legion-apollo 0.5.0 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/README.md +24 -3
- data/lib/legion/apollo/local/migrations/005_add_raw_content_temporal_windows.rb +22 -0
- data/lib/legion/apollo/local.rb +165 -43
- data/lib/legion/apollo/routes.rb +8 -2
- data/lib/legion/apollo/version.rb +1 -1
- data/lib/legion/apollo.rb +54 -26
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a45052ee6c52f3642d1247538a691ace760e67bb5ec35f59fab03f42f95e8768
|
|
4
|
+
data.tar.gz: 7adc4eba3d571ef523c0588be61179e27b519b4f0f5ec620e06b9253004468a5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 37c9de5b076d469c36f43a3cb55e608a053c146e97e9da63b5b4009772943adecff5f56ce6e416d105c52664c2616346a25cf6f2857c42b561ef84b5a95a007c
|
|
7
|
+
data.tar.gz: d4e73cf548045b9fce1b6713aaeb0e0abaca29757534feaf484f62300ec2c80eed16a2a514d9388fa80fde9ada1d5321f97dad8904d4fc9864f81c6d0efe2a31
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,26 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.5.3] - 2026-04-27
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Preserve temporal validity windows when promoting local knowledge to global Apollo.
|
|
7
|
+
- Treat nil or blank `raw_content` as absent so indexed content remains the raw-content fallback.
|
|
8
|
+
- Ignore unparseable temporal inputs instead of storing arbitrary strings that break lexical validity comparisons.
|
|
9
|
+
|
|
10
|
+
## [0.5.2] - 2026-04-27
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- Store `raw_content` alongside indexed `content` in Apollo Local so callers can preserve verbatim source text separately from retrieval text (#25, #26)
|
|
14
|
+
- Add `valid_from`/`valid_to` temporal windows and `as_of:` query filtering for local knowledge entries (#27)
|
|
15
|
+
|
|
16
|
+
### Fixed
|
|
17
|
+
- Sanitize Apollo ingest and query text by scrubbing invalid UTF-8 and removing null bytes before routing to local or global backends (#29)
|
|
18
|
+
|
|
19
|
+
## [0.5.1] - 2026-04-27
|
|
20
|
+
|
|
21
|
+
### Fixed
|
|
22
|
+
- Guard Apollo Local tag queries and promotion against nil, shutdown, or unavailable local DB connections before SQL and Ruby fallback paths (#30)
|
|
23
|
+
|
|
3
24
|
## [0.5.0] - 2026-04-18
|
|
4
25
|
|
|
5
26
|
### Added
|
data/README.md
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# legion-apollo
|
|
2
2
|
|
|
3
|
-
Apollo client
|
|
3
|
+
Apollo is the LegionIO knowledge client. It gives extensions one API for writing, retrieving, and merging knowledge across the global Apollo service and the node-local SQLite store.
|
|
4
4
|
|
|
5
|
-
**Version**: 0.
|
|
5
|
+
**Version**: 0.5.2
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
`legion-apollo` provides `query`, `ingest`, and `retrieve` with smart routing: co-located `lex-apollo`, RabbitMQ transport, node-local SQLite, or graceful failure. `Apollo::Local` mirrors the same public API for offline and low-latency retrieval without requiring remote infrastructure.
|
|
8
8
|
|
|
9
9
|
## Usage
|
|
10
10
|
|
|
@@ -21,6 +21,24 @@ results = Legion::Apollo.query(text: 'local note', scope: :local)
|
|
|
21
21
|
|
|
22
22
|
# Query both and merge (deduped by content hash, ranked by confidence)
|
|
23
23
|
results = Legion::Apollo.query(text: 'ruby', scope: :all)
|
|
24
|
+
|
|
25
|
+
# Preserve verbatim source text separately from indexed retrieval content
|
|
26
|
+
Legion::Apollo.ingest(
|
|
27
|
+
content: 'Summarized policy note for search',
|
|
28
|
+
raw_content: 'Exact source text from the original record',
|
|
29
|
+
tags: %w[policy source],
|
|
30
|
+
scope: :local
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Query the local store as it was valid at a point in time
|
|
34
|
+
Legion::Apollo.ingest(
|
|
35
|
+
content: 'Policy version active in Q2',
|
|
36
|
+
tags: %w[policy],
|
|
37
|
+
valid_from: '2026-04-01T00:00:00.000Z',
|
|
38
|
+
valid_to: '2026-06-30T23:59:59.999Z',
|
|
39
|
+
scope: :local
|
|
40
|
+
)
|
|
41
|
+
results = Legion::Apollo.query(text: 'policy', scope: :local, as_of: '2026-05-01T00:00:00.000Z')
|
|
24
42
|
```
|
|
25
43
|
|
|
26
44
|
## Scopes
|
|
@@ -37,9 +55,12 @@ results = Legion::Apollo.query(text: 'ruby', scope: :all)
|
|
|
37
55
|
|
|
38
56
|
Features:
|
|
39
57
|
- Content-hash dedup (MD5 of normalized content)
|
|
58
|
+
- `raw_content` preservation for verbatim source text
|
|
59
|
+
- `valid_from` / `valid_to` temporal windows with `as_of:` query filtering
|
|
40
60
|
- Optional LLM embeddings (1024-dim) with cosine rerank when `Legion::LLM.can_embed?`
|
|
41
61
|
- TTL expiry (default 5-year retention)
|
|
42
62
|
- FTS5 full-text search with `ILIKE` fallback
|
|
63
|
+
- Null-byte removal and invalid UTF-8 scrubbing before persistence or backend routing
|
|
43
64
|
|
|
44
65
|
## Configuration
|
|
45
66
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
Sequel.migration do
|
|
4
|
+
up do
|
|
5
|
+
alter_table(:local_knowledge) do
|
|
6
|
+
add_column :raw_content, :text, null: true
|
|
7
|
+
add_column :valid_from, String, null: true
|
|
8
|
+
add_column :valid_to, String, null: true
|
|
9
|
+
|
|
10
|
+
add_index :valid_from, name: :idx_local_knowledge_valid_from
|
|
11
|
+
add_index :valid_to, name: :idx_local_knowledge_valid_to
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
down do
|
|
16
|
+
alter_table(:local_knowledge) do
|
|
17
|
+
drop_column :raw_content
|
|
18
|
+
drop_column :valid_from
|
|
19
|
+
drop_column :valid_to
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
data/lib/legion/apollo/local.rb
CHANGED
|
@@ -99,18 +99,19 @@ module Legion
|
|
|
99
99
|
limit ||= local_setting(:default_limit, 5)
|
|
100
100
|
min_confidence ||= local_setting(:min_confidence, 0.3)
|
|
101
101
|
multiplier = local_setting(:fts_candidate_multiplier, 3)
|
|
102
|
+
as_of = normalize_temporal_value(opts[:as_of])
|
|
102
103
|
log.info do
|
|
103
104
|
"Apollo::Local query executing text_length=#{text.to_s.length} " \
|
|
104
105
|
"limit=#{limit} min_confidence=#{min_confidence} tag_count=#{Array(tags).size}"
|
|
105
106
|
end
|
|
106
107
|
log.debug { "Apollo::Local query limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size}" }
|
|
107
108
|
|
|
108
|
-
candidates = fts_search(text, limit: limit * multiplier)
|
|
109
|
+
candidates = fts_search(text, limit: limit * multiplier, as_of: as_of)
|
|
109
110
|
include_inferences = opts.fetch(:include_inferences, true)
|
|
110
111
|
include_history = opts.fetch(:include_history, false)
|
|
111
112
|
candidates = filter_candidates(candidates, min_confidence: min_confidence, tags: tags,
|
|
112
|
-
include_inferences: include_inferences,
|
|
113
|
-
|
|
113
|
+
options: { include_inferences: include_inferences,
|
|
114
|
+
include_history: include_history, as_of: as_of })
|
|
114
115
|
candidates = cosine_rerank(text, candidates) if can_rerank?
|
|
115
116
|
results = candidates.first(limit)
|
|
116
117
|
|
|
@@ -159,10 +160,11 @@ module Legion
|
|
|
159
160
|
end
|
|
160
161
|
|
|
161
162
|
def query_by_tags(tags:, limit: 50) # rubocop:disable Metrics/MethodLength
|
|
162
|
-
|
|
163
|
-
|
|
163
|
+
connection = local_db_connection
|
|
164
164
|
tags = normalize_tags_input(tags)
|
|
165
|
-
|
|
165
|
+
return { success: false, error: :not_started } unless local_db_usable?(connection)
|
|
166
|
+
|
|
167
|
+
results = query_by_tags_via_sql(connection, tags: tags, limit: limit)
|
|
166
168
|
|
|
167
169
|
log.info { "Apollo::Local query_by_tags completed tag_count=#{tags.size} count=#{results.size}" }
|
|
168
170
|
{ success: true, results: results, count: results.size }
|
|
@@ -178,11 +180,13 @@ module Legion
|
|
|
178
180
|
end
|
|
179
181
|
|
|
180
182
|
def promote_to_global(tags:, min_confidence: 0.6) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
181
|
-
return { success: false, error: :not_started } unless
|
|
183
|
+
return { success: false, error: :not_started } unless local_db_usable?(local_db_connection)
|
|
182
184
|
|
|
183
185
|
tags = normalize_tags_input(tags)
|
|
184
186
|
entries = query_by_tags(tags: tags)
|
|
185
|
-
|
|
187
|
+
return entries unless entries[:success]
|
|
188
|
+
|
|
189
|
+
unless entries[:results]&.any?
|
|
186
190
|
log.info { "Apollo::Local promote_to_global skipped tag_count=#{tags.size} reason=no_entries" }
|
|
187
191
|
return { success: true, promoted: 0 }
|
|
188
192
|
end
|
|
@@ -200,6 +204,9 @@ module Legion
|
|
|
200
204
|
end
|
|
201
205
|
result = Legion::Apollo.ingest(
|
|
202
206
|
content: entry[:content],
|
|
207
|
+
raw_content: entry[:raw_content] || entry[:content],
|
|
208
|
+
valid_from: entry[:valid_from],
|
|
209
|
+
valid_to: entry[:valid_to],
|
|
203
210
|
tags: entry_tags + ['promoted_from_local'],
|
|
204
211
|
source_channel: 'local_promotion',
|
|
205
212
|
submitted_by: "node:#{hostname}",
|
|
@@ -337,6 +344,26 @@ module Legion
|
|
|
337
344
|
Legion::Data::Local.connection
|
|
338
345
|
end
|
|
339
346
|
|
|
347
|
+
def local_db_connection
|
|
348
|
+
return nil unless started? && data_local_available?
|
|
349
|
+
|
|
350
|
+
db
|
|
351
|
+
rescue StandardError => e
|
|
352
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.local_db_connection')
|
|
353
|
+
nil
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def local_db_usable?(connection)
|
|
357
|
+
return false unless started? && connection
|
|
358
|
+
return false if connection.respond_to?(:closed?) && connection.closed?
|
|
359
|
+
|
|
360
|
+
connection.test_connection if connection.respond_to?(:test_connection)
|
|
361
|
+
true
|
|
362
|
+
rescue StandardError => e
|
|
363
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.local_db_usable')
|
|
364
|
+
false
|
|
365
|
+
end
|
|
366
|
+
|
|
340
367
|
def content_hash(content)
|
|
341
368
|
normalized = content.to_s.strip.downcase.gsub(/\s+/, ' ')
|
|
342
369
|
Digest::MD5.hexdigest(normalized)
|
|
@@ -396,9 +423,12 @@ module Legion
|
|
|
396
423
|
|
|
397
424
|
result = ingest(
|
|
398
425
|
content: entry[:content],
|
|
426
|
+
raw_content: entry[:raw_content] || entry[:content],
|
|
399
427
|
tags: clean_tags,
|
|
400
428
|
confidence: ((entry[:confidence] || 0.5) * 0.9).round(10),
|
|
401
|
-
source_channel: 'global_hydration'
|
|
429
|
+
source_channel: 'global_hydration',
|
|
430
|
+
valid_from: entry[:valid_from],
|
|
431
|
+
valid_to: entry[:valid_to]
|
|
402
432
|
)
|
|
403
433
|
hydrated += 1 if result[:success]
|
|
404
434
|
end
|
|
@@ -408,6 +438,8 @@ module Legion
|
|
|
408
438
|
end
|
|
409
439
|
|
|
410
440
|
def ingest_without_lock(content:, tags:, **opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
441
|
+
content = normalize_text_input(content)
|
|
442
|
+
raw_content = normalize_raw_content_input(opts[:raw_content], fallback: content)
|
|
411
443
|
hash = content_hash(content)
|
|
412
444
|
return deduplicated_ingest(hash) if duplicate?(hash)
|
|
413
445
|
|
|
@@ -417,9 +449,11 @@ module Legion
|
|
|
417
449
|
end
|
|
418
450
|
log.debug { "Apollo::Local ingest hash=#{hash} tags=#{Array(tags).size} source_channel=#{opts[:source_channel]}" }
|
|
419
451
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
452
|
+
metadata = opts.dup
|
|
453
|
+
metadata.delete(:raw_content)
|
|
454
|
+
row = build_ingest_row(content: content, raw_content: raw_content, hash: hash, tags: tags, **metadata)
|
|
455
|
+
id = persist_ingest_row(row, metadata)
|
|
456
|
+
mark_parent_superseded(metadata[:parent_knowledge_id]) if metadata[:parent_knowledge_id]
|
|
423
457
|
|
|
424
458
|
log.info { "Apollo::Local ingest stored id=#{id} hash=#{hash}" }
|
|
425
459
|
{ success: true, mode: :local, id: id }
|
|
@@ -429,22 +463,56 @@ module Legion
|
|
|
429
463
|
deduplicated_ingest(hash)
|
|
430
464
|
end
|
|
431
465
|
|
|
432
|
-
def build_ingest_row(content:, hash:, tags:, **opts) # rubocop:disable Metrics/MethodLength
|
|
466
|
+
def build_ingest_row(content:, raw_content:, hash:, tags:, **opts) # rubocop:disable Metrics/MethodLength
|
|
433
467
|
is_inference = opts[:is_inference] == true
|
|
434
468
|
default_confidence = is_inference ? Legion::Apollo::Helpers::Confidence::INITIAL_INFERENCE_CONFIDENCE : 1.0
|
|
469
|
+
ingest_metadata_columns(
|
|
470
|
+
content: content,
|
|
471
|
+
raw_content: raw_content,
|
|
472
|
+
hash: hash,
|
|
473
|
+
tags: tags,
|
|
474
|
+
opts: opts,
|
|
475
|
+
is_inference: is_inference,
|
|
476
|
+
default_confidence: default_confidence
|
|
477
|
+
).merge(embedding_columns(content, opts)).merge(timestamp_columns)
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
def ingest_metadata_columns(context)
|
|
481
|
+
ingest_base_columns(context)
|
|
482
|
+
.merge(ingest_lineage_columns(context[:opts]))
|
|
483
|
+
.merge(ingest_temporal_columns(context[:opts]))
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
def ingest_base_columns(context)
|
|
487
|
+
opts = context[:opts]
|
|
488
|
+
{
|
|
489
|
+
content: context[:content],
|
|
490
|
+
raw_content: context[:raw_content],
|
|
491
|
+
content_hash: context[:hash],
|
|
492
|
+
tags: serialized_tags(context[:tags]),
|
|
493
|
+
confidence: opts[:confidence] || context[:default_confidence],
|
|
494
|
+
is_inference: context[:is_inference]
|
|
495
|
+
}.merge(ingest_source_columns(opts))
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
def ingest_source_columns(opts)
|
|
499
|
+
{ source_channel: opts[:source_channel], source_agent: opts[:source_agent],
|
|
500
|
+
submitted_by: opts[:submitted_by] }
|
|
501
|
+
end
|
|
502
|
+
|
|
503
|
+
def ingest_lineage_columns(opts)
|
|
435
504
|
{
|
|
436
|
-
content: content,
|
|
437
|
-
content_hash: hash,
|
|
438
|
-
tags: serialized_tags(tags),
|
|
439
|
-
source_channel: opts[:source_channel],
|
|
440
|
-
source_agent: opts[:source_agent],
|
|
441
|
-
submitted_by: opts[:submitted_by],
|
|
442
|
-
confidence: opts[:confidence] || default_confidence,
|
|
443
|
-
is_inference: is_inference,
|
|
444
505
|
forget_reason: opts[:forget_reason],
|
|
445
506
|
parent_knowledge_id: opts[:parent_knowledge_id],
|
|
446
507
|
supersession_type: opts[:supersession_type]
|
|
447
|
-
}
|
|
508
|
+
}
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
def ingest_temporal_columns(opts)
|
|
512
|
+
{
|
|
513
|
+
valid_from: normalize_temporal_value(opts[:valid_from]),
|
|
514
|
+
valid_to: normalize_temporal_value(opts[:valid_to])
|
|
515
|
+
}
|
|
448
516
|
end
|
|
449
517
|
|
|
450
518
|
def persist_ingest_row(row, opts = {})
|
|
@@ -539,41 +607,41 @@ module Legion
|
|
|
539
607
|
Legion::JSON.dump(normalize_tags_input(tags))
|
|
540
608
|
end
|
|
541
609
|
|
|
542
|
-
def fts_search(text, limit:) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
610
|
+
def fts_search(text, limit:, as_of: nil) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
543
611
|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
544
|
-
if text.to_s.strip.empty?
|
|
545
|
-
return db[:local_knowledge]
|
|
546
|
-
.where(Sequel.lit('expires_at > ?', now))
|
|
547
|
-
.limit(limit)
|
|
548
|
-
.all
|
|
549
|
-
end
|
|
612
|
+
return active_knowledge_dataset(now: now, as_of: as_of).limit(limit).all if text.to_s.strip.empty?
|
|
550
613
|
|
|
551
614
|
tokens = text.to_s.scan(/[\p{L}\p{N}_]+/)
|
|
552
|
-
return ilike_search(text, now: now, limit: limit) if tokens.empty?
|
|
615
|
+
return ilike_search(text, now: now, limit: limit, as_of: as_of) if tokens.empty?
|
|
553
616
|
|
|
554
617
|
escaped = tokens.map { |t| %("#{t}") }.join(' ')
|
|
618
|
+
temporal_sql, temporal_params = temporal_window_sql(as_of, table_alias: 'lk')
|
|
555
619
|
db.fetch(
|
|
556
620
|
'SELECT lk.* FROM local_knowledge lk ' \
|
|
557
621
|
'INNER JOIN local_knowledge_fts fts ON lk.id = fts.rowid ' \
|
|
558
|
-
|
|
559
|
-
|
|
622
|
+
"WHERE local_knowledge_fts MATCH ? AND lk.expires_at > ?#{temporal_sql} " \
|
|
623
|
+
'ORDER BY fts.rank LIMIT ?',
|
|
624
|
+
escaped, now, *temporal_params, limit
|
|
560
625
|
).all
|
|
561
626
|
rescue StandardError => e
|
|
562
627
|
handle_exception(e, level: :debug, operation: 'apollo.local.fts_search', limit: limit, fallback: :ilike)
|
|
563
|
-
ilike_search(text, now: Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ'), limit: limit)
|
|
628
|
+
ilike_search(text, now: Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ'), limit: limit, as_of: as_of)
|
|
564
629
|
end
|
|
565
630
|
|
|
566
|
-
def ilike_search(text, now:, limit:)
|
|
631
|
+
def ilike_search(text, now:, limit:, as_of: nil)
|
|
567
632
|
safe_text = text.to_s.gsub('\\', '\\\\\\\\').gsub('%', '\%').gsub('_', '\_')
|
|
568
|
-
|
|
569
|
-
.where(Sequel.lit('expires_at > ?', now))
|
|
633
|
+
active_knowledge_dataset(now: now, as_of: as_of)
|
|
570
634
|
.where(Sequel.lit("content LIKE ? ESCAPE '\\' COLLATE NOCASE", "%#{safe_text}%"))
|
|
571
635
|
.limit(limit)
|
|
572
636
|
.all
|
|
573
637
|
end
|
|
574
638
|
|
|
575
|
-
def filter_candidates(candidates, min_confidence:, tags:,
|
|
639
|
+
def filter_candidates(candidates, min_confidence:, tags:, options: {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/MethodLength,Metrics/AbcSize
|
|
640
|
+
include_inferences = options.fetch(:include_inferences, true)
|
|
641
|
+
include_history = options.fetch(:include_history, false)
|
|
642
|
+
as_of = options[:as_of]
|
|
576
643
|
candidates = candidates.select { |c| (c[:confidence] || 0) >= min_confidence }
|
|
644
|
+
candidates = candidates.select { |c| temporally_valid?(c, as_of) }
|
|
577
645
|
candidates = candidates.reject { |c| [1, true].include?(c[:is_inference]) } unless include_inferences
|
|
578
646
|
unless include_history
|
|
579
647
|
candidates = candidates.select { |c| c[:is_latest].nil? || c[:is_latest] == 1 || c[:is_latest] == true }
|
|
@@ -588,6 +656,36 @@ module Legion
|
|
|
588
656
|
candidates
|
|
589
657
|
end
|
|
590
658
|
|
|
659
|
+
def active_knowledge_dataset(now:, as_of: nil)
|
|
660
|
+
apply_temporal_window(db[:local_knowledge].where(Sequel.lit('expires_at > ?', now)), as_of)
|
|
661
|
+
end
|
|
662
|
+
|
|
663
|
+
def apply_temporal_window(dataset, as_of)
|
|
664
|
+
return dataset if as_of.to_s.empty?
|
|
665
|
+
|
|
666
|
+
dataset.where(
|
|
667
|
+
Sequel.lit('(valid_from IS NULL OR valid_from <= ?) AND (valid_to IS NULL OR valid_to >= ?)', as_of, as_of)
|
|
668
|
+
)
|
|
669
|
+
end
|
|
670
|
+
|
|
671
|
+
def temporal_window_sql(as_of, table_alias:)
|
|
672
|
+
return ['', []] if as_of.to_s.empty?
|
|
673
|
+
|
|
674
|
+
[
|
|
675
|
+
" AND (#{table_alias}.valid_from IS NULL OR #{table_alias}.valid_from <= ?) " \
|
|
676
|
+
"AND (#{table_alias}.valid_to IS NULL OR #{table_alias}.valid_to >= ?)",
|
|
677
|
+
[as_of, as_of]
|
|
678
|
+
]
|
|
679
|
+
end
|
|
680
|
+
|
|
681
|
+
def temporally_valid?(row, as_of)
|
|
682
|
+
return true if as_of.to_s.empty?
|
|
683
|
+
|
|
684
|
+
valid_from = row[:valid_from]
|
|
685
|
+
valid_to = row[:valid_to]
|
|
686
|
+
(valid_from.nil? || valid_from <= as_of) && (valid_to.nil? || valid_to >= as_of)
|
|
687
|
+
end
|
|
688
|
+
|
|
591
689
|
def parse_tags(tags_json)
|
|
592
690
|
return [] if tags_json.nil? || tags_json.empty?
|
|
593
691
|
|
|
@@ -656,6 +754,26 @@ module Legion
|
|
|
656
754
|
value.to_s
|
|
657
755
|
end
|
|
658
756
|
|
|
757
|
+
def normalize_temporal_value(value)
|
|
758
|
+
return nil if value.nil?
|
|
759
|
+
|
|
760
|
+
text = value.respond_to?(:utc) ? value.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ') : value.to_s.strip
|
|
761
|
+
return nil if text.empty?
|
|
762
|
+
|
|
763
|
+
Time.parse(text).utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
764
|
+
rescue StandardError
|
|
765
|
+
nil
|
|
766
|
+
end
|
|
767
|
+
|
|
768
|
+
def normalize_raw_content_input(value, fallback:)
|
|
769
|
+
if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_raw_content_input, true)
|
|
770
|
+
return Legion::Apollo.send(:normalize_raw_content_input, value, fallback: fallback)
|
|
771
|
+
end
|
|
772
|
+
|
|
773
|
+
normalized = normalize_text_input(value)
|
|
774
|
+
normalized.strip.empty? ? fallback : normalized
|
|
775
|
+
end
|
|
776
|
+
|
|
659
777
|
def normalize_tags_input(tags)
|
|
660
778
|
Legion::Apollo::Helpers::TagNormalizer.normalize(Array(tags)).first(max_tags_limit)
|
|
661
779
|
rescue StandardError => e
|
|
@@ -674,9 +792,9 @@ module Legion
|
|
|
674
792
|
Legion::Apollo::Helpers::TagNormalizer::MAX_TAGS
|
|
675
793
|
end
|
|
676
794
|
|
|
677
|
-
def query_by_tags_via_sql(tags:, limit:) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
795
|
+
def query_by_tags_via_sql(connection, tags:, limit:) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
678
796
|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
679
|
-
dataset =
|
|
797
|
+
dataset = connection[:local_knowledge].where(Sequel.lit('expires_at > ?', now))
|
|
680
798
|
|
|
681
799
|
Array(tags).map(&:to_s).each do |tag|
|
|
682
800
|
dataset = dataset.where(
|
|
@@ -696,11 +814,15 @@ module Legion
|
|
|
696
814
|
tag_count: Array(tags).size,
|
|
697
815
|
limit: limit
|
|
698
816
|
)
|
|
699
|
-
|
|
817
|
+
raise unless local_db_usable?(connection)
|
|
818
|
+
|
|
819
|
+
query_by_tags_via_ruby(connection, tags: tags, limit: limit)
|
|
700
820
|
end
|
|
701
821
|
|
|
702
|
-
def query_by_tags_via_ruby(tags:, limit:)
|
|
703
|
-
|
|
822
|
+
def query_by_tags_via_ruby(connection, tags:, limit:)
|
|
823
|
+
raise Sequel::DatabaseConnectionError, 'local database unavailable' unless local_db_usable?(connection)
|
|
824
|
+
|
|
825
|
+
candidates = connection[:local_knowledge]
|
|
704
826
|
.where(Sequel.lit('expires_at > ?', Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')))
|
|
705
827
|
.all
|
|
706
828
|
|
|
@@ -711,7 +833,7 @@ module Legion
|
|
|
711
833
|
end
|
|
712
834
|
|
|
713
835
|
def update_upsert_entry(existing, content, tags_json, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
714
|
-
content = content
|
|
836
|
+
content = normalize_text_input(content)
|
|
715
837
|
new_hash = content_hash(content)
|
|
716
838
|
embedding, embedded_at = generate_embedding(content)
|
|
717
839
|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
data/lib/legion/apollo/routes.rb
CHANGED
|
@@ -68,6 +68,7 @@ module Legion
|
|
|
68
68
|
agent_id: body[:agent_id] || 'api',
|
|
69
69
|
scope: normalize_scope(body[:scope]),
|
|
70
70
|
tier: body[:tier]&.to_sym,
|
|
71
|
+
as_of: body[:as_of],
|
|
71
72
|
include_inferences: body.fetch(:include_inferences, true),
|
|
72
73
|
include_history: body.fetch(:include_history, false)
|
|
73
74
|
)
|
|
@@ -86,7 +87,7 @@ module Legion
|
|
|
86
87
|
# TagNormalizer hard-caps to MAX_TAGS=20 internally; clamp here to make that limit explicit.
|
|
87
88
|
effective_max_tags = [max_tags, Legion::Apollo::Helpers::TagNormalizer::MAX_TAGS].min
|
|
88
89
|
tags = Legion::Apollo::Helpers::TagNormalizer.normalize(Array(body[:tags])).first(effective_max_tags)
|
|
89
|
-
|
|
90
|
+
ingest_payload = {
|
|
90
91
|
content: body[:content],
|
|
91
92
|
content_type: body[:content_type] || :observation,
|
|
92
93
|
tags: tags,
|
|
@@ -99,13 +100,18 @@ module Legion
|
|
|
99
100
|
is_inference: body[:is_inference] == true,
|
|
100
101
|
forget_reason: body[:forget_reason],
|
|
101
102
|
expires_at: body[:expires_at],
|
|
103
|
+
valid_from: body[:valid_from],
|
|
104
|
+
valid_to: body[:valid_to],
|
|
102
105
|
parent_knowledge_id: body[:parent_knowledge_id],
|
|
103
106
|
supersession_type: body[:supersession_type],
|
|
104
107
|
source_uri: body[:source_uri],
|
|
105
108
|
source_hash: body[:source_hash],
|
|
106
109
|
relevance_score: body[:relevance_score],
|
|
107
110
|
extraction_method: body[:extraction_method]
|
|
108
|
-
|
|
111
|
+
}
|
|
112
|
+
raw_content = body[:raw_content].to_s
|
|
113
|
+
ingest_payload[:raw_content] = body[:raw_content] unless raw_content.strip.empty?
|
|
114
|
+
result = Legion::Apollo.ingest(**ingest_payload)
|
|
109
115
|
json_response(result, status_code: apollo_status_code(result, success_status: 201))
|
|
110
116
|
end
|
|
111
117
|
end
|
data/lib/legion/apollo.rb
CHANGED
|
@@ -97,9 +97,11 @@ module Legion
|
|
|
97
97
|
return not_started_error unless started?
|
|
98
98
|
|
|
99
99
|
normalized_tags = normalize_tags_input(tags)
|
|
100
|
-
|
|
100
|
+
normalized_content = normalize_text_input(content)
|
|
101
|
+
normalized_raw_content = normalize_raw_content_input(opts[:raw_content], fallback: normalized_content)
|
|
102
|
+
payload = { **opts, content: normalized_content, raw_content: normalized_raw_content, tags: normalized_tags }
|
|
101
103
|
log.info do
|
|
102
|
-
"Apollo ingest requested scope=#{scope} content_length=#{content.to_s.length} " \
|
|
104
|
+
"Apollo ingest requested scope=#{scope} content_length=#{payload[:content].to_s.length} " \
|
|
103
105
|
"tags=#{payload[:tags].size} source_channel=#{payload[:source_channel]}"
|
|
104
106
|
end
|
|
105
107
|
log.debug do
|
|
@@ -289,7 +291,8 @@ module Legion
|
|
|
289
291
|
"limit=#{payload[:limit]}"
|
|
290
292
|
end
|
|
291
293
|
result = Legion::Apollo::Local.query(**payload.slice(:text, :limit, :min_confidence, :tags,
|
|
292
|
-
:tier, :include_inferences, :include_history
|
|
294
|
+
:tier, :include_inferences, :include_history,
|
|
295
|
+
:as_of))
|
|
293
296
|
return result unless result[:success]
|
|
294
297
|
|
|
295
298
|
entries = normalize_local_entries(Array(result[:results]))
|
|
@@ -324,7 +327,8 @@ module Legion
|
|
|
324
327
|
if Legion::Apollo::Local.started?
|
|
325
328
|
attempted = true
|
|
326
329
|
local = Legion::Apollo::Local.query(**payload.slice(:text, :limit, :min_confidence, :tags,
|
|
327
|
-
:tier, :include_inferences, :include_history
|
|
330
|
+
:tier, :include_inferences, :include_history,
|
|
331
|
+
:as_of))
|
|
328
332
|
if local[:success]
|
|
329
333
|
any_success = true
|
|
330
334
|
entries.concat(normalize_local_entries(Array(local[:results]))) if local[:results]
|
|
@@ -377,18 +381,29 @@ module Legion
|
|
|
377
381
|
else
|
|
378
382
|
Array(e[:tags])
|
|
379
383
|
end
|
|
380
|
-
{ id: e[:id], content: e[:content], content_hash: hash,
|
|
381
|
-
confidence: e[:confidence] || 0.5, content_type: 'fact', tags: tags, source: :local
|
|
384
|
+
{ id: e[:id], content: e[:content], raw_content: e[:raw_content] || e[:content], content_hash: hash,
|
|
385
|
+
confidence: e[:confidence] || 0.5, content_type: 'fact', tags: tags, source: :local,
|
|
386
|
+
valid_from: e[:valid_from], valid_to: e[:valid_to] }
|
|
382
387
|
end
|
|
383
388
|
end
|
|
384
389
|
|
|
385
390
|
def normalize_global_entries(entries)
|
|
386
|
-
entries.map
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
391
|
+
entries.map { |entry| normalize_global_entry(entry) }
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def normalize_global_entry(entry)
|
|
395
|
+
{ id: entry[:id], content: entry[:content], raw_content: normalized_raw_content(entry),
|
|
396
|
+
content_hash: normalized_content_hash(entry), confidence: entry[:confidence] || 0.5,
|
|
397
|
+
content_type: entry[:content_type] || 'fact', tags: Array(entry[:tags]), source: :global,
|
|
398
|
+
valid_from: entry[:valid_from], valid_to: entry[:valid_to] }
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
def normalized_raw_content(entry)
|
|
402
|
+
entry[:raw_content] || entry[:content]
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def normalized_content_hash(entry)
|
|
406
|
+
entry[:content_hash] || Digest::MD5.hexdigest(entry[:content].to_s.strip.downcase.gsub(/\s+/, ' '))
|
|
392
407
|
end
|
|
393
408
|
|
|
394
409
|
def dedup_and_rank(entries, limit:)
|
|
@@ -470,20 +485,33 @@ module Legion
|
|
|
470
485
|
end
|
|
471
486
|
|
|
472
487
|
def normalize_text_input(value) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
|
|
473
|
-
case value
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
488
|
+
text = case value
|
|
489
|
+
when String
|
|
490
|
+
value
|
|
491
|
+
when Array
|
|
492
|
+
parts = value.filter_map { |entry| extract_text_fragment(entry) }
|
|
493
|
+
joined = parts.map(&:to_s).map(&:strip).reject(&:empty?).join("\n")
|
|
494
|
+
joined.empty? ? value.to_s : joined
|
|
495
|
+
when Hash
|
|
496
|
+
extract_text_fragment(value).to_s
|
|
497
|
+
when nil
|
|
498
|
+
''
|
|
499
|
+
else
|
|
500
|
+
value.to_s
|
|
501
|
+
end
|
|
502
|
+
sanitize_text_input(text)
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
def sanitize_text_input(value)
|
|
506
|
+
text = value.to_s.dup
|
|
507
|
+
text = text.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: '')
|
|
508
|
+
text = text.scrub('') unless text.valid_encoding?
|
|
509
|
+
text.delete("\u0000")
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
def normalize_raw_content_input(value, fallback:)
|
|
513
|
+
normalized = normalize_text_input(value)
|
|
514
|
+
normalized.strip.empty? ? fallback : normalized
|
|
487
515
|
end
|
|
488
516
|
|
|
489
517
|
def normalize_tags_input(tags)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-apollo
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -87,6 +87,7 @@ files:
|
|
|
87
87
|
- lib/legion/apollo/local/migrations/002_create_graph_tables.rb
|
|
88
88
|
- lib/legion/apollo/local/migrations/003_harden_graph_relationships.rb
|
|
89
89
|
- lib/legion/apollo/local/migrations/004_add_versioning_tiers_inference.rb
|
|
90
|
+
- lib/legion/apollo/local/migrations/005_add_raw_content_temporal_windows.rb
|
|
90
91
|
- lib/legion/apollo/messages/access_boost.rb
|
|
91
92
|
- lib/legion/apollo/messages/ingest.rb
|
|
92
93
|
- lib/legion/apollo/messages/query.rb
|