legion-apollo 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +24 -3
- data/lib/legion/apollo/local/migrations/005_add_raw_content_temporal_windows.rb +22 -0
- data/lib/legion/apollo/local.rb +154 -43
- data/lib/legion/apollo/routes.rb +4 -0
- data/lib/legion/apollo/version.rb +1 -1
- data/lib/legion/apollo.rb +49 -26
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9320cef3392467f9819e47c9cacc3893f45ffab9570825cb5d6b270ff3ce6a67
|
|
4
|
+
data.tar.gz: d38343aa96c9ba0b1d634f6e4d3850e22c899fe81c13550dce19809cec290a94
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2dcaf3fc8fbf087b8e59c079e71921aebf8c53620e32d9cd8939a00981474747b78e978412d28a5c3ff4f204df01b9538f23a81c82a76a0d94afab8080219e46
|
|
7
|
+
data.tar.gz: 8d078c7e1c727bd23b08bfdb09af0abe7581296132d715b7ed7c4c5205d4645c004f1f980af6546239961c6649e9433e66a725341e2275e3cdb2e1de7f1687a3
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.5.2] - 2026-04-27
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Store `raw_content` alongside indexed `content` in Apollo Local so callers can preserve verbatim source text separately from retrieval text (#25, #26)
|
|
7
|
+
- Add `valid_from`/`valid_to` temporal windows and `as_of:` query filtering for local knowledge entries (#27)
|
|
8
|
+
|
|
9
|
+
### Fixed
|
|
10
|
+
- Sanitize Apollo ingest and query text by scrubbing invalid UTF-8 and removing null bytes before routing to local or global backends (#29)
|
|
11
|
+
|
|
12
|
+
## [0.5.1] - 2026-04-27
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
- Guard Apollo Local tag queries and promotion against nil, shutdown, or unavailable local DB connections before SQL and Ruby fallback paths (#30)
|
|
16
|
+
|
|
3
17
|
## [0.5.0] - 2026-04-18
|
|
4
18
|
|
|
5
19
|
### Added
|
data/README.md
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# legion-apollo
|
|
2
2
|
|
|
3
|
-
Apollo client
|
|
3
|
+
Apollo is the LegionIO knowledge client. It gives extensions one API for writing, retrieving, and merging knowledge across the global Apollo service and the node-local SQLite store.
|
|
4
4
|
|
|
5
|
-
**Version**: 0.
|
|
5
|
+
**Version**: 0.5.2
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
`legion-apollo` provides `query`, `ingest`, and `retrieve` with smart routing: co-located `lex-apollo`, RabbitMQ transport, node-local SQLite, or graceful failure. `Apollo::Local` mirrors the same public API for offline and low-latency retrieval without requiring remote infrastructure.
|
|
8
8
|
|
|
9
9
|
## Usage
|
|
10
10
|
|
|
@@ -21,6 +21,24 @@ results = Legion::Apollo.query(text: 'local note', scope: :local)
|
|
|
21
21
|
|
|
22
22
|
# Query both and merge (deduped by content hash, ranked by confidence)
|
|
23
23
|
results = Legion::Apollo.query(text: 'ruby', scope: :all)
|
|
24
|
+
|
|
25
|
+
# Preserve verbatim source text separately from indexed retrieval content
|
|
26
|
+
Legion::Apollo.ingest(
|
|
27
|
+
content: 'Summarized policy note for search',
|
|
28
|
+
raw_content: 'Exact source text from the original record',
|
|
29
|
+
tags: %w[policy source],
|
|
30
|
+
scope: :local
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Query the local store as it was valid at a point in time
|
|
34
|
+
Legion::Apollo.ingest(
|
|
35
|
+
content: 'Policy version active in Q2',
|
|
36
|
+
tags: %w[policy],
|
|
37
|
+
valid_from: '2026-04-01T00:00:00.000Z',
|
|
38
|
+
valid_to: '2026-06-30T23:59:59.999Z',
|
|
39
|
+
scope: :local
|
|
40
|
+
)
|
|
41
|
+
results = Legion::Apollo.query(text: 'policy', scope: :local, as_of: '2026-05-01T00:00:00.000Z')
|
|
24
42
|
```
|
|
25
43
|
|
|
26
44
|
## Scopes
|
|
@@ -37,9 +55,12 @@ results = Legion::Apollo.query(text: 'ruby', scope: :all)
|
|
|
37
55
|
|
|
38
56
|
Features:
|
|
39
57
|
- Content-hash dedup (MD5 of normalized content)
|
|
58
|
+
- `raw_content` preservation for verbatim source text
|
|
59
|
+
- `valid_from` / `valid_to` temporal windows with `as_of:` query filtering
|
|
40
60
|
- Optional LLM embeddings (1024-dim) with cosine rerank when `Legion::LLM.can_embed?`
|
|
41
61
|
- TTL expiry (default 5-year retention)
|
|
42
62
|
- FTS5 full-text search with `ILIKE` fallback
|
|
63
|
+
- Null-byte removal and invalid UTF-8 scrubbing before persistence or backend routing
|
|
43
64
|
|
|
44
65
|
## Configuration
|
|
45
66
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
Sequel.migration do
|
|
4
|
+
up do
|
|
5
|
+
alter_table(:local_knowledge) do
|
|
6
|
+
add_column :raw_content, :text, null: true
|
|
7
|
+
add_column :valid_from, String, null: true
|
|
8
|
+
add_column :valid_to, String, null: true
|
|
9
|
+
|
|
10
|
+
add_index :valid_from, name: :idx_local_knowledge_valid_from
|
|
11
|
+
add_index :valid_to, name: :idx_local_knowledge_valid_to
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
down do
|
|
16
|
+
alter_table(:local_knowledge) do
|
|
17
|
+
drop_column :raw_content
|
|
18
|
+
drop_column :valid_from
|
|
19
|
+
drop_column :valid_to
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
data/lib/legion/apollo/local.rb
CHANGED
|
@@ -99,18 +99,19 @@ module Legion
|
|
|
99
99
|
limit ||= local_setting(:default_limit, 5)
|
|
100
100
|
min_confidence ||= local_setting(:min_confidence, 0.3)
|
|
101
101
|
multiplier = local_setting(:fts_candidate_multiplier, 3)
|
|
102
|
+
as_of = normalize_temporal_value(opts[:as_of])
|
|
102
103
|
log.info do
|
|
103
104
|
"Apollo::Local query executing text_length=#{text.to_s.length} " \
|
|
104
105
|
"limit=#{limit} min_confidence=#{min_confidence} tag_count=#{Array(tags).size}"
|
|
105
106
|
end
|
|
106
107
|
log.debug { "Apollo::Local query limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size}" }
|
|
107
108
|
|
|
108
|
-
candidates = fts_search(text, limit: limit * multiplier)
|
|
109
|
+
candidates = fts_search(text, limit: limit * multiplier, as_of: as_of)
|
|
109
110
|
include_inferences = opts.fetch(:include_inferences, true)
|
|
110
111
|
include_history = opts.fetch(:include_history, false)
|
|
111
112
|
candidates = filter_candidates(candidates, min_confidence: min_confidence, tags: tags,
|
|
112
|
-
include_inferences: include_inferences,
|
|
113
|
-
|
|
113
|
+
options: { include_inferences: include_inferences,
|
|
114
|
+
include_history: include_history, as_of: as_of })
|
|
114
115
|
candidates = cosine_rerank(text, candidates) if can_rerank?
|
|
115
116
|
results = candidates.first(limit)
|
|
116
117
|
|
|
@@ -159,10 +160,11 @@ module Legion
|
|
|
159
160
|
end
|
|
160
161
|
|
|
161
162
|
def query_by_tags(tags:, limit: 50) # rubocop:disable Metrics/MethodLength
|
|
162
|
-
|
|
163
|
-
|
|
163
|
+
connection = local_db_connection
|
|
164
164
|
tags = normalize_tags_input(tags)
|
|
165
|
-
|
|
165
|
+
return { success: false, error: :not_started } unless local_db_usable?(connection)
|
|
166
|
+
|
|
167
|
+
results = query_by_tags_via_sql(connection, tags: tags, limit: limit)
|
|
166
168
|
|
|
167
169
|
log.info { "Apollo::Local query_by_tags completed tag_count=#{tags.size} count=#{results.size}" }
|
|
168
170
|
{ success: true, results: results, count: results.size }
|
|
@@ -178,11 +180,13 @@ module Legion
|
|
|
178
180
|
end
|
|
179
181
|
|
|
180
182
|
def promote_to_global(tags:, min_confidence: 0.6) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
181
|
-
return { success: false, error: :not_started } unless
|
|
183
|
+
return { success: false, error: :not_started } unless local_db_usable?(local_db_connection)
|
|
182
184
|
|
|
183
185
|
tags = normalize_tags_input(tags)
|
|
184
186
|
entries = query_by_tags(tags: tags)
|
|
185
|
-
|
|
187
|
+
return entries unless entries[:success]
|
|
188
|
+
|
|
189
|
+
unless entries[:results]&.any?
|
|
186
190
|
log.info { "Apollo::Local promote_to_global skipped tag_count=#{tags.size} reason=no_entries" }
|
|
187
191
|
return { success: true, promoted: 0 }
|
|
188
192
|
end
|
|
@@ -200,6 +204,7 @@ module Legion
|
|
|
200
204
|
end
|
|
201
205
|
result = Legion::Apollo.ingest(
|
|
202
206
|
content: entry[:content],
|
|
207
|
+
raw_content: entry[:raw_content] || entry[:content],
|
|
203
208
|
tags: entry_tags + ['promoted_from_local'],
|
|
204
209
|
source_channel: 'local_promotion',
|
|
205
210
|
submitted_by: "node:#{hostname}",
|
|
@@ -337,6 +342,26 @@ module Legion
|
|
|
337
342
|
Legion::Data::Local.connection
|
|
338
343
|
end
|
|
339
344
|
|
|
345
|
+
def local_db_connection
|
|
346
|
+
return nil unless started? && data_local_available?
|
|
347
|
+
|
|
348
|
+
db
|
|
349
|
+
rescue StandardError => e
|
|
350
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.local_db_connection')
|
|
351
|
+
nil
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
def local_db_usable?(connection)
|
|
355
|
+
return false unless started? && connection
|
|
356
|
+
return false if connection.respond_to?(:closed?) && connection.closed?
|
|
357
|
+
|
|
358
|
+
connection.test_connection if connection.respond_to?(:test_connection)
|
|
359
|
+
true
|
|
360
|
+
rescue StandardError => e
|
|
361
|
+
handle_exception(e, level: :debug, operation: 'apollo.local.local_db_usable')
|
|
362
|
+
false
|
|
363
|
+
end
|
|
364
|
+
|
|
340
365
|
def content_hash(content)
|
|
341
366
|
normalized = content.to_s.strip.downcase.gsub(/\s+/, ' ')
|
|
342
367
|
Digest::MD5.hexdigest(normalized)
|
|
@@ -396,9 +421,12 @@ module Legion
|
|
|
396
421
|
|
|
397
422
|
result = ingest(
|
|
398
423
|
content: entry[:content],
|
|
424
|
+
raw_content: entry[:raw_content] || entry[:content],
|
|
399
425
|
tags: clean_tags,
|
|
400
426
|
confidence: ((entry[:confidence] || 0.5) * 0.9).round(10),
|
|
401
|
-
source_channel: 'global_hydration'
|
|
427
|
+
source_channel: 'global_hydration',
|
|
428
|
+
valid_from: entry[:valid_from],
|
|
429
|
+
valid_to: entry[:valid_to]
|
|
402
430
|
)
|
|
403
431
|
hydrated += 1 if result[:success]
|
|
404
432
|
end
|
|
@@ -408,6 +436,8 @@ module Legion
|
|
|
408
436
|
end
|
|
409
437
|
|
|
410
438
|
def ingest_without_lock(content:, tags:, **opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
439
|
+
content = normalize_text_input(content)
|
|
440
|
+
raw_content = normalize_text_input(opts.key?(:raw_content) ? opts[:raw_content] : content)
|
|
411
441
|
hash = content_hash(content)
|
|
412
442
|
return deduplicated_ingest(hash) if duplicate?(hash)
|
|
413
443
|
|
|
@@ -417,9 +447,11 @@ module Legion
|
|
|
417
447
|
end
|
|
418
448
|
log.debug { "Apollo::Local ingest hash=#{hash} tags=#{Array(tags).size} source_channel=#{opts[:source_channel]}" }
|
|
419
449
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
450
|
+
metadata = opts.dup
|
|
451
|
+
metadata.delete(:raw_content)
|
|
452
|
+
row = build_ingest_row(content: content, raw_content: raw_content, hash: hash, tags: tags, **metadata)
|
|
453
|
+
id = persist_ingest_row(row, metadata)
|
|
454
|
+
mark_parent_superseded(metadata[:parent_knowledge_id]) if metadata[:parent_knowledge_id]
|
|
423
455
|
|
|
424
456
|
log.info { "Apollo::Local ingest stored id=#{id} hash=#{hash}" }
|
|
425
457
|
{ success: true, mode: :local, id: id }
|
|
@@ -429,22 +461,56 @@ module Legion
|
|
|
429
461
|
deduplicated_ingest(hash)
|
|
430
462
|
end
|
|
431
463
|
|
|
432
|
-
def build_ingest_row(content:, hash:, tags:, **opts) # rubocop:disable Metrics/MethodLength
|
|
464
|
+
def build_ingest_row(content:, raw_content:, hash:, tags:, **opts) # rubocop:disable Metrics/MethodLength
|
|
433
465
|
is_inference = opts[:is_inference] == true
|
|
434
466
|
default_confidence = is_inference ? Legion::Apollo::Helpers::Confidence::INITIAL_INFERENCE_CONFIDENCE : 1.0
|
|
467
|
+
ingest_metadata_columns(
|
|
468
|
+
content: content,
|
|
469
|
+
raw_content: raw_content,
|
|
470
|
+
hash: hash,
|
|
471
|
+
tags: tags,
|
|
472
|
+
opts: opts,
|
|
473
|
+
is_inference: is_inference,
|
|
474
|
+
default_confidence: default_confidence
|
|
475
|
+
).merge(embedding_columns(content, opts)).merge(timestamp_columns)
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
def ingest_metadata_columns(context)
|
|
479
|
+
ingest_base_columns(context)
|
|
480
|
+
.merge(ingest_lineage_columns(context[:opts]))
|
|
481
|
+
.merge(ingest_temporal_columns(context[:opts]))
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
def ingest_base_columns(context)
|
|
485
|
+
opts = context[:opts]
|
|
486
|
+
{
|
|
487
|
+
content: context[:content],
|
|
488
|
+
raw_content: context[:raw_content],
|
|
489
|
+
content_hash: context[:hash],
|
|
490
|
+
tags: serialized_tags(context[:tags]),
|
|
491
|
+
confidence: opts[:confidence] || context[:default_confidence],
|
|
492
|
+
is_inference: context[:is_inference]
|
|
493
|
+
}.merge(ingest_source_columns(opts))
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
def ingest_source_columns(opts)
|
|
497
|
+
{ source_channel: opts[:source_channel], source_agent: opts[:source_agent],
|
|
498
|
+
submitted_by: opts[:submitted_by] }
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
def ingest_lineage_columns(opts)
|
|
435
502
|
{
|
|
436
|
-
content: content,
|
|
437
|
-
content_hash: hash,
|
|
438
|
-
tags: serialized_tags(tags),
|
|
439
|
-
source_channel: opts[:source_channel],
|
|
440
|
-
source_agent: opts[:source_agent],
|
|
441
|
-
submitted_by: opts[:submitted_by],
|
|
442
|
-
confidence: opts[:confidence] || default_confidence,
|
|
443
|
-
is_inference: is_inference,
|
|
444
503
|
forget_reason: opts[:forget_reason],
|
|
445
504
|
parent_knowledge_id: opts[:parent_knowledge_id],
|
|
446
505
|
supersession_type: opts[:supersession_type]
|
|
447
|
-
}
|
|
506
|
+
}
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
def ingest_temporal_columns(opts)
|
|
510
|
+
{
|
|
511
|
+
valid_from: normalize_temporal_value(opts[:valid_from]),
|
|
512
|
+
valid_to: normalize_temporal_value(opts[:valid_to])
|
|
513
|
+
}
|
|
448
514
|
end
|
|
449
515
|
|
|
450
516
|
def persist_ingest_row(row, opts = {})
|
|
@@ -539,41 +605,41 @@ module Legion
|
|
|
539
605
|
Legion::JSON.dump(normalize_tags_input(tags))
|
|
540
606
|
end
|
|
541
607
|
|
|
542
|
-
def fts_search(text, limit:) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
608
|
+
def fts_search(text, limit:, as_of: nil) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
543
609
|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
544
|
-
if text.to_s.strip.empty?
|
|
545
|
-
return db[:local_knowledge]
|
|
546
|
-
.where(Sequel.lit('expires_at > ?', now))
|
|
547
|
-
.limit(limit)
|
|
548
|
-
.all
|
|
549
|
-
end
|
|
610
|
+
return active_knowledge_dataset(now: now, as_of: as_of).limit(limit).all if text.to_s.strip.empty?
|
|
550
611
|
|
|
551
612
|
tokens = text.to_s.scan(/[\p{L}\p{N}_]+/)
|
|
552
|
-
return ilike_search(text, now: now, limit: limit) if tokens.empty?
|
|
613
|
+
return ilike_search(text, now: now, limit: limit, as_of: as_of) if tokens.empty?
|
|
553
614
|
|
|
554
615
|
escaped = tokens.map { |t| %("#{t}") }.join(' ')
|
|
616
|
+
temporal_sql, temporal_params = temporal_window_sql(as_of, table_alias: 'lk')
|
|
555
617
|
db.fetch(
|
|
556
618
|
'SELECT lk.* FROM local_knowledge lk ' \
|
|
557
619
|
'INNER JOIN local_knowledge_fts fts ON lk.id = fts.rowid ' \
|
|
558
|
-
|
|
559
|
-
|
|
620
|
+
"WHERE local_knowledge_fts MATCH ? AND lk.expires_at > ?#{temporal_sql} " \
|
|
621
|
+
'ORDER BY fts.rank LIMIT ?',
|
|
622
|
+
escaped, now, *temporal_params, limit
|
|
560
623
|
).all
|
|
561
624
|
rescue StandardError => e
|
|
562
625
|
handle_exception(e, level: :debug, operation: 'apollo.local.fts_search', limit: limit, fallback: :ilike)
|
|
563
|
-
ilike_search(text, now: Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ'), limit: limit)
|
|
626
|
+
ilike_search(text, now: Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ'), limit: limit, as_of: as_of)
|
|
564
627
|
end
|
|
565
628
|
|
|
566
|
-
def ilike_search(text, now:, limit:)
|
|
629
|
+
def ilike_search(text, now:, limit:, as_of: nil)
|
|
567
630
|
safe_text = text.to_s.gsub('\\', '\\\\\\\\').gsub('%', '\%').gsub('_', '\_')
|
|
568
|
-
|
|
569
|
-
.where(Sequel.lit('expires_at > ?', now))
|
|
631
|
+
active_knowledge_dataset(now: now, as_of: as_of)
|
|
570
632
|
.where(Sequel.lit("content LIKE ? ESCAPE '\\' COLLATE NOCASE", "%#{safe_text}%"))
|
|
571
633
|
.limit(limit)
|
|
572
634
|
.all
|
|
573
635
|
end
|
|
574
636
|
|
|
575
|
-
def filter_candidates(candidates, min_confidence:, tags:,
|
|
637
|
+
def filter_candidates(candidates, min_confidence:, tags:, options: {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/MethodLength,Metrics/AbcSize
|
|
638
|
+
include_inferences = options.fetch(:include_inferences, true)
|
|
639
|
+
include_history = options.fetch(:include_history, false)
|
|
640
|
+
as_of = options[:as_of]
|
|
576
641
|
candidates = candidates.select { |c| (c[:confidence] || 0) >= min_confidence }
|
|
642
|
+
candidates = candidates.select { |c| temporally_valid?(c, as_of) }
|
|
577
643
|
candidates = candidates.reject { |c| [1, true].include?(c[:is_inference]) } unless include_inferences
|
|
578
644
|
unless include_history
|
|
579
645
|
candidates = candidates.select { |c| c[:is_latest].nil? || c[:is_latest] == 1 || c[:is_latest] == true }
|
|
@@ -588,6 +654,36 @@ module Legion
|
|
|
588
654
|
candidates
|
|
589
655
|
end
|
|
590
656
|
|
|
657
|
+
def active_knowledge_dataset(now:, as_of: nil)
|
|
658
|
+
apply_temporal_window(db[:local_knowledge].where(Sequel.lit('expires_at > ?', now)), as_of)
|
|
659
|
+
end
|
|
660
|
+
|
|
661
|
+
def apply_temporal_window(dataset, as_of)
|
|
662
|
+
return dataset if as_of.to_s.empty?
|
|
663
|
+
|
|
664
|
+
dataset.where(
|
|
665
|
+
Sequel.lit('(valid_from IS NULL OR valid_from <= ?) AND (valid_to IS NULL OR valid_to >= ?)', as_of, as_of)
|
|
666
|
+
)
|
|
667
|
+
end
|
|
668
|
+
|
|
669
|
+
def temporal_window_sql(as_of, table_alias:)
|
|
670
|
+
return ['', []] if as_of.to_s.empty?
|
|
671
|
+
|
|
672
|
+
[
|
|
673
|
+
" AND (#{table_alias}.valid_from IS NULL OR #{table_alias}.valid_from <= ?) " \
|
|
674
|
+
"AND (#{table_alias}.valid_to IS NULL OR #{table_alias}.valid_to >= ?)",
|
|
675
|
+
[as_of, as_of]
|
|
676
|
+
]
|
|
677
|
+
end
|
|
678
|
+
|
|
679
|
+
def temporally_valid?(row, as_of)
|
|
680
|
+
return true if as_of.to_s.empty?
|
|
681
|
+
|
|
682
|
+
valid_from = row[:valid_from]
|
|
683
|
+
valid_to = row[:valid_to]
|
|
684
|
+
(valid_from.nil? || valid_from <= as_of) && (valid_to.nil? || valid_to >= as_of)
|
|
685
|
+
end
|
|
686
|
+
|
|
591
687
|
def parse_tags(tags_json)
|
|
592
688
|
return [] if tags_json.nil? || tags_json.empty?
|
|
593
689
|
|
|
@@ -656,6 +752,17 @@ module Legion
|
|
|
656
752
|
value.to_s
|
|
657
753
|
end
|
|
658
754
|
|
|
755
|
+
def normalize_temporal_value(value)
|
|
756
|
+
return nil if value.nil?
|
|
757
|
+
|
|
758
|
+
text = value.respond_to?(:utc) ? value.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ') : value.to_s.strip
|
|
759
|
+
return nil if text.empty?
|
|
760
|
+
|
|
761
|
+
Time.parse(text).utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
762
|
+
rescue StandardError
|
|
763
|
+
text
|
|
764
|
+
end
|
|
765
|
+
|
|
659
766
|
def normalize_tags_input(tags)
|
|
660
767
|
Legion::Apollo::Helpers::TagNormalizer.normalize(Array(tags)).first(max_tags_limit)
|
|
661
768
|
rescue StandardError => e
|
|
@@ -674,9 +781,9 @@ module Legion
|
|
|
674
781
|
Legion::Apollo::Helpers::TagNormalizer::MAX_TAGS
|
|
675
782
|
end
|
|
676
783
|
|
|
677
|
-
def query_by_tags_via_sql(tags:, limit:) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
784
|
+
def query_by_tags_via_sql(connection, tags:, limit:) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
678
785
|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
|
679
|
-
dataset =
|
|
786
|
+
dataset = connection[:local_knowledge].where(Sequel.lit('expires_at > ?', now))
|
|
680
787
|
|
|
681
788
|
Array(tags).map(&:to_s).each do |tag|
|
|
682
789
|
dataset = dataset.where(
|
|
@@ -696,11 +803,15 @@ module Legion
|
|
|
696
803
|
tag_count: Array(tags).size,
|
|
697
804
|
limit: limit
|
|
698
805
|
)
|
|
699
|
-
|
|
806
|
+
raise unless local_db_usable?(connection)
|
|
807
|
+
|
|
808
|
+
query_by_tags_via_ruby(connection, tags: tags, limit: limit)
|
|
700
809
|
end
|
|
701
810
|
|
|
702
|
-
def query_by_tags_via_ruby(tags:, limit:)
|
|
703
|
-
|
|
811
|
+
def query_by_tags_via_ruby(connection, tags:, limit:)
|
|
812
|
+
raise Sequel::DatabaseConnectionError, 'local database unavailable' unless local_db_usable?(connection)
|
|
813
|
+
|
|
814
|
+
candidates = connection[:local_knowledge]
|
|
704
815
|
.where(Sequel.lit('expires_at > ?', Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')))
|
|
705
816
|
.all
|
|
706
817
|
|
|
@@ -711,7 +822,7 @@ module Legion
|
|
|
711
822
|
end
|
|
712
823
|
|
|
713
824
|
def update_upsert_entry(existing, content, tags_json, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
714
|
-
content = content
|
|
825
|
+
content = normalize_text_input(content)
|
|
715
826
|
new_hash = content_hash(content)
|
|
716
827
|
embedding, embedded_at = generate_embedding(content)
|
|
717
828
|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
|
data/lib/legion/apollo/routes.rb
CHANGED
|
@@ -68,6 +68,7 @@ module Legion
|
|
|
68
68
|
agent_id: body[:agent_id] || 'api',
|
|
69
69
|
scope: normalize_scope(body[:scope]),
|
|
70
70
|
tier: body[:tier]&.to_sym,
|
|
71
|
+
as_of: body[:as_of],
|
|
71
72
|
include_inferences: body.fetch(:include_inferences, true),
|
|
72
73
|
include_history: body.fetch(:include_history, false)
|
|
73
74
|
)
|
|
@@ -88,6 +89,7 @@ module Legion
|
|
|
88
89
|
tags = Legion::Apollo::Helpers::TagNormalizer.normalize(Array(body[:tags])).first(effective_max_tags)
|
|
89
90
|
result = Legion::Apollo.ingest(
|
|
90
91
|
content: body[:content],
|
|
92
|
+
raw_content: body[:raw_content],
|
|
91
93
|
content_type: body[:content_type] || :observation,
|
|
92
94
|
tags: tags,
|
|
93
95
|
source_agent: body[:source_agent] || 'api',
|
|
@@ -99,6 +101,8 @@ module Legion
|
|
|
99
101
|
is_inference: body[:is_inference] == true,
|
|
100
102
|
forget_reason: body[:forget_reason],
|
|
101
103
|
expires_at: body[:expires_at],
|
|
104
|
+
valid_from: body[:valid_from],
|
|
105
|
+
valid_to: body[:valid_to],
|
|
102
106
|
parent_knowledge_id: body[:parent_knowledge_id],
|
|
103
107
|
supersession_type: body[:supersession_type],
|
|
104
108
|
source_uri: body[:source_uri],
|
data/lib/legion/apollo.rb
CHANGED
|
@@ -97,9 +97,11 @@ module Legion
|
|
|
97
97
|
return not_started_error unless started?
|
|
98
98
|
|
|
99
99
|
normalized_tags = normalize_tags_input(tags)
|
|
100
|
-
|
|
100
|
+
normalized_content = normalize_text_input(content)
|
|
101
|
+
normalized_raw_content = normalize_text_input(opts.key?(:raw_content) ? opts[:raw_content] : content)
|
|
102
|
+
payload = { **opts, content: normalized_content, raw_content: normalized_raw_content, tags: normalized_tags }
|
|
101
103
|
log.info do
|
|
102
|
-
"Apollo ingest requested scope=#{scope} content_length=#{content.to_s.length} " \
|
|
104
|
+
"Apollo ingest requested scope=#{scope} content_length=#{payload[:content].to_s.length} " \
|
|
103
105
|
"tags=#{payload[:tags].size} source_channel=#{payload[:source_channel]}"
|
|
104
106
|
end
|
|
105
107
|
log.debug do
|
|
@@ -289,7 +291,8 @@ module Legion
|
|
|
289
291
|
"limit=#{payload[:limit]}"
|
|
290
292
|
end
|
|
291
293
|
result = Legion::Apollo::Local.query(**payload.slice(:text, :limit, :min_confidence, :tags,
|
|
292
|
-
:tier, :include_inferences, :include_history
|
|
294
|
+
:tier, :include_inferences, :include_history,
|
|
295
|
+
:as_of))
|
|
293
296
|
return result unless result[:success]
|
|
294
297
|
|
|
295
298
|
entries = normalize_local_entries(Array(result[:results]))
|
|
@@ -324,7 +327,8 @@ module Legion
|
|
|
324
327
|
if Legion::Apollo::Local.started?
|
|
325
328
|
attempted = true
|
|
326
329
|
local = Legion::Apollo::Local.query(**payload.slice(:text, :limit, :min_confidence, :tags,
|
|
327
|
-
:tier, :include_inferences, :include_history
|
|
330
|
+
:tier, :include_inferences, :include_history,
|
|
331
|
+
:as_of))
|
|
328
332
|
if local[:success]
|
|
329
333
|
any_success = true
|
|
330
334
|
entries.concat(normalize_local_entries(Array(local[:results]))) if local[:results]
|
|
@@ -377,18 +381,29 @@ module Legion
|
|
|
377
381
|
else
|
|
378
382
|
Array(e[:tags])
|
|
379
383
|
end
|
|
380
|
-
{ id: e[:id], content: e[:content], content_hash: hash,
|
|
381
|
-
confidence: e[:confidence] || 0.5, content_type: 'fact', tags: tags, source: :local
|
|
384
|
+
{ id: e[:id], content: e[:content], raw_content: e[:raw_content] || e[:content], content_hash: hash,
|
|
385
|
+
confidence: e[:confidence] || 0.5, content_type: 'fact', tags: tags, source: :local,
|
|
386
|
+
valid_from: e[:valid_from], valid_to: e[:valid_to] }
|
|
382
387
|
end
|
|
383
388
|
end
|
|
384
389
|
|
|
385
390
|
def normalize_global_entries(entries)
|
|
386
|
-
entries.map
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
391
|
+
entries.map { |entry| normalize_global_entry(entry) }
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def normalize_global_entry(entry)
|
|
395
|
+
{ id: entry[:id], content: entry[:content], raw_content: normalized_raw_content(entry),
|
|
396
|
+
content_hash: normalized_content_hash(entry), confidence: entry[:confidence] || 0.5,
|
|
397
|
+
content_type: entry[:content_type] || 'fact', tags: Array(entry[:tags]), source: :global,
|
|
398
|
+
valid_from: entry[:valid_from], valid_to: entry[:valid_to] }
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
def normalized_raw_content(entry)
|
|
402
|
+
entry[:raw_content] || entry[:content]
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def normalized_content_hash(entry)
|
|
406
|
+
entry[:content_hash] || Digest::MD5.hexdigest(entry[:content].to_s.strip.downcase.gsub(/\s+/, ' '))
|
|
392
407
|
end
|
|
393
408
|
|
|
394
409
|
def dedup_and_rank(entries, limit:)
|
|
@@ -470,20 +485,28 @@ module Legion
|
|
|
470
485
|
end
|
|
471
486
|
|
|
472
487
|
def normalize_text_input(value) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
|
|
473
|
-
case value
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
488
|
+
text = case value
|
|
489
|
+
when String
|
|
490
|
+
value
|
|
491
|
+
when Array
|
|
492
|
+
parts = value.filter_map { |entry| extract_text_fragment(entry) }
|
|
493
|
+
joined = parts.map(&:to_s).map(&:strip).reject(&:empty?).join("\n")
|
|
494
|
+
joined.empty? ? value.to_s : joined
|
|
495
|
+
when Hash
|
|
496
|
+
extract_text_fragment(value).to_s
|
|
497
|
+
when nil
|
|
498
|
+
''
|
|
499
|
+
else
|
|
500
|
+
value.to_s
|
|
501
|
+
end
|
|
502
|
+
sanitize_text_input(text)
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
def sanitize_text_input(value)
|
|
506
|
+
text = value.to_s.dup
|
|
507
|
+
text = text.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: '')
|
|
508
|
+
text = text.scrub('') unless text.valid_encoding?
|
|
509
|
+
text.delete("\u0000")
|
|
487
510
|
end
|
|
488
511
|
|
|
489
512
|
def normalize_tags_input(tags)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-apollo
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -87,6 +87,7 @@ files:
|
|
|
87
87
|
- lib/legion/apollo/local/migrations/002_create_graph_tables.rb
|
|
88
88
|
- lib/legion/apollo/local/migrations/003_harden_graph_relationships.rb
|
|
89
89
|
- lib/legion/apollo/local/migrations/004_add_versioning_tiers_inference.rb
|
|
90
|
+
- lib/legion/apollo/local/migrations/005_add_raw_content_temporal_windows.rb
|
|
90
91
|
- lib/legion/apollo/messages/access_boost.rb
|
|
91
92
|
- lib/legion/apollo/messages/ingest.rb
|
|
92
93
|
- lib/legion/apollo/messages/query.rb
|