lex-apollo 0.4.19 → 0.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/lib/legion/extensions/apollo/actors/entity_watchdog.rb +8 -4
- data/lib/legion/extensions/apollo/actors/writeback_vectorize.rb +6 -0
- data/lib/legion/extensions/apollo/api.rb +5 -3
- data/lib/legion/extensions/apollo/runners/entity_extractor.rb +14 -3
- data/lib/legion/extensions/apollo/runners/expertise.rb +41 -28
- data/lib/legion/extensions/apollo/runners/gas.rb +65 -25
- data/lib/legion/extensions/apollo/runners/knowledge.rb +194 -45
- data/lib/legion/extensions/apollo/runners/maintenance.rb +14 -2
- data/lib/legion/extensions/apollo/version.rb +1 -1
- data/spec/legion/extensions/apollo/runners/knowledge_spec.rb +125 -0
- data/spec/spec_helper.rb +1 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 31dc882a8d5fd761bceaba21ff853a3d3dd898df9d4377464b72b110d306af0a
|
|
4
|
+
data.tar.gz: 8dcfd2bc7c2557b298f3d503cc94b07e5a78b5a9777789f724dede41b9d0efdb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5e066b3c62555b4866bfe99d6bed13a034e2b6b23f613ae7906e8eaebee90ecb2f9dd937a5e928dd8337af8945bfba9a1528deb4d3200eb82b8a92b081b6cf59
|
|
7
|
+
data.tar.gz: dd1d625f4efc30df8951ee80db9ee0cfc1c25137bae5d8571dd09265bd5eec79add3482eba3cc88d77f43b052994597577abe634080cd995eb4dc0db7601458e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.21] - 2026-04-27
|
|
4
|
+
|
|
5
|
+
### Changed
|
|
6
|
+
- `Apollo::Runners::Knowledge#handle_ingest` now emits warn-level logs on the three early-return failure paths (nil/blank content, nil content_type, apollo_data_not_available). Companion to PR #15: that PR added `handle_exception` to the rescue paths; this PR closes the silent-failure window for the early-return paths that fire BEFORE any rescue would. Tag values in the log line are sanitized via `gsub(/[\r\n]+/, ' ')` to prevent log-line injection from caller-controlled tags.
|
|
7
|
+
|
|
8
|
+
## [0.4.20] - 2026-04-25
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- `Knowledge#handle_ingest` now sanitizes stored content for PostgreSQL by scrubbing invalid UTF-8 and removing null bytes before inserts.
|
|
12
|
+
- `Knowledge#handle_ingest` now truncates short metadata fields to schema-safe lengths before writing entries.
|
|
13
|
+
- `Knowledge#handle_query` now treats very short queries as browse/list requests, returning recent non-archived entries without embedding noise while preserving explicit status filters.
|
|
14
|
+
- `Knowledge` Sequel rescue paths now log exception class, message, and a short backtrace before returning structured errors.
|
|
15
|
+
- Apollo runners and actors now emit debug/info lifecycle logs around ingest, query, browse, maintenance, expertise, entity extraction, writeback vectorization, entity watchdog, and GAS pipeline paths so debug-level logging can trace workflows end to end.
|
|
16
|
+
|
|
3
17
|
## [0.4.19] - 2026-04-24
|
|
4
18
|
|
|
5
19
|
### Fixed
|
|
@@ -34,6 +34,7 @@ module Legion
|
|
|
34
34
|
|
|
35
35
|
def scan_and_ingest
|
|
36
36
|
texts = recent_task_log_texts
|
|
37
|
+
log.debug("EntityWatchdog scan_and_ingest log_texts=#{texts.size}")
|
|
37
38
|
return { success: true, ingested: 0, reason: :no_logs } if texts.empty?
|
|
38
39
|
|
|
39
40
|
ingested = 0
|
|
@@ -53,10 +54,10 @@ module Legion
|
|
|
53
54
|
end
|
|
54
55
|
end
|
|
55
56
|
|
|
56
|
-
log.
|
|
57
|
+
log.info("EntityWatchdog ingested=#{ingested} logs_scanned=#{texts.size}")
|
|
57
58
|
{ success: true, ingested: ingested, logs_scanned: texts.size }
|
|
58
59
|
rescue StandardError => e
|
|
59
|
-
|
|
60
|
+
handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.scan_and_ingest')
|
|
60
61
|
{ success: false, error: e.message }
|
|
61
62
|
end
|
|
62
63
|
|
|
@@ -71,7 +72,9 @@ module Legion
|
|
|
71
72
|
.order(Sequel.desc(:created_at))
|
|
72
73
|
.limit(log_limit)
|
|
73
74
|
.select_map(:message)
|
|
74
|
-
logs.map(&:to_s).reject(&:empty?).uniq
|
|
75
|
+
texts = logs.map(&:to_s).reject(&:empty?).uniq
|
|
76
|
+
log.debug("EntityWatchdog recent_task_log_texts lookback=#{lookback} limit=#{log_limit} raw=#{logs.size} unique=#{texts.size}")
|
|
77
|
+
texts
|
|
75
78
|
rescue StandardError => e
|
|
76
79
|
log.warn("EntityWatchdog recent_task_log_texts failed: #{e.message}")
|
|
77
80
|
[]
|
|
@@ -104,8 +107,9 @@ module Legion
|
|
|
104
107
|
source_agent: 'lex-apollo:entity_watchdog',
|
|
105
108
|
context: { entity_type: entity[:type], original_name: entity[:name] }
|
|
106
109
|
).publish
|
|
110
|
+
log.info("EntityWatchdog published entity type=#{entity[:type]} name=#{entity[:name]}")
|
|
107
111
|
rescue StandardError => e
|
|
108
|
-
|
|
112
|
+
handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.publish_entity_ingest')
|
|
109
113
|
end
|
|
110
114
|
|
|
111
115
|
def entity_types
|
|
@@ -14,21 +14,27 @@ module Legion
|
|
|
14
14
|
|
|
15
15
|
def handle_vectorize(payload)
|
|
16
16
|
payload = symbolize(payload)
|
|
17
|
+
log.debug("WritebackVectorize handle_vectorize content_length=#{payload[:content].to_s.length} content_type=#{payload[:content_type] || 'nil'}")
|
|
17
18
|
result = Legion::LLM::Embeddings.generate(text: payload[:content])
|
|
18
19
|
vector = result.is_a?(Hash) ? result[:vector] : result
|
|
19
20
|
embedding = vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
|
|
21
|
+
log.debug("WritebackVectorize embedding_dimensions=#{embedding.length} vector_generated=#{vector.is_a?(Array) && vector.any?}")
|
|
20
22
|
enriched = payload.merge(embedding: embedding)
|
|
21
23
|
|
|
22
24
|
if Helpers::Capability.can_write?
|
|
25
|
+
log.debug('WritebackVectorize route=direct_ingest')
|
|
23
26
|
Runners::Knowledge.handle_ingest(**enriched)
|
|
24
27
|
else
|
|
28
|
+
log.debug('WritebackVectorize route=transport_writeback')
|
|
25
29
|
Transport::Messages::Writeback.new(
|
|
26
30
|
**enriched, has_embedding: true
|
|
27
31
|
).publish
|
|
28
32
|
end
|
|
29
33
|
|
|
34
|
+
log.info('WritebackVectorize completed action=vectorized')
|
|
30
35
|
{ success: true, action: :vectorized }
|
|
31
36
|
rescue StandardError => e
|
|
37
|
+
handle_exception(e, level: :error, operation: 'apollo.writeback_vectorize.handle_vectorize')
|
|
32
38
|
{ success: false, error: e.message }
|
|
33
39
|
end
|
|
34
40
|
|
|
@@ -51,15 +51,17 @@ module Legion
|
|
|
51
51
|
req = json_body
|
|
52
52
|
halt 400, { error: 'query is required' }.to_json unless req[:query]
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
query_options = {
|
|
55
55
|
query: req[:query],
|
|
56
56
|
limit: req[:limit] || 10,
|
|
57
57
|
min_confidence: req[:min_confidence] || 0.3,
|
|
58
|
-
status: req[:status] || [:confirmed],
|
|
59
58
|
tags: req[:tags],
|
|
60
59
|
domain: req[:domain],
|
|
61
60
|
agent_id: req[:agent_id] || 'api'
|
|
62
|
-
|
|
61
|
+
}
|
|
62
|
+
query_options[:status] = req[:status] if req.key?(:status)
|
|
63
|
+
|
|
64
|
+
result = runner.handle_query(**query_options)
|
|
63
65
|
status result[:success] ? 200 : 500
|
|
64
66
|
result.to_json
|
|
65
67
|
end
|
|
@@ -4,17 +4,24 @@ module Legion
|
|
|
4
4
|
module Extensions
|
|
5
5
|
module Apollo
|
|
6
6
|
module Runners
|
|
7
|
-
module EntityExtractor
|
|
7
|
+
module EntityExtractor
|
|
8
8
|
DEFAULT_ENTITY_TYPES = %w[person service repository concept].freeze
|
|
9
9
|
DEFAULT_MIN_CONFIDENCE = 0.7
|
|
10
10
|
|
|
11
11
|
def extract_entities(text:, entity_types: nil, min_confidence: Helpers::Confidence.apollo_setting(:entity_extractor, :min_confidence, default: DEFAULT_MIN_CONFIDENCE), **) # rubocop:disable Layout/LineLength
|
|
12
|
-
|
|
12
|
+
if text.to_s.strip.empty?
|
|
13
|
+
log.debug('Apollo EntityExtractor.extract_entities skipped reason=empty_text')
|
|
14
|
+
return { success: true, entities: [], source: :empty }
|
|
15
|
+
end
|
|
13
16
|
|
|
14
|
-
|
|
17
|
+
unless defined?(Legion::LLM) && Legion::LLM.started?
|
|
18
|
+
log.debug('Apollo EntityExtractor.extract_entities skipped reason=llm_unavailable')
|
|
19
|
+
return { success: true, entities: [], source: :unavailable }
|
|
20
|
+
end
|
|
15
21
|
|
|
16
22
|
types = Array(entity_types).map(&:to_s)
|
|
17
23
|
types = DEFAULT_ENTITY_TYPES if types.empty?
|
|
24
|
+
log.debug("Apollo EntityExtractor.extract_entities text_length=#{text.to_s.length} types=#{types.join(',')} min_confidence=#{min_confidence}")
|
|
18
25
|
|
|
19
26
|
result = Legion::LLM.structured(
|
|
20
27
|
messages: [
|
|
@@ -29,9 +36,11 @@ module Legion
|
|
|
29
36
|
(entity[:confidence] || 0.0) >= min_confidence &&
|
|
30
37
|
(types.empty? || types.include?(entity[:type].to_s))
|
|
31
38
|
end
|
|
39
|
+
log.info("Apollo EntityExtractor.extract_entities raw=#{raw_entities.size} filtered=#{filtered.size}")
|
|
32
40
|
|
|
33
41
|
{ success: true, entities: filtered, source: :llm }
|
|
34
42
|
rescue StandardError => e
|
|
43
|
+
handle_exception(e, level: :error, operation: 'apollo.entity_extractor.extract_entities')
|
|
35
44
|
{ success: false, entities: [], error: e.message, source: :error }
|
|
36
45
|
end
|
|
37
46
|
|
|
@@ -70,6 +79,8 @@ module Legion
|
|
|
70
79
|
required: ['entities']
|
|
71
80
|
}
|
|
72
81
|
end
|
|
82
|
+
|
|
83
|
+
include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
73
84
|
end
|
|
74
85
|
end
|
|
75
86
|
end
|
|
@@ -18,52 +18,65 @@ module Legion
|
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
def aggregate(**)
|
|
21
|
-
|
|
21
|
+
unless defined?(Legion::Data::Model::ApolloEntry)
|
|
22
|
+
log.warn('Apollo Expertise.aggregate skipped: apollo_data_not_available')
|
|
23
|
+
return { success: false, error: 'apollo_data_not_available' }
|
|
24
|
+
end
|
|
22
25
|
|
|
23
26
|
entries = Legion::Data::Model::ApolloEntry
|
|
24
27
|
.select(:source_agent, :tags, :confidence)
|
|
25
28
|
.exclude(source_agent: nil)
|
|
26
29
|
.all
|
|
27
|
-
|
|
28
|
-
groups = {}
|
|
29
|
-
entries.each do |entry|
|
|
30
|
-
agent = entry.source_agent
|
|
31
|
-
domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
|
|
32
|
-
key = "#{agent}:#{domain}"
|
|
33
|
-
groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
|
|
34
|
-
groups[key][:confidences] << entry.confidence.to_f
|
|
35
|
-
end
|
|
30
|
+
log.debug("Apollo Expertise.aggregate entries=#{entries.size}")
|
|
36
31
|
|
|
37
32
|
agent_set = Set.new
|
|
38
33
|
domain_set = Set.new
|
|
39
34
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
count = group[:confidences].size
|
|
43
|
-
cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
|
|
44
|
-
proficiency = [avg * Math.log2(count + 1), cap].min
|
|
45
|
-
|
|
46
|
-
existing = Legion::Data::Model::ApolloExpertise
|
|
47
|
-
.where(agent_id: group[:agent_id], domain: group[:domain]).first
|
|
48
|
-
|
|
49
|
-
if existing
|
|
50
|
-
existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
|
|
51
|
-
else
|
|
52
|
-
Legion::Data::Model::ApolloExpertise.create(
|
|
53
|
-
agent_id: group[:agent_id], domain: group[:domain],
|
|
54
|
-
proficiency: proficiency, entry_count: count, last_active_at: Time.now
|
|
55
|
-
)
|
|
56
|
-
end
|
|
57
|
-
|
|
35
|
+
expertise_groups(entries).each_value do |group|
|
|
36
|
+
upsert_expertise_group(group)
|
|
58
37
|
agent_set << group[:agent_id]
|
|
59
38
|
domain_set << group[:domain]
|
|
60
39
|
end
|
|
61
40
|
|
|
62
41
|
{ success: true, agents: agent_set.size, domains: domain_set.size }
|
|
42
|
+
.tap { |result| log.info("Apollo Expertise.aggregate agents=#{result[:agents]} domains=#{result[:domains]}") }
|
|
63
43
|
rescue Sequel::Error => e
|
|
44
|
+
handle_exception(e, level: :error, operation: 'apollo.expertise.aggregate')
|
|
64
45
|
{ success: false, error: e.message }
|
|
65
46
|
end
|
|
66
47
|
|
|
48
|
+
def expertise_groups(entries)
|
|
49
|
+
entries.each_with_object({}) do |entry, groups|
|
|
50
|
+
agent = entry.source_agent
|
|
51
|
+
domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
|
|
52
|
+
key = "#{agent}:#{domain}"
|
|
53
|
+
groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
|
|
54
|
+
groups[key][:confidences] << entry.confidence.to_f
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def upsert_expertise_group(group)
|
|
59
|
+
count = group[:confidences].size
|
|
60
|
+
proficiency = expertise_proficiency(group[:confidences])
|
|
61
|
+
existing = Legion::Data::Model::ApolloExpertise
|
|
62
|
+
.where(agent_id: group[:agent_id], domain: group[:domain]).first
|
|
63
|
+
|
|
64
|
+
if existing
|
|
65
|
+
existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
|
|
66
|
+
else
|
|
67
|
+
Legion::Data::Model::ApolloExpertise.create(
|
|
68
|
+
agent_id: group[:agent_id], domain: group[:domain],
|
|
69
|
+
proficiency: proficiency, entry_count: count, last_active_at: Time.now
|
|
70
|
+
)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def expertise_proficiency(confidences)
|
|
75
|
+
avg = confidences.sum / confidences.size
|
|
76
|
+
cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
|
|
77
|
+
[avg * Math.log2(confidences.size + 1), cap].min
|
|
78
|
+
end
|
|
79
|
+
|
|
67
80
|
include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
68
81
|
end
|
|
69
82
|
end
|
|
@@ -4,7 +4,10 @@ module Legion
|
|
|
4
4
|
module Extensions
|
|
5
5
|
module Apollo
|
|
6
6
|
module Runners
|
|
7
|
-
module Gas
|
|
7
|
+
module Gas
|
|
8
|
+
include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
9
|
+
extend Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
10
|
+
|
|
8
11
|
RELATION_TYPES = %w[
|
|
9
12
|
similar_to contradicts depends_on causes
|
|
10
13
|
part_of supersedes supports_by extends
|
|
@@ -16,10 +19,6 @@ module Legion
|
|
|
16
19
|
|
|
17
20
|
module_function
|
|
18
21
|
|
|
19
|
-
def log
|
|
20
|
-
Legion::Logging
|
|
21
|
-
end
|
|
22
|
-
|
|
23
22
|
def json_load(str)
|
|
24
23
|
::JSON.parse(str, symbolize_names: true)
|
|
25
24
|
end
|
|
@@ -31,7 +30,12 @@ module Legion
|
|
|
31
30
|
def fallback_confidence = Helpers::Confidence.apollo_setting(:gas, :fallback_confidence, default: 0.5)
|
|
32
31
|
|
|
33
32
|
def process(audit_event)
|
|
34
|
-
|
|
33
|
+
unless processable?(audit_event)
|
|
34
|
+
log.debug('GAS process skipped reason=no_content')
|
|
35
|
+
return { phases_completed: 0, reason: 'no content' }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
log.debug("GAS process start request_id=#{audit_event[:request_id] || 'nil'} messages=#{Array(audit_event[:messages]).size} response_length=#{audit_event[:response_content].to_s.length}") # rubocop:disable Layout/LineLength
|
|
35
39
|
|
|
36
40
|
facts = phase_comprehend(audit_event)
|
|
37
41
|
entities = phase_extract(audit_event, facts)
|
|
@@ -40,7 +44,7 @@ module Legion
|
|
|
40
44
|
deposit_result = phase_deposit(facts, entities, relations, synthesis, audit_event)
|
|
41
45
|
anticipations = phase_anticipate(facts, synthesis)
|
|
42
46
|
|
|
43
|
-
{
|
|
47
|
+
result = {
|
|
44
48
|
phases_completed: 6,
|
|
45
49
|
facts: facts.length,
|
|
46
50
|
entities: entities.length,
|
|
@@ -49,8 +53,10 @@ module Legion
|
|
|
49
53
|
deposited: deposit_result,
|
|
50
54
|
anticipations: anticipations.length
|
|
51
55
|
}
|
|
56
|
+
log.info("GAS process complete facts=#{result[:facts]} entities=#{result[:entities]} relations=#{result[:relations]} synthesis=#{result[:synthesis]} anticipations=#{result[:anticipations]}") # rubocop:disable Layout/LineLength
|
|
57
|
+
result
|
|
52
58
|
rescue StandardError => e
|
|
53
|
-
log.
|
|
59
|
+
log.error("GAS pipeline error: #{e.message}")
|
|
54
60
|
{ phases_completed: 0, error: e.message }
|
|
55
61
|
end
|
|
56
62
|
|
|
@@ -63,19 +69,24 @@ module Legion
|
|
|
63
69
|
messages = audit_event[:messages]
|
|
64
70
|
response = audit_event[:response_content]
|
|
65
71
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
72
|
+
mode = llm_available? ? :llm : :mechanical
|
|
73
|
+
log.debug("GAS phase_comprehend mode=#{mode} messages=#{Array(messages).size} response_length=#{response.to_s.length}")
|
|
74
|
+
facts = mode == :llm ? llm_comprehend(messages, response) : mechanical_comprehend(messages, response)
|
|
75
|
+
log.debug("GAS phase_comprehend facts=#{facts.size}")
|
|
76
|
+
facts
|
|
71
77
|
end
|
|
72
78
|
|
|
73
79
|
# Phase 2: Extract - entity extraction (delegates to existing EntityExtractor)
|
|
74
80
|
def phase_extract(audit_event, _facts)
|
|
75
|
-
|
|
81
|
+
unless defined?(Runners::EntityExtractor)
|
|
82
|
+
log.debug('GAS phase_extract skipped reason=entity_extractor_unavailable')
|
|
83
|
+
return []
|
|
84
|
+
end
|
|
76
85
|
|
|
77
86
|
result = Runners::EntityExtractor.extract_entities(text: audit_event[:response_content])
|
|
78
|
-
result[:success] ? (result[:entities] || []) : []
|
|
87
|
+
entities = result[:success] ? (result[:entities] || []) : []
|
|
88
|
+
log.debug("GAS phase_extract success=#{result[:success]} entities=#{entities.size}")
|
|
89
|
+
entities
|
|
79
90
|
rescue StandardError => e
|
|
80
91
|
log.warn("GAS phase_extract failed: #{e.message}")
|
|
81
92
|
[]
|
|
@@ -83,10 +94,16 @@ module Legion
|
|
|
83
94
|
|
|
84
95
|
# Phase 3: Relate - classify relationships between new and existing entries
|
|
85
96
|
def phase_relate(facts, _entities)
|
|
86
|
-
|
|
97
|
+
unless defined?(Runners::Knowledge)
|
|
98
|
+
log.debug('GAS phase_relate skipped reason=knowledge_runner_unavailable')
|
|
99
|
+
return []
|
|
100
|
+
end
|
|
87
101
|
|
|
88
102
|
existing = fetch_similar_entries(facts)
|
|
89
|
-
|
|
103
|
+
if existing.empty?
|
|
104
|
+
log.debug("GAS phase_relate skipped reason=no_existing_entries facts=#{facts.size}")
|
|
105
|
+
return []
|
|
106
|
+
end
|
|
90
107
|
|
|
91
108
|
relations = []
|
|
92
109
|
facts.each do |fact|
|
|
@@ -95,15 +112,24 @@ module Legion
|
|
|
95
112
|
relations << relation if relation
|
|
96
113
|
end
|
|
97
114
|
end
|
|
115
|
+
log.debug("GAS phase_relate facts=#{facts.size} existing=#{existing.size} relations=#{relations.size}")
|
|
98
116
|
relations
|
|
99
117
|
end
|
|
100
118
|
|
|
101
119
|
# Phase 4: Synthesize - generate derivative knowledge
|
|
102
120
|
def phase_synthesize(facts, _relations)
|
|
103
|
-
|
|
104
|
-
|
|
121
|
+
if facts.length < 2
|
|
122
|
+
log.debug("GAS phase_synthesize skipped reason=insufficient_facts facts=#{facts.length}")
|
|
123
|
+
return []
|
|
124
|
+
end
|
|
125
|
+
unless llm_available?
|
|
126
|
+
log.debug('GAS phase_synthesize skipped reason=llm_unavailable')
|
|
127
|
+
return []
|
|
128
|
+
end
|
|
105
129
|
|
|
106
|
-
llm_synthesize(facts)
|
|
130
|
+
synthesis = llm_synthesize(facts)
|
|
131
|
+
log.debug("GAS phase_synthesize synthesis=#{synthesis.size}")
|
|
132
|
+
synthesis
|
|
107
133
|
rescue StandardError => e
|
|
108
134
|
log.warn("GAS phase_synthesize failed: #{e.message}")
|
|
109
135
|
[]
|
|
@@ -111,7 +137,10 @@ module Legion
|
|
|
111
137
|
|
|
112
138
|
# Phase 5: Deposit - atomic write to Apollo
|
|
113
139
|
def phase_deposit(facts, _entities, _relations, _synthesis, audit_event)
|
|
114
|
-
|
|
140
|
+
unless defined?(Runners::Knowledge)
|
|
141
|
+
log.debug('GAS phase_deposit skipped reason=knowledge_runner_unavailable')
|
|
142
|
+
return { deposited: 0 }
|
|
143
|
+
end
|
|
115
144
|
|
|
116
145
|
deposited = 0
|
|
117
146
|
facts.each do |fact|
|
|
@@ -128,15 +157,24 @@ module Legion
|
|
|
128
157
|
rescue StandardError => e
|
|
129
158
|
log.warn("GAS deposit error: #{e.message}")
|
|
130
159
|
end
|
|
160
|
+
log.info("GAS phase_deposit deposited=#{deposited} facts=#{facts.size}")
|
|
131
161
|
{ deposited: deposited }
|
|
132
162
|
end
|
|
133
163
|
|
|
134
164
|
# Phase 6: Anticipate - pre-cache likely follow-up questions
|
|
135
165
|
def phase_anticipate(facts, _synthesis)
|
|
136
|
-
|
|
137
|
-
|
|
166
|
+
if facts.empty?
|
|
167
|
+
log.debug('GAS phase_anticipate skipped reason=no_facts')
|
|
168
|
+
return []
|
|
169
|
+
end
|
|
170
|
+
unless llm_available?
|
|
171
|
+
log.debug('GAS phase_anticipate skipped reason=llm_unavailable')
|
|
172
|
+
return []
|
|
173
|
+
end
|
|
138
174
|
|
|
139
|
-
llm_anticipate(facts)
|
|
175
|
+
anticipations = llm_anticipate(facts)
|
|
176
|
+
log.debug("GAS phase_anticipate anticipations=#{anticipations.size}")
|
|
177
|
+
anticipations
|
|
140
178
|
rescue StandardError => e
|
|
141
179
|
log.warn("GAS phase_anticipate failed: #{e.message}")
|
|
142
180
|
[]
|
|
@@ -153,7 +191,9 @@ module Legion
|
|
|
153
191
|
log.warn("GAS fetch_similar_entries failed for fact: #{e.message}")
|
|
154
192
|
next
|
|
155
193
|
end
|
|
156
|
-
entries.uniq { |e| e[:id] }
|
|
194
|
+
unique = entries.uniq { |e| e[:id] }
|
|
195
|
+
log.debug("GAS fetch_similar_entries facts=#{facts.size} entries=#{unique.size}")
|
|
196
|
+
unique
|
|
157
197
|
end
|
|
158
198
|
|
|
159
199
|
def classify_relation(fact, entry)
|
|
@@ -21,9 +21,12 @@ module Legion
|
|
|
21
21
|
link: :association, relation: :association, connection: :association,
|
|
22
22
|
inference: :association, implication: :association
|
|
23
23
|
}.freeze
|
|
24
|
+
DEFAULT_QUERY_STATUS = [:confirmed].freeze
|
|
25
|
+
UNSET = Object.new.freeze
|
|
24
26
|
|
|
25
27
|
def store_knowledge(content:, content_type:, tags: [], source_agent: nil, context: {}, **)
|
|
26
28
|
content_type = normalize_content_type(content_type)
|
|
29
|
+
log.debug("Apollo Knowledge.store_knowledge content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent || 'nil'} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
|
|
27
30
|
|
|
28
31
|
if defined?(Legion::Data::Model::ApolloEntry)
|
|
29
32
|
return handle_ingest(content: content, content_type: content_type,
|
|
@@ -41,6 +44,7 @@ module Legion
|
|
|
41
44
|
end
|
|
42
45
|
|
|
43
46
|
def query_knowledge(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: %i[confirmed candidate], tags: nil, **) # rubocop:disable Layout/LineLength
|
|
47
|
+
log.debug("Apollo Knowledge.query_knowledge query_length=#{query.to_s.length} limit=#{limit} statuses=#{Array(status).join(',')} tags=#{Array(tags).size} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
|
|
44
48
|
if defined?(Legion::Data::Model::ApolloEntry)
|
|
45
49
|
return handle_query(query: query, limit: limit, min_confidence: min_confidence,
|
|
46
50
|
status: status, tags: tags, **)
|
|
@@ -57,6 +61,7 @@ module Legion
|
|
|
57
61
|
end
|
|
58
62
|
|
|
59
63
|
def related_entries(entry_id:, relation_types: nil, depth: Helpers::GraphQuery.default_depth, **)
|
|
64
|
+
log.debug("Apollo Knowledge.related_entries entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
|
|
60
65
|
return handle_traverse(entry_id: entry_id, depth: depth, relation_types: relation_types, **) if defined?(Legion::Data::Model::ApolloEntry)
|
|
61
66
|
|
|
62
67
|
{
|
|
@@ -75,74 +80,72 @@ module Legion
|
|
|
75
80
|
}
|
|
76
81
|
end
|
|
77
82
|
|
|
78
|
-
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
|
|
83
|
+
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
|
|
79
84
|
return { status: :skipped } if skip
|
|
80
|
-
return { success: false, error: 'content is required' } if content.nil? || content.to_s.strip.empty?
|
|
81
|
-
return { success: false, error: 'content_type is required' } if content_type.nil?
|
|
82
|
-
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
83
85
|
|
|
84
|
-
|
|
86
|
+
content = normalize_text_input(content)
|
|
87
|
+
log.debug("Apollo Knowledge.handle_ingest content_length=#{content.length} content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'}") # rubocop:disable Layout/LineLength
|
|
88
|
+
early_error = ingest_early_return_error(content: content, content_type: content_type, tags: tags)
|
|
89
|
+
return early_error if early_error
|
|
90
|
+
|
|
85
91
|
hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
.first
|
|
91
|
-
if existing
|
|
92
|
-
existing.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
|
|
93
|
-
return { success: true, entry_id: existing.id, deduped: true }
|
|
94
|
-
end
|
|
92
|
+
existing = active_duplicate_for_hash(hash)
|
|
93
|
+
if existing
|
|
94
|
+
log.info("Apollo Knowledge.handle_ingest deduped entry_id=#{existing.id} source_agent=#{source_agent}")
|
|
95
|
+
return { success: true, entry_id: existing.id, deduped: true }
|
|
95
96
|
end
|
|
96
97
|
|
|
97
98
|
embedding = embed_text(content)
|
|
98
99
|
content_type_sym = content_type.to_s
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
tags: Sequel.pg_array(tag_array),
|
|
114
|
-
status: 'candidate',
|
|
115
|
-
knowledge_domain: domain,
|
|
116
|
-
submitted_by: submitted_by,
|
|
117
|
-
submitted_from: submitted_from,
|
|
118
|
-
content_hash: hash,
|
|
119
|
-
embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
|
|
100
|
+
metadata = ingest_metadata(tags: tags, knowledge_domain: knowledge_domain, source_agent: source_agent,
|
|
101
|
+
source_provider: source_provider, source_channel: source_channel,
|
|
102
|
+
submitted_by: submitted_by, submitted_from: submitted_from)
|
|
103
|
+
|
|
104
|
+
corroborated, existing_id = find_corroboration(
|
|
105
|
+
embedding, content_type_sym, metadata[:source_agent], metadata[:source_channel]
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if corroborated
|
|
109
|
+
log.info("Apollo Knowledge.handle_ingest corroborated entry_id=#{existing_id} source_agent=#{metadata[:source_agent]}")
|
|
110
|
+
else
|
|
111
|
+
existing_id = create_candidate_entry(
|
|
112
|
+
content: content, content_type: content_type_sym, context: context,
|
|
113
|
+
metadata: metadata, content_hash: hash, embedding: embedding
|
|
120
114
|
)
|
|
121
|
-
existing_id = new_entry.id
|
|
122
115
|
end
|
|
123
116
|
|
|
124
|
-
upsert_expertise(source_agent: source_agent, domain: domain)
|
|
117
|
+
upsert_expertise(source_agent: metadata[:source_agent], domain: metadata[:domain])
|
|
125
118
|
|
|
126
119
|
Legion::Data::Model::ApolloAccessLog.create(
|
|
127
|
-
entry_id: existing_id, agent_id: source_agent, action: 'ingest'
|
|
120
|
+
entry_id: existing_id, agent_id: metadata[:source_agent], action: 'ingest'
|
|
128
121
|
)
|
|
129
122
|
|
|
130
123
|
contradictions = detect_contradictions(existing_id, embedding, content)
|
|
124
|
+
log.debug("Apollo Knowledge.handle_ingest complete entry_id=#{existing_id} corroborated=#{corroborated} contradictions=#{contradictions.size}")
|
|
131
125
|
|
|
132
126
|
{ success: true, entry_id: existing_id, status: corroborated ? 'corroborated' : 'candidate',
|
|
133
127
|
corroborated: corroborated, contradictions: contradictions }
|
|
134
128
|
rescue Sequel::Error => e
|
|
129
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_ingest')
|
|
135
130
|
{ success: false, error: e.message }
|
|
136
131
|
end
|
|
137
132
|
|
|
138
|
-
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status:
|
|
133
|
+
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: UNSET, tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Layout/LineLength
|
|
139
134
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
140
135
|
|
|
141
136
|
query = normalize_text_input(query)
|
|
137
|
+
status_defaulted = status.equal?(UNSET)
|
|
138
|
+
requested_status = status_defaulted ? DEFAULT_QUERY_STATUS : status
|
|
139
|
+
log.debug("Apollo Knowledge.handle_query mode=#{browse_query?(query) ? 'browse' : 'semantic'} query_length=#{query.length} limit=#{limit} statuses=#{Array(requested_status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
|
|
140
|
+
if browse_query?(query)
|
|
141
|
+
return list_entries_chronologically(query: query, limit: limit, status: requested_status,
|
|
142
|
+
status_defaulted: status_defaulted, tags: tags, domain: domain)
|
|
143
|
+
end
|
|
144
|
+
|
|
142
145
|
embedding = embed_text(query)
|
|
143
146
|
sql = Helpers::GraphQuery.build_semantic_search_sql(
|
|
144
147
|
limit: limit, min_confidence: min_confidence,
|
|
145
|
-
statuses: Array(
|
|
148
|
+
statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain
|
|
146
149
|
)
|
|
147
150
|
|
|
148
151
|
db = Legion::Data::Model::ApolloEntry.db
|
|
@@ -173,14 +176,17 @@ module Legion
|
|
|
173
176
|
knowledge_domain: entry[:knowledge_domain] }
|
|
174
177
|
end
|
|
175
178
|
|
|
179
|
+
log.info("Apollo Knowledge.handle_query results=#{formatted.size} mode=semantic agent_id=#{agent_id}")
|
|
176
180
|
{ success: true, entries: formatted, count: formatted.size }
|
|
177
181
|
rescue Sequel::Error => e
|
|
182
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_query')
|
|
178
183
|
{ success: false, error: e.message }
|
|
179
184
|
end
|
|
180
185
|
|
|
181
186
|
def handle_traverse(entry_id:, depth: Helpers::GraphQuery.default_depth, relation_types: nil, agent_id: 'unknown', **)
|
|
182
187
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
183
188
|
|
|
189
|
+
log.debug("Apollo Knowledge.handle_traverse entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
|
|
184
190
|
# Whitelist relation_types to prevent SQL injection (they are string-interpolated in build_traversal_sql)
|
|
185
191
|
if relation_types
|
|
186
192
|
allowed = Helpers::Confidence::RELATION_TYPES
|
|
@@ -203,14 +209,17 @@ module Legion
|
|
|
203
209
|
depth: entry[:depth], activation: entry[:activation] }
|
|
204
210
|
end
|
|
205
211
|
|
|
212
|
+
log.info("Apollo Knowledge.handle_traverse results=#{formatted.size} entry_id=#{entry_id}")
|
|
206
213
|
{ success: true, entries: formatted, count: formatted.size }
|
|
207
214
|
rescue Sequel::Error => e
|
|
215
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_traverse')
|
|
208
216
|
{ success: false, error: e.message }
|
|
209
217
|
end
|
|
210
218
|
|
|
211
219
|
def redistribute_knowledge(agent_id:, min_confidence: Helpers::Confidence.apollo_setting(:query, :redistribute_min_confidence, default: 0.5), **)
|
|
212
220
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
213
221
|
|
|
222
|
+
log.debug("Apollo Knowledge.redistribute_knowledge agent_id=#{agent_id} min_confidence=#{min_confidence}")
|
|
214
223
|
entries = Legion::Data::Model::ApolloEntry
|
|
215
224
|
.where(source_agent: agent_id, status: 'confirmed')
|
|
216
225
|
.where { confidence > min_confidence }
|
|
@@ -238,6 +247,7 @@ module Legion
|
|
|
238
247
|
log.info("[apollo] redistributed #{redistributed} entries from departing agent=#{agent_id}")
|
|
239
248
|
{ success: true, redistributed: redistributed, agent_id: agent_id }
|
|
240
249
|
rescue Sequel::Error => e
|
|
250
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.redistribute_knowledge')
|
|
241
251
|
{ success: false, error: e.message }
|
|
242
252
|
end
|
|
243
253
|
|
|
@@ -247,6 +257,7 @@ module Legion
|
|
|
247
257
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
248
258
|
|
|
249
259
|
query = normalize_text_input(query)
|
|
260
|
+
log.debug("Apollo Knowledge.retrieve_relevant query_length=#{query.length} limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
|
|
250
261
|
return { success: true, entries: [], count: 0 } if query.nil? || query.to_s.strip.empty?
|
|
251
262
|
|
|
252
263
|
embedding = embed_text(query)
|
|
@@ -273,8 +284,10 @@ module Legion
|
|
|
273
284
|
knowledge_domain: entry[:knowledge_domain] }
|
|
274
285
|
end
|
|
275
286
|
|
|
287
|
+
log.info("Apollo Knowledge.retrieve_relevant results=#{formatted.size} limit=#{limit}")
|
|
276
288
|
{ success: true, entries: formatted, count: formatted.size }
|
|
277
289
|
rescue Sequel::Error => e
|
|
290
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.retrieve_relevant')
|
|
278
291
|
{ success: false, error: e.message }
|
|
279
292
|
end
|
|
280
293
|
|
|
@@ -283,6 +296,7 @@ module Legion
|
|
|
283
296
|
return { success: false, error: 'apollo_data_not_available' }
|
|
284
297
|
end
|
|
285
298
|
|
|
299
|
+
log.debug("Apollo Knowledge.prepare_mesh_export target_domain=#{target_domain} min_confidence=#{min_confidence} limit=#{limit}")
|
|
286
300
|
conn = Legion::Data.connection
|
|
287
301
|
allowed = allowed_domains_for(target_domain)
|
|
288
302
|
|
|
@@ -302,7 +316,9 @@ module Legion
|
|
|
302
316
|
end
|
|
303
317
|
|
|
304
318
|
{ success: true, entries: formatted, count: formatted.size, target_domain: target_domain }
|
|
319
|
+
.tap { |result| log.info("Apollo Knowledge.prepare_mesh_export results=#{result[:count]} target_domain=#{target_domain}") }
|
|
305
320
|
rescue Sequel::Error => e
|
|
321
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.prepare_mesh_export')
|
|
306
322
|
{ success: false, error: e.message }
|
|
307
323
|
end
|
|
308
324
|
|
|
@@ -311,6 +327,7 @@ module Legion
|
|
|
311
327
|
return { deleted: 0, redacted: 0, error: 'apollo_data_not_available' }
|
|
312
328
|
end
|
|
313
329
|
|
|
330
|
+
log.warn("Apollo Knowledge.handle_erasure_request agent_id=#{agent_id}")
|
|
314
331
|
conn = Legion::Data.connection
|
|
315
332
|
|
|
316
333
|
# Delete entries solely from dead agent (not confirmed by others)
|
|
@@ -325,7 +342,9 @@ module Legion
|
|
|
325
342
|
.update(source_agent: 'redacted', source_provider: nil, source_channel: nil)
|
|
326
343
|
|
|
327
344
|
{ deleted: deleted, redacted: redacted, agent_id: agent_id }
|
|
345
|
+
.tap { |result| log.info("Apollo Knowledge.handle_erasure_request deleted=#{result[:deleted]} redacted=#{result[:redacted]} agent_id=#{agent_id}") } # rubocop:disable Layout/LineLength
|
|
328
346
|
rescue Sequel::Error => e
|
|
347
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_erasure_request')
|
|
329
348
|
{ deleted: 0, redacted: 0, error: e.message }
|
|
330
349
|
end
|
|
331
350
|
|
|
@@ -333,6 +352,27 @@ module Legion
|
|
|
333
352
|
|
|
334
353
|
private
|
|
335
354
|
|
|
355
|
+
def ingest_early_return_error(content:, content_type:, tags:)
|
|
356
|
+
if content.strip.empty?
|
|
357
|
+
safe_tags = Array(tags).map(&:to_s).map { |t| t.gsub(/[\r\n]+/, ' ') }
|
|
358
|
+
log.warn('[apollo][handle_ingest] early-return: content is required ' \
|
|
359
|
+
"content_type=#{content_type} tags=#{safe_tags.inspect}")
|
|
360
|
+
return { success: false, error: 'content is required' }
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
if content_type.nil?
|
|
364
|
+
log.warn('[apollo][handle_ingest] early-return: content_type is required ' \
|
|
365
|
+
"content_length=#{content.to_s.length}")
|
|
366
|
+
return { success: false, error: 'content_type is required' }
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
return nil if defined?(Legion::Data::Model::ApolloEntry)
|
|
370
|
+
|
|
371
|
+
log.warn('[apollo][handle_ingest] early-return: apollo_data_not_available ' \
|
|
372
|
+
"content_type=#{content_type}")
|
|
373
|
+
{ success: false, error: 'apollo_data_not_available' }
|
|
374
|
+
end
|
|
375
|
+
|
|
336
376
|
def normalize_content_type(raw)
|
|
337
377
|
sym = raw.to_s.delete_prefix(':').gsub(%r{[/\s]}, '_').strip.downcase.to_sym
|
|
338
378
|
sym = CONTENT_TYPE_ALIASES.fetch(sym, sym)
|
|
@@ -341,21 +381,124 @@ module Legion
|
|
|
341
381
|
|
|
342
382
|
def embed_text(text)
|
|
343
383
|
text = normalize_text_input(text)
|
|
384
|
+
log.debug("Apollo Knowledge.embed_text text_length=#{text.length}")
|
|
344
385
|
result = Legion::LLM::Embeddings.generate(text: text)
|
|
345
386
|
vector = result.is_a?(Hash) ? result[:vector] : result
|
|
346
|
-
vector.is_a?(Array) && vector.any?
|
|
387
|
+
if vector.is_a?(Array) && vector.any?
|
|
388
|
+
log.debug("Apollo Knowledge.embed_text vector_dimensions=#{vector.length}")
|
|
389
|
+
vector
|
|
390
|
+
else
|
|
391
|
+
log.warn('Apollo Knowledge.embed_text returned no vector; using zero-vector fallback')
|
|
392
|
+
Array.new(1024, 0.0)
|
|
393
|
+
end
|
|
347
394
|
rescue StandardError => e
|
|
348
395
|
log.warn("Apollo Knowledge.embed_text failed: #{e.message}")
|
|
349
396
|
Array.new(1024, 0.0)
|
|
350
397
|
end
|
|
351
398
|
|
|
352
399
|
def normalize_text_input(value)
|
|
353
|
-
|
|
400
|
+
result = if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_text_input, true)
|
|
401
|
+
Legion::Apollo.send(:normalize_text_input, value)
|
|
402
|
+
else
|
|
403
|
+
value.to_s
|
|
404
|
+
end
|
|
354
405
|
|
|
355
|
-
|
|
406
|
+
sanitize_for_postgres(result)
|
|
356
407
|
rescue StandardError => e
|
|
357
408
|
log.warn("Apollo Knowledge.normalize_text_input failed: #{e.message}")
|
|
358
|
-
|
|
409
|
+
''
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
def sanitize_for_postgres(value)
|
|
413
|
+
return value unless value.is_a?(String)
|
|
414
|
+
|
|
415
|
+
string = value.encoding == Encoding::UTF_8 ? value.dup : value.dup.force_encoding(Encoding::UTF_8)
|
|
416
|
+
changed = string.include?("\x00") || !string.valid_encoding?
|
|
417
|
+
string = string.scrub('') unless string.valid_encoding?
|
|
418
|
+
sanitized = string.delete("\x00")
|
|
419
|
+
log.debug("Apollo Knowledge.sanitize_for_postgres sanitized original_length=#{value.bytesize} sanitized_length=#{sanitized.bytesize}") if changed
|
|
420
|
+
sanitized
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def truncate_for_column(value, max_length)
|
|
424
|
+
return nil if value.nil?
|
|
425
|
+
|
|
426
|
+
normalize_text_input(value)[0, max_length]
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def active_duplicate_for_hash(hash)
|
|
430
|
+
return nil unless hash
|
|
431
|
+
|
|
432
|
+
existing = Legion::Data::Model::ApolloEntry
|
|
433
|
+
.where(content_hash: hash)
|
|
434
|
+
.exclude(status: 'archived')
|
|
435
|
+
.first
|
|
436
|
+
existing&.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
|
|
437
|
+
log.debug("Apollo Knowledge.active_duplicate_for_hash matched entry_id=#{existing.id}") if existing
|
|
438
|
+
existing
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, submitted_by:, submitted_from:)
|
|
442
|
+
tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
|
|
443
|
+
agent = truncate_for_column(source_agent, 50) || 'unknown'
|
|
444
|
+
|
|
445
|
+
{ tags: tag_array,
|
|
446
|
+
domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
|
|
447
|
+
source_agent: agent,
|
|
448
|
+
source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
|
|
449
|
+
source_channel: truncate_for_column(source_channel, 100),
|
|
450
|
+
submitted_by: truncate_for_column(submitted_by, 255),
|
|
451
|
+
submitted_from: truncate_for_column(submitted_from, 255) }
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
def create_candidate_entry(content:, content_type:, context:, metadata:, content_hash:, embedding:)
|
|
455
|
+
new_entry = Legion::Data::Model::ApolloEntry.create(
|
|
456
|
+
content: content,
|
|
457
|
+
content_type: content_type,
|
|
458
|
+
confidence: Helpers::Confidence.initial_confidence,
|
|
459
|
+
source_agent: metadata[:source_agent],
|
|
460
|
+
source_provider: metadata[:source_provider],
|
|
461
|
+
source_channel: metadata[:source_channel],
|
|
462
|
+
source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
|
|
463
|
+
tags: Sequel.pg_array(metadata[:tags]),
|
|
464
|
+
status: 'candidate',
|
|
465
|
+
knowledge_domain: metadata[:domain],
|
|
466
|
+
submitted_by: metadata[:submitted_by],
|
|
467
|
+
submitted_from: metadata[:submitted_from],
|
|
468
|
+
content_hash: content_hash,
|
|
469
|
+
embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
|
|
470
|
+
)
|
|
471
|
+
log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
|
|
472
|
+
new_entry.id
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
def browse_query?(query)
|
|
476
|
+
query.to_s.strip.length < 3
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:)
|
|
480
|
+
log.debug("Apollo Knowledge.list_entries_chronologically limit=#{limit} statuses=#{Array(status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
|
|
481
|
+
dataset = Legion::Data::Model::ApolloEntry.exclude(status: 'archived')
|
|
482
|
+
requested = Array(status).map(&:to_s).reject(&:empty?)
|
|
483
|
+
dataset = dataset.where(status: requested) unless status_defaulted || requested.empty?
|
|
484
|
+
dataset = dataset.where(Sequel.lit('tags && ?', Sequel.pg_array(Array(tags)))) if tags && !Array(tags).empty?
|
|
485
|
+
dataset = dataset.where(knowledge_domain: domain) if domain && !domain.to_s.empty?
|
|
486
|
+
|
|
487
|
+
entries = dataset.order(Sequel.desc(:created_at)).limit(limit).all.map do |entry|
|
|
488
|
+
format_entry(entry.is_a?(Hash) ? entry : entry.values)
|
|
489
|
+
end
|
|
490
|
+
log.info("Apollo Knowledge.list_entries_chronologically results=#{entries.size}")
|
|
491
|
+
{ success: true, mode: :browse, query: query, entries: entries, count: entries.size }
|
|
492
|
+
rescue Sequel::Error => e
|
|
493
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.list_entries_chronologically')
|
|
494
|
+
{ success: false, error: e.message }
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
def format_entry(entry)
|
|
498
|
+
{ id: entry[:id], content: entry[:content], content_type: entry[:content_type],
|
|
499
|
+
confidence: entry[:confidence], distance: entry[:distance]&.to_f,
|
|
500
|
+
tags: entry[:tags], source_agent: entry[:source_agent],
|
|
501
|
+
knowledge_domain: entry[:knowledge_domain] }
|
|
359
502
|
end
|
|
360
503
|
|
|
361
504
|
def allowed_domains_for(target_domain)
|
|
@@ -379,6 +522,7 @@ module Legion
|
|
|
379
522
|
rel_weight = Helpers::Confidence.apollo_setting(:contradiction, :relation_weight, default: 0.8)
|
|
380
523
|
|
|
381
524
|
db = Legion::Data::Model::ApolloEntry.db
|
|
525
|
+
log.debug("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} similar_limit=#{sim_limit} threshold=#{sim_threshold}")
|
|
382
526
|
similar = db.fetch(
|
|
383
527
|
"SELECT id, content, embedding FROM apollo_entries WHERE id != :entry_id AND embedding IS NOT NULL ORDER BY embedding <=> :embedding LIMIT #{sim_limit}", # rubocop:disable Layout/LineLength
|
|
384
528
|
entry_id: entry_id,
|
|
@@ -403,9 +547,10 @@ module Legion
|
|
|
403
547
|
Legion::Data::Model::ApolloEntry.where(id: [entry_id, existing[:id]]).update(status: 'disputed')
|
|
404
548
|
contradictions << existing[:id]
|
|
405
549
|
end
|
|
550
|
+
log.info("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} contradictions=#{contradictions.size}") if contradictions.any?
|
|
406
551
|
contradictions
|
|
407
552
|
rescue Sequel::Error => e
|
|
408
|
-
|
|
553
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.detect_contradictions')
|
|
409
554
|
[]
|
|
410
555
|
end
|
|
411
556
|
|
|
@@ -430,6 +575,7 @@ module Legion
|
|
|
430
575
|
|
|
431
576
|
def find_corroboration(embedding, content_type_sym, source_agent, source_channel = nil)
|
|
432
577
|
scan_limit = Helpers::Confidence.apollo_setting(:corroboration, :scan_limit, default: 50)
|
|
578
|
+
log.debug("Apollo Knowledge.find_corroboration content_type=#{content_type_sym} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'} scan_limit=#{scan_limit}") # rubocop:disable Layout/LineLength
|
|
433
579
|
existing = Legion::Data::Model::ApolloEntry
|
|
434
580
|
.where(content_type: content_type_sym)
|
|
435
581
|
.exclude(embedding: nil)
|
|
@@ -462,9 +608,11 @@ module Legion
|
|
|
462
608
|
source_agent: source_agent,
|
|
463
609
|
weight: sim
|
|
464
610
|
)
|
|
611
|
+
log.info("Apollo Knowledge.find_corroboration matched entry_id=#{entry.id} source_agent=#{source_agent} similarity=#{sim}")
|
|
465
612
|
return [true, entry.id]
|
|
466
613
|
end
|
|
467
614
|
|
|
615
|
+
log.debug("Apollo Knowledge.find_corroboration no_match source_agent=#{source_agent}")
|
|
468
616
|
[false, nil]
|
|
469
617
|
end
|
|
470
618
|
|
|
@@ -483,6 +631,7 @@ module Legion
|
|
|
483
631
|
end
|
|
484
632
|
|
|
485
633
|
def upsert_expertise(source_agent:, domain:)
|
|
634
|
+
log.debug("Apollo Knowledge.upsert_expertise source_agent=#{source_agent} domain=#{domain}")
|
|
486
635
|
expertise = Legion::Data::Model::ApolloExpertise
|
|
487
636
|
.where(agent_id: source_agent, domain: domain).first
|
|
488
637
|
if expertise
|
|
@@ -25,7 +25,11 @@ module Legion
|
|
|
25
25
|
min_confidence ||= Helpers::Confidence.decay_threshold
|
|
26
26
|
min_age_hours = Helpers::Confidence.decay_min_age_hours
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
log.debug("Apollo Maintenance.run_decay_cycle alpha=#{alpha} min_confidence=#{min_confidence} min_age_hours=#{min_age_hours}")
|
|
29
|
+
unless defined?(Legion::Data) && Legion::Data.respond_to?(:connection) && Legion::Data.connection
|
|
30
|
+
log.warn('Apollo Maintenance.run_decay_cycle skipped: apollo_data_not_available')
|
|
31
|
+
return { decayed: 0, archived: 0 }
|
|
32
|
+
end
|
|
29
33
|
|
|
30
34
|
conn = Legion::Data.connection
|
|
31
35
|
|
|
@@ -54,15 +58,21 @@ module Legion
|
|
|
54
58
|
|
|
55
59
|
{ decayed: decayed, archived: archived, alpha: alpha, threshold: min_confidence,
|
|
56
60
|
min_age_hours: min_age_hours }
|
|
61
|
+
.tap { |result| log.info("Apollo Maintenance.run_decay_cycle decayed=#{result[:decayed]} archived=#{result[:archived]} alpha=#{alpha} threshold=#{min_confidence}") } # rubocop:disable Layout/LineLength
|
|
57
62
|
rescue Sequel::Error => e
|
|
63
|
+
handle_exception(e, level: :error, operation: 'apollo.maintenance.run_decay_cycle')
|
|
58
64
|
{ decayed: 0, archived: 0, error: e.message }
|
|
59
65
|
end
|
|
60
66
|
|
|
61
67
|
def check_corroboration(**) # rubocop:disable Metrics/CyclomaticComplexity
|
|
62
|
-
|
|
68
|
+
unless defined?(Legion::Data::Model::ApolloEntry)
|
|
69
|
+
log.warn('Apollo Maintenance.check_corroboration skipped: apollo_data_not_available')
|
|
70
|
+
return { success: false, error: 'apollo_data_not_available' }
|
|
71
|
+
end
|
|
63
72
|
|
|
64
73
|
candidates = Legion::Data::Model::ApolloEntry.where(status: 'candidate').exclude(embedding: nil).all
|
|
65
74
|
confirmed = Legion::Data::Model::ApolloEntry.where(status: 'confirmed').exclude(embedding: nil).all
|
|
75
|
+
log.debug("Apollo Maintenance.check_corroboration candidates=#{candidates.size} confirmed=#{confirmed.size}")
|
|
66
76
|
|
|
67
77
|
promoted = 0
|
|
68
78
|
|
|
@@ -106,7 +116,9 @@ module Legion
|
|
|
106
116
|
end
|
|
107
117
|
|
|
108
118
|
{ success: true, promoted: promoted, scanned: candidates.size }
|
|
119
|
+
.tap { |result| log.info("Apollo Maintenance.check_corroboration scanned=#{result[:scanned]} promoted=#{result[:promoted]}") }
|
|
109
120
|
rescue Sequel::Error => e
|
|
121
|
+
handle_exception(e, level: :error, operation: 'apollo.maintenance.check_corroboration')
|
|
110
122
|
{ success: false, error: e.message }
|
|
111
123
|
end
|
|
112
124
|
|
|
@@ -263,6 +263,36 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
263
263
|
tags: ['RabbitMQ'], source_agent: 'agent-1')
|
|
264
264
|
end
|
|
265
265
|
|
|
266
|
+
it 'sanitizes null bytes before storing content' do
|
|
267
|
+
expect(mock_entry_class).to receive(:create).with(
|
|
268
|
+
hash_including(content: 'helloworld')
|
|
269
|
+
).and_return(mock_entry)
|
|
270
|
+
host.handle_ingest(content: "hello\x00world", content_type: 'fact', source_agent: 'agent-1')
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
it 'truncates short varchar metadata fields at the database boundary' do
|
|
274
|
+
expect(mock_entry_class).to receive(:create).with(
|
|
275
|
+
hash_including(
|
|
276
|
+
source_agent: 'a' * 50,
|
|
277
|
+
source_provider: 'p' * 50,
|
|
278
|
+
source_channel: 'c' * 100,
|
|
279
|
+
knowledge_domain: 'd' * 50,
|
|
280
|
+
submitted_by: 'u' * 255,
|
|
281
|
+
submitted_from: 'n' * 255
|
|
282
|
+
)
|
|
283
|
+
).and_return(mock_entry)
|
|
284
|
+
host.handle_ingest(
|
|
285
|
+
content: 'test',
|
|
286
|
+
content_type: 'fact',
|
|
287
|
+
source_agent: 'a' * 60,
|
|
288
|
+
source_provider: 'p' * 60,
|
|
289
|
+
source_channel: 'c' * 120,
|
|
290
|
+
knowledge_domain: 'd' * 60,
|
|
291
|
+
submitted_by: 'u' * 300,
|
|
292
|
+
submitted_from: 'n' * 300
|
|
293
|
+
)
|
|
294
|
+
end
|
|
295
|
+
|
|
266
296
|
context 'content hash dedup' do
|
|
267
297
|
let(:existing_entry) do
|
|
268
298
|
double('existing', id: 'uuid-existing', confidence: 0.6,
|
|
@@ -295,9 +325,46 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
295
325
|
end
|
|
296
326
|
|
|
297
327
|
it 'returns a structured error' do
|
|
328
|
+
allow(host).to receive(:handle_exception)
|
|
329
|
+
|
|
298
330
|
result = host.handle_ingest(content: 'test', content_type: 'fact', source_agent: 'a')
|
|
299
331
|
expect(result[:success]).to be false
|
|
300
332
|
expect(result[:error]).to eq('connection lost')
|
|
333
|
+
expect(host).to have_received(:handle_exception).with(
|
|
334
|
+
instance_of(Sequel::Error),
|
|
335
|
+
level: :error,
|
|
336
|
+
operation: 'apollo.knowledge.handle_ingest'
|
|
337
|
+
)
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
context 'early-return warn logs' do
|
|
342
|
+
let(:logger) { instance_double('Logger', debug: nil, info: nil, warn: nil) }
|
|
343
|
+
|
|
344
|
+
before { allow(host).to receive(:log).and_return(logger) }
|
|
345
|
+
|
|
346
|
+
it 'emits a warn log when content is nil' do
|
|
347
|
+
host.handle_ingest(content: nil, content_type: 'fact')
|
|
348
|
+
expect(logger).to have_received(:warn).with(/early-return: content is required/)
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
it 'emits a warn log when content_type is nil' do
|
|
352
|
+
host.handle_ingest(content: 'something', content_type: nil)
|
|
353
|
+
expect(logger).to have_received(:warn).with(/early-return: content_type is required/)
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
it 'emits a warn log when apollo_data_not_available' do
|
|
357
|
+
hide_const('Legion::Data::Model::ApolloEntry') if defined?(Legion::Data::Model::ApolloEntry)
|
|
358
|
+
host.handle_ingest(content: 'something', content_type: 'fact')
|
|
359
|
+
expect(logger).to have_received(:warn).with(/early-return: apollo_data_not_available/)
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
it 'sanitizes newline-bearing tags in the warn log' do
|
|
363
|
+
host.handle_ingest(content: nil, content_type: 'fact', tags: ["evil\nFAKE LOG LINE", 'normal'])
|
|
364
|
+
expect(logger).to have_received(:warn) do |msg|
|
|
365
|
+
expect(msg).to include('evil FAKE LOG LINE')
|
|
366
|
+
expect(msg).not_to include("\n")
|
|
367
|
+
end
|
|
301
368
|
end
|
|
302
369
|
end
|
|
303
370
|
end
|
|
@@ -376,6 +443,64 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
376
443
|
expect(result[:count]).to eq(0)
|
|
377
444
|
end
|
|
378
445
|
end
|
|
446
|
+
|
|
447
|
+
context 'when query is browse-shaped' do
|
|
448
|
+
let(:mock_entry_class) { double('ApolloEntry') }
|
|
449
|
+
let(:dataset) { double('dataset') }
|
|
450
|
+
let(:entries) do
|
|
451
|
+
[{ id: 'uuid-1', content: 'Candidate fact', content_type: 'fact',
|
|
452
|
+
confidence: 0.7, tags: ['ruby'], source_agent: 'agent-1',
|
|
453
|
+
knowledge_domain: 'general' }]
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
before do
|
|
457
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class)
|
|
458
|
+
allow(mock_entry_class).to receive(:exclude).with(status: 'archived').and_return(dataset)
|
|
459
|
+
allow(dataset).to receive(:where).and_return(dataset)
|
|
460
|
+
allow(dataset).to receive(:order).with(:created_at).and_return(dataset)
|
|
461
|
+
allow(dataset).to receive(:limit).with(50).and_return(dataset)
|
|
462
|
+
allow(dataset).to receive(:all).and_return(entries)
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
it 'lists recent non-archived entries without generating an embedding' do
|
|
466
|
+
expect(Legion::LLM::Embeddings).not_to receive(:generate)
|
|
467
|
+
|
|
468
|
+
result = host.handle_query(query: 'x', limit: 50)
|
|
469
|
+
|
|
470
|
+
expect(result[:success]).to be true
|
|
471
|
+
expect(result[:mode]).to eq(:browse)
|
|
472
|
+
expect(result[:count]).to eq(1)
|
|
473
|
+
expect(result[:entries].first[:content]).to eq('Candidate fact')
|
|
474
|
+
expect(dataset).not_to have_received(:where).with(status: ['confirmed'])
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
it 'respects an explicit confirmed status filter' do
|
|
478
|
+
host.handle_query(query: 'x', limit: 50, status: [:confirmed])
|
|
479
|
+
|
|
480
|
+
expect(dataset).to have_received(:where).with(status: ['confirmed'])
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
it 'applies tags and domain filters when provided' do
|
|
484
|
+
host.handle_query(query: 'x', limit: 50, tags: ['ruby'], domain: 'general')
|
|
485
|
+
|
|
486
|
+
expect(dataset).to have_received(:where).with('tags && ?')
|
|
487
|
+
expect(dataset).to have_received(:where).with(knowledge_domain: 'general')
|
|
488
|
+
end
|
|
489
|
+
end
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
describe '#normalize_text_input' do
|
|
493
|
+
let(:host) { Object.new.extend(described_class) }
|
|
494
|
+
|
|
495
|
+
it 'strips null bytes in the local fallback path' do
|
|
496
|
+
expect(host.send(:normalize_text_input, "hello\x00world")).to eq('helloworld')
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
it 'scrubs invalid UTF-8 in the local fallback path' do
|
|
500
|
+
invalid = "hello\xC3(world".dup.force_encoding(Encoding::UTF_8)
|
|
501
|
+
|
|
502
|
+
expect(host.send(:normalize_text_input, invalid)).to eq('hello(world')
|
|
503
|
+
end
|
|
379
504
|
end
|
|
380
505
|
|
|
381
506
|
describe '#retrieve_relevant' do
|
data/spec/spec_helper.rb
CHANGED