lex-apollo 0.4.19 → 0.4.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/lib/legion/extensions/apollo/actors/entity_watchdog.rb +8 -4
- data/lib/legion/extensions/apollo/actors/writeback_vectorize.rb +6 -0
- data/lib/legion/extensions/apollo/api.rb +5 -3
- data/lib/legion/extensions/apollo/runners/entity_extractor.rb +14 -3
- data/lib/legion/extensions/apollo/runners/expertise.rb +41 -28
- data/lib/legion/extensions/apollo/runners/gas.rb +65 -25
- data/lib/legion/extensions/apollo/runners/knowledge.rb +172 -43
- data/lib/legion/extensions/apollo/runners/maintenance.rb +14 -2
- data/lib/legion/extensions/apollo/version.rb +1 -1
- data/spec/legion/extensions/apollo/runners/knowledge_spec.rb +95 -0
- data/spec/spec_helper.rb +1 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f197e46e616eb71f939175480496d17775c67985bf70d631ac8d089724c7ac7d
|
|
4
|
+
data.tar.gz: c8f6ee951339eda218647c3bee95bfda3f32c6ee62a44abcaad8940b40214bd9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5b4822965e47e806bf21b1e07e3a675fff365bdea474514278ab61efc1be2e3fc557756f2ad8ab32e63b2d1eb24286dfe25c44ceacbba723bd877d1402d7a5ab
|
|
7
|
+
data.tar.gz: b67b970d6cf8734abd96c5de5e993e802ba924fd966307e60fd7911f7879559e3835d262ed653a269b740bbffd2574df9e156afe4e4c9dfd6a15b70afce6d658
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.20] - 2026-04-25
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- `Knowledge#handle_ingest` now sanitizes stored content for PostgreSQL by scrubbing invalid UTF-8 and removing null bytes before inserts.
|
|
7
|
+
- `Knowledge#handle_ingest` now truncates short metadata fields to schema-safe lengths before writing entries.
|
|
8
|
+
- `Knowledge#handle_query` now treats very short queries as browse/list requests, returning recent non-archived entries without embedding noise while preserving explicit status filters.
|
|
9
|
+
- `Knowledge` Sequel rescue paths now log exception class, message, and a short backtrace before returning structured errors.
|
|
10
|
+
- Apollo runners and actors now emit debug/info lifecycle logs around ingest, query, browse, maintenance, expertise, entity extraction, writeback vectorization, entity watchdog, and GAS pipeline paths so debug-level logging can trace workflows end to end.
|
|
11
|
+
|
|
3
12
|
## [0.4.19] - 2026-04-24
|
|
4
13
|
|
|
5
14
|
### Fixed
|
|
@@ -34,6 +34,7 @@ module Legion
|
|
|
34
34
|
|
|
35
35
|
def scan_and_ingest
|
|
36
36
|
texts = recent_task_log_texts
|
|
37
|
+
log.debug("EntityWatchdog scan_and_ingest log_texts=#{texts.size}")
|
|
37
38
|
return { success: true, ingested: 0, reason: :no_logs } if texts.empty?
|
|
38
39
|
|
|
39
40
|
ingested = 0
|
|
@@ -53,10 +54,10 @@ module Legion
|
|
|
53
54
|
end
|
|
54
55
|
end
|
|
55
56
|
|
|
56
|
-
log.
|
|
57
|
+
log.info("EntityWatchdog ingested=#{ingested} logs_scanned=#{texts.size}")
|
|
57
58
|
{ success: true, ingested: ingested, logs_scanned: texts.size }
|
|
58
59
|
rescue StandardError => e
|
|
59
|
-
|
|
60
|
+
handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.scan_and_ingest')
|
|
60
61
|
{ success: false, error: e.message }
|
|
61
62
|
end
|
|
62
63
|
|
|
@@ -71,7 +72,9 @@ module Legion
|
|
|
71
72
|
.order(Sequel.desc(:created_at))
|
|
72
73
|
.limit(log_limit)
|
|
73
74
|
.select_map(:message)
|
|
74
|
-
logs.map(&:to_s).reject(&:empty?).uniq
|
|
75
|
+
texts = logs.map(&:to_s).reject(&:empty?).uniq
|
|
76
|
+
log.debug("EntityWatchdog recent_task_log_texts lookback=#{lookback} limit=#{log_limit} raw=#{logs.size} unique=#{texts.size}")
|
|
77
|
+
texts
|
|
75
78
|
rescue StandardError => e
|
|
76
79
|
log.warn("EntityWatchdog recent_task_log_texts failed: #{e.message}")
|
|
77
80
|
[]
|
|
@@ -104,8 +107,9 @@ module Legion
|
|
|
104
107
|
source_agent: 'lex-apollo:entity_watchdog',
|
|
105
108
|
context: { entity_type: entity[:type], original_name: entity[:name] }
|
|
106
109
|
).publish
|
|
110
|
+
log.info("EntityWatchdog published entity type=#{entity[:type]} name=#{entity[:name]}")
|
|
107
111
|
rescue StandardError => e
|
|
108
|
-
|
|
112
|
+
handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.publish_entity_ingest')
|
|
109
113
|
end
|
|
110
114
|
|
|
111
115
|
def entity_types
|
|
@@ -14,21 +14,27 @@ module Legion
|
|
|
14
14
|
|
|
15
15
|
def handle_vectorize(payload)
|
|
16
16
|
payload = symbolize(payload)
|
|
17
|
+
log.debug("WritebackVectorize handle_vectorize content_length=#{payload[:content].to_s.length} content_type=#{payload[:content_type] || 'nil'}")
|
|
17
18
|
result = Legion::LLM::Embeddings.generate(text: payload[:content])
|
|
18
19
|
vector = result.is_a?(Hash) ? result[:vector] : result
|
|
19
20
|
embedding = vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
|
|
21
|
+
log.debug("WritebackVectorize embedding_dimensions=#{embedding.length} vector_generated=#{vector.is_a?(Array) && vector.any?}")
|
|
20
22
|
enriched = payload.merge(embedding: embedding)
|
|
21
23
|
|
|
22
24
|
if Helpers::Capability.can_write?
|
|
25
|
+
log.debug('WritebackVectorize route=direct_ingest')
|
|
23
26
|
Runners::Knowledge.handle_ingest(**enriched)
|
|
24
27
|
else
|
|
28
|
+
log.debug('WritebackVectorize route=transport_writeback')
|
|
25
29
|
Transport::Messages::Writeback.new(
|
|
26
30
|
**enriched, has_embedding: true
|
|
27
31
|
).publish
|
|
28
32
|
end
|
|
29
33
|
|
|
34
|
+
log.info('WritebackVectorize completed action=vectorized')
|
|
30
35
|
{ success: true, action: :vectorized }
|
|
31
36
|
rescue StandardError => e
|
|
37
|
+
handle_exception(e, level: :error, operation: 'apollo.writeback_vectorize.handle_vectorize')
|
|
32
38
|
{ success: false, error: e.message }
|
|
33
39
|
end
|
|
34
40
|
|
|
@@ -51,15 +51,17 @@ module Legion
|
|
|
51
51
|
req = json_body
|
|
52
52
|
halt 400, { error: 'query is required' }.to_json unless req[:query]
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
query_options = {
|
|
55
55
|
query: req[:query],
|
|
56
56
|
limit: req[:limit] || 10,
|
|
57
57
|
min_confidence: req[:min_confidence] || 0.3,
|
|
58
|
-
status: req[:status] || [:confirmed],
|
|
59
58
|
tags: req[:tags],
|
|
60
59
|
domain: req[:domain],
|
|
61
60
|
agent_id: req[:agent_id] || 'api'
|
|
62
|
-
|
|
61
|
+
}
|
|
62
|
+
query_options[:status] = req[:status] if req.key?(:status)
|
|
63
|
+
|
|
64
|
+
result = runner.handle_query(**query_options)
|
|
63
65
|
status result[:success] ? 200 : 500
|
|
64
66
|
result.to_json
|
|
65
67
|
end
|
|
@@ -4,17 +4,24 @@ module Legion
|
|
|
4
4
|
module Extensions
|
|
5
5
|
module Apollo
|
|
6
6
|
module Runners
|
|
7
|
-
module EntityExtractor
|
|
7
|
+
module EntityExtractor
|
|
8
8
|
DEFAULT_ENTITY_TYPES = %w[person service repository concept].freeze
|
|
9
9
|
DEFAULT_MIN_CONFIDENCE = 0.7
|
|
10
10
|
|
|
11
11
|
def extract_entities(text:, entity_types: nil, min_confidence: Helpers::Confidence.apollo_setting(:entity_extractor, :min_confidence, default: DEFAULT_MIN_CONFIDENCE), **) # rubocop:disable Layout/LineLength
|
|
12
|
-
|
|
12
|
+
if text.to_s.strip.empty?
|
|
13
|
+
log.debug('Apollo EntityExtractor.extract_entities skipped reason=empty_text')
|
|
14
|
+
return { success: true, entities: [], source: :empty }
|
|
15
|
+
end
|
|
13
16
|
|
|
14
|
-
|
|
17
|
+
unless defined?(Legion::LLM) && Legion::LLM.started?
|
|
18
|
+
log.debug('Apollo EntityExtractor.extract_entities skipped reason=llm_unavailable')
|
|
19
|
+
return { success: true, entities: [], source: :unavailable }
|
|
20
|
+
end
|
|
15
21
|
|
|
16
22
|
types = Array(entity_types).map(&:to_s)
|
|
17
23
|
types = DEFAULT_ENTITY_TYPES if types.empty?
|
|
24
|
+
log.debug("Apollo EntityExtractor.extract_entities text_length=#{text.to_s.length} types=#{types.join(',')} min_confidence=#{min_confidence}")
|
|
18
25
|
|
|
19
26
|
result = Legion::LLM.structured(
|
|
20
27
|
messages: [
|
|
@@ -29,9 +36,11 @@ module Legion
|
|
|
29
36
|
(entity[:confidence] || 0.0) >= min_confidence &&
|
|
30
37
|
(types.empty? || types.include?(entity[:type].to_s))
|
|
31
38
|
end
|
|
39
|
+
log.info("Apollo EntityExtractor.extract_entities raw=#{raw_entities.size} filtered=#{filtered.size}")
|
|
32
40
|
|
|
33
41
|
{ success: true, entities: filtered, source: :llm }
|
|
34
42
|
rescue StandardError => e
|
|
43
|
+
handle_exception(e, level: :error, operation: 'apollo.entity_extractor.extract_entities')
|
|
35
44
|
{ success: false, entities: [], error: e.message, source: :error }
|
|
36
45
|
end
|
|
37
46
|
|
|
@@ -70,6 +79,8 @@ module Legion
|
|
|
70
79
|
required: ['entities']
|
|
71
80
|
}
|
|
72
81
|
end
|
|
82
|
+
|
|
83
|
+
include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
73
84
|
end
|
|
74
85
|
end
|
|
75
86
|
end
|
|
@@ -18,52 +18,65 @@ module Legion
|
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
def aggregate(**)
|
|
21
|
-
|
|
21
|
+
unless defined?(Legion::Data::Model::ApolloEntry)
|
|
22
|
+
log.warn('Apollo Expertise.aggregate skipped: apollo_data_not_available')
|
|
23
|
+
return { success: false, error: 'apollo_data_not_available' }
|
|
24
|
+
end
|
|
22
25
|
|
|
23
26
|
entries = Legion::Data::Model::ApolloEntry
|
|
24
27
|
.select(:source_agent, :tags, :confidence)
|
|
25
28
|
.exclude(source_agent: nil)
|
|
26
29
|
.all
|
|
27
|
-
|
|
28
|
-
groups = {}
|
|
29
|
-
entries.each do |entry|
|
|
30
|
-
agent = entry.source_agent
|
|
31
|
-
domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
|
|
32
|
-
key = "#{agent}:#{domain}"
|
|
33
|
-
groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
|
|
34
|
-
groups[key][:confidences] << entry.confidence.to_f
|
|
35
|
-
end
|
|
30
|
+
log.debug("Apollo Expertise.aggregate entries=#{entries.size}")
|
|
36
31
|
|
|
37
32
|
agent_set = Set.new
|
|
38
33
|
domain_set = Set.new
|
|
39
34
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
count = group[:confidences].size
|
|
43
|
-
cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
|
|
44
|
-
proficiency = [avg * Math.log2(count + 1), cap].min
|
|
45
|
-
|
|
46
|
-
existing = Legion::Data::Model::ApolloExpertise
|
|
47
|
-
.where(agent_id: group[:agent_id], domain: group[:domain]).first
|
|
48
|
-
|
|
49
|
-
if existing
|
|
50
|
-
existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
|
|
51
|
-
else
|
|
52
|
-
Legion::Data::Model::ApolloExpertise.create(
|
|
53
|
-
agent_id: group[:agent_id], domain: group[:domain],
|
|
54
|
-
proficiency: proficiency, entry_count: count, last_active_at: Time.now
|
|
55
|
-
)
|
|
56
|
-
end
|
|
57
|
-
|
|
35
|
+
expertise_groups(entries).each_value do |group|
|
|
36
|
+
upsert_expertise_group(group)
|
|
58
37
|
agent_set << group[:agent_id]
|
|
59
38
|
domain_set << group[:domain]
|
|
60
39
|
end
|
|
61
40
|
|
|
62
41
|
{ success: true, agents: agent_set.size, domains: domain_set.size }
|
|
42
|
+
.tap { |result| log.info("Apollo Expertise.aggregate agents=#{result[:agents]} domains=#{result[:domains]}") }
|
|
63
43
|
rescue Sequel::Error => e
|
|
44
|
+
handle_exception(e, level: :error, operation: 'apollo.expertise.aggregate')
|
|
64
45
|
{ success: false, error: e.message }
|
|
65
46
|
end
|
|
66
47
|
|
|
48
|
+
def expertise_groups(entries)
|
|
49
|
+
entries.each_with_object({}) do |entry, groups|
|
|
50
|
+
agent = entry.source_agent
|
|
51
|
+
domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
|
|
52
|
+
key = "#{agent}:#{domain}"
|
|
53
|
+
groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
|
|
54
|
+
groups[key][:confidences] << entry.confidence.to_f
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def upsert_expertise_group(group)
|
|
59
|
+
count = group[:confidences].size
|
|
60
|
+
proficiency = expertise_proficiency(group[:confidences])
|
|
61
|
+
existing = Legion::Data::Model::ApolloExpertise
|
|
62
|
+
.where(agent_id: group[:agent_id], domain: group[:domain]).first
|
|
63
|
+
|
|
64
|
+
if existing
|
|
65
|
+
existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
|
|
66
|
+
else
|
|
67
|
+
Legion::Data::Model::ApolloExpertise.create(
|
|
68
|
+
agent_id: group[:agent_id], domain: group[:domain],
|
|
69
|
+
proficiency: proficiency, entry_count: count, last_active_at: Time.now
|
|
70
|
+
)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def expertise_proficiency(confidences)
|
|
75
|
+
avg = confidences.sum / confidences.size
|
|
76
|
+
cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
|
|
77
|
+
[avg * Math.log2(confidences.size + 1), cap].min
|
|
78
|
+
end
|
|
79
|
+
|
|
67
80
|
include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
68
81
|
end
|
|
69
82
|
end
|
|
@@ -4,7 +4,10 @@ module Legion
|
|
|
4
4
|
module Extensions
|
|
5
5
|
module Apollo
|
|
6
6
|
module Runners
|
|
7
|
-
module Gas
|
|
7
|
+
module Gas
|
|
8
|
+
include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
9
|
+
extend Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
|
|
10
|
+
|
|
8
11
|
RELATION_TYPES = %w[
|
|
9
12
|
similar_to contradicts depends_on causes
|
|
10
13
|
part_of supersedes supports_by extends
|
|
@@ -16,10 +19,6 @@ module Legion
|
|
|
16
19
|
|
|
17
20
|
module_function
|
|
18
21
|
|
|
19
|
-
def log
|
|
20
|
-
Legion::Logging
|
|
21
|
-
end
|
|
22
|
-
|
|
23
22
|
def json_load(str)
|
|
24
23
|
::JSON.parse(str, symbolize_names: true)
|
|
25
24
|
end
|
|
@@ -31,7 +30,12 @@ module Legion
|
|
|
31
30
|
def fallback_confidence = Helpers::Confidence.apollo_setting(:gas, :fallback_confidence, default: 0.5)
|
|
32
31
|
|
|
33
32
|
def process(audit_event)
|
|
34
|
-
|
|
33
|
+
unless processable?(audit_event)
|
|
34
|
+
log.debug('GAS process skipped reason=no_content')
|
|
35
|
+
return { phases_completed: 0, reason: 'no content' }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
log.debug("GAS process start request_id=#{audit_event[:request_id] || 'nil'} messages=#{Array(audit_event[:messages]).size} response_length=#{audit_event[:response_content].to_s.length}") # rubocop:disable Layout/LineLength
|
|
35
39
|
|
|
36
40
|
facts = phase_comprehend(audit_event)
|
|
37
41
|
entities = phase_extract(audit_event, facts)
|
|
@@ -40,7 +44,7 @@ module Legion
|
|
|
40
44
|
deposit_result = phase_deposit(facts, entities, relations, synthesis, audit_event)
|
|
41
45
|
anticipations = phase_anticipate(facts, synthesis)
|
|
42
46
|
|
|
43
|
-
{
|
|
47
|
+
result = {
|
|
44
48
|
phases_completed: 6,
|
|
45
49
|
facts: facts.length,
|
|
46
50
|
entities: entities.length,
|
|
@@ -49,8 +53,10 @@ module Legion
|
|
|
49
53
|
deposited: deposit_result,
|
|
50
54
|
anticipations: anticipations.length
|
|
51
55
|
}
|
|
56
|
+
log.info("GAS process complete facts=#{result[:facts]} entities=#{result[:entities]} relations=#{result[:relations]} synthesis=#{result[:synthesis]} anticipations=#{result[:anticipations]}") # rubocop:disable Layout/LineLength
|
|
57
|
+
result
|
|
52
58
|
rescue StandardError => e
|
|
53
|
-
log.
|
|
59
|
+
log.error("GAS pipeline error: #{e.message}")
|
|
54
60
|
{ phases_completed: 0, error: e.message }
|
|
55
61
|
end
|
|
56
62
|
|
|
@@ -63,19 +69,24 @@ module Legion
|
|
|
63
69
|
messages = audit_event[:messages]
|
|
64
70
|
response = audit_event[:response_content]
|
|
65
71
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
72
|
+
mode = llm_available? ? :llm : :mechanical
|
|
73
|
+
log.debug("GAS phase_comprehend mode=#{mode} messages=#{Array(messages).size} response_length=#{response.to_s.length}")
|
|
74
|
+
facts = mode == :llm ? llm_comprehend(messages, response) : mechanical_comprehend(messages, response)
|
|
75
|
+
log.debug("GAS phase_comprehend facts=#{facts.size}")
|
|
76
|
+
facts
|
|
71
77
|
end
|
|
72
78
|
|
|
73
79
|
# Phase 2: Extract - entity extraction (delegates to existing EntityExtractor)
|
|
74
80
|
def phase_extract(audit_event, _facts)
|
|
75
|
-
|
|
81
|
+
unless defined?(Runners::EntityExtractor)
|
|
82
|
+
log.debug('GAS phase_extract skipped reason=entity_extractor_unavailable')
|
|
83
|
+
return []
|
|
84
|
+
end
|
|
76
85
|
|
|
77
86
|
result = Runners::EntityExtractor.extract_entities(text: audit_event[:response_content])
|
|
78
|
-
result[:success] ? (result[:entities] || []) : []
|
|
87
|
+
entities = result[:success] ? (result[:entities] || []) : []
|
|
88
|
+
log.debug("GAS phase_extract success=#{result[:success]} entities=#{entities.size}")
|
|
89
|
+
entities
|
|
79
90
|
rescue StandardError => e
|
|
80
91
|
log.warn("GAS phase_extract failed: #{e.message}")
|
|
81
92
|
[]
|
|
@@ -83,10 +94,16 @@ module Legion
|
|
|
83
94
|
|
|
84
95
|
# Phase 3: Relate - classify relationships between new and existing entries
|
|
85
96
|
def phase_relate(facts, _entities)
|
|
86
|
-
|
|
97
|
+
unless defined?(Runners::Knowledge)
|
|
98
|
+
log.debug('GAS phase_relate skipped reason=knowledge_runner_unavailable')
|
|
99
|
+
return []
|
|
100
|
+
end
|
|
87
101
|
|
|
88
102
|
existing = fetch_similar_entries(facts)
|
|
89
|
-
|
|
103
|
+
if existing.empty?
|
|
104
|
+
log.debug("GAS phase_relate skipped reason=no_existing_entries facts=#{facts.size}")
|
|
105
|
+
return []
|
|
106
|
+
end
|
|
90
107
|
|
|
91
108
|
relations = []
|
|
92
109
|
facts.each do |fact|
|
|
@@ -95,15 +112,24 @@ module Legion
|
|
|
95
112
|
relations << relation if relation
|
|
96
113
|
end
|
|
97
114
|
end
|
|
115
|
+
log.debug("GAS phase_relate facts=#{facts.size} existing=#{existing.size} relations=#{relations.size}")
|
|
98
116
|
relations
|
|
99
117
|
end
|
|
100
118
|
|
|
101
119
|
# Phase 4: Synthesize - generate derivative knowledge
|
|
102
120
|
def phase_synthesize(facts, _relations)
|
|
103
|
-
|
|
104
|
-
|
|
121
|
+
if facts.length < 2
|
|
122
|
+
log.debug("GAS phase_synthesize skipped reason=insufficient_facts facts=#{facts.length}")
|
|
123
|
+
return []
|
|
124
|
+
end
|
|
125
|
+
unless llm_available?
|
|
126
|
+
log.debug('GAS phase_synthesize skipped reason=llm_unavailable')
|
|
127
|
+
return []
|
|
128
|
+
end
|
|
105
129
|
|
|
106
|
-
llm_synthesize(facts)
|
|
130
|
+
synthesis = llm_synthesize(facts)
|
|
131
|
+
log.debug("GAS phase_synthesize synthesis=#{synthesis.size}")
|
|
132
|
+
synthesis
|
|
107
133
|
rescue StandardError => e
|
|
108
134
|
log.warn("GAS phase_synthesize failed: #{e.message}")
|
|
109
135
|
[]
|
|
@@ -111,7 +137,10 @@ module Legion
|
|
|
111
137
|
|
|
112
138
|
# Phase 5: Deposit - atomic write to Apollo
|
|
113
139
|
def phase_deposit(facts, _entities, _relations, _synthesis, audit_event)
|
|
114
|
-
|
|
140
|
+
unless defined?(Runners::Knowledge)
|
|
141
|
+
log.debug('GAS phase_deposit skipped reason=knowledge_runner_unavailable')
|
|
142
|
+
return { deposited: 0 }
|
|
143
|
+
end
|
|
115
144
|
|
|
116
145
|
deposited = 0
|
|
117
146
|
facts.each do |fact|
|
|
@@ -128,15 +157,24 @@ module Legion
|
|
|
128
157
|
rescue StandardError => e
|
|
129
158
|
log.warn("GAS deposit error: #{e.message}")
|
|
130
159
|
end
|
|
160
|
+
log.info("GAS phase_deposit deposited=#{deposited} facts=#{facts.size}")
|
|
131
161
|
{ deposited: deposited }
|
|
132
162
|
end
|
|
133
163
|
|
|
134
164
|
# Phase 6: Anticipate - pre-cache likely follow-up questions
|
|
135
165
|
def phase_anticipate(facts, _synthesis)
|
|
136
|
-
|
|
137
|
-
|
|
166
|
+
if facts.empty?
|
|
167
|
+
log.debug('GAS phase_anticipate skipped reason=no_facts')
|
|
168
|
+
return []
|
|
169
|
+
end
|
|
170
|
+
unless llm_available?
|
|
171
|
+
log.debug('GAS phase_anticipate skipped reason=llm_unavailable')
|
|
172
|
+
return []
|
|
173
|
+
end
|
|
138
174
|
|
|
139
|
-
llm_anticipate(facts)
|
|
175
|
+
anticipations = llm_anticipate(facts)
|
|
176
|
+
log.debug("GAS phase_anticipate anticipations=#{anticipations.size}")
|
|
177
|
+
anticipations
|
|
140
178
|
rescue StandardError => e
|
|
141
179
|
log.warn("GAS phase_anticipate failed: #{e.message}")
|
|
142
180
|
[]
|
|
@@ -153,7 +191,9 @@ module Legion
|
|
|
153
191
|
log.warn("GAS fetch_similar_entries failed for fact: #{e.message}")
|
|
154
192
|
next
|
|
155
193
|
end
|
|
156
|
-
entries.uniq { |e| e[:id] }
|
|
194
|
+
unique = entries.uniq { |e| e[:id] }
|
|
195
|
+
log.debug("GAS fetch_similar_entries facts=#{facts.size} entries=#{unique.size}")
|
|
196
|
+
unique
|
|
157
197
|
end
|
|
158
198
|
|
|
159
199
|
def classify_relation(fact, entry)
|
|
@@ -21,9 +21,12 @@ module Legion
|
|
|
21
21
|
link: :association, relation: :association, connection: :association,
|
|
22
22
|
inference: :association, implication: :association
|
|
23
23
|
}.freeze
|
|
24
|
+
DEFAULT_QUERY_STATUS = [:confirmed].freeze
|
|
25
|
+
UNSET = Object.new.freeze
|
|
24
26
|
|
|
25
27
|
def store_knowledge(content:, content_type:, tags: [], source_agent: nil, context: {}, **)
|
|
26
28
|
content_type = normalize_content_type(content_type)
|
|
29
|
+
log.debug("Apollo Knowledge.store_knowledge content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent || 'nil'} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
|
|
27
30
|
|
|
28
31
|
if defined?(Legion::Data::Model::ApolloEntry)
|
|
29
32
|
return handle_ingest(content: content, content_type: content_type,
|
|
@@ -41,6 +44,7 @@ module Legion
|
|
|
41
44
|
end
|
|
42
45
|
|
|
43
46
|
def query_knowledge(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: %i[confirmed candidate], tags: nil, **) # rubocop:disable Layout/LineLength
|
|
47
|
+
log.debug("Apollo Knowledge.query_knowledge query_length=#{query.to_s.length} limit=#{limit} statuses=#{Array(status).join(',')} tags=#{Array(tags).size} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
|
|
44
48
|
if defined?(Legion::Data::Model::ApolloEntry)
|
|
45
49
|
return handle_query(query: query, limit: limit, min_confidence: min_confidence,
|
|
46
50
|
status: status, tags: tags, **)
|
|
@@ -57,6 +61,7 @@ module Legion
|
|
|
57
61
|
end
|
|
58
62
|
|
|
59
63
|
def related_entries(entry_id:, relation_types: nil, depth: Helpers::GraphQuery.default_depth, **)
|
|
64
|
+
log.debug("Apollo Knowledge.related_entries entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
|
|
60
65
|
return handle_traverse(entry_id: entry_id, depth: depth, relation_types: relation_types, **) if defined?(Legion::Data::Model::ApolloEntry)
|
|
61
66
|
|
|
62
67
|
{
|
|
@@ -75,74 +80,73 @@ module Legion
|
|
|
75
80
|
}
|
|
76
81
|
end
|
|
77
82
|
|
|
78
|
-
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
|
|
83
|
+
def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
|
|
79
84
|
return { status: :skipped } if skip
|
|
80
|
-
|
|
85
|
+
|
|
86
|
+
content = normalize_text_input(content)
|
|
87
|
+
log.debug("Apollo Knowledge.handle_ingest content_length=#{content.length} content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'}") # rubocop:disable Layout/LineLength
|
|
88
|
+
return { success: false, error: 'content is required' } if content.strip.empty?
|
|
81
89
|
return { success: false, error: 'content_type is required' } if content_type.nil?
|
|
82
90
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
83
91
|
|
|
84
|
-
# Content hash dedup
|
|
85
92
|
hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
.first
|
|
91
|
-
if existing
|
|
92
|
-
existing.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
|
|
93
|
-
return { success: true, entry_id: existing.id, deduped: true }
|
|
94
|
-
end
|
|
93
|
+
existing = active_duplicate_for_hash(hash)
|
|
94
|
+
if existing
|
|
95
|
+
log.info("Apollo Knowledge.handle_ingest deduped entry_id=#{existing.id} source_agent=#{source_agent}")
|
|
96
|
+
return { success: true, entry_id: existing.id, deduped: true }
|
|
95
97
|
end
|
|
96
98
|
|
|
97
99
|
embedding = embed_text(content)
|
|
98
100
|
content_type_sym = content_type.to_s
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
tags: Sequel.pg_array(tag_array),
|
|
114
|
-
status: 'candidate',
|
|
115
|
-
knowledge_domain: domain,
|
|
116
|
-
submitted_by: submitted_by,
|
|
117
|
-
submitted_from: submitted_from,
|
|
118
|
-
content_hash: hash,
|
|
119
|
-
embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
|
|
101
|
+
metadata = ingest_metadata(tags: tags, knowledge_domain: knowledge_domain, source_agent: source_agent,
|
|
102
|
+
source_provider: source_provider, source_channel: source_channel,
|
|
103
|
+
submitted_by: submitted_by, submitted_from: submitted_from)
|
|
104
|
+
|
|
105
|
+
corroborated, existing_id = find_corroboration(
|
|
106
|
+
embedding, content_type_sym, metadata[:source_agent], metadata[:source_channel]
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
if corroborated
|
|
110
|
+
log.info("Apollo Knowledge.handle_ingest corroborated entry_id=#{existing_id} source_agent=#{metadata[:source_agent]}")
|
|
111
|
+
else
|
|
112
|
+
existing_id = create_candidate_entry(
|
|
113
|
+
content: content, content_type: content_type_sym, context: context,
|
|
114
|
+
metadata: metadata, content_hash: hash, embedding: embedding
|
|
120
115
|
)
|
|
121
|
-
existing_id = new_entry.id
|
|
122
116
|
end
|
|
123
117
|
|
|
124
|
-
upsert_expertise(source_agent: source_agent, domain: domain)
|
|
118
|
+
upsert_expertise(source_agent: metadata[:source_agent], domain: metadata[:domain])
|
|
125
119
|
|
|
126
120
|
Legion::Data::Model::ApolloAccessLog.create(
|
|
127
|
-
entry_id: existing_id, agent_id: source_agent, action: 'ingest'
|
|
121
|
+
entry_id: existing_id, agent_id: metadata[:source_agent], action: 'ingest'
|
|
128
122
|
)
|
|
129
123
|
|
|
130
124
|
contradictions = detect_contradictions(existing_id, embedding, content)
|
|
125
|
+
log.debug("Apollo Knowledge.handle_ingest complete entry_id=#{existing_id} corroborated=#{corroborated} contradictions=#{contradictions.size}")
|
|
131
126
|
|
|
132
127
|
{ success: true, entry_id: existing_id, status: corroborated ? 'corroborated' : 'candidate',
|
|
133
128
|
corroborated: corroborated, contradictions: contradictions }
|
|
134
129
|
rescue Sequel::Error => e
|
|
130
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_ingest')
|
|
135
131
|
{ success: false, error: e.message }
|
|
136
132
|
end
|
|
137
133
|
|
|
138
|
-
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status:
|
|
134
|
+
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: UNSET, tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Layout/LineLength
|
|
139
135
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
140
136
|
|
|
141
137
|
query = normalize_text_input(query)
|
|
138
|
+
status_defaulted = status.equal?(UNSET)
|
|
139
|
+
requested_status = status_defaulted ? DEFAULT_QUERY_STATUS : status
|
|
140
|
+
log.debug("Apollo Knowledge.handle_query mode=#{browse_query?(query) ? 'browse' : 'semantic'} query_length=#{query.length} limit=#{limit} statuses=#{Array(requested_status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
|
|
141
|
+
if browse_query?(query)
|
|
142
|
+
return list_entries_chronologically(query: query, limit: limit, status: requested_status,
|
|
143
|
+
status_defaulted: status_defaulted, tags: tags, domain: domain)
|
|
144
|
+
end
|
|
145
|
+
|
|
142
146
|
embedding = embed_text(query)
|
|
143
147
|
sql = Helpers::GraphQuery.build_semantic_search_sql(
|
|
144
148
|
limit: limit, min_confidence: min_confidence,
|
|
145
|
-
statuses: Array(
|
|
149
|
+
statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain
|
|
146
150
|
)
|
|
147
151
|
|
|
148
152
|
db = Legion::Data::Model::ApolloEntry.db
|
|
@@ -173,14 +177,17 @@ module Legion
|
|
|
173
177
|
knowledge_domain: entry[:knowledge_domain] }
|
|
174
178
|
end
|
|
175
179
|
|
|
180
|
+
log.info("Apollo Knowledge.handle_query results=#{formatted.size} mode=semantic agent_id=#{agent_id}")
|
|
176
181
|
{ success: true, entries: formatted, count: formatted.size }
|
|
177
182
|
rescue Sequel::Error => e
|
|
183
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_query')
|
|
178
184
|
{ success: false, error: e.message }
|
|
179
185
|
end
|
|
180
186
|
|
|
181
187
|
def handle_traverse(entry_id:, depth: Helpers::GraphQuery.default_depth, relation_types: nil, agent_id: 'unknown', **)
|
|
182
188
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
183
189
|
|
|
190
|
+
log.debug("Apollo Knowledge.handle_traverse entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
|
|
184
191
|
# Whitelist relation_types to prevent SQL injection (they are string-interpolated in build_traversal_sql)
|
|
185
192
|
if relation_types
|
|
186
193
|
allowed = Helpers::Confidence::RELATION_TYPES
|
|
@@ -203,14 +210,17 @@ module Legion
|
|
|
203
210
|
depth: entry[:depth], activation: entry[:activation] }
|
|
204
211
|
end
|
|
205
212
|
|
|
213
|
+
log.info("Apollo Knowledge.handle_traverse results=#{formatted.size} entry_id=#{entry_id}")
|
|
206
214
|
{ success: true, entries: formatted, count: formatted.size }
|
|
207
215
|
rescue Sequel::Error => e
|
|
216
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_traverse')
|
|
208
217
|
{ success: false, error: e.message }
|
|
209
218
|
end
|
|
210
219
|
|
|
211
220
|
def redistribute_knowledge(agent_id:, min_confidence: Helpers::Confidence.apollo_setting(:query, :redistribute_min_confidence, default: 0.5), **)
|
|
212
221
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
213
222
|
|
|
223
|
+
log.debug("Apollo Knowledge.redistribute_knowledge agent_id=#{agent_id} min_confidence=#{min_confidence}")
|
|
214
224
|
entries = Legion::Data::Model::ApolloEntry
|
|
215
225
|
.where(source_agent: agent_id, status: 'confirmed')
|
|
216
226
|
.where { confidence > min_confidence }
|
|
@@ -238,6 +248,7 @@ module Legion
|
|
|
238
248
|
log.info("[apollo] redistributed #{redistributed} entries from departing agent=#{agent_id}")
|
|
239
249
|
{ success: true, redistributed: redistributed, agent_id: agent_id }
|
|
240
250
|
rescue Sequel::Error => e
|
|
251
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.redistribute_knowledge')
|
|
241
252
|
{ success: false, error: e.message }
|
|
242
253
|
end
|
|
243
254
|
|
|
@@ -247,6 +258,7 @@ module Legion
|
|
|
247
258
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
248
259
|
|
|
249
260
|
query = normalize_text_input(query)
|
|
261
|
+
log.debug("Apollo Knowledge.retrieve_relevant query_length=#{query.length} limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
|
|
250
262
|
return { success: true, entries: [], count: 0 } if query.nil? || query.to_s.strip.empty?
|
|
251
263
|
|
|
252
264
|
embedding = embed_text(query)
|
|
@@ -273,8 +285,10 @@ module Legion
|
|
|
273
285
|
knowledge_domain: entry[:knowledge_domain] }
|
|
274
286
|
end
|
|
275
287
|
|
|
288
|
+
log.info("Apollo Knowledge.retrieve_relevant results=#{formatted.size} limit=#{limit}")
|
|
276
289
|
{ success: true, entries: formatted, count: formatted.size }
|
|
277
290
|
rescue Sequel::Error => e
|
|
291
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.retrieve_relevant')
|
|
278
292
|
{ success: false, error: e.message }
|
|
279
293
|
end
|
|
280
294
|
|
|
@@ -283,6 +297,7 @@ module Legion
|
|
|
283
297
|
return { success: false, error: 'apollo_data_not_available' }
|
|
284
298
|
end
|
|
285
299
|
|
|
300
|
+
log.debug("Apollo Knowledge.prepare_mesh_export target_domain=#{target_domain} min_confidence=#{min_confidence} limit=#{limit}")
|
|
286
301
|
conn = Legion::Data.connection
|
|
287
302
|
allowed = allowed_domains_for(target_domain)
|
|
288
303
|
|
|
@@ -302,7 +317,9 @@ module Legion
|
|
|
302
317
|
end
|
|
303
318
|
|
|
304
319
|
{ success: true, entries: formatted, count: formatted.size, target_domain: target_domain }
|
|
320
|
+
.tap { |result| log.info("Apollo Knowledge.prepare_mesh_export results=#{result[:count]} target_domain=#{target_domain}") }
|
|
305
321
|
rescue Sequel::Error => e
|
|
322
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.prepare_mesh_export')
|
|
306
323
|
{ success: false, error: e.message }
|
|
307
324
|
end
|
|
308
325
|
|
|
@@ -311,6 +328,7 @@ module Legion
|
|
|
311
328
|
return { deleted: 0, redacted: 0, error: 'apollo_data_not_available' }
|
|
312
329
|
end
|
|
313
330
|
|
|
331
|
+
log.warn("Apollo Knowledge.handle_erasure_request agent_id=#{agent_id}")
|
|
314
332
|
conn = Legion::Data.connection
|
|
315
333
|
|
|
316
334
|
# Delete entries solely from dead agent (not confirmed by others)
|
|
@@ -325,7 +343,9 @@ module Legion
|
|
|
325
343
|
.update(source_agent: 'redacted', source_provider: nil, source_channel: nil)
|
|
326
344
|
|
|
327
345
|
{ deleted: deleted, redacted: redacted, agent_id: agent_id }
|
|
346
|
+
.tap { |result| log.info("Apollo Knowledge.handle_erasure_request deleted=#{result[:deleted]} redacted=#{result[:redacted]} agent_id=#{agent_id}") } # rubocop:disable Layout/LineLength
|
|
328
347
|
rescue Sequel::Error => e
|
|
348
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_erasure_request')
|
|
329
349
|
{ deleted: 0, redacted: 0, error: e.message }
|
|
330
350
|
end
|
|
331
351
|
|
|
@@ -341,21 +361,124 @@ module Legion
|
|
|
341
361
|
|
|
342
362
|
def embed_text(text)
|
|
343
363
|
text = normalize_text_input(text)
|
|
364
|
+
log.debug("Apollo Knowledge.embed_text text_length=#{text.length}")
|
|
344
365
|
result = Legion::LLM::Embeddings.generate(text: text)
|
|
345
366
|
vector = result.is_a?(Hash) ? result[:vector] : result
|
|
346
|
-
vector.is_a?(Array) && vector.any?
|
|
367
|
+
if vector.is_a?(Array) && vector.any?
|
|
368
|
+
log.debug("Apollo Knowledge.embed_text vector_dimensions=#{vector.length}")
|
|
369
|
+
vector
|
|
370
|
+
else
|
|
371
|
+
log.warn('Apollo Knowledge.embed_text returned no vector; using zero-vector fallback')
|
|
372
|
+
Array.new(1024, 0.0)
|
|
373
|
+
end
|
|
347
374
|
rescue StandardError => e
|
|
348
375
|
log.warn("Apollo Knowledge.embed_text failed: #{e.message}")
|
|
349
376
|
Array.new(1024, 0.0)
|
|
350
377
|
end
|
|
351
378
|
|
|
352
379
|
def normalize_text_input(value)
|
|
353
|
-
|
|
380
|
+
result = if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_text_input, true)
|
|
381
|
+
Legion::Apollo.send(:normalize_text_input, value)
|
|
382
|
+
else
|
|
383
|
+
value.to_s
|
|
384
|
+
end
|
|
354
385
|
|
|
355
|
-
|
|
386
|
+
sanitize_for_postgres(result)
|
|
356
387
|
rescue StandardError => e
|
|
357
388
|
log.warn("Apollo Knowledge.normalize_text_input failed: #{e.message}")
|
|
358
|
-
|
|
389
|
+
''
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
def sanitize_for_postgres(value)
|
|
393
|
+
return value unless value.is_a?(String)
|
|
394
|
+
|
|
395
|
+
string = value.encoding == Encoding::UTF_8 ? value.dup : value.dup.force_encoding(Encoding::UTF_8)
|
|
396
|
+
changed = string.include?("\x00") || !string.valid_encoding?
|
|
397
|
+
string = string.scrub('') unless string.valid_encoding?
|
|
398
|
+
sanitized = string.delete("\x00")
|
|
399
|
+
log.debug("Apollo Knowledge.sanitize_for_postgres sanitized original_length=#{value.bytesize} sanitized_length=#{sanitized.bytesize}") if changed
|
|
400
|
+
sanitized
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
def truncate_for_column(value, max_length)
|
|
404
|
+
return nil if value.nil?
|
|
405
|
+
|
|
406
|
+
normalize_text_input(value)[0, max_length]
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def active_duplicate_for_hash(hash)
|
|
410
|
+
return nil unless hash
|
|
411
|
+
|
|
412
|
+
existing = Legion::Data::Model::ApolloEntry
|
|
413
|
+
.where(content_hash: hash)
|
|
414
|
+
.exclude(status: 'archived')
|
|
415
|
+
.first
|
|
416
|
+
existing&.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
|
|
417
|
+
log.debug("Apollo Knowledge.active_duplicate_for_hash matched entry_id=#{existing.id}") if existing
|
|
418
|
+
existing
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, submitted_by:, submitted_from:)
|
|
422
|
+
tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
|
|
423
|
+
agent = truncate_for_column(source_agent, 50) || 'unknown'
|
|
424
|
+
|
|
425
|
+
{ tags: tag_array,
|
|
426
|
+
domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
|
|
427
|
+
source_agent: agent,
|
|
428
|
+
source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
|
|
429
|
+
source_channel: truncate_for_column(source_channel, 100),
|
|
430
|
+
submitted_by: truncate_for_column(submitted_by, 255),
|
|
431
|
+
submitted_from: truncate_for_column(submitted_from, 255) }
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
def create_candidate_entry(content:, content_type:, context:, metadata:, content_hash:, embedding:)
|
|
435
|
+
new_entry = Legion::Data::Model::ApolloEntry.create(
|
|
436
|
+
content: content,
|
|
437
|
+
content_type: content_type,
|
|
438
|
+
confidence: Helpers::Confidence.initial_confidence,
|
|
439
|
+
source_agent: metadata[:source_agent],
|
|
440
|
+
source_provider: metadata[:source_provider],
|
|
441
|
+
source_channel: metadata[:source_channel],
|
|
442
|
+
source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
|
|
443
|
+
tags: Sequel.pg_array(metadata[:tags]),
|
|
444
|
+
status: 'candidate',
|
|
445
|
+
knowledge_domain: metadata[:domain],
|
|
446
|
+
submitted_by: metadata[:submitted_by],
|
|
447
|
+
submitted_from: metadata[:submitted_from],
|
|
448
|
+
content_hash: content_hash,
|
|
449
|
+
embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
|
|
450
|
+
)
|
|
451
|
+
log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
|
|
452
|
+
new_entry.id
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def browse_query?(query)
|
|
456
|
+
query.to_s.strip.length < 3
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:)
|
|
460
|
+
log.debug("Apollo Knowledge.list_entries_chronologically limit=#{limit} statuses=#{Array(status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
|
|
461
|
+
dataset = Legion::Data::Model::ApolloEntry.exclude(status: 'archived')
|
|
462
|
+
requested = Array(status).map(&:to_s).reject(&:empty?)
|
|
463
|
+
dataset = dataset.where(status: requested) unless status_defaulted || requested.empty?
|
|
464
|
+
dataset = dataset.where(Sequel.lit('tags && ?', Sequel.pg_array(Array(tags)))) if tags && !Array(tags).empty?
|
|
465
|
+
dataset = dataset.where(knowledge_domain: domain) if domain && !domain.to_s.empty?
|
|
466
|
+
|
|
467
|
+
entries = dataset.order(Sequel.desc(:created_at)).limit(limit).all.map do |entry|
|
|
468
|
+
format_entry(entry.is_a?(Hash) ? entry : entry.values)
|
|
469
|
+
end
|
|
470
|
+
log.info("Apollo Knowledge.list_entries_chronologically results=#{entries.size}")
|
|
471
|
+
{ success: true, mode: :browse, query: query, entries: entries, count: entries.size }
|
|
472
|
+
rescue Sequel::Error => e
|
|
473
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.list_entries_chronologically')
|
|
474
|
+
{ success: false, error: e.message }
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
def format_entry(entry)
|
|
478
|
+
{ id: entry[:id], content: entry[:content], content_type: entry[:content_type],
|
|
479
|
+
confidence: entry[:confidence], distance: entry[:distance]&.to_f,
|
|
480
|
+
tags: entry[:tags], source_agent: entry[:source_agent],
|
|
481
|
+
knowledge_domain: entry[:knowledge_domain] }
|
|
359
482
|
end
|
|
360
483
|
|
|
361
484
|
def allowed_domains_for(target_domain)
|
|
@@ -379,6 +502,7 @@ module Legion
|
|
|
379
502
|
rel_weight = Helpers::Confidence.apollo_setting(:contradiction, :relation_weight, default: 0.8)
|
|
380
503
|
|
|
381
504
|
db = Legion::Data::Model::ApolloEntry.db
|
|
505
|
+
log.debug("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} similar_limit=#{sim_limit} threshold=#{sim_threshold}")
|
|
382
506
|
similar = db.fetch(
|
|
383
507
|
"SELECT id, content, embedding FROM apollo_entries WHERE id != :entry_id AND embedding IS NOT NULL ORDER BY embedding <=> :embedding LIMIT #{sim_limit}", # rubocop:disable Layout/LineLength
|
|
384
508
|
entry_id: entry_id,
|
|
@@ -403,9 +527,10 @@ module Legion
|
|
|
403
527
|
Legion::Data::Model::ApolloEntry.where(id: [entry_id, existing[:id]]).update(status: 'disputed')
|
|
404
528
|
contradictions << existing[:id]
|
|
405
529
|
end
|
|
530
|
+
log.info("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} contradictions=#{contradictions.size}") if contradictions.any?
|
|
406
531
|
contradictions
|
|
407
532
|
rescue Sequel::Error => e
|
|
408
|
-
|
|
533
|
+
handle_exception(e, level: :error, operation: 'apollo.knowledge.detect_contradictions')
|
|
409
534
|
[]
|
|
410
535
|
end
|
|
411
536
|
|
|
@@ -430,6 +555,7 @@ module Legion
|
|
|
430
555
|
|
|
431
556
|
def find_corroboration(embedding, content_type_sym, source_agent, source_channel = nil)
|
|
432
557
|
scan_limit = Helpers::Confidence.apollo_setting(:corroboration, :scan_limit, default: 50)
|
|
558
|
+
log.debug("Apollo Knowledge.find_corroboration content_type=#{content_type_sym} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'} scan_limit=#{scan_limit}") # rubocop:disable Layout/LineLength
|
|
433
559
|
existing = Legion::Data::Model::ApolloEntry
|
|
434
560
|
.where(content_type: content_type_sym)
|
|
435
561
|
.exclude(embedding: nil)
|
|
@@ -462,9 +588,11 @@ module Legion
|
|
|
462
588
|
source_agent: source_agent,
|
|
463
589
|
weight: sim
|
|
464
590
|
)
|
|
591
|
+
log.info("Apollo Knowledge.find_corroboration matched entry_id=#{entry.id} source_agent=#{source_agent} similarity=#{sim}")
|
|
465
592
|
return [true, entry.id]
|
|
466
593
|
end
|
|
467
594
|
|
|
595
|
+
log.debug("Apollo Knowledge.find_corroboration no_match source_agent=#{source_agent}")
|
|
468
596
|
[false, nil]
|
|
469
597
|
end
|
|
470
598
|
|
|
@@ -483,6 +611,7 @@ module Legion
|
|
|
483
611
|
end
|
|
484
612
|
|
|
485
613
|
def upsert_expertise(source_agent:, domain:)
|
|
614
|
+
log.debug("Apollo Knowledge.upsert_expertise source_agent=#{source_agent} domain=#{domain}")
|
|
486
615
|
expertise = Legion::Data::Model::ApolloExpertise
|
|
487
616
|
.where(agent_id: source_agent, domain: domain).first
|
|
488
617
|
if expertise
|
|
@@ -25,7 +25,11 @@ module Legion
|
|
|
25
25
|
min_confidence ||= Helpers::Confidence.decay_threshold
|
|
26
26
|
min_age_hours = Helpers::Confidence.decay_min_age_hours
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
log.debug("Apollo Maintenance.run_decay_cycle alpha=#{alpha} min_confidence=#{min_confidence} min_age_hours=#{min_age_hours}")
|
|
29
|
+
unless defined?(Legion::Data) && Legion::Data.respond_to?(:connection) && Legion::Data.connection
|
|
30
|
+
log.warn('Apollo Maintenance.run_decay_cycle skipped: apollo_data_not_available')
|
|
31
|
+
return { decayed: 0, archived: 0 }
|
|
32
|
+
end
|
|
29
33
|
|
|
30
34
|
conn = Legion::Data.connection
|
|
31
35
|
|
|
@@ -54,15 +58,21 @@ module Legion
|
|
|
54
58
|
|
|
55
59
|
{ decayed: decayed, archived: archived, alpha: alpha, threshold: min_confidence,
|
|
56
60
|
min_age_hours: min_age_hours }
|
|
61
|
+
.tap { |result| log.info("Apollo Maintenance.run_decay_cycle decayed=#{result[:decayed]} archived=#{result[:archived]} alpha=#{alpha} threshold=#{min_confidence}") } # rubocop:disable Layout/LineLength
|
|
57
62
|
rescue Sequel::Error => e
|
|
63
|
+
handle_exception(e, level: :error, operation: 'apollo.maintenance.run_decay_cycle')
|
|
58
64
|
{ decayed: 0, archived: 0, error: e.message }
|
|
59
65
|
end
|
|
60
66
|
|
|
61
67
|
def check_corroboration(**) # rubocop:disable Metrics/CyclomaticComplexity
|
|
62
|
-
|
|
68
|
+
unless defined?(Legion::Data::Model::ApolloEntry)
|
|
69
|
+
log.warn('Apollo Maintenance.check_corroboration skipped: apollo_data_not_available')
|
|
70
|
+
return { success: false, error: 'apollo_data_not_available' }
|
|
71
|
+
end
|
|
63
72
|
|
|
64
73
|
candidates = Legion::Data::Model::ApolloEntry.where(status: 'candidate').exclude(embedding: nil).all
|
|
65
74
|
confirmed = Legion::Data::Model::ApolloEntry.where(status: 'confirmed').exclude(embedding: nil).all
|
|
75
|
+
log.debug("Apollo Maintenance.check_corroboration candidates=#{candidates.size} confirmed=#{confirmed.size}")
|
|
66
76
|
|
|
67
77
|
promoted = 0
|
|
68
78
|
|
|
@@ -106,7 +116,9 @@ module Legion
|
|
|
106
116
|
end
|
|
107
117
|
|
|
108
118
|
{ success: true, promoted: promoted, scanned: candidates.size }
|
|
119
|
+
.tap { |result| log.info("Apollo Maintenance.check_corroboration scanned=#{result[:scanned]} promoted=#{result[:promoted]}") }
|
|
109
120
|
rescue Sequel::Error => e
|
|
121
|
+
handle_exception(e, level: :error, operation: 'apollo.maintenance.check_corroboration')
|
|
110
122
|
{ success: false, error: e.message }
|
|
111
123
|
end
|
|
112
124
|
|
|
@@ -263,6 +263,36 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
263
263
|
tags: ['RabbitMQ'], source_agent: 'agent-1')
|
|
264
264
|
end
|
|
265
265
|
|
|
266
|
+
it 'sanitizes null bytes before storing content' do
|
|
267
|
+
expect(mock_entry_class).to receive(:create).with(
|
|
268
|
+
hash_including(content: 'helloworld')
|
|
269
|
+
).and_return(mock_entry)
|
|
270
|
+
host.handle_ingest(content: "hello\x00world", content_type: 'fact', source_agent: 'agent-1')
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
it 'truncates short varchar metadata fields at the database boundary' do
|
|
274
|
+
expect(mock_entry_class).to receive(:create).with(
|
|
275
|
+
hash_including(
|
|
276
|
+
source_agent: 'a' * 50,
|
|
277
|
+
source_provider: 'p' * 50,
|
|
278
|
+
source_channel: 'c' * 100,
|
|
279
|
+
knowledge_domain: 'd' * 50,
|
|
280
|
+
submitted_by: 'u' * 255,
|
|
281
|
+
submitted_from: 'n' * 255
|
|
282
|
+
)
|
|
283
|
+
).and_return(mock_entry)
|
|
284
|
+
host.handle_ingest(
|
|
285
|
+
content: 'test',
|
|
286
|
+
content_type: 'fact',
|
|
287
|
+
source_agent: 'a' * 60,
|
|
288
|
+
source_provider: 'p' * 60,
|
|
289
|
+
source_channel: 'c' * 120,
|
|
290
|
+
knowledge_domain: 'd' * 60,
|
|
291
|
+
submitted_by: 'u' * 300,
|
|
292
|
+
submitted_from: 'n' * 300
|
|
293
|
+
)
|
|
294
|
+
end
|
|
295
|
+
|
|
266
296
|
context 'content hash dedup' do
|
|
267
297
|
let(:existing_entry) do
|
|
268
298
|
double('existing', id: 'uuid-existing', confidence: 0.6,
|
|
@@ -295,9 +325,16 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
295
325
|
end
|
|
296
326
|
|
|
297
327
|
it 'returns a structured error' do
|
|
328
|
+
allow(host).to receive(:handle_exception)
|
|
329
|
+
|
|
298
330
|
result = host.handle_ingest(content: 'test', content_type: 'fact', source_agent: 'a')
|
|
299
331
|
expect(result[:success]).to be false
|
|
300
332
|
expect(result[:error]).to eq('connection lost')
|
|
333
|
+
expect(host).to have_received(:handle_exception).with(
|
|
334
|
+
instance_of(Sequel::Error),
|
|
335
|
+
level: :error,
|
|
336
|
+
operation: 'apollo.knowledge.handle_ingest'
|
|
337
|
+
)
|
|
301
338
|
end
|
|
302
339
|
end
|
|
303
340
|
end
|
|
@@ -376,6 +413,64 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
|
|
|
376
413
|
expect(result[:count]).to eq(0)
|
|
377
414
|
end
|
|
378
415
|
end
|
|
416
|
+
|
|
417
|
+
context 'when query is browse-shaped' do
|
|
418
|
+
let(:mock_entry_class) { double('ApolloEntry') }
|
|
419
|
+
let(:dataset) { double('dataset') }
|
|
420
|
+
let(:entries) do
|
|
421
|
+
[{ id: 'uuid-1', content: 'Candidate fact', content_type: 'fact',
|
|
422
|
+
confidence: 0.7, tags: ['ruby'], source_agent: 'agent-1',
|
|
423
|
+
knowledge_domain: 'general' }]
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
before do
|
|
427
|
+
stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class)
|
|
428
|
+
allow(mock_entry_class).to receive(:exclude).with(status: 'archived').and_return(dataset)
|
|
429
|
+
allow(dataset).to receive(:where).and_return(dataset)
|
|
430
|
+
allow(dataset).to receive(:order).with(:created_at).and_return(dataset)
|
|
431
|
+
allow(dataset).to receive(:limit).with(50).and_return(dataset)
|
|
432
|
+
allow(dataset).to receive(:all).and_return(entries)
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
it 'lists recent non-archived entries without generating an embedding' do
|
|
436
|
+
expect(Legion::LLM::Embeddings).not_to receive(:generate)
|
|
437
|
+
|
|
438
|
+
result = host.handle_query(query: 'x', limit: 50)
|
|
439
|
+
|
|
440
|
+
expect(result[:success]).to be true
|
|
441
|
+
expect(result[:mode]).to eq(:browse)
|
|
442
|
+
expect(result[:count]).to eq(1)
|
|
443
|
+
expect(result[:entries].first[:content]).to eq('Candidate fact')
|
|
444
|
+
expect(dataset).not_to have_received(:where).with(status: ['confirmed'])
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
it 'respects an explicit confirmed status filter' do
|
|
448
|
+
host.handle_query(query: 'x', limit: 50, status: [:confirmed])
|
|
449
|
+
|
|
450
|
+
expect(dataset).to have_received(:where).with(status: ['confirmed'])
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
it 'applies tags and domain filters when provided' do
|
|
454
|
+
host.handle_query(query: 'x', limit: 50, tags: ['ruby'], domain: 'general')
|
|
455
|
+
|
|
456
|
+
expect(dataset).to have_received(:where).with('tags && ?')
|
|
457
|
+
expect(dataset).to have_received(:where).with(knowledge_domain: 'general')
|
|
458
|
+
end
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
describe '#normalize_text_input' do
|
|
463
|
+
let(:host) { Object.new.extend(described_class) }
|
|
464
|
+
|
|
465
|
+
it 'strips null bytes in the local fallback path' do
|
|
466
|
+
expect(host.send(:normalize_text_input, "hello\x00world")).to eq('helloworld')
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
it 'scrubs invalid UTF-8 in the local fallback path' do
|
|
470
|
+
invalid = "hello\xC3(world".dup.force_encoding(Encoding::UTF_8)
|
|
471
|
+
|
|
472
|
+
expect(host.send(:normalize_text_input, invalid)).to eq('hello(world')
|
|
473
|
+
end
|
|
379
474
|
end
|
|
380
475
|
|
|
381
476
|
describe '#retrieve_relevant' do
|
data/spec/spec_helper.rb
CHANGED