lex-apollo 0.4.18 → 0.4.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 87724ea9011349f5f7c008a360728975567903827b5cdcf9a42e2678ca84b134
4
- data.tar.gz: ae10f48baf9522bd7087a20e1886fc26170c1f5e0a0c36a3ccac26c836f11885
3
+ metadata.gz: f197e46e616eb71f939175480496d17775c67985bf70d631ac8d089724c7ac7d
4
+ data.tar.gz: c8f6ee951339eda218647c3bee95bfda3f32c6ee62a44abcaad8940b40214bd9
5
5
  SHA512:
6
- metadata.gz: b21c5413da8dfe9f60d9845a578bcfe8337982d128a5041df214004e144b9913dd34cd2da1514a30ea67e21d52604a11218c362a46f9c990c472ac4ec14a0fa9
7
- data.tar.gz: ae07a4aba39ce05f20b6a495310b6a20c1c468e0d627393ab931af76d9cd57da5b52075a33706a8b94fadae01c32b65d818d4f5c1018f9eac08a8ffdc10c7ace
6
+ metadata.gz: 5b4822965e47e806bf21b1e07e3a675fff365bdea474514278ab61efc1be2e3fc557756f2ad8ab32e63b2d1eb24286dfe25c44ceacbba723bd877d1402d7a5ab
7
+ data.tar.gz: b67b970d6cf8734abd96c5de5e993e802ba924fd966307e60fd7911f7879559e3835d262ed653a269b740bbffd2574df9e156afe4e4c9dfd6a15b70afce6d658
data/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.20] - 2026-04-25
4
+
5
+ ### Fixed
6
+ - `Knowledge#handle_ingest` now sanitizes stored content for PostgreSQL by scrubbing invalid UTF-8 and removing null bytes before inserts.
7
+ - `Knowledge#handle_ingest` now truncates short metadata fields to schema-safe lengths before writing entries.
8
+ - `Knowledge#handle_query` now treats very short queries as browse/list requests, returning recent non-archived entries without embedding noise while preserving explicit status filters.
9
+ - `Knowledge` Sequel rescue paths now log exception class, message, and a short backtrace before returning structured errors.
10
+ - Apollo runners and actors now emit debug/info lifecycle logs around ingest, query, browse, maintenance, expertise, entity extraction, writeback vectorization, entity watchdog, and GAS pipeline paths so debug-level logging can trace workflows end to end.
11
+
12
+ ## [0.4.19] - 2026-04-24
13
+
14
+ ### Fixed
15
+ - `store_knowledge` no longer rejects LLM-provided content_type values — normalizes free-form strings (`"reasoning"`, `"text"`, `"text/plain"`, `":fact"`, `"inference"`) to valid symbols via alias map with `:observation` fallback
16
+ - `GaiaIntegration.publish_insight` now passes `:observation` instead of the domain string as content_type (was sending `"general"` or domain names which failed validation)
17
+ - `llm_detects_conflict?` truncates content to 4000 chars before sending to LLM to prevent context overflow errors (was passing full entry content, hitting 65536-token limit)
18
+
3
19
  ## [0.4.18] - 2026-04-24
4
20
 
5
21
  ### Fixed
@@ -34,6 +34,7 @@ module Legion
34
34
 
35
35
  def scan_and_ingest
36
36
  texts = recent_task_log_texts
37
+ log.debug("EntityWatchdog scan_and_ingest log_texts=#{texts.size}")
37
38
  return { success: true, ingested: 0, reason: :no_logs } if texts.empty?
38
39
 
39
40
  ingested = 0
@@ -53,10 +54,10 @@ module Legion
53
54
  end
54
55
  end
55
56
 
56
- log.debug("EntityWatchdog: ingested #{ingested} new entities from #{texts.size} log entries")
57
+ log.info("EntityWatchdog ingested=#{ingested} logs_scanned=#{texts.size}")
57
58
  { success: true, ingested: ingested, logs_scanned: texts.size }
58
59
  rescue StandardError => e
59
- log.error("EntityWatchdog scan_and_ingest failed: #{e.message}")
60
+ handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.scan_and_ingest')
60
61
  { success: false, error: e.message }
61
62
  end
62
63
 
@@ -71,7 +72,9 @@ module Legion
71
72
  .order(Sequel.desc(:created_at))
72
73
  .limit(log_limit)
73
74
  .select_map(:message)
74
- logs.map(&:to_s).reject(&:empty?).uniq
75
+ texts = logs.map(&:to_s).reject(&:empty?).uniq
76
+ log.debug("EntityWatchdog recent_task_log_texts lookback=#{lookback} limit=#{log_limit} raw=#{logs.size} unique=#{texts.size}")
77
+ texts
75
78
  rescue StandardError => e
76
79
  log.warn("EntityWatchdog recent_task_log_texts failed: #{e.message}")
77
80
  []
@@ -104,8 +107,9 @@ module Legion
104
107
  source_agent: 'lex-apollo:entity_watchdog',
105
108
  context: { entity_type: entity[:type], original_name: entity[:name] }
106
109
  ).publish
110
+ log.info("EntityWatchdog published entity type=#{entity[:type]} name=#{entity[:name]}")
107
111
  rescue StandardError => e
108
- log.error("EntityWatchdog publish failed: #{e.message}")
112
+ handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.publish_entity_ingest')
109
113
  end
110
114
 
111
115
  def entity_types
@@ -14,21 +14,27 @@ module Legion
14
14
 
15
15
  def handle_vectorize(payload)
16
16
  payload = symbolize(payload)
17
+ log.debug("WritebackVectorize handle_vectorize content_length=#{payload[:content].to_s.length} content_type=#{payload[:content_type] || 'nil'}")
17
18
  result = Legion::LLM::Embeddings.generate(text: payload[:content])
18
19
  vector = result.is_a?(Hash) ? result[:vector] : result
19
20
  embedding = vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
21
+ log.debug("WritebackVectorize embedding_dimensions=#{embedding.length} vector_generated=#{vector.is_a?(Array) && vector.any?}")
20
22
  enriched = payload.merge(embedding: embedding)
21
23
 
22
24
  if Helpers::Capability.can_write?
25
+ log.debug('WritebackVectorize route=direct_ingest')
23
26
  Runners::Knowledge.handle_ingest(**enriched)
24
27
  else
28
+ log.debug('WritebackVectorize route=transport_writeback')
25
29
  Transport::Messages::Writeback.new(
26
30
  **enriched, has_embedding: true
27
31
  ).publish
28
32
  end
29
33
 
34
+ log.info('WritebackVectorize completed action=vectorized')
30
35
  { success: true, action: :vectorized }
31
36
  rescue StandardError => e
37
+ handle_exception(e, level: :error, operation: 'apollo.writeback_vectorize.handle_vectorize')
32
38
  { success: false, error: e.message }
33
39
  end
34
40
 
@@ -51,15 +51,17 @@ module Legion
51
51
  req = json_body
52
52
  halt 400, { error: 'query is required' }.to_json unless req[:query]
53
53
 
54
- result = runner.handle_query(
54
+ query_options = {
55
55
  query: req[:query],
56
56
  limit: req[:limit] || 10,
57
57
  min_confidence: req[:min_confidence] || 0.3,
58
- status: req[:status] || [:confirmed],
59
58
  tags: req[:tags],
60
59
  domain: req[:domain],
61
60
  agent_id: req[:agent_id] || 'api'
62
- )
61
+ }
62
+ query_options[:status] = req[:status] if req.key?(:status)
63
+
64
+ result = runner.handle_query(**query_options)
63
65
  status result[:success] ? 200 : 500
64
66
  result.to_json
65
67
  end
@@ -15,7 +15,7 @@ module Legion
15
15
  client = Legion::Extensions::Apollo::Client.new(agent_id: agent_id)
16
16
  client.store_knowledge(
17
17
  content: insight[:content],
18
- content_type: insight[:domain] || 'general',
18
+ content_type: :observation,
19
19
  source_agent: agent_id,
20
20
  tags: Array(insight[:tags])
21
21
  )
@@ -4,17 +4,24 @@ module Legion
4
4
  module Extensions
5
5
  module Apollo
6
6
  module Runners
7
- module EntityExtractor # rubocop:disable Legion/Extension/RunnerIncludeHelpers
7
+ module EntityExtractor
8
8
  DEFAULT_ENTITY_TYPES = %w[person service repository concept].freeze
9
9
  DEFAULT_MIN_CONFIDENCE = 0.7
10
10
 
11
11
  def extract_entities(text:, entity_types: nil, min_confidence: Helpers::Confidence.apollo_setting(:entity_extractor, :min_confidence, default: DEFAULT_MIN_CONFIDENCE), **) # rubocop:disable Layout/LineLength
12
- return { success: true, entities: [], source: :empty } if text.to_s.strip.empty?
12
+ if text.to_s.strip.empty?
13
+ log.debug('Apollo EntityExtractor.extract_entities skipped reason=empty_text')
14
+ return { success: true, entities: [], source: :empty }
15
+ end
13
16
 
14
- return { success: true, entities: [], source: :unavailable } unless defined?(Legion::LLM) && Legion::LLM.started?
17
+ unless defined?(Legion::LLM) && Legion::LLM.started?
18
+ log.debug('Apollo EntityExtractor.extract_entities skipped reason=llm_unavailable')
19
+ return { success: true, entities: [], source: :unavailable }
20
+ end
15
21
 
16
22
  types = Array(entity_types).map(&:to_s)
17
23
  types = DEFAULT_ENTITY_TYPES if types.empty?
24
+ log.debug("Apollo EntityExtractor.extract_entities text_length=#{text.to_s.length} types=#{types.join(',')} min_confidence=#{min_confidence}")
18
25
 
19
26
  result = Legion::LLM.structured(
20
27
  messages: [
@@ -29,9 +36,11 @@ module Legion
29
36
  (entity[:confidence] || 0.0) >= min_confidence &&
30
37
  (types.empty? || types.include?(entity[:type].to_s))
31
38
  end
39
+ log.info("Apollo EntityExtractor.extract_entities raw=#{raw_entities.size} filtered=#{filtered.size}")
32
40
 
33
41
  { success: true, entities: filtered, source: :llm }
34
42
  rescue StandardError => e
43
+ handle_exception(e, level: :error, operation: 'apollo.entity_extractor.extract_entities')
35
44
  { success: false, entities: [], error: e.message, source: :error }
36
45
  end
37
46
 
@@ -70,6 +79,8 @@ module Legion
70
79
  required: ['entities']
71
80
  }
72
81
  end
82
+
83
+ include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
73
84
  end
74
85
  end
75
86
  end
@@ -18,52 +18,65 @@ module Legion
18
18
  end
19
19
 
20
20
  def aggregate(**)
21
- return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
21
+ unless defined?(Legion::Data::Model::ApolloEntry)
22
+ log.warn('Apollo Expertise.aggregate skipped: apollo_data_not_available')
23
+ return { success: false, error: 'apollo_data_not_available' }
24
+ end
22
25
 
23
26
  entries = Legion::Data::Model::ApolloEntry
24
27
  .select(:source_agent, :tags, :confidence)
25
28
  .exclude(source_agent: nil)
26
29
  .all
27
-
28
- groups = {}
29
- entries.each do |entry|
30
- agent = entry.source_agent
31
- domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
32
- key = "#{agent}:#{domain}"
33
- groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
34
- groups[key][:confidences] << entry.confidence.to_f
35
- end
30
+ log.debug("Apollo Expertise.aggregate entries=#{entries.size}")
36
31
 
37
32
  agent_set = Set.new
38
33
  domain_set = Set.new
39
34
 
40
- groups.each_value do |group|
41
- avg = group[:confidences].sum / group[:confidences].size
42
- count = group[:confidences].size
43
- cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
44
- proficiency = [avg * Math.log2(count + 1), cap].min
45
-
46
- existing = Legion::Data::Model::ApolloExpertise
47
- .where(agent_id: group[:agent_id], domain: group[:domain]).first
48
-
49
- if existing
50
- existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
51
- else
52
- Legion::Data::Model::ApolloExpertise.create(
53
- agent_id: group[:agent_id], domain: group[:domain],
54
- proficiency: proficiency, entry_count: count, last_active_at: Time.now
55
- )
56
- end
57
-
35
+ expertise_groups(entries).each_value do |group|
36
+ upsert_expertise_group(group)
58
37
  agent_set << group[:agent_id]
59
38
  domain_set << group[:domain]
60
39
  end
61
40
 
62
41
  { success: true, agents: agent_set.size, domains: domain_set.size }
42
+ .tap { |result| log.info("Apollo Expertise.aggregate agents=#{result[:agents]} domains=#{result[:domains]}") }
63
43
  rescue Sequel::Error => e
44
+ handle_exception(e, level: :error, operation: 'apollo.expertise.aggregate')
64
45
  { success: false, error: e.message }
65
46
  end
66
47
 
48
+ def expertise_groups(entries)
49
+ entries.each_with_object({}) do |entry, groups|
50
+ agent = entry.source_agent
51
+ domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
52
+ key = "#{agent}:#{domain}"
53
+ groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
54
+ groups[key][:confidences] << entry.confidence.to_f
55
+ end
56
+ end
57
+
58
+ def upsert_expertise_group(group)
59
+ count = group[:confidences].size
60
+ proficiency = expertise_proficiency(group[:confidences])
61
+ existing = Legion::Data::Model::ApolloExpertise
62
+ .where(agent_id: group[:agent_id], domain: group[:domain]).first
63
+
64
+ if existing
65
+ existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
66
+ else
67
+ Legion::Data::Model::ApolloExpertise.create(
68
+ agent_id: group[:agent_id], domain: group[:domain],
69
+ proficiency: proficiency, entry_count: count, last_active_at: Time.now
70
+ )
71
+ end
72
+ end
73
+
74
+ def expertise_proficiency(confidences)
75
+ avg = confidences.sum / confidences.size
76
+ cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
77
+ [avg * Math.log2(confidences.size + 1), cap].min
78
+ end
79
+
67
80
  include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
68
81
  end
69
82
  end
@@ -4,7 +4,10 @@ module Legion
4
4
  module Extensions
5
5
  module Apollo
6
6
  module Runners
7
- module Gas # rubocop:disable Legion/Extension/RunnerIncludeHelpers
7
+ module Gas
8
+ include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
9
+ extend Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
10
+
8
11
  RELATION_TYPES = %w[
9
12
  similar_to contradicts depends_on causes
10
13
  part_of supersedes supports_by extends
@@ -16,10 +19,6 @@ module Legion
16
19
 
17
20
  module_function
18
21
 
19
- def log
20
- Legion::Logging
21
- end
22
-
23
22
  def json_load(str)
24
23
  ::JSON.parse(str, symbolize_names: true)
25
24
  end
@@ -31,7 +30,12 @@ module Legion
31
30
  def fallback_confidence = Helpers::Confidence.apollo_setting(:gas, :fallback_confidence, default: 0.5)
32
31
 
33
32
  def process(audit_event)
34
- return { phases_completed: 0, reason: 'no content' } unless processable?(audit_event)
33
+ unless processable?(audit_event)
34
+ log.debug('GAS process skipped reason=no_content')
35
+ return { phases_completed: 0, reason: 'no content' }
36
+ end
37
+
38
+ log.debug("GAS process start request_id=#{audit_event[:request_id] || 'nil'} messages=#{Array(audit_event[:messages]).size} response_length=#{audit_event[:response_content].to_s.length}") # rubocop:disable Layout/LineLength
35
39
 
36
40
  facts = phase_comprehend(audit_event)
37
41
  entities = phase_extract(audit_event, facts)
@@ -40,7 +44,7 @@ module Legion
40
44
  deposit_result = phase_deposit(facts, entities, relations, synthesis, audit_event)
41
45
  anticipations = phase_anticipate(facts, synthesis)
42
46
 
43
- {
47
+ result = {
44
48
  phases_completed: 6,
45
49
  facts: facts.length,
46
50
  entities: entities.length,
@@ -49,8 +53,10 @@ module Legion
49
53
  deposited: deposit_result,
50
54
  anticipations: anticipations.length
51
55
  }
56
+ log.info("GAS process complete facts=#{result[:facts]} entities=#{result[:entities]} relations=#{result[:relations]} synthesis=#{result[:synthesis]} anticipations=#{result[:anticipations]}") # rubocop:disable Layout/LineLength
57
+ result
52
58
  rescue StandardError => e
53
- log.warn("GAS pipeline error: #{e.message}")
59
+ log.error("GAS pipeline error: #{e.message}")
54
60
  { phases_completed: 0, error: e.message }
55
61
  end
56
62
 
@@ -63,19 +69,24 @@ module Legion
63
69
  messages = audit_event[:messages]
64
70
  response = audit_event[:response_content]
65
71
 
66
- if llm_available?
67
- llm_comprehend(messages, response)
68
- else
69
- mechanical_comprehend(messages, response)
70
- end
72
+ mode = llm_available? ? :llm : :mechanical
73
+ log.debug("GAS phase_comprehend mode=#{mode} messages=#{Array(messages).size} response_length=#{response.to_s.length}")
74
+ facts = mode == :llm ? llm_comprehend(messages, response) : mechanical_comprehend(messages, response)
75
+ log.debug("GAS phase_comprehend facts=#{facts.size}")
76
+ facts
71
77
  end
72
78
 
73
79
  # Phase 2: Extract - entity extraction (delegates to existing EntityExtractor)
74
80
  def phase_extract(audit_event, _facts)
75
- return [] unless defined?(Runners::EntityExtractor)
81
+ unless defined?(Runners::EntityExtractor)
82
+ log.debug('GAS phase_extract skipped reason=entity_extractor_unavailable')
83
+ return []
84
+ end
76
85
 
77
86
  result = Runners::EntityExtractor.extract_entities(text: audit_event[:response_content])
78
- result[:success] ? (result[:entities] || []) : []
87
+ entities = result[:success] ? (result[:entities] || []) : []
88
+ log.debug("GAS phase_extract success=#{result[:success]} entities=#{entities.size}")
89
+ entities
79
90
  rescue StandardError => e
80
91
  log.warn("GAS phase_extract failed: #{e.message}")
81
92
  []
@@ -83,10 +94,16 @@ module Legion
83
94
 
84
95
  # Phase 3: Relate - classify relationships between new and existing entries
85
96
  def phase_relate(facts, _entities)
86
- return [] unless defined?(Runners::Knowledge)
97
+ unless defined?(Runners::Knowledge)
98
+ log.debug('GAS phase_relate skipped reason=knowledge_runner_unavailable')
99
+ return []
100
+ end
87
101
 
88
102
  existing = fetch_similar_entries(facts)
89
- return [] if existing.empty?
103
+ if existing.empty?
104
+ log.debug("GAS phase_relate skipped reason=no_existing_entries facts=#{facts.size}")
105
+ return []
106
+ end
90
107
 
91
108
  relations = []
92
109
  facts.each do |fact|
@@ -95,15 +112,24 @@ module Legion
95
112
  relations << relation if relation
96
113
  end
97
114
  end
115
+ log.debug("GAS phase_relate facts=#{facts.size} existing=#{existing.size} relations=#{relations.size}")
98
116
  relations
99
117
  end
100
118
 
101
119
  # Phase 4: Synthesize - generate derivative knowledge
102
120
  def phase_synthesize(facts, _relations)
103
- return [] if facts.length < 2
104
- return [] unless llm_available?
121
+ if facts.length < 2
122
+ log.debug("GAS phase_synthesize skipped reason=insufficient_facts facts=#{facts.length}")
123
+ return []
124
+ end
125
+ unless llm_available?
126
+ log.debug('GAS phase_synthesize skipped reason=llm_unavailable')
127
+ return []
128
+ end
105
129
 
106
- llm_synthesize(facts)
130
+ synthesis = llm_synthesize(facts)
131
+ log.debug("GAS phase_synthesize synthesis=#{synthesis.size}")
132
+ synthesis
107
133
  rescue StandardError => e
108
134
  log.warn("GAS phase_synthesize failed: #{e.message}")
109
135
  []
@@ -111,7 +137,10 @@ module Legion
111
137
 
112
138
  # Phase 5: Deposit - atomic write to Apollo
113
139
  def phase_deposit(facts, _entities, _relations, _synthesis, audit_event)
114
- return { deposited: 0 } unless defined?(Runners::Knowledge)
140
+ unless defined?(Runners::Knowledge)
141
+ log.debug('GAS phase_deposit skipped reason=knowledge_runner_unavailable')
142
+ return { deposited: 0 }
143
+ end
115
144
 
116
145
  deposited = 0
117
146
  facts.each do |fact|
@@ -128,15 +157,24 @@ module Legion
128
157
  rescue StandardError => e
129
158
  log.warn("GAS deposit error: #{e.message}")
130
159
  end
160
+ log.info("GAS phase_deposit deposited=#{deposited} facts=#{facts.size}")
131
161
  { deposited: deposited }
132
162
  end
133
163
 
134
164
  # Phase 6: Anticipate - pre-cache likely follow-up questions
135
165
  def phase_anticipate(facts, _synthesis)
136
- return [] if facts.empty?
137
- return [] unless llm_available?
166
+ if facts.empty?
167
+ log.debug('GAS phase_anticipate skipped reason=no_facts')
168
+ return []
169
+ end
170
+ unless llm_available?
171
+ log.debug('GAS phase_anticipate skipped reason=llm_unavailable')
172
+ return []
173
+ end
138
174
 
139
- llm_anticipate(facts)
175
+ anticipations = llm_anticipate(facts)
176
+ log.debug("GAS phase_anticipate anticipations=#{anticipations.size}")
177
+ anticipations
140
178
  rescue StandardError => e
141
179
  log.warn("GAS phase_anticipate failed: #{e.message}")
142
180
  []
@@ -153,7 +191,9 @@ module Legion
153
191
  log.warn("GAS fetch_similar_entries failed for fact: #{e.message}")
154
192
  next
155
193
  end
156
- entries.uniq { |e| e[:id] }
194
+ unique = entries.uniq { |e| e[:id] }
195
+ log.debug("GAS fetch_similar_entries facts=#{facts.size} entries=#{unique.size}")
196
+ unique
157
197
  end
158
198
 
159
199
  def classify_relation(fact, entry)
@@ -14,11 +14,19 @@ module Legion
14
14
  'general' => :all
15
15
  }.freeze
16
16
 
17
+ CONTENT_TYPE_ALIASES = {
18
+ reasoning: :concept, analysis: :concept, explanation: :concept,
19
+ text: :observation, general: :observation, note: :observation, summary: :observation,
20
+ rule: :procedure, step: :procedure, instruction: :procedure,
21
+ link: :association, relation: :association, connection: :association,
22
+ inference: :association, implication: :association
23
+ }.freeze
24
+ DEFAULT_QUERY_STATUS = [:confirmed].freeze
25
+ UNSET = Object.new.freeze
26
+
17
27
  def store_knowledge(content:, content_type:, tags: [], source_agent: nil, context: {}, **)
18
- content_type = content_type.to_sym
19
- unless Helpers::Confidence::CONTENT_TYPES.include?(content_type)
20
- raise ArgumentError, "invalid content_type: #{content_type}. Must be one of #{Helpers::Confidence::CONTENT_TYPES}"
21
- end
28
+ content_type = normalize_content_type(content_type)
29
+ log.debug("Apollo Knowledge.store_knowledge content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent || 'nil'} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
22
30
 
23
31
  if defined?(Legion::Data::Model::ApolloEntry)
24
32
  return handle_ingest(content: content, content_type: content_type,
@@ -36,6 +44,7 @@ module Legion
36
44
  end
37
45
 
38
46
  def query_knowledge(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: %i[confirmed candidate], tags: nil, **) # rubocop:disable Layout/LineLength
47
+ log.debug("Apollo Knowledge.query_knowledge query_length=#{query.to_s.length} limit=#{limit} statuses=#{Array(status).join(',')} tags=#{Array(tags).size} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
39
48
  if defined?(Legion::Data::Model::ApolloEntry)
40
49
  return handle_query(query: query, limit: limit, min_confidence: min_confidence,
41
50
  status: status, tags: tags, **)
@@ -52,6 +61,7 @@ module Legion
52
61
  end
53
62
 
54
63
  def related_entries(entry_id:, relation_types: nil, depth: Helpers::GraphQuery.default_depth, **)
64
+ log.debug("Apollo Knowledge.related_entries entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
55
65
  return handle_traverse(entry_id: entry_id, depth: depth, relation_types: relation_types, **) if defined?(Legion::Data::Model::ApolloEntry)
56
66
 
57
67
  {
@@ -70,74 +80,73 @@ module Legion
70
80
  }
71
81
  end
72
82
 
73
- def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength, Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
83
+ def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
74
84
  return { status: :skipped } if skip
75
- return { success: false, error: 'content is required' } if content.nil? || content.to_s.strip.empty?
85
+
86
+ content = normalize_text_input(content)
87
+ log.debug("Apollo Knowledge.handle_ingest content_length=#{content.length} content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'}") # rubocop:disable Layout/LineLength
88
+ return { success: false, error: 'content is required' } if content.strip.empty?
76
89
  return { success: false, error: 'content_type is required' } if content_type.nil?
77
90
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
78
91
 
79
- # Content hash dedup
80
92
  hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
81
- if hash
82
- existing = Legion::Data::Model::ApolloEntry
83
- .where(content_hash: hash)
84
- .exclude(status: 'archived')
85
- .first
86
- if existing
87
- existing.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
88
- return { success: true, entry_id: existing.id, deduped: true }
89
- end
93
+ existing = active_duplicate_for_hash(hash)
94
+ if existing
95
+ log.info("Apollo Knowledge.handle_ingest deduped entry_id=#{existing.id} source_agent=#{source_agent}")
96
+ return { success: true, entry_id: existing.id, deduped: true }
90
97
  end
91
98
 
92
99
  embedding = embed_text(content)
93
100
  content_type_sym = content_type.to_s
94
- tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
95
- domain = knowledge_domain || tag_array.first || 'general'
96
-
97
- corroborated, existing_id = find_corroboration(embedding, content_type_sym, source_agent, source_channel)
98
-
99
- unless corroborated
100
- new_entry = Legion::Data::Model::ApolloEntry.create(
101
- content: content,
102
- content_type: content_type_sym,
103
- confidence: Helpers::Confidence.initial_confidence,
104
- source_agent: source_agent,
105
- source_provider: source_provider || derive_provider_from_agent(source_agent),
106
- source_channel: source_channel,
107
- source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
108
- tags: Sequel.pg_array(tag_array),
109
- status: 'candidate',
110
- knowledge_domain: domain,
111
- submitted_by: submitted_by,
112
- submitted_from: submitted_from,
113
- content_hash: hash,
114
- embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
101
+ metadata = ingest_metadata(tags: tags, knowledge_domain: knowledge_domain, source_agent: source_agent,
102
+ source_provider: source_provider, source_channel: source_channel,
103
+ submitted_by: submitted_by, submitted_from: submitted_from)
104
+
105
+ corroborated, existing_id = find_corroboration(
106
+ embedding, content_type_sym, metadata[:source_agent], metadata[:source_channel]
107
+ )
108
+
109
+ if corroborated
110
+ log.info("Apollo Knowledge.handle_ingest corroborated entry_id=#{existing_id} source_agent=#{metadata[:source_agent]}")
111
+ else
112
+ existing_id = create_candidate_entry(
113
+ content: content, content_type: content_type_sym, context: context,
114
+ metadata: metadata, content_hash: hash, embedding: embedding
115
115
  )
116
- existing_id = new_entry.id
117
116
  end
118
117
 
119
- upsert_expertise(source_agent: source_agent, domain: domain)
118
+ upsert_expertise(source_agent: metadata[:source_agent], domain: metadata[:domain])
120
119
 
121
120
  Legion::Data::Model::ApolloAccessLog.create(
122
- entry_id: existing_id, agent_id: source_agent, action: 'ingest'
121
+ entry_id: existing_id, agent_id: metadata[:source_agent], action: 'ingest'
123
122
  )
124
123
 
125
124
  contradictions = detect_contradictions(existing_id, embedding, content)
125
+ log.debug("Apollo Knowledge.handle_ingest complete entry_id=#{existing_id} corroborated=#{corroborated} contradictions=#{contradictions.size}")
126
126
 
127
127
  { success: true, entry_id: existing_id, status: corroborated ? 'corroborated' : 'candidate',
128
128
  corroborated: corroborated, contradictions: contradictions }
129
129
  rescue Sequel::Error => e
130
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_ingest')
130
131
  { success: false, error: e.message }
131
132
  end
132
133
 
133
- def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: [:confirmed], tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Layout/LineLength
134
+ def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: UNSET, tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Layout/LineLength
134
135
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
135
136
 
136
137
  query = normalize_text_input(query)
138
+ status_defaulted = status.equal?(UNSET)
139
+ requested_status = status_defaulted ? DEFAULT_QUERY_STATUS : status
140
+ log.debug("Apollo Knowledge.handle_query mode=#{browse_query?(query) ? 'browse' : 'semantic'} query_length=#{query.length} limit=#{limit} statuses=#{Array(requested_status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
141
+ if browse_query?(query)
142
+ return list_entries_chronologically(query: query, limit: limit, status: requested_status,
143
+ status_defaulted: status_defaulted, tags: tags, domain: domain)
144
+ end
145
+
137
146
  embedding = embed_text(query)
138
147
  sql = Helpers::GraphQuery.build_semantic_search_sql(
139
148
  limit: limit, min_confidence: min_confidence,
140
- statuses: Array(status).map(&:to_s), tags: tags, domain: domain
149
+ statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain
141
150
  )
142
151
 
143
152
  db = Legion::Data::Model::ApolloEntry.db
@@ -168,14 +177,17 @@ module Legion
168
177
  knowledge_domain: entry[:knowledge_domain] }
169
178
  end
170
179
 
180
+ log.info("Apollo Knowledge.handle_query results=#{formatted.size} mode=semantic agent_id=#{agent_id}")
171
181
  { success: true, entries: formatted, count: formatted.size }
172
182
  rescue Sequel::Error => e
183
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_query')
173
184
  { success: false, error: e.message }
174
185
  end
175
186
 
176
187
  def handle_traverse(entry_id:, depth: Helpers::GraphQuery.default_depth, relation_types: nil, agent_id: 'unknown', **)
177
188
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
178
189
 
190
+ log.debug("Apollo Knowledge.handle_traverse entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
179
191
  # Whitelist relation_types to prevent SQL injection (they are string-interpolated in build_traversal_sql)
180
192
  if relation_types
181
193
  allowed = Helpers::Confidence::RELATION_TYPES
@@ -198,14 +210,17 @@ module Legion
198
210
  depth: entry[:depth], activation: entry[:activation] }
199
211
  end
200
212
 
213
+ log.info("Apollo Knowledge.handle_traverse results=#{formatted.size} entry_id=#{entry_id}")
201
214
  { success: true, entries: formatted, count: formatted.size }
202
215
  rescue Sequel::Error => e
216
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_traverse')
203
217
  { success: false, error: e.message }
204
218
  end
205
219
 
206
220
  def redistribute_knowledge(agent_id:, min_confidence: Helpers::Confidence.apollo_setting(:query, :redistribute_min_confidence, default: 0.5), **)
207
221
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
208
222
 
223
+ log.debug("Apollo Knowledge.redistribute_knowledge agent_id=#{agent_id} min_confidence=#{min_confidence}")
209
224
  entries = Legion::Data::Model::ApolloEntry
210
225
  .where(source_agent: agent_id, status: 'confirmed')
211
226
  .where { confidence > min_confidence }
@@ -233,6 +248,7 @@ module Legion
233
248
  log.info("[apollo] redistributed #{redistributed} entries from departing agent=#{agent_id}")
234
249
  { success: true, redistributed: redistributed, agent_id: agent_id }
235
250
  rescue Sequel::Error => e
251
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.redistribute_knowledge')
236
252
  { success: false, error: e.message }
237
253
  end
238
254
 
@@ -242,6 +258,7 @@ module Legion
242
258
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
243
259
 
244
260
  query = normalize_text_input(query)
261
+ log.debug("Apollo Knowledge.retrieve_relevant query_length=#{query.length} limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
245
262
  return { success: true, entries: [], count: 0 } if query.nil? || query.to_s.strip.empty?
246
263
 
247
264
  embedding = embed_text(query)
@@ -268,8 +285,10 @@ module Legion
268
285
  knowledge_domain: entry[:knowledge_domain] }
269
286
  end
270
287
 
288
+ log.info("Apollo Knowledge.retrieve_relevant results=#{formatted.size} limit=#{limit}")
271
289
  { success: true, entries: formatted, count: formatted.size }
272
290
  rescue Sequel::Error => e
291
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.retrieve_relevant')
273
292
  { success: false, error: e.message }
274
293
  end
275
294
 
@@ -278,6 +297,7 @@ module Legion
278
297
  return { success: false, error: 'apollo_data_not_available' }
279
298
  end
280
299
 
300
+ log.debug("Apollo Knowledge.prepare_mesh_export target_domain=#{target_domain} min_confidence=#{min_confidence} limit=#{limit}")
281
301
  conn = Legion::Data.connection
282
302
  allowed = allowed_domains_for(target_domain)
283
303
 
@@ -297,7 +317,9 @@ module Legion
297
317
  end
298
318
 
299
319
  { success: true, entries: formatted, count: formatted.size, target_domain: target_domain }
320
+ .tap { |result| log.info("Apollo Knowledge.prepare_mesh_export results=#{result[:count]} target_domain=#{target_domain}") }
300
321
  rescue Sequel::Error => e
322
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.prepare_mesh_export')
301
323
  { success: false, error: e.message }
302
324
  end
303
325
 
@@ -306,6 +328,7 @@ module Legion
306
328
  return { deleted: 0, redacted: 0, error: 'apollo_data_not_available' }
307
329
  end
308
330
 
331
+ log.warn("Apollo Knowledge.handle_erasure_request agent_id=#{agent_id}")
309
332
  conn = Legion::Data.connection
310
333
 
311
334
  # Delete entries solely from dead agent (not confirmed by others)
@@ -320,29 +343,142 @@ module Legion
320
343
  .update(source_agent: 'redacted', source_provider: nil, source_channel: nil)
321
344
 
322
345
  { deleted: deleted, redacted: redacted, agent_id: agent_id }
346
+ .tap { |result| log.info("Apollo Knowledge.handle_erasure_request deleted=#{result[:deleted]} redacted=#{result[:redacted]} agent_id=#{agent_id}") } # rubocop:disable Layout/LineLength
323
347
  rescue Sequel::Error => e
348
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_erasure_request')
324
349
  { deleted: 0, redacted: 0, error: e.message }
325
350
  end
326
351
 
352
+ CONFLICT_CHECK_MAX_CHARS = 4000
353
+
327
354
  private
328
355
 
356
+ def normalize_content_type(raw)
357
+ sym = raw.to_s.delete_prefix(':').gsub(%r{[/\s]}, '_').strip.downcase.to_sym
358
+ sym = CONTENT_TYPE_ALIASES.fetch(sym, sym)
359
+ Helpers::Confidence::CONTENT_TYPES.include?(sym) ? sym : :observation
360
+ end
361
+
329
362
  def embed_text(text)
330
363
  text = normalize_text_input(text)
364
+ log.debug("Apollo Knowledge.embed_text text_length=#{text.length}")
331
365
  result = Legion::LLM::Embeddings.generate(text: text)
332
366
  vector = result.is_a?(Hash) ? result[:vector] : result
333
- vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
367
+ if vector.is_a?(Array) && vector.any?
368
+ log.debug("Apollo Knowledge.embed_text vector_dimensions=#{vector.length}")
369
+ vector
370
+ else
371
+ log.warn('Apollo Knowledge.embed_text returned no vector; using zero-vector fallback')
372
+ Array.new(1024, 0.0)
373
+ end
334
374
  rescue StandardError => e
335
375
  log.warn("Apollo Knowledge.embed_text failed: #{e.message}")
336
376
  Array.new(1024, 0.0)
337
377
  end
338
378
 
339
379
  def normalize_text_input(value)
340
- return Legion::Apollo.send(:normalize_text_input, value) if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_text_input, true)
380
+ result = if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_text_input, true)
381
+ Legion::Apollo.send(:normalize_text_input, value)
382
+ else
383
+ value.to_s
384
+ end
341
385
 
342
- value.to_s
386
+ sanitize_for_postgres(result)
343
387
  rescue StandardError => e
344
388
  log.warn("Apollo Knowledge.normalize_text_input failed: #{e.message}")
345
- value.to_s
389
+ ''
390
+ end
391
+
392
+ def sanitize_for_postgres(value)
393
+ return value unless value.is_a?(String)
394
+
395
+ string = value.encoding == Encoding::UTF_8 ? value.dup : value.dup.force_encoding(Encoding::UTF_8)
396
+ changed = string.include?("\x00") || !string.valid_encoding?
397
+ string = string.scrub('') unless string.valid_encoding?
398
+ sanitized = string.delete("\x00")
399
+ log.debug("Apollo Knowledge.sanitize_for_postgres sanitized original_length=#{value.bytesize} sanitized_length=#{sanitized.bytesize}") if changed
400
+ sanitized
401
+ end
402
+
403
+ def truncate_for_column(value, max_length)
404
+ return nil if value.nil?
405
+
406
+ normalize_text_input(value)[0, max_length]
407
+ end
408
+
409
+ def active_duplicate_for_hash(hash)
410
+ return nil unless hash
411
+
412
+ existing = Legion::Data::Model::ApolloEntry
413
+ .where(content_hash: hash)
414
+ .exclude(status: 'archived')
415
+ .first
416
+ existing&.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
417
+ log.debug("Apollo Knowledge.active_duplicate_for_hash matched entry_id=#{existing.id}") if existing
418
+ existing
419
+ end
420
+
421
+ def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, submitted_by:, submitted_from:)
422
+ tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
423
+ agent = truncate_for_column(source_agent, 50) || 'unknown'
424
+
425
+ { tags: tag_array,
426
+ domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
427
+ source_agent: agent,
428
+ source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
429
+ source_channel: truncate_for_column(source_channel, 100),
430
+ submitted_by: truncate_for_column(submitted_by, 255),
431
+ submitted_from: truncate_for_column(submitted_from, 255) }
432
+ end
433
+
434
+ def create_candidate_entry(content:, content_type:, context:, metadata:, content_hash:, embedding:)
435
+ new_entry = Legion::Data::Model::ApolloEntry.create(
436
+ content: content,
437
+ content_type: content_type,
438
+ confidence: Helpers::Confidence.initial_confidence,
439
+ source_agent: metadata[:source_agent],
440
+ source_provider: metadata[:source_provider],
441
+ source_channel: metadata[:source_channel],
442
+ source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
443
+ tags: Sequel.pg_array(metadata[:tags]),
444
+ status: 'candidate',
445
+ knowledge_domain: metadata[:domain],
446
+ submitted_by: metadata[:submitted_by],
447
+ submitted_from: metadata[:submitted_from],
448
+ content_hash: content_hash,
449
+ embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
450
+ )
451
+ log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
452
+ new_entry.id
453
+ end
454
+
455
+ def browse_query?(query)
456
+ query.to_s.strip.length < 3
457
+ end
458
+
459
+ def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:)
460
+ log.debug("Apollo Knowledge.list_entries_chronologically limit=#{limit} statuses=#{Array(status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
461
+ dataset = Legion::Data::Model::ApolloEntry.exclude(status: 'archived')
462
+ requested = Array(status).map(&:to_s).reject(&:empty?)
463
+ dataset = dataset.where(status: requested) unless status_defaulted || requested.empty?
464
+ dataset = dataset.where(Sequel.lit('tags && ?', Sequel.pg_array(Array(tags)))) if tags && !Array(tags).empty?
465
+ dataset = dataset.where(knowledge_domain: domain) if domain && !domain.to_s.empty?
466
+
467
+ entries = dataset.order(Sequel.desc(:created_at)).limit(limit).all.map do |entry|
468
+ format_entry(entry.is_a?(Hash) ? entry : entry.values)
469
+ end
470
+ log.info("Apollo Knowledge.list_entries_chronologically results=#{entries.size}")
471
+ { success: true, mode: :browse, query: query, entries: entries, count: entries.size }
472
+ rescue Sequel::Error => e
473
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.list_entries_chronologically')
474
+ { success: false, error: e.message }
475
+ end
476
+
477
+ def format_entry(entry)
478
+ { id: entry[:id], content: entry[:content], content_type: entry[:content_type],
479
+ confidence: entry[:confidence], distance: entry[:distance]&.to_f,
480
+ tags: entry[:tags], source_agent: entry[:source_agent],
481
+ knowledge_domain: entry[:knowledge_domain] }
346
482
  end
347
483
 
348
484
  def allowed_domains_for(target_domain)
@@ -366,6 +502,7 @@ module Legion
366
502
  rel_weight = Helpers::Confidence.apollo_setting(:contradiction, :relation_weight, default: 0.8)
367
503
 
368
504
  db = Legion::Data::Model::ApolloEntry.db
505
+ log.debug("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} similar_limit=#{sim_limit} threshold=#{sim_threshold}")
369
506
  similar = db.fetch(
370
507
  "SELECT id, content, embedding FROM apollo_entries WHERE id != :entry_id AND embedding IS NOT NULL ORDER BY embedding <=> :embedding LIMIT #{sim_limit}", # rubocop:disable Layout/LineLength
371
508
  entry_id: entry_id,
@@ -390,19 +527,22 @@ module Legion
390
527
  Legion::Data::Model::ApolloEntry.where(id: [entry_id, existing[:id]]).update(status: 'disputed')
391
528
  contradictions << existing[:id]
392
529
  end
530
+ log.info("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} contradictions=#{contradictions.size}") if contradictions.any?
393
531
  contradictions
394
532
  rescue Sequel::Error => e
395
- log.warn("Apollo Knowledge.detect_contradictions failed: #{e.message}")
533
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.detect_contradictions')
396
534
  []
397
535
  end
398
536
 
399
537
  def llm_detects_conflict?(content_a, content_b)
400
538
  return false unless defined?(Legion::LLM) && Legion::LLM.respond_to?(:structured)
401
539
 
540
+ a = content_a.to_s[0, CONFLICT_CHECK_MAX_CHARS]
541
+ b = content_b.to_s[0, CONFLICT_CHECK_MAX_CHARS]
402
542
  result = Legion::LLM.structured(
403
543
  messages: [
404
544
  { role: 'system', content: 'Do these two statements contradict each other? Return JSON.' },
405
- { role: 'user', content: "A: #{content_a}\n\nB: #{content_b}" }
545
+ { role: 'user', content: "A: #{a}\n\nB: #{b}" }
406
546
  ],
407
547
  schema: { type: 'object', properties: { contradicts: { type: 'boolean' } } },
408
548
  caller: { extension: 'lex-apollo', runner: 'knowledge' }
@@ -415,6 +555,7 @@ module Legion
415
555
 
416
556
  def find_corroboration(embedding, content_type_sym, source_agent, source_channel = nil)
417
557
  scan_limit = Helpers::Confidence.apollo_setting(:corroboration, :scan_limit, default: 50)
558
+ log.debug("Apollo Knowledge.find_corroboration content_type=#{content_type_sym} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'} scan_limit=#{scan_limit}") # rubocop:disable Layout/LineLength
418
559
  existing = Legion::Data::Model::ApolloEntry
419
560
  .where(content_type: content_type_sym)
420
561
  .exclude(embedding: nil)
@@ -447,9 +588,11 @@ module Legion
447
588
  source_agent: source_agent,
448
589
  weight: sim
449
590
  )
591
+ log.info("Apollo Knowledge.find_corroboration matched entry_id=#{entry.id} source_agent=#{source_agent} similarity=#{sim}")
450
592
  return [true, entry.id]
451
593
  end
452
594
 
595
+ log.debug("Apollo Knowledge.find_corroboration no_match source_agent=#{source_agent}")
453
596
  [false, nil]
454
597
  end
455
598
 
@@ -468,6 +611,7 @@ module Legion
468
611
  end
469
612
 
470
613
  def upsert_expertise(source_agent:, domain:)
614
+ log.debug("Apollo Knowledge.upsert_expertise source_agent=#{source_agent} domain=#{domain}")
471
615
  expertise = Legion::Data::Model::ApolloExpertise
472
616
  .where(agent_id: source_agent, domain: domain).first
473
617
  if expertise
@@ -25,7 +25,11 @@ module Legion
25
25
  min_confidence ||= Helpers::Confidence.decay_threshold
26
26
  min_age_hours = Helpers::Confidence.decay_min_age_hours
27
27
 
28
- return { decayed: 0, archived: 0 } unless defined?(Legion::Data) && Legion::Data.respond_to?(:connection) && Legion::Data.connection
28
+ log.debug("Apollo Maintenance.run_decay_cycle alpha=#{alpha} min_confidence=#{min_confidence} min_age_hours=#{min_age_hours}")
29
+ unless defined?(Legion::Data) && Legion::Data.respond_to?(:connection) && Legion::Data.connection
30
+ log.warn('Apollo Maintenance.run_decay_cycle skipped: apollo_data_not_available')
31
+ return { decayed: 0, archived: 0 }
32
+ end
29
33
 
30
34
  conn = Legion::Data.connection
31
35
 
@@ -54,15 +58,21 @@ module Legion
54
58
 
55
59
  { decayed: decayed, archived: archived, alpha: alpha, threshold: min_confidence,
56
60
  min_age_hours: min_age_hours }
61
+ .tap { |result| log.info("Apollo Maintenance.run_decay_cycle decayed=#{result[:decayed]} archived=#{result[:archived]} alpha=#{alpha} threshold=#{min_confidence}") } # rubocop:disable Layout/LineLength
57
62
  rescue Sequel::Error => e
63
+ handle_exception(e, level: :error, operation: 'apollo.maintenance.run_decay_cycle')
58
64
  { decayed: 0, archived: 0, error: e.message }
59
65
  end
60
66
 
61
67
  def check_corroboration(**) # rubocop:disable Metrics/CyclomaticComplexity
62
- return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
68
+ unless defined?(Legion::Data::Model::ApolloEntry)
69
+ log.warn('Apollo Maintenance.check_corroboration skipped: apollo_data_not_available')
70
+ return { success: false, error: 'apollo_data_not_available' }
71
+ end
63
72
 
64
73
  candidates = Legion::Data::Model::ApolloEntry.where(status: 'candidate').exclude(embedding: nil).all
65
74
  confirmed = Legion::Data::Model::ApolloEntry.where(status: 'confirmed').exclude(embedding: nil).all
75
+ log.debug("Apollo Maintenance.check_corroboration candidates=#{candidates.size} confirmed=#{confirmed.size}")
66
76
 
67
77
  promoted = 0
68
78
 
@@ -106,7 +116,9 @@ module Legion
106
116
  end
107
117
 
108
118
  { success: true, promoted: promoted, scanned: candidates.size }
119
+ .tap { |result| log.info("Apollo Maintenance.check_corroboration scanned=#{result[:scanned]} promoted=#{result[:promoted]}") }
109
120
  rescue Sequel::Error => e
121
+ handle_exception(e, level: :error, operation: 'apollo.maintenance.check_corroboration')
110
122
  { success: false, error: e.message }
111
123
  end
112
124
 
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Apollo
6
- VERSION = '0.4.18'
6
+ VERSION = '0.4.20'
7
7
  end
8
8
  end
9
9
  end
@@ -9,6 +9,27 @@ RSpec.describe 'Apollo Contradiction Detection' do
9
9
  it 'returns false when LLM unavailable' do
10
10
  expect(knowledge.send(:llm_detects_conflict?, 'sky is blue', 'sky is red')).to be false
11
11
  end
12
+
13
+ context 'when LLM is available' do
14
+ let(:llm_mod) do
15
+ Module.new do
16
+ def self.respond_to?(*) = true
17
+ def self.structured(**) = { data: { contradicts: true } }
18
+ end
19
+ end
20
+
21
+ before { stub_const('Legion::LLM', llm_mod) }
22
+
23
+ it 'truncates content longer than CONFLICT_CHECK_MAX_CHARS' do
24
+ long_text = 'x' * 10_000
25
+ allow(llm_mod).to receive(:structured).and_return({ data: { contradicts: false } })
26
+ knowledge.send(:llm_detects_conflict?, long_text, long_text)
27
+ expect(llm_mod).to have_received(:structured) do |**kwargs|
28
+ user_msg = kwargs[:messages].find { |m| m[:role] == 'user' }[:content]
29
+ expect(user_msg.length).to be < 10_000
30
+ end
31
+ end
32
+ end
12
33
  end
13
34
 
14
35
  describe '#detect_contradictions' do
@@ -44,6 +44,20 @@ RSpec.describe Legion::Extensions::Apollo::GaiaIntegration do
44
44
  )
45
45
  expect(result).to eq({ success: true })
46
46
  end
47
+
48
+ it 'passes :observation as content_type regardless of domain' do
49
+ client_double = instance_double(Legion::Extensions::Apollo::Client)
50
+ allow(Legion::Extensions::Apollo::Client).to receive(:new).and_return(client_double)
51
+ allow(client_double).to receive(:store_knowledge).and_return({ success: true })
52
+
53
+ described_class.publish_insight(
54
+ { confidence: 0.9, novelty: 0.5, content: 'insight', domain: 'clinical' },
55
+ agent_id: 'test-agent'
56
+ )
57
+ expect(client_double).to have_received(:store_knowledge).with(
58
+ hash_including(content_type: :observation)
59
+ )
60
+ end
47
61
  end
48
62
 
49
63
  describe 'entity watchdog phase handler' do
@@ -45,10 +45,41 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
45
45
  expect(result[:source_agent]).to eq('worker-1')
46
46
  end
47
47
 
48
- it 'rejects invalid content_type' do
49
- expect do
50
- runner.store_knowledge(content: 'test', content_type: :invalid)
51
- end.to raise_error(ArgumentError, /content_type/)
48
+ it 'falls back to :observation for unrecognized content_type' do
49
+ result = runner.store_knowledge(content: 'test', content_type: 'invalid_type')
50
+ expect(result[:content_type]).to eq(:observation)
51
+ end
52
+
53
+ it 'normalizes LLM-provided content_type "reasoning" to :concept' do
54
+ result = runner.store_knowledge(content: 'test', content_type: 'reasoning')
55
+ expect(result[:content_type]).to eq(:concept)
56
+ end
57
+
58
+ it 'normalizes "text" to :observation' do
59
+ result = runner.store_knowledge(content: 'test', content_type: 'text')
60
+ expect(result[:content_type]).to eq(:observation)
61
+ end
62
+
63
+ it 'normalizes "text/plain" to :observation' do
64
+ result = runner.store_knowledge(content: 'test', content_type: 'text/plain')
65
+ expect(result[:content_type]).to eq(:observation)
66
+ end
67
+
68
+ it 'strips leading colon from ":fact"' do
69
+ result = runner.store_knowledge(content: 'test', content_type: ':fact')
70
+ expect(result[:content_type]).to eq(:fact)
71
+ end
72
+
73
+ it 'normalizes "inference" to :association' do
74
+ result = runner.store_knowledge(content: 'test', content_type: 'inference')
75
+ expect(result[:content_type]).to eq(:association)
76
+ end
77
+
78
+ it 'accepts all valid CONTENT_TYPES unchanged' do
79
+ %i[fact concept procedure association observation].each do |ct|
80
+ result = runner.store_knowledge(content: 'test', content_type: ct)
81
+ expect(result[:content_type]).to eq(ct)
82
+ end
52
83
  end
53
84
  end
54
85
 
@@ -232,6 +263,36 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
232
263
  tags: ['RabbitMQ'], source_agent: 'agent-1')
233
264
  end
234
265
 
266
+ it 'sanitizes null bytes before storing content' do
267
+ expect(mock_entry_class).to receive(:create).with(
268
+ hash_including(content: 'helloworld')
269
+ ).and_return(mock_entry)
270
+ host.handle_ingest(content: "hello\x00world", content_type: 'fact', source_agent: 'agent-1')
271
+ end
272
+
273
+ it 'truncates short varchar metadata fields at the database boundary' do
274
+ expect(mock_entry_class).to receive(:create).with(
275
+ hash_including(
276
+ source_agent: 'a' * 50,
277
+ source_provider: 'p' * 50,
278
+ source_channel: 'c' * 100,
279
+ knowledge_domain: 'd' * 50,
280
+ submitted_by: 'u' * 255,
281
+ submitted_from: 'n' * 255
282
+ )
283
+ ).and_return(mock_entry)
284
+ host.handle_ingest(
285
+ content: 'test',
286
+ content_type: 'fact',
287
+ source_agent: 'a' * 60,
288
+ source_provider: 'p' * 60,
289
+ source_channel: 'c' * 120,
290
+ knowledge_domain: 'd' * 60,
291
+ submitted_by: 'u' * 300,
292
+ submitted_from: 'n' * 300
293
+ )
294
+ end
295
+
235
296
  context 'content hash dedup' do
236
297
  let(:existing_entry) do
237
298
  double('existing', id: 'uuid-existing', confidence: 0.6,
@@ -264,9 +325,16 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
264
325
  end
265
326
 
266
327
  it 'returns a structured error' do
328
+ allow(host).to receive(:handle_exception)
329
+
267
330
  result = host.handle_ingest(content: 'test', content_type: 'fact', source_agent: 'a')
268
331
  expect(result[:success]).to be false
269
332
  expect(result[:error]).to eq('connection lost')
333
+ expect(host).to have_received(:handle_exception).with(
334
+ instance_of(Sequel::Error),
335
+ level: :error,
336
+ operation: 'apollo.knowledge.handle_ingest'
337
+ )
270
338
  end
271
339
  end
272
340
  end
@@ -345,6 +413,64 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
345
413
  expect(result[:count]).to eq(0)
346
414
  end
347
415
  end
416
+
417
+ context 'when query is browse-shaped' do
418
+ let(:mock_entry_class) { double('ApolloEntry') }
419
+ let(:dataset) { double('dataset') }
420
+ let(:entries) do
421
+ [{ id: 'uuid-1', content: 'Candidate fact', content_type: 'fact',
422
+ confidence: 0.7, tags: ['ruby'], source_agent: 'agent-1',
423
+ knowledge_domain: 'general' }]
424
+ end
425
+
426
+ before do
427
+ stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class)
428
+ allow(mock_entry_class).to receive(:exclude).with(status: 'archived').and_return(dataset)
429
+ allow(dataset).to receive(:where).and_return(dataset)
430
+ allow(dataset).to receive(:order).with(:created_at).and_return(dataset)
431
+ allow(dataset).to receive(:limit).with(50).and_return(dataset)
432
+ allow(dataset).to receive(:all).and_return(entries)
433
+ end
434
+
435
+ it 'lists recent non-archived entries without generating an embedding' do
436
+ expect(Legion::LLM::Embeddings).not_to receive(:generate)
437
+
438
+ result = host.handle_query(query: 'x', limit: 50)
439
+
440
+ expect(result[:success]).to be true
441
+ expect(result[:mode]).to eq(:browse)
442
+ expect(result[:count]).to eq(1)
443
+ expect(result[:entries].first[:content]).to eq('Candidate fact')
444
+ expect(dataset).not_to have_received(:where).with(status: ['confirmed'])
445
+ end
446
+
447
+ it 'respects an explicit confirmed status filter' do
448
+ host.handle_query(query: 'x', limit: 50, status: [:confirmed])
449
+
450
+ expect(dataset).to have_received(:where).with(status: ['confirmed'])
451
+ end
452
+
453
+ it 'applies tags and domain filters when provided' do
454
+ host.handle_query(query: 'x', limit: 50, tags: ['ruby'], domain: 'general')
455
+
456
+ expect(dataset).to have_received(:where).with('tags && ?')
457
+ expect(dataset).to have_received(:where).with(knowledge_domain: 'general')
458
+ end
459
+ end
460
+ end
461
+
462
+ describe '#normalize_text_input' do
463
+ let(:host) { Object.new.extend(described_class) }
464
+
465
+ it 'strips null bytes in the local fallback path' do
466
+ expect(host.send(:normalize_text_input, "hello\x00world")).to eq('helloworld')
467
+ end
468
+
469
+ it 'scrubs invalid UTF-8 in the local fallback path' do
470
+ invalid = "hello\xC3(world".dup.force_encoding(Encoding::UTF_8)
471
+
472
+ expect(host.send(:normalize_text_input, invalid)).to eq('hello(world')
473
+ end
348
474
  end
349
475
 
350
476
  describe '#retrieve_relevant' do
data/spec/spec_helper.rb CHANGED
@@ -16,6 +16,7 @@ unless defined?(Sequel)
16
16
 
17
17
  def self.pg_array(arr) = arr
18
18
  def self.lit(str, *) = str
19
+ def self.desc(sym) = sym
19
20
  Expr = Struct.new(:value) do
20
21
  def +(other) = "#{value} + #{other}"
21
22
  def *(other) = "#{value} * #{other}"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-apollo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.18
4
+ version: 0.4.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity