lex-apollo 0.4.19 → 0.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1bf0ca8790d13fc3d262ede810ce50a47146dc37743e8806696de970b8a0385a
4
- data.tar.gz: 1f1a9115e1a1bb36423150a7290a82352f07aef7aa41f0b4b7da8e0ff5b76f94
3
+ metadata.gz: 31dc882a8d5fd761bceaba21ff853a3d3dd898df9d4377464b72b110d306af0a
4
+ data.tar.gz: 8dcfd2bc7c2557b298f3d503cc94b07e5a78b5a9777789f724dede41b9d0efdb
5
5
  SHA512:
6
- metadata.gz: 3d92269898b53825fff831ef253fc44ac9e9a45602da25628f32fc95c1e6a53edfd86add2f02f7131c449cd1b940258d2c8fc4df3b90ca6306affbf8a76d4ccb
7
- data.tar.gz: d6c98fc7a7f351dc929e2d2dac4c9d7d119e32328706c9880e19a6bc3edb53448b008d8a8fefd7f7e8b8a201084f4ec78006088954cb7e56ee72cb312f0d5ce6
6
+ metadata.gz: 5e066b3c62555b4866bfe99d6bed13a034e2b6b23f613ae7906e8eaebee90ecb2f9dd937a5e928dd8337af8945bfba9a1528deb4d3200eb82b8a92b081b6cf59
7
+ data.tar.gz: dd1d625f4efc30df8951ee80db9ee0cfc1c25137bae5d8571dd09265bd5eec79add3482eba3cc88d77f43b052994597577abe634080cd995eb4dc0db7601458e
data/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.21] - 2026-04-27
4
+
5
+ ### Changed
6
+ - `Apollo::Runners::Knowledge#handle_ingest` now emits warn-level logs on the three early-return failure paths (nil/blank content, nil content_type, apollo_data_not_available). Companion to PR #15: that PR added `handle_exception` to the rescue paths; this PR closes the silent-failure window for the early-return paths that fire BEFORE any rescue would. Tag values in the log line are sanitized via `gsub(/[\r\n]+/, ' ')` to prevent log-line injection from caller-controlled tags.
7
+
8
+ ## [0.4.20] - 2026-04-25
9
+
10
+ ### Fixed
11
+ - `Knowledge#handle_ingest` now sanitizes stored content for PostgreSQL by scrubbing invalid UTF-8 and removing null bytes before inserts.
12
+ - `Knowledge#handle_ingest` now truncates short metadata fields to schema-safe lengths before writing entries.
13
+ - `Knowledge#handle_query` now treats very short queries as browse/list requests, returning recent non-archived entries without embedding noise while preserving explicit status filters.
14
+ - `Knowledge` Sequel rescue paths now log exception class, message, and a short backtrace before returning structured errors.
15
+ - Apollo runners and actors now emit debug/info lifecycle logs around ingest, query, browse, maintenance, expertise, entity extraction, writeback vectorization, entity watchdog, and GAS pipeline paths so debug-level logging can trace workflows end to end.
16
+
3
17
  ## [0.4.19] - 2026-04-24
4
18
 
5
19
  ### Fixed
@@ -34,6 +34,7 @@ module Legion
34
34
 
35
35
  def scan_and_ingest
36
36
  texts = recent_task_log_texts
37
+ log.debug("EntityWatchdog scan_and_ingest log_texts=#{texts.size}")
37
38
  return { success: true, ingested: 0, reason: :no_logs } if texts.empty?
38
39
 
39
40
  ingested = 0
@@ -53,10 +54,10 @@ module Legion
53
54
  end
54
55
  end
55
56
 
56
- log.debug("EntityWatchdog: ingested #{ingested} new entities from #{texts.size} log entries")
57
+ log.info("EntityWatchdog ingested=#{ingested} logs_scanned=#{texts.size}")
57
58
  { success: true, ingested: ingested, logs_scanned: texts.size }
58
59
  rescue StandardError => e
59
- log.error("EntityWatchdog scan_and_ingest failed: #{e.message}")
60
+ handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.scan_and_ingest')
60
61
  { success: false, error: e.message }
61
62
  end
62
63
 
@@ -71,7 +72,9 @@ module Legion
71
72
  .order(Sequel.desc(:created_at))
72
73
  .limit(log_limit)
73
74
  .select_map(:message)
74
- logs.map(&:to_s).reject(&:empty?).uniq
75
+ texts = logs.map(&:to_s).reject(&:empty?).uniq
76
+ log.debug("EntityWatchdog recent_task_log_texts lookback=#{lookback} limit=#{log_limit} raw=#{logs.size} unique=#{texts.size}")
77
+ texts
75
78
  rescue StandardError => e
76
79
  log.warn("EntityWatchdog recent_task_log_texts failed: #{e.message}")
77
80
  []
@@ -104,8 +107,9 @@ module Legion
104
107
  source_agent: 'lex-apollo:entity_watchdog',
105
108
  context: { entity_type: entity[:type], original_name: entity[:name] }
106
109
  ).publish
110
+ log.info("EntityWatchdog published entity type=#{entity[:type]} name=#{entity[:name]}")
107
111
  rescue StandardError => e
108
- log.error("EntityWatchdog publish failed: #{e.message}")
112
+ handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.publish_entity_ingest')
109
113
  end
110
114
 
111
115
  def entity_types
@@ -14,21 +14,27 @@ module Legion
14
14
 
15
15
  def handle_vectorize(payload)
16
16
  payload = symbolize(payload)
17
+ log.debug("WritebackVectorize handle_vectorize content_length=#{payload[:content].to_s.length} content_type=#{payload[:content_type] || 'nil'}")
17
18
  result = Legion::LLM::Embeddings.generate(text: payload[:content])
18
19
  vector = result.is_a?(Hash) ? result[:vector] : result
19
20
  embedding = vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
21
+ log.debug("WritebackVectorize embedding_dimensions=#{embedding.length} vector_generated=#{vector.is_a?(Array) && vector.any?}")
20
22
  enriched = payload.merge(embedding: embedding)
21
23
 
22
24
  if Helpers::Capability.can_write?
25
+ log.debug('WritebackVectorize route=direct_ingest')
23
26
  Runners::Knowledge.handle_ingest(**enriched)
24
27
  else
28
+ log.debug('WritebackVectorize route=transport_writeback')
25
29
  Transport::Messages::Writeback.new(
26
30
  **enriched, has_embedding: true
27
31
  ).publish
28
32
  end
29
33
 
34
+ log.info('WritebackVectorize completed action=vectorized')
30
35
  { success: true, action: :vectorized }
31
36
  rescue StandardError => e
37
+ handle_exception(e, level: :error, operation: 'apollo.writeback_vectorize.handle_vectorize')
32
38
  { success: false, error: e.message }
33
39
  end
34
40
 
@@ -51,15 +51,17 @@ module Legion
51
51
  req = json_body
52
52
  halt 400, { error: 'query is required' }.to_json unless req[:query]
53
53
 
54
- result = runner.handle_query(
54
+ query_options = {
55
55
  query: req[:query],
56
56
  limit: req[:limit] || 10,
57
57
  min_confidence: req[:min_confidence] || 0.3,
58
- status: req[:status] || [:confirmed],
59
58
  tags: req[:tags],
60
59
  domain: req[:domain],
61
60
  agent_id: req[:agent_id] || 'api'
62
- )
61
+ }
62
+ query_options[:status] = req[:status] if req.key?(:status)
63
+
64
+ result = runner.handle_query(**query_options)
63
65
  status result[:success] ? 200 : 500
64
66
  result.to_json
65
67
  end
@@ -4,17 +4,24 @@ module Legion
4
4
  module Extensions
5
5
  module Apollo
6
6
  module Runners
7
- module EntityExtractor # rubocop:disable Legion/Extension/RunnerIncludeHelpers
7
+ module EntityExtractor
8
8
  DEFAULT_ENTITY_TYPES = %w[person service repository concept].freeze
9
9
  DEFAULT_MIN_CONFIDENCE = 0.7
10
10
 
11
11
  def extract_entities(text:, entity_types: nil, min_confidence: Helpers::Confidence.apollo_setting(:entity_extractor, :min_confidence, default: DEFAULT_MIN_CONFIDENCE), **) # rubocop:disable Layout/LineLength
12
- return { success: true, entities: [], source: :empty } if text.to_s.strip.empty?
12
+ if text.to_s.strip.empty?
13
+ log.debug('Apollo EntityExtractor.extract_entities skipped reason=empty_text')
14
+ return { success: true, entities: [], source: :empty }
15
+ end
13
16
 
14
- return { success: true, entities: [], source: :unavailable } unless defined?(Legion::LLM) && Legion::LLM.started?
17
+ unless defined?(Legion::LLM) && Legion::LLM.started?
18
+ log.debug('Apollo EntityExtractor.extract_entities skipped reason=llm_unavailable')
19
+ return { success: true, entities: [], source: :unavailable }
20
+ end
15
21
 
16
22
  types = Array(entity_types).map(&:to_s)
17
23
  types = DEFAULT_ENTITY_TYPES if types.empty?
24
+ log.debug("Apollo EntityExtractor.extract_entities text_length=#{text.to_s.length} types=#{types.join(',')} min_confidence=#{min_confidence}")
18
25
 
19
26
  result = Legion::LLM.structured(
20
27
  messages: [
@@ -29,9 +36,11 @@ module Legion
29
36
  (entity[:confidence] || 0.0) >= min_confidence &&
30
37
  (types.empty? || types.include?(entity[:type].to_s))
31
38
  end
39
+ log.info("Apollo EntityExtractor.extract_entities raw=#{raw_entities.size} filtered=#{filtered.size}")
32
40
 
33
41
  { success: true, entities: filtered, source: :llm }
34
42
  rescue StandardError => e
43
+ handle_exception(e, level: :error, operation: 'apollo.entity_extractor.extract_entities')
35
44
  { success: false, entities: [], error: e.message, source: :error }
36
45
  end
37
46
 
@@ -70,6 +79,8 @@ module Legion
70
79
  required: ['entities']
71
80
  }
72
81
  end
82
+
83
+ include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
73
84
  end
74
85
  end
75
86
  end
@@ -18,52 +18,65 @@ module Legion
18
18
  end
19
19
 
20
20
  def aggregate(**)
21
- return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
21
+ unless defined?(Legion::Data::Model::ApolloEntry)
22
+ log.warn('Apollo Expertise.aggregate skipped: apollo_data_not_available')
23
+ return { success: false, error: 'apollo_data_not_available' }
24
+ end
22
25
 
23
26
  entries = Legion::Data::Model::ApolloEntry
24
27
  .select(:source_agent, :tags, :confidence)
25
28
  .exclude(source_agent: nil)
26
29
  .all
27
-
28
- groups = {}
29
- entries.each do |entry|
30
- agent = entry.source_agent
31
- domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
32
- key = "#{agent}:#{domain}"
33
- groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
34
- groups[key][:confidences] << entry.confidence.to_f
35
- end
30
+ log.debug("Apollo Expertise.aggregate entries=#{entries.size}")
36
31
 
37
32
  agent_set = Set.new
38
33
  domain_set = Set.new
39
34
 
40
- groups.each_value do |group|
41
- avg = group[:confidences].sum / group[:confidences].size
42
- count = group[:confidences].size
43
- cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
44
- proficiency = [avg * Math.log2(count + 1), cap].min
45
-
46
- existing = Legion::Data::Model::ApolloExpertise
47
- .where(agent_id: group[:agent_id], domain: group[:domain]).first
48
-
49
- if existing
50
- existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
51
- else
52
- Legion::Data::Model::ApolloExpertise.create(
53
- agent_id: group[:agent_id], domain: group[:domain],
54
- proficiency: proficiency, entry_count: count, last_active_at: Time.now
55
- )
56
- end
57
-
35
+ expertise_groups(entries).each_value do |group|
36
+ upsert_expertise_group(group)
58
37
  agent_set << group[:agent_id]
59
38
  domain_set << group[:domain]
60
39
  end
61
40
 
62
41
  { success: true, agents: agent_set.size, domains: domain_set.size }
42
+ .tap { |result| log.info("Apollo Expertise.aggregate agents=#{result[:agents]} domains=#{result[:domains]}") }
63
43
  rescue Sequel::Error => e
44
+ handle_exception(e, level: :error, operation: 'apollo.expertise.aggregate')
64
45
  { success: false, error: e.message }
65
46
  end
66
47
 
48
+ def expertise_groups(entries)
49
+ entries.each_with_object({}) do |entry, groups|
50
+ agent = entry.source_agent
51
+ domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
52
+ key = "#{agent}:#{domain}"
53
+ groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
54
+ groups[key][:confidences] << entry.confidence.to_f
55
+ end
56
+ end
57
+
58
+ def upsert_expertise_group(group)
59
+ count = group[:confidences].size
60
+ proficiency = expertise_proficiency(group[:confidences])
61
+ existing = Legion::Data::Model::ApolloExpertise
62
+ .where(agent_id: group[:agent_id], domain: group[:domain]).first
63
+
64
+ if existing
65
+ existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
66
+ else
67
+ Legion::Data::Model::ApolloExpertise.create(
68
+ agent_id: group[:agent_id], domain: group[:domain],
69
+ proficiency: proficiency, entry_count: count, last_active_at: Time.now
70
+ )
71
+ end
72
+ end
73
+
74
+ def expertise_proficiency(confidences)
75
+ avg = confidences.sum / confidences.size
76
+ cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
77
+ [avg * Math.log2(confidences.size + 1), cap].min
78
+ end
79
+
67
80
  include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
68
81
  end
69
82
  end
@@ -4,7 +4,10 @@ module Legion
4
4
  module Extensions
5
5
  module Apollo
6
6
  module Runners
7
- module Gas # rubocop:disable Legion/Extension/RunnerIncludeHelpers
7
+ module Gas
8
+ include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
9
+ extend Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
10
+
8
11
  RELATION_TYPES = %w[
9
12
  similar_to contradicts depends_on causes
10
13
  part_of supersedes supports_by extends
@@ -16,10 +19,6 @@ module Legion
16
19
 
17
20
  module_function
18
21
 
19
- def log
20
- Legion::Logging
21
- end
22
-
23
22
  def json_load(str)
24
23
  ::JSON.parse(str, symbolize_names: true)
25
24
  end
@@ -31,7 +30,12 @@ module Legion
31
30
  def fallback_confidence = Helpers::Confidence.apollo_setting(:gas, :fallback_confidence, default: 0.5)
32
31
 
33
32
  def process(audit_event)
34
- return { phases_completed: 0, reason: 'no content' } unless processable?(audit_event)
33
+ unless processable?(audit_event)
34
+ log.debug('GAS process skipped reason=no_content')
35
+ return { phases_completed: 0, reason: 'no content' }
36
+ end
37
+
38
+ log.debug("GAS process start request_id=#{audit_event[:request_id] || 'nil'} messages=#{Array(audit_event[:messages]).size} response_length=#{audit_event[:response_content].to_s.length}") # rubocop:disable Layout/LineLength
35
39
 
36
40
  facts = phase_comprehend(audit_event)
37
41
  entities = phase_extract(audit_event, facts)
@@ -40,7 +44,7 @@ module Legion
40
44
  deposit_result = phase_deposit(facts, entities, relations, synthesis, audit_event)
41
45
  anticipations = phase_anticipate(facts, synthesis)
42
46
 
43
- {
47
+ result = {
44
48
  phases_completed: 6,
45
49
  facts: facts.length,
46
50
  entities: entities.length,
@@ -49,8 +53,10 @@ module Legion
49
53
  deposited: deposit_result,
50
54
  anticipations: anticipations.length
51
55
  }
56
+ log.info("GAS process complete facts=#{result[:facts]} entities=#{result[:entities]} relations=#{result[:relations]} synthesis=#{result[:synthesis]} anticipations=#{result[:anticipations]}") # rubocop:disable Layout/LineLength
57
+ result
52
58
  rescue StandardError => e
53
- log.warn("GAS pipeline error: #{e.message}")
59
+ log.error("GAS pipeline error: #{e.message}")
54
60
  { phases_completed: 0, error: e.message }
55
61
  end
56
62
 
@@ -63,19 +69,24 @@ module Legion
63
69
  messages = audit_event[:messages]
64
70
  response = audit_event[:response_content]
65
71
 
66
- if llm_available?
67
- llm_comprehend(messages, response)
68
- else
69
- mechanical_comprehend(messages, response)
70
- end
72
+ mode = llm_available? ? :llm : :mechanical
73
+ log.debug("GAS phase_comprehend mode=#{mode} messages=#{Array(messages).size} response_length=#{response.to_s.length}")
74
+ facts = mode == :llm ? llm_comprehend(messages, response) : mechanical_comprehend(messages, response)
75
+ log.debug("GAS phase_comprehend facts=#{facts.size}")
76
+ facts
71
77
  end
72
78
 
73
79
  # Phase 2: Extract - entity extraction (delegates to existing EntityExtractor)
74
80
  def phase_extract(audit_event, _facts)
75
- return [] unless defined?(Runners::EntityExtractor)
81
+ unless defined?(Runners::EntityExtractor)
82
+ log.debug('GAS phase_extract skipped reason=entity_extractor_unavailable')
83
+ return []
84
+ end
76
85
 
77
86
  result = Runners::EntityExtractor.extract_entities(text: audit_event[:response_content])
78
- result[:success] ? (result[:entities] || []) : []
87
+ entities = result[:success] ? (result[:entities] || []) : []
88
+ log.debug("GAS phase_extract success=#{result[:success]} entities=#{entities.size}")
89
+ entities
79
90
  rescue StandardError => e
80
91
  log.warn("GAS phase_extract failed: #{e.message}")
81
92
  []
@@ -83,10 +94,16 @@ module Legion
83
94
 
84
95
  # Phase 3: Relate - classify relationships between new and existing entries
85
96
  def phase_relate(facts, _entities)
86
- return [] unless defined?(Runners::Knowledge)
97
+ unless defined?(Runners::Knowledge)
98
+ log.debug('GAS phase_relate skipped reason=knowledge_runner_unavailable')
99
+ return []
100
+ end
87
101
 
88
102
  existing = fetch_similar_entries(facts)
89
- return [] if existing.empty?
103
+ if existing.empty?
104
+ log.debug("GAS phase_relate skipped reason=no_existing_entries facts=#{facts.size}")
105
+ return []
106
+ end
90
107
 
91
108
  relations = []
92
109
  facts.each do |fact|
@@ -95,15 +112,24 @@ module Legion
95
112
  relations << relation if relation
96
113
  end
97
114
  end
115
+ log.debug("GAS phase_relate facts=#{facts.size} existing=#{existing.size} relations=#{relations.size}")
98
116
  relations
99
117
  end
100
118
 
101
119
  # Phase 4: Synthesize - generate derivative knowledge
102
120
  def phase_synthesize(facts, _relations)
103
- return [] if facts.length < 2
104
- return [] unless llm_available?
121
+ if facts.length < 2
122
+ log.debug("GAS phase_synthesize skipped reason=insufficient_facts facts=#{facts.length}")
123
+ return []
124
+ end
125
+ unless llm_available?
126
+ log.debug('GAS phase_synthesize skipped reason=llm_unavailable')
127
+ return []
128
+ end
105
129
 
106
- llm_synthesize(facts)
130
+ synthesis = llm_synthesize(facts)
131
+ log.debug("GAS phase_synthesize synthesis=#{synthesis.size}")
132
+ synthesis
107
133
  rescue StandardError => e
108
134
  log.warn("GAS phase_synthesize failed: #{e.message}")
109
135
  []
@@ -111,7 +137,10 @@ module Legion
111
137
 
112
138
  # Phase 5: Deposit - atomic write to Apollo
113
139
  def phase_deposit(facts, _entities, _relations, _synthesis, audit_event)
114
- return { deposited: 0 } unless defined?(Runners::Knowledge)
140
+ unless defined?(Runners::Knowledge)
141
+ log.debug('GAS phase_deposit skipped reason=knowledge_runner_unavailable')
142
+ return { deposited: 0 }
143
+ end
115
144
 
116
145
  deposited = 0
117
146
  facts.each do |fact|
@@ -128,15 +157,24 @@ module Legion
128
157
  rescue StandardError => e
129
158
  log.warn("GAS deposit error: #{e.message}")
130
159
  end
160
+ log.info("GAS phase_deposit deposited=#{deposited} facts=#{facts.size}")
131
161
  { deposited: deposited }
132
162
  end
133
163
 
134
164
  # Phase 6: Anticipate - pre-cache likely follow-up questions
135
165
  def phase_anticipate(facts, _synthesis)
136
- return [] if facts.empty?
137
- return [] unless llm_available?
166
+ if facts.empty?
167
+ log.debug('GAS phase_anticipate skipped reason=no_facts')
168
+ return []
169
+ end
170
+ unless llm_available?
171
+ log.debug('GAS phase_anticipate skipped reason=llm_unavailable')
172
+ return []
173
+ end
138
174
 
139
- llm_anticipate(facts)
175
+ anticipations = llm_anticipate(facts)
176
+ log.debug("GAS phase_anticipate anticipations=#{anticipations.size}")
177
+ anticipations
140
178
  rescue StandardError => e
141
179
  log.warn("GAS phase_anticipate failed: #{e.message}")
142
180
  []
@@ -153,7 +191,9 @@ module Legion
153
191
  log.warn("GAS fetch_similar_entries failed for fact: #{e.message}")
154
192
  next
155
193
  end
156
- entries.uniq { |e| e[:id] }
194
+ unique = entries.uniq { |e| e[:id] }
195
+ log.debug("GAS fetch_similar_entries facts=#{facts.size} entries=#{unique.size}")
196
+ unique
157
197
  end
158
198
 
159
199
  def classify_relation(fact, entry)
@@ -21,9 +21,12 @@ module Legion
21
21
  link: :association, relation: :association, connection: :association,
22
22
  inference: :association, implication: :association
23
23
  }.freeze
24
+ DEFAULT_QUERY_STATUS = [:confirmed].freeze
25
+ UNSET = Object.new.freeze
24
26
 
25
27
  def store_knowledge(content:, content_type:, tags: [], source_agent: nil, context: {}, **)
26
28
  content_type = normalize_content_type(content_type)
29
+ log.debug("Apollo Knowledge.store_knowledge content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent || 'nil'} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
27
30
 
28
31
  if defined?(Legion::Data::Model::ApolloEntry)
29
32
  return handle_ingest(content: content, content_type: content_type,
@@ -41,6 +44,7 @@ module Legion
41
44
  end
42
45
 
43
46
  def query_knowledge(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: %i[confirmed candidate], tags: nil, **) # rubocop:disable Layout/LineLength
47
+ log.debug("Apollo Knowledge.query_knowledge query_length=#{query.to_s.length} limit=#{limit} statuses=#{Array(status).join(',')} tags=#{Array(tags).size} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
44
48
  if defined?(Legion::Data::Model::ApolloEntry)
45
49
  return handle_query(query: query, limit: limit, min_confidence: min_confidence,
46
50
  status: status, tags: tags, **)
@@ -57,6 +61,7 @@ module Legion
57
61
  end
58
62
 
59
63
  def related_entries(entry_id:, relation_types: nil, depth: Helpers::GraphQuery.default_depth, **)
64
+ log.debug("Apollo Knowledge.related_entries entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
60
65
  return handle_traverse(entry_id: entry_id, depth: depth, relation_types: relation_types, **) if defined?(Legion::Data::Model::ApolloEntry)
61
66
 
62
67
  {
@@ -75,74 +80,72 @@ module Legion
75
80
  }
76
81
  end
77
82
 
78
- def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength, Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
83
+ def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
79
84
  return { status: :skipped } if skip
80
- return { success: false, error: 'content is required' } if content.nil? || content.to_s.strip.empty?
81
- return { success: false, error: 'content_type is required' } if content_type.nil?
82
- return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
83
85
 
84
- # Content hash dedup
86
+ content = normalize_text_input(content)
87
+ log.debug("Apollo Knowledge.handle_ingest content_length=#{content.length} content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'}") # rubocop:disable Layout/LineLength
88
+ early_error = ingest_early_return_error(content: content, content_type: content_type, tags: tags)
89
+ return early_error if early_error
90
+
85
91
  hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
86
- if hash
87
- existing = Legion::Data::Model::ApolloEntry
88
- .where(content_hash: hash)
89
- .exclude(status: 'archived')
90
- .first
91
- if existing
92
- existing.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
93
- return { success: true, entry_id: existing.id, deduped: true }
94
- end
92
+ existing = active_duplicate_for_hash(hash)
93
+ if existing
94
+ log.info("Apollo Knowledge.handle_ingest deduped entry_id=#{existing.id} source_agent=#{source_agent}")
95
+ return { success: true, entry_id: existing.id, deduped: true }
95
96
  end
96
97
 
97
98
  embedding = embed_text(content)
98
99
  content_type_sym = content_type.to_s
99
- tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
100
- domain = knowledge_domain || tag_array.first || 'general'
101
-
102
- corroborated, existing_id = find_corroboration(embedding, content_type_sym, source_agent, source_channel)
103
-
104
- unless corroborated
105
- new_entry = Legion::Data::Model::ApolloEntry.create(
106
- content: content,
107
- content_type: content_type_sym,
108
- confidence: Helpers::Confidence.initial_confidence,
109
- source_agent: source_agent,
110
- source_provider: source_provider || derive_provider_from_agent(source_agent),
111
- source_channel: source_channel,
112
- source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
113
- tags: Sequel.pg_array(tag_array),
114
- status: 'candidate',
115
- knowledge_domain: domain,
116
- submitted_by: submitted_by,
117
- submitted_from: submitted_from,
118
- content_hash: hash,
119
- embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
100
+ metadata = ingest_metadata(tags: tags, knowledge_domain: knowledge_domain, source_agent: source_agent,
101
+ source_provider: source_provider, source_channel: source_channel,
102
+ submitted_by: submitted_by, submitted_from: submitted_from)
103
+
104
+ corroborated, existing_id = find_corroboration(
105
+ embedding, content_type_sym, metadata[:source_agent], metadata[:source_channel]
106
+ )
107
+
108
+ if corroborated
109
+ log.info("Apollo Knowledge.handle_ingest corroborated entry_id=#{existing_id} source_agent=#{metadata[:source_agent]}")
110
+ else
111
+ existing_id = create_candidate_entry(
112
+ content: content, content_type: content_type_sym, context: context,
113
+ metadata: metadata, content_hash: hash, embedding: embedding
120
114
  )
121
- existing_id = new_entry.id
122
115
  end
123
116
 
124
- upsert_expertise(source_agent: source_agent, domain: domain)
117
+ upsert_expertise(source_agent: metadata[:source_agent], domain: metadata[:domain])
125
118
 
126
119
  Legion::Data::Model::ApolloAccessLog.create(
127
- entry_id: existing_id, agent_id: source_agent, action: 'ingest'
120
+ entry_id: existing_id, agent_id: metadata[:source_agent], action: 'ingest'
128
121
  )
129
122
 
130
123
  contradictions = detect_contradictions(existing_id, embedding, content)
124
+ log.debug("Apollo Knowledge.handle_ingest complete entry_id=#{existing_id} corroborated=#{corroborated} contradictions=#{contradictions.size}")
131
125
 
132
126
  { success: true, entry_id: existing_id, status: corroborated ? 'corroborated' : 'candidate',
133
127
  corroborated: corroborated, contradictions: contradictions }
134
128
  rescue Sequel::Error => e
129
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_ingest')
135
130
  { success: false, error: e.message }
136
131
  end
137
132
 
138
- def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: [:confirmed], tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Layout/LineLength
133
+ def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: UNSET, tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Layout/LineLength
139
134
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
140
135
 
141
136
  query = normalize_text_input(query)
137
+ status_defaulted = status.equal?(UNSET)
138
+ requested_status = status_defaulted ? DEFAULT_QUERY_STATUS : status
139
+ log.debug("Apollo Knowledge.handle_query mode=#{browse_query?(query) ? 'browse' : 'semantic'} query_length=#{query.length} limit=#{limit} statuses=#{Array(requested_status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
140
+ if browse_query?(query)
141
+ return list_entries_chronologically(query: query, limit: limit, status: requested_status,
142
+ status_defaulted: status_defaulted, tags: tags, domain: domain)
143
+ end
144
+
142
145
  embedding = embed_text(query)
143
146
  sql = Helpers::GraphQuery.build_semantic_search_sql(
144
147
  limit: limit, min_confidence: min_confidence,
145
- statuses: Array(status).map(&:to_s), tags: tags, domain: domain
148
+ statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain
146
149
  )
147
150
 
148
151
  db = Legion::Data::Model::ApolloEntry.db
@@ -173,14 +176,17 @@ module Legion
173
176
  knowledge_domain: entry[:knowledge_domain] }
174
177
  end
175
178
 
179
+ log.info("Apollo Knowledge.handle_query results=#{formatted.size} mode=semantic agent_id=#{agent_id}")
176
180
  { success: true, entries: formatted, count: formatted.size }
177
181
  rescue Sequel::Error => e
182
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_query')
178
183
  { success: false, error: e.message }
179
184
  end
180
185
 
181
186
  def handle_traverse(entry_id:, depth: Helpers::GraphQuery.default_depth, relation_types: nil, agent_id: 'unknown', **)
182
187
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
183
188
 
189
+ log.debug("Apollo Knowledge.handle_traverse entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
184
190
  # Whitelist relation_types to prevent SQL injection (they are string-interpolated in build_traversal_sql)
185
191
  if relation_types
186
192
  allowed = Helpers::Confidence::RELATION_TYPES
@@ -203,14 +209,17 @@ module Legion
203
209
  depth: entry[:depth], activation: entry[:activation] }
204
210
  end
205
211
 
212
+ log.info("Apollo Knowledge.handle_traverse results=#{formatted.size} entry_id=#{entry_id}")
206
213
  { success: true, entries: formatted, count: formatted.size }
207
214
  rescue Sequel::Error => e
215
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_traverse')
208
216
  { success: false, error: e.message }
209
217
  end
210
218
 
211
219
  def redistribute_knowledge(agent_id:, min_confidence: Helpers::Confidence.apollo_setting(:query, :redistribute_min_confidence, default: 0.5), **)
212
220
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
213
221
 
222
+ log.debug("Apollo Knowledge.redistribute_knowledge agent_id=#{agent_id} min_confidence=#{min_confidence}")
214
223
  entries = Legion::Data::Model::ApolloEntry
215
224
  .where(source_agent: agent_id, status: 'confirmed')
216
225
  .where { confidence > min_confidence }
@@ -238,6 +247,7 @@ module Legion
238
247
  log.info("[apollo] redistributed #{redistributed} entries from departing agent=#{agent_id}")
239
248
  { success: true, redistributed: redistributed, agent_id: agent_id }
240
249
  rescue Sequel::Error => e
250
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.redistribute_knowledge')
241
251
  { success: false, error: e.message }
242
252
  end
243
253
 
@@ -247,6 +257,7 @@ module Legion
247
257
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
248
258
 
249
259
  query = normalize_text_input(query)
260
+ log.debug("Apollo Knowledge.retrieve_relevant query_length=#{query.length} limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
250
261
  return { success: true, entries: [], count: 0 } if query.nil? || query.to_s.strip.empty?
251
262
 
252
263
  embedding = embed_text(query)
@@ -273,8 +284,10 @@ module Legion
273
284
  knowledge_domain: entry[:knowledge_domain] }
274
285
  end
275
286
 
287
+ log.info("Apollo Knowledge.retrieve_relevant results=#{formatted.size} limit=#{limit}")
276
288
  { success: true, entries: formatted, count: formatted.size }
277
289
  rescue Sequel::Error => e
290
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.retrieve_relevant')
278
291
  { success: false, error: e.message }
279
292
  end
280
293
 
@@ -283,6 +296,7 @@ module Legion
283
296
  return { success: false, error: 'apollo_data_not_available' }
284
297
  end
285
298
 
299
+ log.debug("Apollo Knowledge.prepare_mesh_export target_domain=#{target_domain} min_confidence=#{min_confidence} limit=#{limit}")
286
300
  conn = Legion::Data.connection
287
301
  allowed = allowed_domains_for(target_domain)
288
302
 
@@ -302,7 +316,9 @@ module Legion
302
316
  end
303
317
 
304
318
  { success: true, entries: formatted, count: formatted.size, target_domain: target_domain }
319
+ .tap { |result| log.info("Apollo Knowledge.prepare_mesh_export results=#{result[:count]} target_domain=#{target_domain}") }
305
320
  rescue Sequel::Error => e
321
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.prepare_mesh_export')
306
322
  { success: false, error: e.message }
307
323
  end
308
324
 
@@ -311,6 +327,7 @@ module Legion
311
327
  return { deleted: 0, redacted: 0, error: 'apollo_data_not_available' }
312
328
  end
313
329
 
330
+ log.warn("Apollo Knowledge.handle_erasure_request agent_id=#{agent_id}")
314
331
  conn = Legion::Data.connection
315
332
 
316
333
  # Delete entries solely from dead agent (not confirmed by others)
@@ -325,7 +342,9 @@ module Legion
325
342
  .update(source_agent: 'redacted', source_provider: nil, source_channel: nil)
326
343
 
327
344
  { deleted: deleted, redacted: redacted, agent_id: agent_id }
345
+ .tap { |result| log.info("Apollo Knowledge.handle_erasure_request deleted=#{result[:deleted]} redacted=#{result[:redacted]} agent_id=#{agent_id}") } # rubocop:disable Layout/LineLength
328
346
  rescue Sequel::Error => e
347
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_erasure_request')
329
348
  { deleted: 0, redacted: 0, error: e.message }
330
349
  end
331
350
 
@@ -333,6 +352,27 @@ module Legion
333
352
 
334
353
  private
335
354
 
355
+ def ingest_early_return_error(content:, content_type:, tags:)
356
+ if content.strip.empty?
357
+ safe_tags = Array(tags).map(&:to_s).map { |t| t.gsub(/[\r\n]+/, ' ') }
358
+ log.warn('[apollo][handle_ingest] early-return: content is required ' \
359
+ "content_type=#{content_type} tags=#{safe_tags.inspect}")
360
+ return { success: false, error: 'content is required' }
361
+ end
362
+
363
+ if content_type.nil?
364
+ log.warn('[apollo][handle_ingest] early-return: content_type is required ' \
365
+ "content_length=#{content.to_s.length}")
366
+ return { success: false, error: 'content_type is required' }
367
+ end
368
+
369
+ return nil if defined?(Legion::Data::Model::ApolloEntry)
370
+
371
+ log.warn('[apollo][handle_ingest] early-return: apollo_data_not_available ' \
372
+ "content_type=#{content_type}")
373
+ { success: false, error: 'apollo_data_not_available' }
374
+ end
375
+
336
376
  def normalize_content_type(raw)
337
377
  sym = raw.to_s.delete_prefix(':').gsub(%r{[/\s]}, '_').strip.downcase.to_sym
338
378
  sym = CONTENT_TYPE_ALIASES.fetch(sym, sym)
@@ -341,21 +381,124 @@ module Legion
341
381
 
342
382
  def embed_text(text)
343
383
  text = normalize_text_input(text)
384
+ log.debug("Apollo Knowledge.embed_text text_length=#{text.length}")
344
385
  result = Legion::LLM::Embeddings.generate(text: text)
345
386
  vector = result.is_a?(Hash) ? result[:vector] : result
346
- vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
387
+ if vector.is_a?(Array) && vector.any?
388
+ log.debug("Apollo Knowledge.embed_text vector_dimensions=#{vector.length}")
389
+ vector
390
+ else
391
+ log.warn('Apollo Knowledge.embed_text returned no vector; using zero-vector fallback')
392
+ Array.new(1024, 0.0)
393
+ end
347
394
  rescue StandardError => e
348
395
  log.warn("Apollo Knowledge.embed_text failed: #{e.message}")
349
396
  Array.new(1024, 0.0)
350
397
  end
351
398
 
352
399
  def normalize_text_input(value)
353
- return Legion::Apollo.send(:normalize_text_input, value) if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_text_input, true)
400
+ result = if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_text_input, true)
401
+ Legion::Apollo.send(:normalize_text_input, value)
402
+ else
403
+ value.to_s
404
+ end
354
405
 
355
- value.to_s
406
+ sanitize_for_postgres(result)
356
407
  rescue StandardError => e
357
408
  log.warn("Apollo Knowledge.normalize_text_input failed: #{e.message}")
358
- value.to_s
409
+ ''
410
+ end
411
+
412
+ def sanitize_for_postgres(value)
413
+ return value unless value.is_a?(String)
414
+
415
+ string = value.encoding == Encoding::UTF_8 ? value.dup : value.dup.force_encoding(Encoding::UTF_8)
416
+ changed = string.include?("\x00") || !string.valid_encoding?
417
+ string = string.scrub('') unless string.valid_encoding?
418
+ sanitized = string.delete("\x00")
419
+ log.debug("Apollo Knowledge.sanitize_for_postgres sanitized original_length=#{value.bytesize} sanitized_length=#{sanitized.bytesize}") if changed
420
+ sanitized
421
+ end
422
+
423
+ def truncate_for_column(value, max_length)
424
+ return nil if value.nil?
425
+
426
+ normalize_text_input(value)[0, max_length]
427
+ end
428
+
429
+ def active_duplicate_for_hash(hash)
430
+ return nil unless hash
431
+
432
+ existing = Legion::Data::Model::ApolloEntry
433
+ .where(content_hash: hash)
434
+ .exclude(status: 'archived')
435
+ .first
436
+ existing&.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
437
+ log.debug("Apollo Knowledge.active_duplicate_for_hash matched entry_id=#{existing.id}") if existing
438
+ existing
439
+ end
440
+
441
+ def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, submitted_by:, submitted_from:)
442
+ tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
443
+ agent = truncate_for_column(source_agent, 50) || 'unknown'
444
+
445
+ { tags: tag_array,
446
+ domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
447
+ source_agent: agent,
448
+ source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
449
+ source_channel: truncate_for_column(source_channel, 100),
450
+ submitted_by: truncate_for_column(submitted_by, 255),
451
+ submitted_from: truncate_for_column(submitted_from, 255) }
452
+ end
453
+
454
+ def create_candidate_entry(content:, content_type:, context:, metadata:, content_hash:, embedding:)
455
+ new_entry = Legion::Data::Model::ApolloEntry.create(
456
+ content: content,
457
+ content_type: content_type,
458
+ confidence: Helpers::Confidence.initial_confidence,
459
+ source_agent: metadata[:source_agent],
460
+ source_provider: metadata[:source_provider],
461
+ source_channel: metadata[:source_channel],
462
+ source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
463
+ tags: Sequel.pg_array(metadata[:tags]),
464
+ status: 'candidate',
465
+ knowledge_domain: metadata[:domain],
466
+ submitted_by: metadata[:submitted_by],
467
+ submitted_from: metadata[:submitted_from],
468
+ content_hash: content_hash,
469
+ embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
470
+ )
471
+ log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
472
+ new_entry.id
473
+ end
474
+
475
+ def browse_query?(query)
476
+ query.to_s.strip.length < 3
477
+ end
478
+
479
+ def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:)
480
+ log.debug("Apollo Knowledge.list_entries_chronologically limit=#{limit} statuses=#{Array(status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
481
+ dataset = Legion::Data::Model::ApolloEntry.exclude(status: 'archived')
482
+ requested = Array(status).map(&:to_s).reject(&:empty?)
483
+ dataset = dataset.where(status: requested) unless status_defaulted || requested.empty?
484
+ dataset = dataset.where(Sequel.lit('tags && ?', Sequel.pg_array(Array(tags)))) if tags && !Array(tags).empty?
485
+ dataset = dataset.where(knowledge_domain: domain) if domain && !domain.to_s.empty?
486
+
487
+ entries = dataset.order(Sequel.desc(:created_at)).limit(limit).all.map do |entry|
488
+ format_entry(entry.is_a?(Hash) ? entry : entry.values)
489
+ end
490
+ log.info("Apollo Knowledge.list_entries_chronologically results=#{entries.size}")
491
+ { success: true, mode: :browse, query: query, entries: entries, count: entries.size }
492
+ rescue Sequel::Error => e
493
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.list_entries_chronologically')
494
+ { success: false, error: e.message }
495
+ end
496
+
497
+ def format_entry(entry)
498
+ { id: entry[:id], content: entry[:content], content_type: entry[:content_type],
499
+ confidence: entry[:confidence], distance: entry[:distance]&.to_f,
500
+ tags: entry[:tags], source_agent: entry[:source_agent],
501
+ knowledge_domain: entry[:knowledge_domain] }
359
502
  end
360
503
 
361
504
  def allowed_domains_for(target_domain)
@@ -379,6 +522,7 @@ module Legion
379
522
  rel_weight = Helpers::Confidence.apollo_setting(:contradiction, :relation_weight, default: 0.8)
380
523
 
381
524
  db = Legion::Data::Model::ApolloEntry.db
525
+ log.debug("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} similar_limit=#{sim_limit} threshold=#{sim_threshold}")
382
526
  similar = db.fetch(
383
527
  "SELECT id, content, embedding FROM apollo_entries WHERE id != :entry_id AND embedding IS NOT NULL ORDER BY embedding <=> :embedding LIMIT #{sim_limit}", # rubocop:disable Layout/LineLength
384
528
  entry_id: entry_id,
@@ -403,9 +547,10 @@ module Legion
403
547
  Legion::Data::Model::ApolloEntry.where(id: [entry_id, existing[:id]]).update(status: 'disputed')
404
548
  contradictions << existing[:id]
405
549
  end
550
+ log.info("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} contradictions=#{contradictions.size}") if contradictions.any?
406
551
  contradictions
407
552
  rescue Sequel::Error => e
408
- log.warn("Apollo Knowledge.detect_contradictions failed: #{e.message}")
553
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.detect_contradictions')
409
554
  []
410
555
  end
411
556
 
@@ -430,6 +575,7 @@ module Legion
430
575
 
431
576
  def find_corroboration(embedding, content_type_sym, source_agent, source_channel = nil)
432
577
  scan_limit = Helpers::Confidence.apollo_setting(:corroboration, :scan_limit, default: 50)
578
+ log.debug("Apollo Knowledge.find_corroboration content_type=#{content_type_sym} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'} scan_limit=#{scan_limit}") # rubocop:disable Layout/LineLength
433
579
  existing = Legion::Data::Model::ApolloEntry
434
580
  .where(content_type: content_type_sym)
435
581
  .exclude(embedding: nil)
@@ -462,9 +608,11 @@ module Legion
462
608
  source_agent: source_agent,
463
609
  weight: sim
464
610
  )
611
+ log.info("Apollo Knowledge.find_corroboration matched entry_id=#{entry.id} source_agent=#{source_agent} similarity=#{sim}")
465
612
  return [true, entry.id]
466
613
  end
467
614
 
615
+ log.debug("Apollo Knowledge.find_corroboration no_match source_agent=#{source_agent}")
468
616
  [false, nil]
469
617
  end
470
618
 
@@ -483,6 +631,7 @@ module Legion
483
631
  end
484
632
 
485
633
  def upsert_expertise(source_agent:, domain:)
634
+ log.debug("Apollo Knowledge.upsert_expertise source_agent=#{source_agent} domain=#{domain}")
486
635
  expertise = Legion::Data::Model::ApolloExpertise
487
636
  .where(agent_id: source_agent, domain: domain).first
488
637
  if expertise
@@ -25,7 +25,11 @@ module Legion
25
25
  min_confidence ||= Helpers::Confidence.decay_threshold
26
26
  min_age_hours = Helpers::Confidence.decay_min_age_hours
27
27
 
28
- return { decayed: 0, archived: 0 } unless defined?(Legion::Data) && Legion::Data.respond_to?(:connection) && Legion::Data.connection
28
+ log.debug("Apollo Maintenance.run_decay_cycle alpha=#{alpha} min_confidence=#{min_confidence} min_age_hours=#{min_age_hours}")
29
+ unless defined?(Legion::Data) && Legion::Data.respond_to?(:connection) && Legion::Data.connection
30
+ log.warn('Apollo Maintenance.run_decay_cycle skipped: apollo_data_not_available')
31
+ return { decayed: 0, archived: 0 }
32
+ end
29
33
 
30
34
  conn = Legion::Data.connection
31
35
 
@@ -54,15 +58,21 @@ module Legion
54
58
 
55
59
  { decayed: decayed, archived: archived, alpha: alpha, threshold: min_confidence,
56
60
  min_age_hours: min_age_hours }
61
+ .tap { |result| log.info("Apollo Maintenance.run_decay_cycle decayed=#{result[:decayed]} archived=#{result[:archived]} alpha=#{alpha} threshold=#{min_confidence}") } # rubocop:disable Layout/LineLength
57
62
  rescue Sequel::Error => e
63
+ handle_exception(e, level: :error, operation: 'apollo.maintenance.run_decay_cycle')
58
64
  { decayed: 0, archived: 0, error: e.message }
59
65
  end
60
66
 
61
67
  def check_corroboration(**) # rubocop:disable Metrics/CyclomaticComplexity
62
- return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
68
+ unless defined?(Legion::Data::Model::ApolloEntry)
69
+ log.warn('Apollo Maintenance.check_corroboration skipped: apollo_data_not_available')
70
+ return { success: false, error: 'apollo_data_not_available' }
71
+ end
63
72
 
64
73
  candidates = Legion::Data::Model::ApolloEntry.where(status: 'candidate').exclude(embedding: nil).all
65
74
  confirmed = Legion::Data::Model::ApolloEntry.where(status: 'confirmed').exclude(embedding: nil).all
75
+ log.debug("Apollo Maintenance.check_corroboration candidates=#{candidates.size} confirmed=#{confirmed.size}")
66
76
 
67
77
  promoted = 0
68
78
 
@@ -106,7 +116,9 @@ module Legion
106
116
  end
107
117
 
108
118
  { success: true, promoted: promoted, scanned: candidates.size }
119
+ .tap { |result| log.info("Apollo Maintenance.check_corroboration scanned=#{result[:scanned]} promoted=#{result[:promoted]}") }
109
120
  rescue Sequel::Error => e
121
+ handle_exception(e, level: :error, operation: 'apollo.maintenance.check_corroboration')
110
122
  { success: false, error: e.message }
111
123
  end
112
124
 
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Apollo
6
- VERSION = '0.4.19'
6
+ VERSION = '0.4.21'
7
7
  end
8
8
  end
9
9
  end
@@ -263,6 +263,36 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
263
263
  tags: ['RabbitMQ'], source_agent: 'agent-1')
264
264
  end
265
265
 
266
+ it 'sanitizes null bytes before storing content' do
267
+ expect(mock_entry_class).to receive(:create).with(
268
+ hash_including(content: 'helloworld')
269
+ ).and_return(mock_entry)
270
+ host.handle_ingest(content: "hello\x00world", content_type: 'fact', source_agent: 'agent-1')
271
+ end
272
+
273
+ it 'truncates short varchar metadata fields at the database boundary' do
274
+ expect(mock_entry_class).to receive(:create).with(
275
+ hash_including(
276
+ source_agent: 'a' * 50,
277
+ source_provider: 'p' * 50,
278
+ source_channel: 'c' * 100,
279
+ knowledge_domain: 'd' * 50,
280
+ submitted_by: 'u' * 255,
281
+ submitted_from: 'n' * 255
282
+ )
283
+ ).and_return(mock_entry)
284
+ host.handle_ingest(
285
+ content: 'test',
286
+ content_type: 'fact',
287
+ source_agent: 'a' * 60,
288
+ source_provider: 'p' * 60,
289
+ source_channel: 'c' * 120,
290
+ knowledge_domain: 'd' * 60,
291
+ submitted_by: 'u' * 300,
292
+ submitted_from: 'n' * 300
293
+ )
294
+ end
295
+
266
296
  context 'content hash dedup' do
267
297
  let(:existing_entry) do
268
298
  double('existing', id: 'uuid-existing', confidence: 0.6,
@@ -295,9 +325,46 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
295
325
  end
296
326
 
297
327
  it 'returns a structured error' do
328
+ allow(host).to receive(:handle_exception)
329
+
298
330
  result = host.handle_ingest(content: 'test', content_type: 'fact', source_agent: 'a')
299
331
  expect(result[:success]).to be false
300
332
  expect(result[:error]).to eq('connection lost')
333
+ expect(host).to have_received(:handle_exception).with(
334
+ instance_of(Sequel::Error),
335
+ level: :error,
336
+ operation: 'apollo.knowledge.handle_ingest'
337
+ )
338
+ end
339
+ end
340
+
341
+ context 'early-return warn logs' do
342
+ let(:logger) { instance_double('Logger', debug: nil, info: nil, warn: nil) }
343
+
344
+ before { allow(host).to receive(:log).and_return(logger) }
345
+
346
+ it 'emits a warn log when content is nil' do
347
+ host.handle_ingest(content: nil, content_type: 'fact')
348
+ expect(logger).to have_received(:warn).with(/early-return: content is required/)
349
+ end
350
+
351
+ it 'emits a warn log when content_type is nil' do
352
+ host.handle_ingest(content: 'something', content_type: nil)
353
+ expect(logger).to have_received(:warn).with(/early-return: content_type is required/)
354
+ end
355
+
356
+ it 'emits a warn log when apollo_data_not_available' do
357
+ hide_const('Legion::Data::Model::ApolloEntry') if defined?(Legion::Data::Model::ApolloEntry)
358
+ host.handle_ingest(content: 'something', content_type: 'fact')
359
+ expect(logger).to have_received(:warn).with(/early-return: apollo_data_not_available/)
360
+ end
361
+
362
+ it 'sanitizes newline-bearing tags in the warn log' do
363
+ host.handle_ingest(content: nil, content_type: 'fact', tags: ["evil\nFAKE LOG LINE", 'normal'])
364
+ expect(logger).to have_received(:warn) do |msg|
365
+ expect(msg).to include('evil FAKE LOG LINE')
366
+ expect(msg).not_to include("\n")
367
+ end
301
368
  end
302
369
  end
303
370
  end
@@ -376,6 +443,64 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
376
443
  expect(result[:count]).to eq(0)
377
444
  end
378
445
  end
446
+
447
+ context 'when query is browse-shaped' do
448
+ let(:mock_entry_class) { double('ApolloEntry') }
449
+ let(:dataset) { double('dataset') }
450
+ let(:entries) do
451
+ [{ id: 'uuid-1', content: 'Candidate fact', content_type: 'fact',
452
+ confidence: 0.7, tags: ['ruby'], source_agent: 'agent-1',
453
+ knowledge_domain: 'general' }]
454
+ end
455
+
456
+ before do
457
+ stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class)
458
+ allow(mock_entry_class).to receive(:exclude).with(status: 'archived').and_return(dataset)
459
+ allow(dataset).to receive(:where).and_return(dataset)
460
+ allow(dataset).to receive(:order).with(:created_at).and_return(dataset)
461
+ allow(dataset).to receive(:limit).with(50).and_return(dataset)
462
+ allow(dataset).to receive(:all).and_return(entries)
463
+ end
464
+
465
+ it 'lists recent non-archived entries without generating an embedding' do
466
+ expect(Legion::LLM::Embeddings).not_to receive(:generate)
467
+
468
+ result = host.handle_query(query: 'x', limit: 50)
469
+
470
+ expect(result[:success]).to be true
471
+ expect(result[:mode]).to eq(:browse)
472
+ expect(result[:count]).to eq(1)
473
+ expect(result[:entries].first[:content]).to eq('Candidate fact')
474
+ expect(dataset).not_to have_received(:where).with(status: ['confirmed'])
475
+ end
476
+
477
+ it 'respects an explicit confirmed status filter' do
478
+ host.handle_query(query: 'x', limit: 50, status: [:confirmed])
479
+
480
+ expect(dataset).to have_received(:where).with(status: ['confirmed'])
481
+ end
482
+
483
+ it 'applies tags and domain filters when provided' do
484
+ host.handle_query(query: 'x', limit: 50, tags: ['ruby'], domain: 'general')
485
+
486
+ expect(dataset).to have_received(:where).with('tags && ?')
487
+ expect(dataset).to have_received(:where).with(knowledge_domain: 'general')
488
+ end
489
+ end
490
+ end
491
+
492
+ describe '#normalize_text_input' do
493
+ let(:host) { Object.new.extend(described_class) }
494
+
495
+ it 'strips null bytes in the local fallback path' do
496
+ expect(host.send(:normalize_text_input, "hello\x00world")).to eq('helloworld')
497
+ end
498
+
499
+ it 'scrubs invalid UTF-8 in the local fallback path' do
500
+ invalid = "hello\xC3(world".dup.force_encoding(Encoding::UTF_8)
501
+
502
+ expect(host.send(:normalize_text_input, invalid)).to eq('hello(world')
503
+ end
379
504
  end
380
505
 
381
506
  describe '#retrieve_relevant' do
data/spec/spec_helper.rb CHANGED
@@ -16,6 +16,7 @@ unless defined?(Sequel)
16
16
 
17
17
  def self.pg_array(arr) = arr
18
18
  def self.lit(str, *) = str
19
+ def self.desc(sym) = sym
19
20
  Expr = Struct.new(:value) do
20
21
  def +(other) = "#{value} + #{other}"
21
22
  def *(other) = "#{value} * #{other}"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-apollo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.19
4
+ version: 0.4.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity