lex-apollo 0.4.19 → 0.4.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1bf0ca8790d13fc3d262ede810ce50a47146dc37743e8806696de970b8a0385a
4
- data.tar.gz: 1f1a9115e1a1bb36423150a7290a82352f07aef7aa41f0b4b7da8e0ff5b76f94
3
+ metadata.gz: f197e46e616eb71f939175480496d17775c67985bf70d631ac8d089724c7ac7d
4
+ data.tar.gz: c8f6ee951339eda218647c3bee95bfda3f32c6ee62a44abcaad8940b40214bd9
5
5
  SHA512:
6
- metadata.gz: 3d92269898b53825fff831ef253fc44ac9e9a45602da25628f32fc95c1e6a53edfd86add2f02f7131c449cd1b940258d2c8fc4df3b90ca6306affbf8a76d4ccb
7
- data.tar.gz: d6c98fc7a7f351dc929e2d2dac4c9d7d119e32328706c9880e19a6bc3edb53448b008d8a8fefd7f7e8b8a201084f4ec78006088954cb7e56ee72cb312f0d5ce6
6
+ metadata.gz: 5b4822965e47e806bf21b1e07e3a675fff365bdea474514278ab61efc1be2e3fc557756f2ad8ab32e63b2d1eb24286dfe25c44ceacbba723bd877d1402d7a5ab
7
+ data.tar.gz: b67b970d6cf8734abd96c5de5e993e802ba924fd966307e60fd7911f7879559e3835d262ed653a269b740bbffd2574df9e156afe4e4c9dfd6a15b70afce6d658
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.20] - 2026-04-25
4
+
5
+ ### Fixed
6
+ - `Knowledge#handle_ingest` now sanitizes stored content for PostgreSQL by scrubbing invalid UTF-8 and removing null bytes before inserts.
7
+ - `Knowledge#handle_ingest` now truncates short metadata fields to schema-safe lengths before writing entries.
8
+ - `Knowledge#handle_query` now treats very short queries as browse/list requests, returning recent non-archived entries without embedding noise while preserving explicit status filters.
9
+ - `Knowledge` Sequel rescue paths now log exception class, message, and a short backtrace before returning structured errors.
10
+ - Apollo runners and actors now emit debug/info lifecycle logs around ingest, query, browse, maintenance, expertise, entity extraction, writeback vectorization, entity watchdog, and GAS pipeline paths so debug-level logging can trace workflows end to end.
11
+
3
12
  ## [0.4.19] - 2026-04-24
4
13
 
5
14
  ### Fixed
@@ -34,6 +34,7 @@ module Legion
34
34
 
35
35
  def scan_and_ingest
36
36
  texts = recent_task_log_texts
37
+ log.debug("EntityWatchdog scan_and_ingest log_texts=#{texts.size}")
37
38
  return { success: true, ingested: 0, reason: :no_logs } if texts.empty?
38
39
 
39
40
  ingested = 0
@@ -53,10 +54,10 @@ module Legion
53
54
  end
54
55
  end
55
56
 
56
- log.debug("EntityWatchdog: ingested #{ingested} new entities from #{texts.size} log entries")
57
+ log.info("EntityWatchdog ingested=#{ingested} logs_scanned=#{texts.size}")
57
58
  { success: true, ingested: ingested, logs_scanned: texts.size }
58
59
  rescue StandardError => e
59
- log.error("EntityWatchdog scan_and_ingest failed: #{e.message}")
60
+ handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.scan_and_ingest')
60
61
  { success: false, error: e.message }
61
62
  end
62
63
 
@@ -71,7 +72,9 @@ module Legion
71
72
  .order(Sequel.desc(:created_at))
72
73
  .limit(log_limit)
73
74
  .select_map(:message)
74
- logs.map(&:to_s).reject(&:empty?).uniq
75
+ texts = logs.map(&:to_s).reject(&:empty?).uniq
76
+ log.debug("EntityWatchdog recent_task_log_texts lookback=#{lookback} limit=#{log_limit} raw=#{logs.size} unique=#{texts.size}")
77
+ texts
75
78
  rescue StandardError => e
76
79
  log.warn("EntityWatchdog recent_task_log_texts failed: #{e.message}")
77
80
  []
@@ -104,8 +107,9 @@ module Legion
104
107
  source_agent: 'lex-apollo:entity_watchdog',
105
108
  context: { entity_type: entity[:type], original_name: entity[:name] }
106
109
  ).publish
110
+ log.info("EntityWatchdog published entity type=#{entity[:type]} name=#{entity[:name]}")
107
111
  rescue StandardError => e
108
- log.error("EntityWatchdog publish failed: #{e.message}")
112
+ handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.publish_entity_ingest')
109
113
  end
110
114
 
111
115
  def entity_types
@@ -14,21 +14,27 @@ module Legion
14
14
 
15
15
  def handle_vectorize(payload)
16
16
  payload = symbolize(payload)
17
+ log.debug("WritebackVectorize handle_vectorize content_length=#{payload[:content].to_s.length} content_type=#{payload[:content_type] || 'nil'}")
17
18
  result = Legion::LLM::Embeddings.generate(text: payload[:content])
18
19
  vector = result.is_a?(Hash) ? result[:vector] : result
19
20
  embedding = vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
21
+ log.debug("WritebackVectorize embedding_dimensions=#{embedding.length} vector_generated=#{vector.is_a?(Array) && vector.any?}")
20
22
  enriched = payload.merge(embedding: embedding)
21
23
 
22
24
  if Helpers::Capability.can_write?
25
+ log.debug('WritebackVectorize route=direct_ingest')
23
26
  Runners::Knowledge.handle_ingest(**enriched)
24
27
  else
28
+ log.debug('WritebackVectorize route=transport_writeback')
25
29
  Transport::Messages::Writeback.new(
26
30
  **enriched, has_embedding: true
27
31
  ).publish
28
32
  end
29
33
 
34
+ log.info('WritebackVectorize completed action=vectorized')
30
35
  { success: true, action: :vectorized }
31
36
  rescue StandardError => e
37
+ handle_exception(e, level: :error, operation: 'apollo.writeback_vectorize.handle_vectorize')
32
38
  { success: false, error: e.message }
33
39
  end
34
40
 
@@ -51,15 +51,17 @@ module Legion
51
51
  req = json_body
52
52
  halt 400, { error: 'query is required' }.to_json unless req[:query]
53
53
 
54
- result = runner.handle_query(
54
+ query_options = {
55
55
  query: req[:query],
56
56
  limit: req[:limit] || 10,
57
57
  min_confidence: req[:min_confidence] || 0.3,
58
- status: req[:status] || [:confirmed],
59
58
  tags: req[:tags],
60
59
  domain: req[:domain],
61
60
  agent_id: req[:agent_id] || 'api'
62
- )
61
+ }
62
+ query_options[:status] = req[:status] if req.key?(:status)
63
+
64
+ result = runner.handle_query(**query_options)
63
65
  status result[:success] ? 200 : 500
64
66
  result.to_json
65
67
  end
@@ -4,17 +4,24 @@ module Legion
4
4
  module Extensions
5
5
  module Apollo
6
6
  module Runners
7
- module EntityExtractor # rubocop:disable Legion/Extension/RunnerIncludeHelpers
7
+ module EntityExtractor
8
8
  DEFAULT_ENTITY_TYPES = %w[person service repository concept].freeze
9
9
  DEFAULT_MIN_CONFIDENCE = 0.7
10
10
 
11
11
  def extract_entities(text:, entity_types: nil, min_confidence: Helpers::Confidence.apollo_setting(:entity_extractor, :min_confidence, default: DEFAULT_MIN_CONFIDENCE), **) # rubocop:disable Layout/LineLength
12
- return { success: true, entities: [], source: :empty } if text.to_s.strip.empty?
12
+ if text.to_s.strip.empty?
13
+ log.debug('Apollo EntityExtractor.extract_entities skipped reason=empty_text')
14
+ return { success: true, entities: [], source: :empty }
15
+ end
13
16
 
14
- return { success: true, entities: [], source: :unavailable } unless defined?(Legion::LLM) && Legion::LLM.started?
17
+ unless defined?(Legion::LLM) && Legion::LLM.started?
18
+ log.debug('Apollo EntityExtractor.extract_entities skipped reason=llm_unavailable')
19
+ return { success: true, entities: [], source: :unavailable }
20
+ end
15
21
 
16
22
  types = Array(entity_types).map(&:to_s)
17
23
  types = DEFAULT_ENTITY_TYPES if types.empty?
24
+ log.debug("Apollo EntityExtractor.extract_entities text_length=#{text.to_s.length} types=#{types.join(',')} min_confidence=#{min_confidence}")
18
25
 
19
26
  result = Legion::LLM.structured(
20
27
  messages: [
@@ -29,9 +36,11 @@ module Legion
29
36
  (entity[:confidence] || 0.0) >= min_confidence &&
30
37
  (types.empty? || types.include?(entity[:type].to_s))
31
38
  end
39
+ log.info("Apollo EntityExtractor.extract_entities raw=#{raw_entities.size} filtered=#{filtered.size}")
32
40
 
33
41
  { success: true, entities: filtered, source: :llm }
34
42
  rescue StandardError => e
43
+ handle_exception(e, level: :error, operation: 'apollo.entity_extractor.extract_entities')
35
44
  { success: false, entities: [], error: e.message, source: :error }
36
45
  end
37
46
 
@@ -70,6 +79,8 @@ module Legion
70
79
  required: ['entities']
71
80
  }
72
81
  end
82
+
83
+ include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
73
84
  end
74
85
  end
75
86
  end
@@ -18,52 +18,65 @@ module Legion
18
18
  end
19
19
 
20
20
  def aggregate(**)
21
- return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
21
+ unless defined?(Legion::Data::Model::ApolloEntry)
22
+ log.warn('Apollo Expertise.aggregate skipped: apollo_data_not_available')
23
+ return { success: false, error: 'apollo_data_not_available' }
24
+ end
22
25
 
23
26
  entries = Legion::Data::Model::ApolloEntry
24
27
  .select(:source_agent, :tags, :confidence)
25
28
  .exclude(source_agent: nil)
26
29
  .all
27
-
28
- groups = {}
29
- entries.each do |entry|
30
- agent = entry.source_agent
31
- domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
32
- key = "#{agent}:#{domain}"
33
- groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
34
- groups[key][:confidences] << entry.confidence.to_f
35
- end
30
+ log.debug("Apollo Expertise.aggregate entries=#{entries.size}")
36
31
 
37
32
  agent_set = Set.new
38
33
  domain_set = Set.new
39
34
 
40
- groups.each_value do |group|
41
- avg = group[:confidences].sum / group[:confidences].size
42
- count = group[:confidences].size
43
- cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
44
- proficiency = [avg * Math.log2(count + 1), cap].min
45
-
46
- existing = Legion::Data::Model::ApolloExpertise
47
- .where(agent_id: group[:agent_id], domain: group[:domain]).first
48
-
49
- if existing
50
- existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
51
- else
52
- Legion::Data::Model::ApolloExpertise.create(
53
- agent_id: group[:agent_id], domain: group[:domain],
54
- proficiency: proficiency, entry_count: count, last_active_at: Time.now
55
- )
56
- end
57
-
35
+ expertise_groups(entries).each_value do |group|
36
+ upsert_expertise_group(group)
58
37
  agent_set << group[:agent_id]
59
38
  domain_set << group[:domain]
60
39
  end
61
40
 
62
41
  { success: true, agents: agent_set.size, domains: domain_set.size }
42
+ .tap { |result| log.info("Apollo Expertise.aggregate agents=#{result[:agents]} domains=#{result[:domains]}") }
63
43
  rescue Sequel::Error => e
44
+ handle_exception(e, level: :error, operation: 'apollo.expertise.aggregate')
64
45
  { success: false, error: e.message }
65
46
  end
66
47
 
48
+ def expertise_groups(entries)
49
+ entries.each_with_object({}) do |entry, groups|
50
+ agent = entry.source_agent
51
+ domain = entry.tags.is_a?(Array) ? (entry.tags.first || 'general') : 'general'
52
+ key = "#{agent}:#{domain}"
53
+ groups[key] ||= { agent_id: agent, domain: domain, confidences: [] }
54
+ groups[key][:confidences] << entry.confidence.to_f
55
+ end
56
+ end
57
+
58
+ def upsert_expertise_group(group)
59
+ count = group[:confidences].size
60
+ proficiency = expertise_proficiency(group[:confidences])
61
+ existing = Legion::Data::Model::ApolloExpertise
62
+ .where(agent_id: group[:agent_id], domain: group[:domain]).first
63
+
64
+ if existing
65
+ existing.update(proficiency: proficiency, entry_count: count, last_active_at: Time.now)
66
+ else
67
+ Legion::Data::Model::ApolloExpertise.create(
68
+ agent_id: group[:agent_id], domain: group[:domain],
69
+ proficiency: proficiency, entry_count: count, last_active_at: Time.now
70
+ )
71
+ end
72
+ end
73
+
74
+ def expertise_proficiency(confidences)
75
+ avg = confidences.sum / confidences.size
76
+ cap = Helpers::Confidence.apollo_setting(:expertise, :proficiency_cap, default: 1.0)
77
+ [avg * Math.log2(confidences.size + 1), cap].min
78
+ end
79
+
67
80
  include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
68
81
  end
69
82
  end
@@ -4,7 +4,10 @@ module Legion
4
4
  module Extensions
5
5
  module Apollo
6
6
  module Runners
7
- module Gas # rubocop:disable Legion/Extension/RunnerIncludeHelpers
7
+ module Gas
8
+ include Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
9
+ extend Legion::Extensions::Helpers::Lex if defined?(Legion::Extensions::Helpers::Lex)
10
+
8
11
  RELATION_TYPES = %w[
9
12
  similar_to contradicts depends_on causes
10
13
  part_of supersedes supports_by extends
@@ -16,10 +19,6 @@ module Legion
16
19
 
17
20
  module_function
18
21
 
19
- def log
20
- Legion::Logging
21
- end
22
-
23
22
  def json_load(str)
24
23
  ::JSON.parse(str, symbolize_names: true)
25
24
  end
@@ -31,7 +30,12 @@ module Legion
31
30
  def fallback_confidence = Helpers::Confidence.apollo_setting(:gas, :fallback_confidence, default: 0.5)
32
31
 
33
32
  def process(audit_event)
34
- return { phases_completed: 0, reason: 'no content' } unless processable?(audit_event)
33
+ unless processable?(audit_event)
34
+ log.debug('GAS process skipped reason=no_content')
35
+ return { phases_completed: 0, reason: 'no content' }
36
+ end
37
+
38
+ log.debug("GAS process start request_id=#{audit_event[:request_id] || 'nil'} messages=#{Array(audit_event[:messages]).size} response_length=#{audit_event[:response_content].to_s.length}") # rubocop:disable Layout/LineLength
35
39
 
36
40
  facts = phase_comprehend(audit_event)
37
41
  entities = phase_extract(audit_event, facts)
@@ -40,7 +44,7 @@ module Legion
40
44
  deposit_result = phase_deposit(facts, entities, relations, synthesis, audit_event)
41
45
  anticipations = phase_anticipate(facts, synthesis)
42
46
 
43
- {
47
+ result = {
44
48
  phases_completed: 6,
45
49
  facts: facts.length,
46
50
  entities: entities.length,
@@ -49,8 +53,10 @@ module Legion
49
53
  deposited: deposit_result,
50
54
  anticipations: anticipations.length
51
55
  }
56
+ log.info("GAS process complete facts=#{result[:facts]} entities=#{result[:entities]} relations=#{result[:relations]} synthesis=#{result[:synthesis]} anticipations=#{result[:anticipations]}") # rubocop:disable Layout/LineLength
57
+ result
52
58
  rescue StandardError => e
53
- log.warn("GAS pipeline error: #{e.message}")
59
+ log.error("GAS pipeline error: #{e.message}")
54
60
  { phases_completed: 0, error: e.message }
55
61
  end
56
62
 
@@ -63,19 +69,24 @@ module Legion
63
69
  messages = audit_event[:messages]
64
70
  response = audit_event[:response_content]
65
71
 
66
- if llm_available?
67
- llm_comprehend(messages, response)
68
- else
69
- mechanical_comprehend(messages, response)
70
- end
72
+ mode = llm_available? ? :llm : :mechanical
73
+ log.debug("GAS phase_comprehend mode=#{mode} messages=#{Array(messages).size} response_length=#{response.to_s.length}")
74
+ facts = mode == :llm ? llm_comprehend(messages, response) : mechanical_comprehend(messages, response)
75
+ log.debug("GAS phase_comprehend facts=#{facts.size}")
76
+ facts
71
77
  end
72
78
 
73
79
  # Phase 2: Extract - entity extraction (delegates to existing EntityExtractor)
74
80
  def phase_extract(audit_event, _facts)
75
- return [] unless defined?(Runners::EntityExtractor)
81
+ unless defined?(Runners::EntityExtractor)
82
+ log.debug('GAS phase_extract skipped reason=entity_extractor_unavailable')
83
+ return []
84
+ end
76
85
 
77
86
  result = Runners::EntityExtractor.extract_entities(text: audit_event[:response_content])
78
- result[:success] ? (result[:entities] || []) : []
87
+ entities = result[:success] ? (result[:entities] || []) : []
88
+ log.debug("GAS phase_extract success=#{result[:success]} entities=#{entities.size}")
89
+ entities
79
90
  rescue StandardError => e
80
91
  log.warn("GAS phase_extract failed: #{e.message}")
81
92
  []
@@ -83,10 +94,16 @@ module Legion
83
94
 
84
95
  # Phase 3: Relate - classify relationships between new and existing entries
85
96
  def phase_relate(facts, _entities)
86
- return [] unless defined?(Runners::Knowledge)
97
+ unless defined?(Runners::Knowledge)
98
+ log.debug('GAS phase_relate skipped reason=knowledge_runner_unavailable')
99
+ return []
100
+ end
87
101
 
88
102
  existing = fetch_similar_entries(facts)
89
- return [] if existing.empty?
103
+ if existing.empty?
104
+ log.debug("GAS phase_relate skipped reason=no_existing_entries facts=#{facts.size}")
105
+ return []
106
+ end
90
107
 
91
108
  relations = []
92
109
  facts.each do |fact|
@@ -95,15 +112,24 @@ module Legion
95
112
  relations << relation if relation
96
113
  end
97
114
  end
115
+ log.debug("GAS phase_relate facts=#{facts.size} existing=#{existing.size} relations=#{relations.size}")
98
116
  relations
99
117
  end
100
118
 
101
119
  # Phase 4: Synthesize - generate derivative knowledge
102
120
  def phase_synthesize(facts, _relations)
103
- return [] if facts.length < 2
104
- return [] unless llm_available?
121
+ if facts.length < 2
122
+ log.debug("GAS phase_synthesize skipped reason=insufficient_facts facts=#{facts.length}")
123
+ return []
124
+ end
125
+ unless llm_available?
126
+ log.debug('GAS phase_synthesize skipped reason=llm_unavailable')
127
+ return []
128
+ end
105
129
 
106
- llm_synthesize(facts)
130
+ synthesis = llm_synthesize(facts)
131
+ log.debug("GAS phase_synthesize synthesis=#{synthesis.size}")
132
+ synthesis
107
133
  rescue StandardError => e
108
134
  log.warn("GAS phase_synthesize failed: #{e.message}")
109
135
  []
@@ -111,7 +137,10 @@ module Legion
111
137
 
112
138
  # Phase 5: Deposit - atomic write to Apollo
113
139
  def phase_deposit(facts, _entities, _relations, _synthesis, audit_event)
114
- return { deposited: 0 } unless defined?(Runners::Knowledge)
140
+ unless defined?(Runners::Knowledge)
141
+ log.debug('GAS phase_deposit skipped reason=knowledge_runner_unavailable')
142
+ return { deposited: 0 }
143
+ end
115
144
 
116
145
  deposited = 0
117
146
  facts.each do |fact|
@@ -128,15 +157,24 @@ module Legion
128
157
  rescue StandardError => e
129
158
  log.warn("GAS deposit error: #{e.message}")
130
159
  end
160
+ log.info("GAS phase_deposit deposited=#{deposited} facts=#{facts.size}")
131
161
  { deposited: deposited }
132
162
  end
133
163
 
134
164
  # Phase 6: Anticipate - pre-cache likely follow-up questions
135
165
  def phase_anticipate(facts, _synthesis)
136
- return [] if facts.empty?
137
- return [] unless llm_available?
166
+ if facts.empty?
167
+ log.debug('GAS phase_anticipate skipped reason=no_facts')
168
+ return []
169
+ end
170
+ unless llm_available?
171
+ log.debug('GAS phase_anticipate skipped reason=llm_unavailable')
172
+ return []
173
+ end
138
174
 
139
- llm_anticipate(facts)
175
+ anticipations = llm_anticipate(facts)
176
+ log.debug("GAS phase_anticipate anticipations=#{anticipations.size}")
177
+ anticipations
140
178
  rescue StandardError => e
141
179
  log.warn("GAS phase_anticipate failed: #{e.message}")
142
180
  []
@@ -153,7 +191,9 @@ module Legion
153
191
  log.warn("GAS fetch_similar_entries failed for fact: #{e.message}")
154
192
  next
155
193
  end
156
- entries.uniq { |e| e[:id] }
194
+ unique = entries.uniq { |e| e[:id] }
195
+ log.debug("GAS fetch_similar_entries facts=#{facts.size} entries=#{unique.size}")
196
+ unique
157
197
  end
158
198
 
159
199
  def classify_relation(fact, entry)
@@ -21,9 +21,12 @@ module Legion
21
21
  link: :association, relation: :association, connection: :association,
22
22
  inference: :association, implication: :association
23
23
  }.freeze
24
+ DEFAULT_QUERY_STATUS = [:confirmed].freeze
25
+ UNSET = Object.new.freeze
24
26
 
25
27
  def store_knowledge(content:, content_type:, tags: [], source_agent: nil, context: {}, **)
26
28
  content_type = normalize_content_type(content_type)
29
+ log.debug("Apollo Knowledge.store_knowledge content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent || 'nil'} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
27
30
 
28
31
  if defined?(Legion::Data::Model::ApolloEntry)
29
32
  return handle_ingest(content: content, content_type: content_type,
@@ -41,6 +44,7 @@ module Legion
41
44
  end
42
45
 
43
46
  def query_knowledge(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: %i[confirmed candidate], tags: nil, **) # rubocop:disable Layout/LineLength
47
+ log.debug("Apollo Knowledge.query_knowledge query_length=#{query.to_s.length} limit=#{limit} statuses=#{Array(status).join(',')} tags=#{Array(tags).size} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
44
48
  if defined?(Legion::Data::Model::ApolloEntry)
45
49
  return handle_query(query: query, limit: limit, min_confidence: min_confidence,
46
50
  status: status, tags: tags, **)
@@ -57,6 +61,7 @@ module Legion
57
61
  end
58
62
 
59
63
  def related_entries(entry_id:, relation_types: nil, depth: Helpers::GraphQuery.default_depth, **)
64
+ log.debug("Apollo Knowledge.related_entries entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} data_available=#{defined?(Legion::Data::Model::ApolloEntry) ? true : false}") # rubocop:disable Layout/LineLength
60
65
  return handle_traverse(entry_id: entry_id, depth: depth, relation_types: relation_types, **) if defined?(Legion::Data::Model::ApolloEntry)
61
66
 
62
67
  {
@@ -75,74 +80,73 @@ module Legion
75
80
  }
76
81
  end
77
82
 
78
- def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength, Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
83
+ def handle_ingest(content: nil, content_type: nil, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, skip: false, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
79
84
  return { status: :skipped } if skip
80
- return { success: false, error: 'content is required' } if content.nil? || content.to_s.strip.empty?
85
+
86
+ content = normalize_text_input(content)
87
+ log.debug("Apollo Knowledge.handle_ingest content_length=#{content.length} content_type=#{content_type} tags=#{Array(tags).size} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'}") # rubocop:disable Layout/LineLength
88
+ return { success: false, error: 'content is required' } if content.strip.empty?
81
89
  return { success: false, error: 'content_type is required' } if content_type.nil?
82
90
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
83
91
 
84
- # Content hash dedup
85
92
  hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
86
- if hash
87
- existing = Legion::Data::Model::ApolloEntry
88
- .where(content_hash: hash)
89
- .exclude(status: 'archived')
90
- .first
91
- if existing
92
- existing.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
93
- return { success: true, entry_id: existing.id, deduped: true }
94
- end
93
+ existing = active_duplicate_for_hash(hash)
94
+ if existing
95
+ log.info("Apollo Knowledge.handle_ingest deduped entry_id=#{existing.id} source_agent=#{source_agent}")
96
+ return { success: true, entry_id: existing.id, deduped: true }
95
97
  end
96
98
 
97
99
  embedding = embed_text(content)
98
100
  content_type_sym = content_type.to_s
99
- tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
100
- domain = knowledge_domain || tag_array.first || 'general'
101
-
102
- corroborated, existing_id = find_corroboration(embedding, content_type_sym, source_agent, source_channel)
103
-
104
- unless corroborated
105
- new_entry = Legion::Data::Model::ApolloEntry.create(
106
- content: content,
107
- content_type: content_type_sym,
108
- confidence: Helpers::Confidence.initial_confidence,
109
- source_agent: source_agent,
110
- source_provider: source_provider || derive_provider_from_agent(source_agent),
111
- source_channel: source_channel,
112
- source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
113
- tags: Sequel.pg_array(tag_array),
114
- status: 'candidate',
115
- knowledge_domain: domain,
116
- submitted_by: submitted_by,
117
- submitted_from: submitted_from,
118
- content_hash: hash,
119
- embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
101
+ metadata = ingest_metadata(tags: tags, knowledge_domain: knowledge_domain, source_agent: source_agent,
102
+ source_provider: source_provider, source_channel: source_channel,
103
+ submitted_by: submitted_by, submitted_from: submitted_from)
104
+
105
+ corroborated, existing_id = find_corroboration(
106
+ embedding, content_type_sym, metadata[:source_agent], metadata[:source_channel]
107
+ )
108
+
109
+ if corroborated
110
+ log.info("Apollo Knowledge.handle_ingest corroborated entry_id=#{existing_id} source_agent=#{metadata[:source_agent]}")
111
+ else
112
+ existing_id = create_candidate_entry(
113
+ content: content, content_type: content_type_sym, context: context,
114
+ metadata: metadata, content_hash: hash, embedding: embedding
120
115
  )
121
- existing_id = new_entry.id
122
116
  end
123
117
 
124
- upsert_expertise(source_agent: source_agent, domain: domain)
118
+ upsert_expertise(source_agent: metadata[:source_agent], domain: metadata[:domain])
125
119
 
126
120
  Legion::Data::Model::ApolloAccessLog.create(
127
- entry_id: existing_id, agent_id: source_agent, action: 'ingest'
121
+ entry_id: existing_id, agent_id: metadata[:source_agent], action: 'ingest'
128
122
  )
129
123
 
130
124
  contradictions = detect_contradictions(existing_id, embedding, content)
125
+ log.debug("Apollo Knowledge.handle_ingest complete entry_id=#{existing_id} corroborated=#{corroborated} contradictions=#{contradictions.size}")
131
126
 
132
127
  { success: true, entry_id: existing_id, status: corroborated ? 'corroborated' : 'candidate',
133
128
  corroborated: corroborated, contradictions: contradictions }
134
129
  rescue Sequel::Error => e
130
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_ingest')
135
131
  { success: false, error: e.message }
136
132
  end
137
133
 
138
- def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: [:confirmed], tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Layout/LineLength
134
+ def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: UNSET, tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Layout/LineLength
139
135
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
140
136
 
141
137
  query = normalize_text_input(query)
138
+ status_defaulted = status.equal?(UNSET)
139
+ requested_status = status_defaulted ? DEFAULT_QUERY_STATUS : status
140
+ log.debug("Apollo Knowledge.handle_query mode=#{browse_query?(query) ? 'browse' : 'semantic'} query_length=#{query.length} limit=#{limit} statuses=#{Array(requested_status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
141
+ if browse_query?(query)
142
+ return list_entries_chronologically(query: query, limit: limit, status: requested_status,
143
+ status_defaulted: status_defaulted, tags: tags, domain: domain)
144
+ end
145
+
142
146
  embedding = embed_text(query)
143
147
  sql = Helpers::GraphQuery.build_semantic_search_sql(
144
148
  limit: limit, min_confidence: min_confidence,
145
- statuses: Array(status).map(&:to_s), tags: tags, domain: domain
149
+ statuses: Array(requested_status).map(&:to_s), tags: tags, domain: domain
146
150
  )
147
151
 
148
152
  db = Legion::Data::Model::ApolloEntry.db
@@ -173,14 +177,17 @@ module Legion
173
177
  knowledge_domain: entry[:knowledge_domain] }
174
178
  end
175
179
 
180
+ log.info("Apollo Knowledge.handle_query results=#{formatted.size} mode=semantic agent_id=#{agent_id}")
176
181
  { success: true, entries: formatted, count: formatted.size }
177
182
  rescue Sequel::Error => e
183
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_query')
178
184
  { success: false, error: e.message }
179
185
  end
180
186
 
181
187
  def handle_traverse(entry_id:, depth: Helpers::GraphQuery.default_depth, relation_types: nil, agent_id: 'unknown', **)
182
188
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
183
189
 
190
+ log.debug("Apollo Knowledge.handle_traverse entry_id=#{entry_id} depth=#{depth} relation_types=#{Array(relation_types).join(',')} agent_id=#{agent_id}") # rubocop:disable Layout/LineLength
184
191
  # Whitelist relation_types to prevent SQL injection (they are string-interpolated in build_traversal_sql)
185
192
  if relation_types
186
193
  allowed = Helpers::Confidence::RELATION_TYPES
@@ -203,14 +210,17 @@ module Legion
203
210
  depth: entry[:depth], activation: entry[:activation] }
204
211
  end
205
212
 
213
+ log.info("Apollo Knowledge.handle_traverse results=#{formatted.size} entry_id=#{entry_id}")
206
214
  { success: true, entries: formatted, count: formatted.size }
207
215
  rescue Sequel::Error => e
216
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_traverse')
208
217
  { success: false, error: e.message }
209
218
  end
210
219
 
211
220
  def redistribute_knowledge(agent_id:, min_confidence: Helpers::Confidence.apollo_setting(:query, :redistribute_min_confidence, default: 0.5), **)
212
221
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
213
222
 
223
+ log.debug("Apollo Knowledge.redistribute_knowledge agent_id=#{agent_id} min_confidence=#{min_confidence}")
214
224
  entries = Legion::Data::Model::ApolloEntry
215
225
  .where(source_agent: agent_id, status: 'confirmed')
216
226
  .where { confidence > min_confidence }
@@ -238,6 +248,7 @@ module Legion
238
248
  log.info("[apollo] redistributed #{redistributed} entries from departing agent=#{agent_id}")
239
249
  { success: true, redistributed: redistributed, agent_id: agent_id }
240
250
  rescue Sequel::Error => e
251
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.redistribute_knowledge')
241
252
  { success: false, error: e.message }
242
253
  end
243
254
 
@@ -247,6 +258,7 @@ module Legion
247
258
  return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
248
259
 
249
260
  query = normalize_text_input(query)
261
+ log.debug("Apollo Knowledge.retrieve_relevant query_length=#{query.length} limit=#{limit} min_confidence=#{min_confidence} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
250
262
  return { success: true, entries: [], count: 0 } if query.nil? || query.to_s.strip.empty?
251
263
 
252
264
  embedding = embed_text(query)
@@ -273,8 +285,10 @@ module Legion
273
285
  knowledge_domain: entry[:knowledge_domain] }
274
286
  end
275
287
 
288
+ log.info("Apollo Knowledge.retrieve_relevant results=#{formatted.size} limit=#{limit}")
276
289
  { success: true, entries: formatted, count: formatted.size }
277
290
  rescue Sequel::Error => e
291
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.retrieve_relevant')
278
292
  { success: false, error: e.message }
279
293
  end
280
294
 
@@ -283,6 +297,7 @@ module Legion
283
297
  return { success: false, error: 'apollo_data_not_available' }
284
298
  end
285
299
 
300
+ log.debug("Apollo Knowledge.prepare_mesh_export target_domain=#{target_domain} min_confidence=#{min_confidence} limit=#{limit}")
286
301
  conn = Legion::Data.connection
287
302
  allowed = allowed_domains_for(target_domain)
288
303
 
@@ -302,7 +317,9 @@ module Legion
302
317
  end
303
318
 
304
319
  { success: true, entries: formatted, count: formatted.size, target_domain: target_domain }
320
+ .tap { |result| log.info("Apollo Knowledge.prepare_mesh_export results=#{result[:count]} target_domain=#{target_domain}") }
305
321
  rescue Sequel::Error => e
322
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.prepare_mesh_export')
306
323
  { success: false, error: e.message }
307
324
  end
308
325
 
@@ -311,6 +328,7 @@ module Legion
311
328
  return { deleted: 0, redacted: 0, error: 'apollo_data_not_available' }
312
329
  end
313
330
 
331
+ log.warn("Apollo Knowledge.handle_erasure_request agent_id=#{agent_id}")
314
332
  conn = Legion::Data.connection
315
333
 
316
334
  # Delete entries solely from dead agent (not confirmed by others)
@@ -325,7 +343,9 @@ module Legion
325
343
  .update(source_agent: 'redacted', source_provider: nil, source_channel: nil)
326
344
 
327
345
  { deleted: deleted, redacted: redacted, agent_id: agent_id }
346
+ .tap { |result| log.info("Apollo Knowledge.handle_erasure_request deleted=#{result[:deleted]} redacted=#{result[:redacted]} agent_id=#{agent_id}") } # rubocop:disable Layout/LineLength
328
347
  rescue Sequel::Error => e
348
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.handle_erasure_request')
329
349
  { deleted: 0, redacted: 0, error: e.message }
330
350
  end
331
351
 
@@ -341,21 +361,124 @@ module Legion
341
361
 
342
362
  def embed_text(text)
343
363
  text = normalize_text_input(text)
364
+ log.debug("Apollo Knowledge.embed_text text_length=#{text.length}")
344
365
  result = Legion::LLM::Embeddings.generate(text: text)
345
366
  vector = result.is_a?(Hash) ? result[:vector] : result
346
- vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
367
+ if vector.is_a?(Array) && vector.any?
368
+ log.debug("Apollo Knowledge.embed_text vector_dimensions=#{vector.length}")
369
+ vector
370
+ else
371
+ log.warn('Apollo Knowledge.embed_text returned no vector; using zero-vector fallback')
372
+ Array.new(1024, 0.0)
373
+ end
347
374
  rescue StandardError => e
348
375
  log.warn("Apollo Knowledge.embed_text failed: #{e.message}")
349
376
  Array.new(1024, 0.0)
350
377
  end
351
378
 
352
379
  def normalize_text_input(value)
353
- return Legion::Apollo.send(:normalize_text_input, value) if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_text_input, true)
380
+ result = if defined?(Legion::Apollo) && Legion::Apollo.respond_to?(:normalize_text_input, true)
381
+ Legion::Apollo.send(:normalize_text_input, value)
382
+ else
383
+ value.to_s
384
+ end
354
385
 
355
- value.to_s
386
+ sanitize_for_postgres(result)
356
387
  rescue StandardError => e
357
388
  log.warn("Apollo Knowledge.normalize_text_input failed: #{e.message}")
358
- value.to_s
389
+ ''
390
+ end
391
+
392
+ def sanitize_for_postgres(value)
393
+ return value unless value.is_a?(String)
394
+
395
+ string = value.encoding == Encoding::UTF_8 ? value.dup : value.dup.force_encoding(Encoding::UTF_8)
396
+ changed = string.include?("\x00") || !string.valid_encoding?
397
+ string = string.scrub('') unless string.valid_encoding?
398
+ sanitized = string.delete("\x00")
399
+ log.debug("Apollo Knowledge.sanitize_for_postgres sanitized original_length=#{value.bytesize} sanitized_length=#{sanitized.bytesize}") if changed
400
+ sanitized
401
+ end
402
+
403
+ def truncate_for_column(value, max_length)
404
+ return nil if value.nil?
405
+
406
+ normalize_text_input(value)[0, max_length]
407
+ end
408
+
409
+ def active_duplicate_for_hash(hash)
410
+ return nil unless hash
411
+
412
+ existing = Legion::Data::Model::ApolloEntry
413
+ .where(content_hash: hash)
414
+ .exclude(status: 'archived')
415
+ .first
416
+ existing&.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
417
+ log.debug("Apollo Knowledge.active_duplicate_for_hash matched entry_id=#{existing.id}") if existing
418
+ existing
419
+ end
420
+
421
+ def ingest_metadata(tags:, knowledge_domain:, source_agent:, source_provider:, source_channel:, submitted_by:, submitted_from:)
422
+ tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
423
+ agent = truncate_for_column(source_agent, 50) || 'unknown'
424
+
425
+ { tags: tag_array,
426
+ domain: truncate_for_column(knowledge_domain || tag_array.first || 'general', 50),
427
+ source_agent: agent,
428
+ source_provider: truncate_for_column(source_provider || derive_provider_from_agent(agent), 50),
429
+ source_channel: truncate_for_column(source_channel, 100),
430
+ submitted_by: truncate_for_column(submitted_by, 255),
431
+ submitted_from: truncate_for_column(submitted_from, 255) }
432
+ end
433
+
434
+ def create_candidate_entry(content:, content_type:, context:, metadata:, content_hash:, embedding:)
435
+ new_entry = Legion::Data::Model::ApolloEntry.create(
436
+ content: content,
437
+ content_type: content_type,
438
+ confidence: Helpers::Confidence.initial_confidence,
439
+ source_agent: metadata[:source_agent],
440
+ source_provider: metadata[:source_provider],
441
+ source_channel: metadata[:source_channel],
442
+ source_context: ::JSON.dump(context.is_a?(Hash) ? context : {}),
443
+ tags: Sequel.pg_array(metadata[:tags]),
444
+ status: 'candidate',
445
+ knowledge_domain: metadata[:domain],
446
+ submitted_by: metadata[:submitted_by],
447
+ submitted_from: metadata[:submitted_from],
448
+ content_hash: content_hash,
449
+ embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
450
+ )
451
+ log.info("Apollo Knowledge.handle_ingest created entry_id=#{new_entry.id} status=candidate domain=#{metadata[:domain]} source_agent=#{metadata[:source_agent]}") # rubocop:disable Layout/LineLength
452
+ new_entry.id
453
+ end
454
+
455
+ def browse_query?(query)
456
+ query.to_s.strip.length < 3
457
+ end
458
+
459
+ def list_entries_chronologically(query:, limit:, status:, status_defaulted:, tags:, domain:)
460
+ log.debug("Apollo Knowledge.list_entries_chronologically limit=#{limit} statuses=#{Array(status).join(',')} status_defaulted=#{status_defaulted} tags=#{Array(tags).size} domain=#{domain || 'nil'}") # rubocop:disable Layout/LineLength
461
+ dataset = Legion::Data::Model::ApolloEntry.exclude(status: 'archived')
462
+ requested = Array(status).map(&:to_s).reject(&:empty?)
463
+ dataset = dataset.where(status: requested) unless status_defaulted || requested.empty?
464
+ dataset = dataset.where(Sequel.lit('tags && ?', Sequel.pg_array(Array(tags)))) if tags && !Array(tags).empty?
465
+ dataset = dataset.where(knowledge_domain: domain) if domain && !domain.to_s.empty?
466
+
467
+ entries = dataset.order(Sequel.desc(:created_at)).limit(limit).all.map do |entry|
468
+ format_entry(entry.is_a?(Hash) ? entry : entry.values)
469
+ end
470
+ log.info("Apollo Knowledge.list_entries_chronologically results=#{entries.size}")
471
+ { success: true, mode: :browse, query: query, entries: entries, count: entries.size }
472
+ rescue Sequel::Error => e
473
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.list_entries_chronologically')
474
+ { success: false, error: e.message }
475
+ end
476
+
477
+ def format_entry(entry)
478
+ { id: entry[:id], content: entry[:content], content_type: entry[:content_type],
479
+ confidence: entry[:confidence], distance: entry[:distance]&.to_f,
480
+ tags: entry[:tags], source_agent: entry[:source_agent],
481
+ knowledge_domain: entry[:knowledge_domain] }
359
482
  end
360
483
 
361
484
  def allowed_domains_for(target_domain)
@@ -379,6 +502,7 @@ module Legion
379
502
  rel_weight = Helpers::Confidence.apollo_setting(:contradiction, :relation_weight, default: 0.8)
380
503
 
381
504
  db = Legion::Data::Model::ApolloEntry.db
505
+ log.debug("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} similar_limit=#{sim_limit} threshold=#{sim_threshold}")
382
506
  similar = db.fetch(
383
507
  "SELECT id, content, embedding FROM apollo_entries WHERE id != :entry_id AND embedding IS NOT NULL ORDER BY embedding <=> :embedding LIMIT #{sim_limit}", # rubocop:disable Layout/LineLength
384
508
  entry_id: entry_id,
@@ -403,9 +527,10 @@ module Legion
403
527
  Legion::Data::Model::ApolloEntry.where(id: [entry_id, existing[:id]]).update(status: 'disputed')
404
528
  contradictions << existing[:id]
405
529
  end
530
+ log.info("Apollo Knowledge.detect_contradictions entry_id=#{entry_id} contradictions=#{contradictions.size}") if contradictions.any?
406
531
  contradictions
407
532
  rescue Sequel::Error => e
408
- log.warn("Apollo Knowledge.detect_contradictions failed: #{e.message}")
533
+ handle_exception(e, level: :error, operation: 'apollo.knowledge.detect_contradictions')
409
534
  []
410
535
  end
411
536
 
@@ -430,6 +555,7 @@ module Legion
430
555
 
431
556
  def find_corroboration(embedding, content_type_sym, source_agent, source_channel = nil)
432
557
  scan_limit = Helpers::Confidence.apollo_setting(:corroboration, :scan_limit, default: 50)
558
+ log.debug("Apollo Knowledge.find_corroboration content_type=#{content_type_sym} source_agent=#{source_agent} source_channel=#{source_channel || 'nil'} scan_limit=#{scan_limit}") # rubocop:disable Layout/LineLength
433
559
  existing = Legion::Data::Model::ApolloEntry
434
560
  .where(content_type: content_type_sym)
435
561
  .exclude(embedding: nil)
@@ -462,9 +588,11 @@ module Legion
462
588
  source_agent: source_agent,
463
589
  weight: sim
464
590
  )
591
+ log.info("Apollo Knowledge.find_corroboration matched entry_id=#{entry.id} source_agent=#{source_agent} similarity=#{sim}")
465
592
  return [true, entry.id]
466
593
  end
467
594
 
595
+ log.debug("Apollo Knowledge.find_corroboration no_match source_agent=#{source_agent}")
468
596
  [false, nil]
469
597
  end
470
598
 
@@ -483,6 +611,7 @@ module Legion
483
611
  end
484
612
 
485
613
  def upsert_expertise(source_agent:, domain:)
614
+ log.debug("Apollo Knowledge.upsert_expertise source_agent=#{source_agent} domain=#{domain}")
486
615
  expertise = Legion::Data::Model::ApolloExpertise
487
616
  .where(agent_id: source_agent, domain: domain).first
488
617
  if expertise
@@ -25,7 +25,11 @@ module Legion
25
25
  min_confidence ||= Helpers::Confidence.decay_threshold
26
26
  min_age_hours = Helpers::Confidence.decay_min_age_hours
27
27
 
28
- return { decayed: 0, archived: 0 } unless defined?(Legion::Data) && Legion::Data.respond_to?(:connection) && Legion::Data.connection
28
+ log.debug("Apollo Maintenance.run_decay_cycle alpha=#{alpha} min_confidence=#{min_confidence} min_age_hours=#{min_age_hours}")
29
+ unless defined?(Legion::Data) && Legion::Data.respond_to?(:connection) && Legion::Data.connection
30
+ log.warn('Apollo Maintenance.run_decay_cycle skipped: apollo_data_not_available')
31
+ return { decayed: 0, archived: 0 }
32
+ end
29
33
 
30
34
  conn = Legion::Data.connection
31
35
 
@@ -54,15 +58,21 @@ module Legion
54
58
 
55
59
  { decayed: decayed, archived: archived, alpha: alpha, threshold: min_confidence,
56
60
  min_age_hours: min_age_hours }
61
+ .tap { |result| log.info("Apollo Maintenance.run_decay_cycle decayed=#{result[:decayed]} archived=#{result[:archived]} alpha=#{alpha} threshold=#{min_confidence}") } # rubocop:disable Layout/LineLength
57
62
  rescue Sequel::Error => e
63
+ handle_exception(e, level: :error, operation: 'apollo.maintenance.run_decay_cycle')
58
64
  { decayed: 0, archived: 0, error: e.message }
59
65
  end
60
66
 
61
67
  def check_corroboration(**) # rubocop:disable Metrics/CyclomaticComplexity
62
- return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
68
+ unless defined?(Legion::Data::Model::ApolloEntry)
69
+ log.warn('Apollo Maintenance.check_corroboration skipped: apollo_data_not_available')
70
+ return { success: false, error: 'apollo_data_not_available' }
71
+ end
63
72
 
64
73
  candidates = Legion::Data::Model::ApolloEntry.where(status: 'candidate').exclude(embedding: nil).all
65
74
  confirmed = Legion::Data::Model::ApolloEntry.where(status: 'confirmed').exclude(embedding: nil).all
75
+ log.debug("Apollo Maintenance.check_corroboration candidates=#{candidates.size} confirmed=#{confirmed.size}")
66
76
 
67
77
  promoted = 0
68
78
 
@@ -106,7 +116,9 @@ module Legion
106
116
  end
107
117
 
108
118
  { success: true, promoted: promoted, scanned: candidates.size }
119
+ .tap { |result| log.info("Apollo Maintenance.check_corroboration scanned=#{result[:scanned]} promoted=#{result[:promoted]}") }
109
120
  rescue Sequel::Error => e
121
+ handle_exception(e, level: :error, operation: 'apollo.maintenance.check_corroboration')
110
122
  { success: false, error: e.message }
111
123
  end
112
124
 
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Apollo
6
- VERSION = '0.4.19'
6
+ VERSION = '0.4.20'
7
7
  end
8
8
  end
9
9
  end
@@ -263,6 +263,36 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
263
263
  tags: ['RabbitMQ'], source_agent: 'agent-1')
264
264
  end
265
265
 
266
+ it 'sanitizes null bytes before storing content' do
267
+ expect(mock_entry_class).to receive(:create).with(
268
+ hash_including(content: 'helloworld')
269
+ ).and_return(mock_entry)
270
+ host.handle_ingest(content: "hello\x00world", content_type: 'fact', source_agent: 'agent-1')
271
+ end
272
+
273
+ it 'truncates short varchar metadata fields at the database boundary' do
274
+ expect(mock_entry_class).to receive(:create).with(
275
+ hash_including(
276
+ source_agent: 'a' * 50,
277
+ source_provider: 'p' * 50,
278
+ source_channel: 'c' * 100,
279
+ knowledge_domain: 'd' * 50,
280
+ submitted_by: 'u' * 255,
281
+ submitted_from: 'n' * 255
282
+ )
283
+ ).and_return(mock_entry)
284
+ host.handle_ingest(
285
+ content: 'test',
286
+ content_type: 'fact',
287
+ source_agent: 'a' * 60,
288
+ source_provider: 'p' * 60,
289
+ source_channel: 'c' * 120,
290
+ knowledge_domain: 'd' * 60,
291
+ submitted_by: 'u' * 300,
292
+ submitted_from: 'n' * 300
293
+ )
294
+ end
295
+
266
296
  context 'content hash dedup' do
267
297
  let(:existing_entry) do
268
298
  double('existing', id: 'uuid-existing', confidence: 0.6,
@@ -295,9 +325,16 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
295
325
  end
296
326
 
297
327
  it 'returns a structured error' do
328
+ allow(host).to receive(:handle_exception)
329
+
298
330
  result = host.handle_ingest(content: 'test', content_type: 'fact', source_agent: 'a')
299
331
  expect(result[:success]).to be false
300
332
  expect(result[:error]).to eq('connection lost')
333
+ expect(host).to have_received(:handle_exception).with(
334
+ instance_of(Sequel::Error),
335
+ level: :error,
336
+ operation: 'apollo.knowledge.handle_ingest'
337
+ )
301
338
  end
302
339
  end
303
340
  end
@@ -376,6 +413,64 @@ RSpec.describe Legion::Extensions::Apollo::Runners::Knowledge do
376
413
  expect(result[:count]).to eq(0)
377
414
  end
378
415
  end
416
+
417
+ context 'when query is browse-shaped' do
418
+ let(:mock_entry_class) { double('ApolloEntry') }
419
+ let(:dataset) { double('dataset') }
420
+ let(:entries) do
421
+ [{ id: 'uuid-1', content: 'Candidate fact', content_type: 'fact',
422
+ confidence: 0.7, tags: ['ruby'], source_agent: 'agent-1',
423
+ knowledge_domain: 'general' }]
424
+ end
425
+
426
+ before do
427
+ stub_const('Legion::Data::Model::ApolloEntry', mock_entry_class)
428
+ allow(mock_entry_class).to receive(:exclude).with(status: 'archived').and_return(dataset)
429
+ allow(dataset).to receive(:where).and_return(dataset)
430
+ allow(dataset).to receive(:order).with(:created_at).and_return(dataset)
431
+ allow(dataset).to receive(:limit).with(50).and_return(dataset)
432
+ allow(dataset).to receive(:all).and_return(entries)
433
+ end
434
+
435
+ it 'lists recent non-archived entries without generating an embedding' do
436
+ expect(Legion::LLM::Embeddings).not_to receive(:generate)
437
+
438
+ result = host.handle_query(query: 'x', limit: 50)
439
+
440
+ expect(result[:success]).to be true
441
+ expect(result[:mode]).to eq(:browse)
442
+ expect(result[:count]).to eq(1)
443
+ expect(result[:entries].first[:content]).to eq('Candidate fact')
444
+ expect(dataset).not_to have_received(:where).with(status: ['confirmed'])
445
+ end
446
+
447
+ it 'respects an explicit confirmed status filter' do
448
+ host.handle_query(query: 'x', limit: 50, status: [:confirmed])
449
+
450
+ expect(dataset).to have_received(:where).with(status: ['confirmed'])
451
+ end
452
+
453
+ it 'applies tags and domain filters when provided' do
454
+ host.handle_query(query: 'x', limit: 50, tags: ['ruby'], domain: 'general')
455
+
456
+ expect(dataset).to have_received(:where).with('tags && ?')
457
+ expect(dataset).to have_received(:where).with(knowledge_domain: 'general')
458
+ end
459
+ end
460
+ end
461
+
462
+ describe '#normalize_text_input' do
463
+ let(:host) { Object.new.extend(described_class) }
464
+
465
+ it 'strips null bytes in the local fallback path' do
466
+ expect(host.send(:normalize_text_input, "hello\x00world")).to eq('helloworld')
467
+ end
468
+
469
+ it 'scrubs invalid UTF-8 in the local fallback path' do
470
+ invalid = "hello\xC3(world".dup.force_encoding(Encoding::UTF_8)
471
+
472
+ expect(host.send(:normalize_text_input, invalid)).to eq('hello(world')
473
+ end
379
474
  end
380
475
 
381
476
  describe '#retrieve_relevant' do
data/spec/spec_helper.rb CHANGED
@@ -16,6 +16,7 @@ unless defined?(Sequel)
16
16
 
17
17
  def self.pg_array(arr) = arr
18
18
  def self.lit(str, *) = str
19
+ def self.desc(sym) = sym
19
20
  Expr = Struct.new(:value) do
20
21
  def +(other) = "#{value} + #{other}"
21
22
  def *(other) = "#{value} * #{other}"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-apollo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.19
4
+ version: 0.4.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity