lex-knowledge 0.6.9 → 0.6.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 29631e6f2778ceedf74b69197379edc4c215d9b7ff4b2e088790dedc227fd90f
4
- data.tar.gz: d96dd08d5a1450e5e1f3db0936729d3a40c834edc166126d6c9dd73a1839518f
3
+ metadata.gz: 21c773508b11dfb12b4d07bf2e37705483b82f4100382c29d84d883f3790ce5c
4
+ data.tar.gz: 96c6e3da83bb20eabb5db9f55b8ef4b7715ac08d3110ec4c6680ccf6b62210e4
5
5
  SHA512:
6
- metadata.gz: 971edc3c4347a949a89b690cccda16150e5644be27eb201d22daeaab0e293cc03ebf530e509712a514eb2f40e3e86c201ce96475c2cbf398252fcae73bc0af64
7
- data.tar.gz: 980eb54c45d386db31c1ed817f41f2f12d6a7993e07db910c016398ddb5229274622c720e78eb1aabcb0920d01772de63627ecbff6653bb051b893b7e230d766
6
+ metadata.gz: 5e2a5993ca4b4213c1dfb25eff797aaf7b89af3b5bc4c37072683d564bc49db49aad6f30702f47c19bd66c008bcdcc9b9a29b075920b66c03ab237dd4e9070e0
7
+ data.tar.gz: 04f3d33195e5c81c4b48c361170909b6f8a53e28660a0d30370584453a847f8d787443261ffa1b47a383c58aa387281616a29f1a2366c6c46e43308d82284808
@@ -5,6 +5,9 @@ module Legion
5
5
  module Knowledge
6
6
  module Actor
7
7
  class CorpusIngest < Legion::Extensions::Actors::Subscription
8
+ include Legion::Logging::Helper
9
+ include Legion::Settings::Helper
10
+
8
11
  def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
9
12
  def runner_function = 'ingest_file'
10
13
  def check_subtask? = false
@@ -13,7 +16,8 @@ module Legion
13
16
  def enabled? # rubocop:disable Legion/Extension/ActorEnabledSideEffects
14
17
  Legion.const_defined?(:Transport, false) &&
15
18
  defined?(Legion::Extensions::Knowledge::Runners::Ingest)
16
- rescue StandardError => _e
19
+ rescue StandardError => e
20
+ handle_exception(e, level: :warn, operation: 'knowledge.corpus_ingest.enabled')
17
21
  false
18
22
  end
19
23
  end
@@ -5,26 +5,25 @@ module Legion
5
5
  module Knowledge
6
6
  module Actor
7
7
  class CorpusWatcher < Legion::Extensions::Actors::Every # rubocop:disable Legion/Extension/EveryActorRequiresTime
8
+ include Legion::Logging::Helper
9
+ include Legion::Settings::Helper
10
+
8
11
  def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
9
12
  def runner_function = 'ingest_corpus'
10
13
  def check_subtask? = false
11
14
  def generate_task? = false
12
15
 
13
16
  def time
14
- if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
15
- Legion::Settings.dig(:knowledge, :actors, :watcher_interval) || 300
16
- else
17
- 300
18
- end
17
+ settings[:actors][:watcher_interval]
19
18
  rescue StandardError => e
20
- log.warn(e.message)
19
+ handle_exception(e, level: :warn, operation: 'knowledge.corpus_watcher.time')
21
20
  300
22
21
  end
23
22
 
24
23
  def enabled? # rubocop:disable Legion/Extension/ActorEnabledSideEffects
25
24
  resolve_monitors.any?
26
25
  rescue StandardError => e
27
- log.warn(e.message)
26
+ handle_exception(e, level: :warn, operation: 'knowledge.corpus_watcher.enabled')
28
27
  false
29
28
  end
30
29
 
@@ -34,14 +33,10 @@ module Legion
34
33
 
35
34
  private
36
35
 
37
- def log
38
- Legion::Logging
39
- end
40
-
41
36
  def resolve_monitors
42
37
  Runners::Monitor.resolve_monitors
43
38
  rescue StandardError => e
44
- log.warn(e.message)
39
+ handle_exception(e, level: :warn, operation: 'knowledge.corpus_watcher.resolve_monitors')
45
40
  []
46
41
  end
47
42
  end
@@ -5,19 +5,18 @@ module Legion
5
5
  module Knowledge
6
6
  module Actor
7
7
  class MaintenanceRunner < Legion::Extensions::Actors::Every # rubocop:disable Legion/Extension/EveryActorRequiresTime
8
+ include Legion::Logging::Helper
9
+ include Legion::Settings::Helper
10
+
8
11
  def runner_class = 'Legion::Extensions::Knowledge::Runners::Maintenance'
9
12
  def runner_function = 'health'
10
13
  def check_subtask? = false
11
14
  def generate_task? = false
12
15
 
13
16
  def time
14
- if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
15
- Legion::Settings.dig(:knowledge, :actors, :maintenance_interval) || 21_600
16
- else
17
- 21_600
18
- end
17
+ settings[:actors][:maintenance_interval]
19
18
  rescue StandardError => e
20
- log.warn(e.message)
19
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance_runner.time')
21
20
  21_600
22
21
  end
23
22
 
@@ -26,7 +25,7 @@ module Legion
26
25
 
27
26
  true
28
27
  rescue StandardError => e
29
- log.warn(e.message)
28
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance_runner.enabled')
30
29
  false
31
30
  end
32
31
 
@@ -36,16 +35,10 @@ module Legion
36
35
 
37
36
  private
38
37
 
39
- def log
40
- Legion::Logging
41
- end
42
-
43
38
  def corpus_path
44
- return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
45
-
46
- Legion::Settings.dig(:knowledge, :corpus_path)
39
+ settings[:corpus_path]
47
40
  rescue StandardError => e
48
- log.warn(e.message)
41
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance_runner.corpus_path')
49
42
  nil
50
43
  end
51
44
  end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Knowledge
6
+ module Helpers
7
+ module ApolloModels
8
+ class << self
9
+ def entry
10
+ namespaced_apollo_model(:Entry) || legacy_model(:ApolloEntry)
11
+ end
12
+
13
+ def access_log
14
+ namespaced_apollo_model(:AccessLog) || legacy_model(:ApolloAccessLog)
15
+ end
16
+
17
+ def entry_available?
18
+ !entry.nil?
19
+ end
20
+
21
+ def access_log_available?
22
+ !access_log.nil?
23
+ end
24
+
25
+ private
26
+
27
+ def namespaced_apollo_model(name)
28
+ return nil unless defined?(Legion::Data::Model::Apollo)
29
+ return nil unless Legion::Data::Model::Apollo.const_defined?(name, false)
30
+
31
+ Legion::Data::Model::Apollo.const_get(name, false)
32
+ end
33
+
34
+ def legacy_model(name)
35
+ return nil unless defined?(Legion::Data::Model)
36
+ return nil unless Legion::Data::Model.const_defined?(name, false)
37
+
38
+ Legion::Data::Model.const_get(name, false)
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -7,13 +7,16 @@ module Legion
7
7
  module Knowledge
8
8
  module Helpers
9
9
  module Chunker
10
+ extend Legion::Logging::Helper
11
+ extend Legion::Settings::Helper
12
+
10
13
  CHARS_PER_TOKEN = 4
11
14
 
12
15
  module_function
13
16
 
14
17
  def chunk(sections:, max_tokens: nil, overlap_tokens: nil)
15
- resolved_max = max_tokens || settings_max_tokens || 512
16
- resolved_overlap = overlap_tokens || settings_overlap_tokens || 128
18
+ resolved_max = max_tokens || settings[:chunker][:max_tokens]
19
+ resolved_overlap = overlap_tokens || settings[:chunker][:overlap_tokens]
17
20
 
18
21
  max_chars = resolved_max * CHARS_PER_TOKEN
19
22
  overlap_chars = resolved_overlap * CHARS_PER_TOKEN
@@ -62,6 +65,9 @@ module Legion
62
65
  end
63
66
  private_class_method :split_section
64
67
 
68
+ # Hash must match Legion::Extensions::Apollo::Helpers::Writeback.content_hash
69
+ # so knowledge chunks deduplicate consistently with Apollo writeback and still
70
+ # fit older apollo_entries.content_hash columns fixed at MD5 length.
65
71
  def build_chunk(section, content, index)
66
72
  {
67
73
  content: content,
@@ -70,28 +76,22 @@ module Legion
70
76
  source_file: section[:source_file],
71
77
  token_count: (content.length.to_f / CHARS_PER_TOKEN).ceil,
72
78
  chunk_index: index,
73
- content_hash: ::Digest::SHA256.hexdigest(content)
79
+ content_hash: apollo_compatible_content_hash(content)
74
80
  }
75
81
  end
76
82
  private_class_method :build_chunk
77
83
 
78
- def settings_max_tokens
79
- return nil unless defined?(Legion::Settings)
80
-
81
- Legion::Settings.dig(:knowledge, :chunker, :max_tokens)
82
- rescue StandardError => _e
83
- nil
84
- end
85
- private_class_method :settings_max_tokens
86
-
87
- def settings_overlap_tokens
88
- return nil unless defined?(Legion::Settings)
89
-
90
- Legion::Settings.dig(:knowledge, :chunker, :overlap_tokens)
91
- rescue StandardError => _e
92
- nil
84
+ def apollo_compatible_content_hash(content)
85
+ if defined?(Legion::Extensions::Apollo::Helpers::Writeback)
86
+ Legion::Extensions::Apollo::Helpers::Writeback.content_hash(content)
87
+ else
88
+ # Fallback when apollo isn't loaded - match its MD5+normalize semantics
89
+ # so future apollo-backed lookups still work.
90
+ normalized = content.to_s.strip.downcase.gsub(/\s+/, ' ')
91
+ ::Digest::MD5.hexdigest(normalized)
92
+ end
93
93
  end
94
- private_class_method :settings_overlap_tokens
94
+ private_class_method :apollo_compatible_content_hash
95
95
  end
96
96
  end
97
97
  end
@@ -7,6 +7,8 @@ module Legion
7
7
  module Knowledge
8
8
  module Helpers
9
9
  module Manifest
10
+ extend Legion::Logging::Helper
11
+
10
12
  module_function
11
13
 
12
14
  def scan(path:, extensions: %w[.md .txt .docx .pdf])
@@ -25,15 +27,10 @@ module Legion
25
27
  results << build_entry(entry)
26
28
  end
27
29
  rescue Errno::EPERM, Errno::EACCES, Errno::ELOOP, Errno::ENOENT => e
28
- log.debug("[manifest] skipping unreadable #{entry}: #{e.class}: #{e.message}")
30
+ handle_exception(e, level: :warn, operation: 'knowledge.manifest.walk', entry: entry)
29
31
  end
30
32
  private_class_method :walk
31
33
 
32
- def log
33
- Legion::Logging
34
- end
35
- private_class_method :log
36
-
37
34
  def diff(current:, previous:)
38
35
  current_map = current.to_h { |e| [e[:path], e[:sha256]] }
39
36
  previous_map = previous.to_h { |e| [e[:path], e[:sha256]] }
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'digest'
4
4
  require 'fileutils'
5
- require 'json'
5
+ require 'legion/json'
6
6
  require 'tempfile'
7
7
 
8
8
  module Legion
@@ -10,6 +10,9 @@ module Legion
10
10
  module Knowledge
11
11
  module Helpers
12
12
  module ManifestStore
13
+ extend Legion::Logging::Helper
14
+ extend Legion::JSON::Helper
15
+
13
16
  module_function
14
17
 
15
18
  STORE_DIR = ::File.expand_path('~/.legionio/knowledge').freeze
@@ -19,8 +22,9 @@ module Legion
19
22
  return [] unless ::File.exist?(path)
20
23
 
21
24
  raw = ::File.read(path, encoding: 'utf-8')
22
- ::JSON.parse(raw, symbolize_names: true)
23
- rescue StandardError => _e
25
+ json_parse(raw)
26
+ rescue StandardError => e
27
+ handle_exception(e, level: :warn, operation: 'knowledge.manifest_store.load', corpus_path: corpus_path)
24
28
  []
25
29
  end
26
30
 
@@ -28,10 +32,11 @@ module Legion
28
32
  ::FileUtils.mkdir_p(STORE_DIR)
29
33
  path = store_path(corpus_path: corpus_path)
30
34
  tmp = "#{path}.tmp"
31
- ::File.write(tmp, ::JSON.generate(manifest.map { |e| serialize_entry(e) }))
35
+ ::File.write(tmp, json_generate(manifest.map { |e| serialize_entry(e) }))
32
36
  ::File.rename(tmp, path)
33
37
  true
34
- rescue StandardError => _e
38
+ rescue StandardError => e
39
+ handle_exception(e, level: :warn, operation: 'knowledge.manifest_store.save', corpus_path: corpus_path)
35
40
  false
36
41
  end
37
42
 
@@ -5,6 +5,8 @@ module Legion
5
5
  module Knowledge
6
6
  module Helpers
7
7
  module Parser
8
+ extend Legion::Logging::Helper
9
+
8
10
  module_function
9
11
 
10
12
  def parse(file_path:)
@@ -57,6 +59,7 @@ module Legion
57
59
  heading = ::File.basename(file_path, '.*')
58
60
  [{ heading: heading, section_path: [], content: result[:text].strip, source_file: file_path }]
59
61
  rescue StandardError => e
62
+ handle_exception(e, level: :warn, operation: 'knowledge.parser.extract_via_data', file_path: file_path)
60
63
  [{ error: 'extraction_failed', source_file: file_path, detail: e.message }]
61
64
  end
62
65
 
@@ -5,6 +5,8 @@ module Legion
5
5
  module Knowledge
6
6
  module Runners
7
7
  module Corpus # rubocop:disable Legion/Extension/RunnerIncludeHelpers
8
+ extend Legion::Logging::Helper
9
+
8
10
  module_function
9
11
 
10
12
  def manifest_path(path:)
@@ -32,6 +34,7 @@ module Legion
32
34
  total_bytes: entries.sum { |e| e[:size] }
33
35
  }
34
36
  rescue StandardError => e
37
+ handle_exception(e, level: :warn, operation: 'knowledge.corpus.corpus_stats', path: path)
35
38
  { success: false, error: e.message }
36
39
  end
37
40
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../helpers/apollo_models'
4
+
3
5
  require 'securerandom'
4
6
 
5
7
  module Legion
@@ -7,12 +9,10 @@ module Legion
7
9
  module Knowledge
8
10
  module Runners
9
11
  module Ingest # rubocop:disable Legion/Extension/RunnerIncludeHelpers
10
- module_function
12
+ extend Legion::Logging::Helper
13
+ extend Legion::Settings::Helper
11
14
 
12
- def log
13
- Legion::Logging
14
- end
15
- private_class_method :log
15
+ module_function
16
16
 
17
17
  def scan_corpus(path:, extensions: nil)
18
18
  opts = { path: path }
@@ -29,17 +29,27 @@ module Legion
29
29
  }
30
30
  end
31
31
 
32
- def ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false)
33
- return ingest_monitors(monitors: monitors, dry_run: dry_run, force: force) if monitors&.any?
32
+ FILTER_SCHEMA = {
33
+ type: 'object',
34
+ properties: {
35
+ relevant: { type: 'boolean' },
36
+ confidence: { type: 'number' },
37
+ reason: { type: 'string' }
38
+ },
39
+ required: %w[relevant confidence]
40
+ }.freeze
41
+
42
+ def ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false, filter: true)
43
+ return ingest_monitors(monitors: monitors, dry_run: dry_run, force: force, filter: filter) if monitors&.any?
34
44
  raise ArgumentError, 'path is required when monitors is not provided' if path.nil?
35
45
 
36
- ingest_corpus_path(path: path, dry_run: dry_run, force: force)
46
+ ingest_corpus_path(path: path, dry_run: dry_run, force: force, filter: filter)
37
47
  rescue ArgumentError => e
38
- log.warn(e.message)
48
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_corpus')
39
49
  { success: false, error: e.message }
40
50
  end
41
51
 
42
- def ingest_corpus_path(path:, dry_run: false, force: false)
52
+ def ingest_corpus_path(path:, dry_run: false, force: false, filter: true)
43
53
  current = Helpers::Manifest.scan(path: path)
44
54
  previous = force ? [] : Helpers::ManifestStore.load(corpus_path: path)
45
55
  delta = Helpers::Manifest.diff(current: current, previous: previous)
@@ -50,7 +60,7 @@ module Legion
50
60
  chunks_updated = 0
51
61
 
52
62
  to_process.each do |file_path|
53
- result = process_file(file_path, dry_run: dry_run, force: force)
63
+ result = process_file(file_path, dry_run: dry_run, force: force, filter: filter)
54
64
  chunks_created += result[:created]
55
65
  chunks_skipped += result[:skipped]
56
66
  chunks_updated += result[:updated]
@@ -71,16 +81,16 @@ module Legion
71
81
  chunks_updated: chunks_updated
72
82
  }
73
83
  rescue StandardError => e
74
- log.warn(e.message)
84
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_corpus_path', path: path)
75
85
  { success: false, error: e.message }
76
86
  end
77
87
  private_class_method :ingest_corpus_path
78
88
 
79
- def ingest_monitors(monitors:, dry_run: false, force: false)
89
+ def ingest_monitors(monitors:, dry_run: false, force: false, filter: true)
80
90
  results = monitors.map do |monitor|
81
- ingest_corpus(path: monitor[:path], dry_run: dry_run, force: force)
91
+ ingest_corpus(path: monitor[:path], dry_run: dry_run, force: force, filter: filter)
82
92
  rescue StandardError => e
83
- log.warn(e.message)
93
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_monitor', path: monitor[:path])
84
94
  { success: false, path: monitor[:path], error: e.message }
85
95
  end
86
96
 
@@ -101,7 +111,7 @@ module Legion
101
111
 
102
112
  { success: true, monitors_processed: results.size, **total }
103
113
  rescue StandardError => e
104
- log.warn(e.message)
114
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_monitors')
105
115
  { success: false, error: e.message }
106
116
  end
107
117
  private_class_method :ingest_monitors
@@ -114,17 +124,17 @@ module Legion
114
124
  section_path: [source_type.to_s],
115
125
  source_file: source_path
116
126
  }
117
- chunks = Helpers::Chunker.chunk(sections: [section])
127
+ chunks = filter_chunks(Helpers::Chunker.chunk(sections: [section]), filter: true)
118
128
  paired = batch_embed_chunks(chunks, force: false)
119
129
  paired.each { |p| upsert_chunk_with_embedding(p[:chunk], p[:embedding], force: false, exists: p[:exists] || false) }
120
130
  { status: :ingested, chunks: chunks.size, source_type: source_type, metadata: metadata }
121
131
  rescue StandardError => e
122
- log.warn(e.message)
132
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_content', source_type: source_type)
123
133
  { status: :failed, error: e.message, source_type: source_type, metadata: metadata }
124
134
  end
125
135
 
126
- def ingest_file(file_path:, force: false)
127
- result = process_file(file_path, dry_run: false, force: force)
136
+ def ingest_file(file_path:, force: false, filter: true)
137
+ result = process_file(file_path, dry_run: false, force: force, filter: filter)
128
138
 
129
139
  {
130
140
  success: true,
@@ -134,23 +144,24 @@ module Legion
134
144
  chunks_updated: result[:updated]
135
145
  }
136
146
  rescue StandardError => e
137
- log.warn(e.message)
147
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_file', file_path: file_path)
138
148
  { success: false, error: e.message }
139
149
  end
140
150
 
141
- def process_file(file_path, dry_run: false, force: false)
151
+ def process_file(file_path, dry_run: false, force: false, filter: true)
142
152
  sections = Helpers::Parser.parse(file_path: file_path)
143
153
  return { created: 0, skipped: 0, updated: 0 } if sections.first&.key?(:error)
144
154
 
145
- chunks = Helpers::Chunker.chunk(sections: sections)
155
+ chunks = Helpers::Chunker.chunk(sections: sections)
156
+ filtered_chunks = filter_chunks(chunks, filter: filter)
146
157
  paired = if dry_run
147
- chunks.map { |c| { chunk: c, embedding: nil } }
158
+ filtered_chunks.map { |c| { chunk: c, embedding: nil } }
148
159
  else
149
- batch_embed_chunks(chunks, force: force)
160
+ batch_embed_chunks(filtered_chunks, force: force)
150
161
  end
151
162
 
152
163
  created = 0
153
- skipped = 0
164
+ skipped = chunks.size - filtered_chunks.size
154
165
  updated = 0
155
166
 
156
167
  paired.each do |p|
@@ -166,6 +177,49 @@ module Legion
166
177
  end
167
178
  private_class_method :process_file
168
179
 
180
+ def filter_chunks(chunks, filter:)
181
+ return chunks unless filter
182
+
183
+ prompt = settings[:ingest][:filter_prompt]
184
+ return chunks if prompt.to_s.strip.empty? || !llm_structured_available?
185
+
186
+ chunks.select { |chunk| chunk_allowed_by_filter?(chunk, prompt: prompt, threshold: settings[:ingest][:filter_threshold]) }
187
+ rescue StandardError => e
188
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.filter_chunks')
189
+ chunks
190
+ end
191
+ private_class_method :filter_chunks
192
+
193
+ def chunk_allowed_by_filter?(chunk, prompt:, threshold:)
194
+ hash = chunk[:content_hash] || Helpers::Chunker.send(:apollo_compatible_content_hash, chunk[:content].to_s)
195
+ return filter_cache[hash] if filter_cache.key?(hash)
196
+
197
+ result = Legion::LLM.structured( # rubocop:disable Legion/HelperMigration/DirectLlm
198
+ messages: [
199
+ { role: 'system', content: prompt },
200
+ { role: 'user', content: chunk[:content].to_s }
201
+ ],
202
+ schema: FILTER_SCHEMA,
203
+ caller: { extension: 'lex-knowledge', runner: 'ingest', operation: 'filter_chunk' }
204
+ )
205
+ data = result.is_a?(Hash) ? (result[:data] || result) : {}
206
+ filter_cache[hash] = data[:relevant] == true && data[:confidence].to_f >= threshold.to_f
207
+ rescue StandardError => e
208
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.filter_chunk', content_hash: hash)
209
+ filter_cache[hash] = true
210
+ end
211
+ private_class_method :chunk_allowed_by_filter?
212
+
213
+ def filter_cache
214
+ Thread.current[:lex_knowledge_filter_cache] ||= {}
215
+ end
216
+ private_class_method :filter_cache
217
+
218
+ def llm_structured_available?
219
+ defined?(Legion::LLM) && Legion::LLM.respond_to?(:structured)
220
+ end
221
+ private_class_method :llm_structured_available?
222
+
169
223
  def batch_embed_chunks(chunks, force:)
170
224
  exists_map = force ? {} : build_exists_map(chunks)
171
225
  return paired_without_embed(chunks, exists_map) unless llm_embed_available?
@@ -175,7 +229,7 @@ module Legion
175
229
 
176
230
  chunks.map { |c| { chunk: c, embedding: embed_map[c[:content_hash]], exists: exists_map.fetch(c[:content_hash], false) } }
177
231
  rescue StandardError => e
178
- log.warn(e.message)
232
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.batch_embed_chunks')
179
233
  paired_without_embed(chunks, {})
180
234
  end
181
235
  private_class_method :batch_embed_chunks
@@ -201,7 +255,7 @@ module Legion
201
255
  h[needs_embed[r[:index]][:content_hash]] = r[:vector] unless r[:error]
202
256
  end
203
257
  rescue StandardError => e
204
- log.warn(e.message)
258
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.build_embed_map')
205
259
  {}
206
260
  end
207
261
  private_class_method :build_embed_map
@@ -211,22 +265,36 @@ module Legion
211
265
  return :created unless defined?(Legion::Extensions::Apollo)
212
266
  return :skipped if !force && exists
213
267
 
214
- ingest_to_apollo(chunk, embedding)
268
+ result = ingest_to_apollo(chunk, embedding)
269
+ # handle_ingest returns a Hash on both success and failure paths; the upsert
270
+ # status must reflect the actual persistence outcome, not just the `force` flag.
271
+ # Previously any non-raising return was treated as success, producing
272
+ # false-positive :created/:updated responses to callers.
273
+ unless result.is_a?(Hash) && result[:success] == true
274
+ hash_prefix = chunk[:content_hash]&.slice(0, 12)
275
+ content_len = chunk[:content]&.length
276
+ error = result.is_a?(Hash) ? result[:error].inspect : "non-hash result class=#{result.class}"
277
+ log.warn(
278
+ '[knowledge][upsert_chunk] apollo persistence not confirmed ' \
279
+ "error=#{error} chunk_hash=#{hash_prefix} chunk_len=#{content_len}"
280
+ )
281
+ return :skipped
282
+ end
215
283
  force ? :updated : :created
216
284
  rescue StandardError => e
217
- log.warn(e.message)
285
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.upsert_chunk', content_hash: chunk[:content_hash]&.slice(0, 12))
218
286
  :skipped
219
287
  end
220
288
  private_class_method :upsert_chunk_with_embedding
221
289
 
222
290
  def chunk_exists?(content_hash)
223
- return false unless defined?(Legion::Data::Model::ApolloEntry)
291
+ return false unless Helpers::ApolloModels.entry_available?
224
292
 
225
- Legion::Data::Model::ApolloEntry
226
- .where(content_hash: content_hash)
227
- .any?
293
+ Helpers::ApolloModels.entry
294
+ .where(content_hash: content_hash)
295
+ .any?
228
296
  rescue StandardError => e
229
- log.warn(e.message)
297
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.chunk_exists', content_hash: content_hash)
230
298
  false
231
299
  end
232
300
  private_class_method :chunk_exists?
@@ -234,18 +302,20 @@ module Legion
234
302
  def ingest_to_apollo(chunk, embedding)
235
303
  return unless defined?(Legion::Extensions::Apollo)
236
304
 
305
+ context = {
306
+ source_file: chunk[:source_file],
307
+ heading: chunk[:heading],
308
+ section_path: chunk[:section_path],
309
+ chunk_index: chunk[:chunk_index],
310
+ token_count: chunk[:token_count]
311
+ }
237
312
  payload = {
238
313
  content: chunk[:content],
239
314
  content_type: 'document_chunk',
240
315
  content_hash: chunk[:content_hash],
241
316
  tags: [chunk[:source_file], chunk[:heading], 'document_chunk'].compact.uniq,
242
- metadata: {
243
- source_file: chunk[:source_file],
244
- heading: chunk[:heading],
245
- section_path: chunk[:section_path],
246
- chunk_index: chunk[:chunk_index],
247
- token_count: chunk[:token_count]
248
- }
317
+ context: context,
318
+ metadata: context
249
319
  }
250
320
  payload[:embedding] = embedding if embedding
251
321
 
@@ -264,7 +334,7 @@ module Legion
264
334
  metadata: { source_file: file_path, retired: true }
265
335
  )
266
336
  rescue StandardError => e
267
- log.warn(e.message)
337
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.retire_file', file_path: file_path)
268
338
  nil
269
339
  end
270
340
  private_class_method :retire_file