lex-knowledge 0.6.10 → 0.6.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 319636e39b5a3f036355f455f03ae585a9902e7510028ae2d37ce88387d09ee9
4
- data.tar.gz: 7a6c81ac4db6261d2388b3a16b8a6d28dc7b81571d4bff5e49c4cb623ca40576
3
+ metadata.gz: 21c773508b11dfb12b4d07bf2e37705483b82f4100382c29d84d883f3790ce5c
4
+ data.tar.gz: 96c6e3da83bb20eabb5db9f55b8ef4b7715ac08d3110ec4c6680ccf6b62210e4
5
5
  SHA512:
6
- metadata.gz: 763094b4aad5b43d300823e8c74843cb8e7a199401a5a61cfe308f9d77a3461445d8175e3eb8e0b5599ad28c4c797d77bc37b038464f9ddb93f86918de6b40a9
7
- data.tar.gz: a4d39884a98025e49509f281dfab9c94501a12c824f4ea08ec20795bea80d36b9bde7ec54c657b965f1b489f36a04f3e48bda6df85471246eb59d42a5bb10c27
6
+ metadata.gz: 5e2a5993ca4b4213c1dfb25eff797aaf7b89af3b5bc4c37072683d564bc49db49aad6f30702f47c19bd66c008bcdcc9b9a29b075920b66c03ab237dd4e9070e0
7
+ data.tar.gz: 04f3d33195e5c81c4b48c361170909b6f8a53e28660a0d30370584453a847f8d787443261ffa1b47a383c58aa387281616a29f1a2366c6c46e43308d82284808
@@ -5,6 +5,9 @@ module Legion
5
5
  module Knowledge
6
6
  module Actor
7
7
  class CorpusIngest < Legion::Extensions::Actors::Subscription
8
+ include Legion::Logging::Helper
9
+ include Legion::Settings::Helper
10
+
8
11
  def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
9
12
  def runner_function = 'ingest_file'
10
13
  def check_subtask? = false
@@ -13,7 +16,8 @@ module Legion
13
16
  def enabled? # rubocop:disable Legion/Extension/ActorEnabledSideEffects
14
17
  Legion.const_defined?(:Transport, false) &&
15
18
  defined?(Legion::Extensions::Knowledge::Runners::Ingest)
16
- rescue StandardError => _e
19
+ rescue StandardError => e
20
+ handle_exception(e, level: :warn, operation: 'knowledge.corpus_ingest.enabled')
17
21
  false
18
22
  end
19
23
  end
@@ -5,26 +5,25 @@ module Legion
5
5
  module Knowledge
6
6
  module Actor
7
7
  class CorpusWatcher < Legion::Extensions::Actors::Every # rubocop:disable Legion/Extension/EveryActorRequiresTime
8
+ include Legion::Logging::Helper
9
+ include Legion::Settings::Helper
10
+
8
11
  def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
9
12
  def runner_function = 'ingest_corpus'
10
13
  def check_subtask? = false
11
14
  def generate_task? = false
12
15
 
13
16
  def time
14
- if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
15
- Legion::Settings.dig(:knowledge, :actors, :watcher_interval) || 300
16
- else
17
- 300
18
- end
17
+ settings[:actors][:watcher_interval]
19
18
  rescue StandardError => e
20
- log.warn(e.message)
19
+ handle_exception(e, level: :warn, operation: 'knowledge.corpus_watcher.time')
21
20
  300
22
21
  end
23
22
 
24
23
  def enabled? # rubocop:disable Legion/Extension/ActorEnabledSideEffects
25
24
  resolve_monitors.any?
26
25
  rescue StandardError => e
27
- log.warn(e.message)
26
+ handle_exception(e, level: :warn, operation: 'knowledge.corpus_watcher.enabled')
28
27
  false
29
28
  end
30
29
 
@@ -34,14 +33,10 @@ module Legion
34
33
 
35
34
  private
36
35
 
37
- def log
38
- Legion::Logging
39
- end
40
-
41
36
  def resolve_monitors
42
37
  Runners::Monitor.resolve_monitors
43
38
  rescue StandardError => e
44
- log.warn(e.message)
39
+ handle_exception(e, level: :warn, operation: 'knowledge.corpus_watcher.resolve_monitors')
45
40
  []
46
41
  end
47
42
  end
@@ -5,19 +5,18 @@ module Legion
5
5
  module Knowledge
6
6
  module Actor
7
7
  class MaintenanceRunner < Legion::Extensions::Actors::Every # rubocop:disable Legion/Extension/EveryActorRequiresTime
8
+ include Legion::Logging::Helper
9
+ include Legion::Settings::Helper
10
+
8
11
  def runner_class = 'Legion::Extensions::Knowledge::Runners::Maintenance'
9
12
  def runner_function = 'health'
10
13
  def check_subtask? = false
11
14
  def generate_task? = false
12
15
 
13
16
  def time
14
- if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
15
- Legion::Settings.dig(:knowledge, :actors, :maintenance_interval) || 21_600
16
- else
17
- 21_600
18
- end
17
+ settings[:actors][:maintenance_interval]
19
18
  rescue StandardError => e
20
- log.warn(e.message)
19
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance_runner.time')
21
20
  21_600
22
21
  end
23
22
 
@@ -26,7 +25,7 @@ module Legion
26
25
 
27
26
  true
28
27
  rescue StandardError => e
29
- log.warn(e.message)
28
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance_runner.enabled')
30
29
  false
31
30
  end
32
31
 
@@ -36,16 +35,10 @@ module Legion
36
35
 
37
36
  private
38
37
 
39
- def log
40
- Legion::Logging
41
- end
42
-
43
38
  def corpus_path
44
- return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
45
-
46
- Legion::Settings.dig(:knowledge, :corpus_path)
39
+ settings[:corpus_path]
47
40
  rescue StandardError => e
48
- log.warn(e.message)
41
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance_runner.corpus_path')
49
42
  nil
50
43
  end
51
44
  end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Knowledge
6
+ module Helpers
7
+ module ApolloModels
8
+ class << self
9
+ def entry
10
+ namespaced_apollo_model(:Entry) || legacy_model(:ApolloEntry)
11
+ end
12
+
13
+ def access_log
14
+ namespaced_apollo_model(:AccessLog) || legacy_model(:ApolloAccessLog)
15
+ end
16
+
17
+ def entry_available?
18
+ !entry.nil?
19
+ end
20
+
21
+ def access_log_available?
22
+ !access_log.nil?
23
+ end
24
+
25
+ private
26
+
27
+ def namespaced_apollo_model(name)
28
+ return nil unless defined?(Legion::Data::Model::Apollo)
29
+ return nil unless Legion::Data::Model::Apollo.const_defined?(name, false)
30
+
31
+ Legion::Data::Model::Apollo.const_get(name, false)
32
+ end
33
+
34
+ def legacy_model(name)
35
+ return nil unless defined?(Legion::Data::Model)
36
+ return nil unless Legion::Data::Model.const_defined?(name, false)
37
+
38
+ Legion::Data::Model.const_get(name, false)
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -7,13 +7,16 @@ module Legion
7
7
  module Knowledge
8
8
  module Helpers
9
9
  module Chunker
10
+ extend Legion::Logging::Helper
11
+ extend Legion::Settings::Helper
12
+
10
13
  CHARS_PER_TOKEN = 4
11
14
 
12
15
  module_function
13
16
 
14
17
  def chunk(sections:, max_tokens: nil, overlap_tokens: nil)
15
- resolved_max = max_tokens || settings_max_tokens || 512
16
- resolved_overlap = overlap_tokens || settings_overlap_tokens || 128
18
+ resolved_max = max_tokens || settings[:chunker][:max_tokens]
19
+ resolved_overlap = overlap_tokens || settings[:chunker][:overlap_tokens]
17
20
 
18
21
  max_chars = resolved_max * CHARS_PER_TOKEN
19
22
  overlap_chars = resolved_overlap * CHARS_PER_TOKEN
@@ -89,24 +92,6 @@ module Legion
89
92
  end
90
93
  end
91
94
  private_class_method :apollo_compatible_content_hash
92
-
93
- def settings_max_tokens
94
- return nil unless defined?(Legion::Settings)
95
-
96
- Legion::Settings.dig(:knowledge, :chunker, :max_tokens)
97
- rescue StandardError => _e
98
- nil
99
- end
100
- private_class_method :settings_max_tokens
101
-
102
- def settings_overlap_tokens
103
- return nil unless defined?(Legion::Settings)
104
-
105
- Legion::Settings.dig(:knowledge, :chunker, :overlap_tokens)
106
- rescue StandardError => _e
107
- nil
108
- end
109
- private_class_method :settings_overlap_tokens
110
95
  end
111
96
  end
112
97
  end
@@ -7,6 +7,8 @@ module Legion
7
7
  module Knowledge
8
8
  module Helpers
9
9
  module Manifest
10
+ extend Legion::Logging::Helper
11
+
10
12
  module_function
11
13
 
12
14
  def scan(path:, extensions: %w[.md .txt .docx .pdf])
@@ -25,15 +27,10 @@ module Legion
25
27
  results << build_entry(entry)
26
28
  end
27
29
  rescue Errno::EPERM, Errno::EACCES, Errno::ELOOP, Errno::ENOENT => e
28
- log.debug("[manifest] skipping unreadable #{entry}: #{e.class}: #{e.message}")
30
+ handle_exception(e, level: :warn, operation: 'knowledge.manifest.walk', entry: entry)
29
31
  end
30
32
  private_class_method :walk
31
33
 
32
- def log
33
- Legion::Logging
34
- end
35
- private_class_method :log
36
-
37
34
  def diff(current:, previous:)
38
35
  current_map = current.to_h { |e| [e[:path], e[:sha256]] }
39
36
  previous_map = previous.to_h { |e| [e[:path], e[:sha256]] }
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'digest'
4
4
  require 'fileutils'
5
- require 'json'
5
+ require 'legion/json'
6
6
  require 'tempfile'
7
7
 
8
8
  module Legion
@@ -10,6 +10,9 @@ module Legion
10
10
  module Knowledge
11
11
  module Helpers
12
12
  module ManifestStore
13
+ extend Legion::Logging::Helper
14
+ extend Legion::JSON::Helper
15
+
13
16
  module_function
14
17
 
15
18
  STORE_DIR = ::File.expand_path('~/.legionio/knowledge').freeze
@@ -19,8 +22,9 @@ module Legion
19
22
  return [] unless ::File.exist?(path)
20
23
 
21
24
  raw = ::File.read(path, encoding: 'utf-8')
22
- ::JSON.parse(raw, symbolize_names: true)
23
- rescue StandardError => _e
25
+ json_parse(raw)
26
+ rescue StandardError => e
27
+ handle_exception(e, level: :warn, operation: 'knowledge.manifest_store.load', corpus_path: corpus_path)
24
28
  []
25
29
  end
26
30
 
@@ -28,10 +32,11 @@ module Legion
28
32
  ::FileUtils.mkdir_p(STORE_DIR)
29
33
  path = store_path(corpus_path: corpus_path)
30
34
  tmp = "#{path}.tmp"
31
- ::File.write(tmp, ::JSON.generate(manifest.map { |e| serialize_entry(e) }))
35
+ ::File.write(tmp, json_generate(manifest.map { |e| serialize_entry(e) }))
32
36
  ::File.rename(tmp, path)
33
37
  true
34
- rescue StandardError => _e
38
+ rescue StandardError => e
39
+ handle_exception(e, level: :warn, operation: 'knowledge.manifest_store.save', corpus_path: corpus_path)
35
40
  false
36
41
  end
37
42
 
@@ -5,6 +5,8 @@ module Legion
5
5
  module Knowledge
6
6
  module Helpers
7
7
  module Parser
8
+ extend Legion::Logging::Helper
9
+
8
10
  module_function
9
11
 
10
12
  def parse(file_path:)
@@ -57,6 +59,7 @@ module Legion
57
59
  heading = ::File.basename(file_path, '.*')
58
60
  [{ heading: heading, section_path: [], content: result[:text].strip, source_file: file_path }]
59
61
  rescue StandardError => e
62
+ handle_exception(e, level: :warn, operation: 'knowledge.parser.extract_via_data', file_path: file_path)
60
63
  [{ error: 'extraction_failed', source_file: file_path, detail: e.message }]
61
64
  end
62
65
 
@@ -5,6 +5,8 @@ module Legion
5
5
  module Knowledge
6
6
  module Runners
7
7
  module Corpus # rubocop:disable Legion/Extension/RunnerIncludeHelpers
8
+ extend Legion::Logging::Helper
9
+
8
10
  module_function
9
11
 
10
12
  def manifest_path(path:)
@@ -32,6 +34,7 @@ module Legion
32
34
  total_bytes: entries.sum { |e| e[:size] }
33
35
  }
34
36
  rescue StandardError => e
37
+ handle_exception(e, level: :warn, operation: 'knowledge.corpus.corpus_stats', path: path)
35
38
  { success: false, error: e.message }
36
39
  end
37
40
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../helpers/apollo_models'
4
+
3
5
  require 'securerandom'
4
6
 
5
7
  module Legion
@@ -7,12 +9,10 @@ module Legion
7
9
  module Knowledge
8
10
  module Runners
9
11
  module Ingest # rubocop:disable Legion/Extension/RunnerIncludeHelpers
10
- module_function
12
+ extend Legion::Logging::Helper
13
+ extend Legion::Settings::Helper
11
14
 
12
- def log
13
- Legion::Logging
14
- end
15
- private_class_method :log
15
+ module_function
16
16
 
17
17
  def scan_corpus(path:, extensions: nil)
18
18
  opts = { path: path }
@@ -29,17 +29,27 @@ module Legion
29
29
  }
30
30
  end
31
31
 
32
- def ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false)
33
- return ingest_monitors(monitors: monitors, dry_run: dry_run, force: force) if monitors&.any?
32
+ FILTER_SCHEMA = {
33
+ type: 'object',
34
+ properties: {
35
+ relevant: { type: 'boolean' },
36
+ confidence: { type: 'number' },
37
+ reason: { type: 'string' }
38
+ },
39
+ required: %w[relevant confidence]
40
+ }.freeze
41
+
42
+ def ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false, filter: true)
43
+ return ingest_monitors(monitors: monitors, dry_run: dry_run, force: force, filter: filter) if monitors&.any?
34
44
  raise ArgumentError, 'path is required when monitors is not provided' if path.nil?
35
45
 
36
- ingest_corpus_path(path: path, dry_run: dry_run, force: force)
46
+ ingest_corpus_path(path: path, dry_run: dry_run, force: force, filter: filter)
37
47
  rescue ArgumentError => e
38
- log.warn(e.message)
48
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_corpus')
39
49
  { success: false, error: e.message }
40
50
  end
41
51
 
42
- def ingest_corpus_path(path:, dry_run: false, force: false)
52
+ def ingest_corpus_path(path:, dry_run: false, force: false, filter: true)
43
53
  current = Helpers::Manifest.scan(path: path)
44
54
  previous = force ? [] : Helpers::ManifestStore.load(corpus_path: path)
45
55
  delta = Helpers::Manifest.diff(current: current, previous: previous)
@@ -50,7 +60,7 @@ module Legion
50
60
  chunks_updated = 0
51
61
 
52
62
  to_process.each do |file_path|
53
- result = process_file(file_path, dry_run: dry_run, force: force)
63
+ result = process_file(file_path, dry_run: dry_run, force: force, filter: filter)
54
64
  chunks_created += result[:created]
55
65
  chunks_skipped += result[:skipped]
56
66
  chunks_updated += result[:updated]
@@ -71,16 +81,16 @@ module Legion
71
81
  chunks_updated: chunks_updated
72
82
  }
73
83
  rescue StandardError => e
74
- log.warn(e.message)
84
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_corpus_path', path: path)
75
85
  { success: false, error: e.message }
76
86
  end
77
87
  private_class_method :ingest_corpus_path
78
88
 
79
- def ingest_monitors(monitors:, dry_run: false, force: false)
89
+ def ingest_monitors(monitors:, dry_run: false, force: false, filter: true)
80
90
  results = monitors.map do |monitor|
81
- ingest_corpus(path: monitor[:path], dry_run: dry_run, force: force)
91
+ ingest_corpus(path: monitor[:path], dry_run: dry_run, force: force, filter: filter)
82
92
  rescue StandardError => e
83
- log.warn(e.message)
93
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_monitor', path: monitor[:path])
84
94
  { success: false, path: monitor[:path], error: e.message }
85
95
  end
86
96
 
@@ -101,7 +111,7 @@ module Legion
101
111
 
102
112
  { success: true, monitors_processed: results.size, **total }
103
113
  rescue StandardError => e
104
- log.warn(e.message)
114
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_monitors')
105
115
  { success: false, error: e.message }
106
116
  end
107
117
  private_class_method :ingest_monitors
@@ -114,17 +124,17 @@ module Legion
114
124
  section_path: [source_type.to_s],
115
125
  source_file: source_path
116
126
  }
117
- chunks = Helpers::Chunker.chunk(sections: [section])
127
+ chunks = filter_chunks(Helpers::Chunker.chunk(sections: [section]), filter: true)
118
128
  paired = batch_embed_chunks(chunks, force: false)
119
129
  paired.each { |p| upsert_chunk_with_embedding(p[:chunk], p[:embedding], force: false, exists: p[:exists] || false) }
120
130
  { status: :ingested, chunks: chunks.size, source_type: source_type, metadata: metadata }
121
131
  rescue StandardError => e
122
- log.warn(e.message)
132
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_content', source_type: source_type)
123
133
  { status: :failed, error: e.message, source_type: source_type, metadata: metadata }
124
134
  end
125
135
 
126
- def ingest_file(file_path:, force: false)
127
- result = process_file(file_path, dry_run: false, force: force)
136
+ def ingest_file(file_path:, force: false, filter: true)
137
+ result = process_file(file_path, dry_run: false, force: force, filter: filter)
128
138
 
129
139
  {
130
140
  success: true,
@@ -134,23 +144,24 @@ module Legion
134
144
  chunks_updated: result[:updated]
135
145
  }
136
146
  rescue StandardError => e
137
- log.warn(e.message)
147
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_file', file_path: file_path)
138
148
  { success: false, error: e.message }
139
149
  end
140
150
 
141
- def process_file(file_path, dry_run: false, force: false)
151
+ def process_file(file_path, dry_run: false, force: false, filter: true)
142
152
  sections = Helpers::Parser.parse(file_path: file_path)
143
153
  return { created: 0, skipped: 0, updated: 0 } if sections.first&.key?(:error)
144
154
 
145
- chunks = Helpers::Chunker.chunk(sections: sections)
155
+ chunks = Helpers::Chunker.chunk(sections: sections)
156
+ filtered_chunks = filter_chunks(chunks, filter: filter)
146
157
  paired = if dry_run
147
- chunks.map { |c| { chunk: c, embedding: nil } }
158
+ filtered_chunks.map { |c| { chunk: c, embedding: nil } }
148
159
  else
149
- batch_embed_chunks(chunks, force: force)
160
+ batch_embed_chunks(filtered_chunks, force: force)
150
161
  end
151
162
 
152
163
  created = 0
153
- skipped = 0
164
+ skipped = chunks.size - filtered_chunks.size
154
165
  updated = 0
155
166
 
156
167
  paired.each do |p|
@@ -166,6 +177,49 @@ module Legion
166
177
  end
167
178
  private_class_method :process_file
168
179
 
180
+ def filter_chunks(chunks, filter:)
181
+ return chunks unless filter
182
+
183
+ prompt = settings[:ingest][:filter_prompt]
184
+ return chunks if prompt.to_s.strip.empty? || !llm_structured_available?
185
+
186
+ chunks.select { |chunk| chunk_allowed_by_filter?(chunk, prompt: prompt, threshold: settings[:ingest][:filter_threshold]) }
187
+ rescue StandardError => e
188
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.filter_chunks')
189
+ chunks
190
+ end
191
+ private_class_method :filter_chunks
192
+
193
+ def chunk_allowed_by_filter?(chunk, prompt:, threshold:)
194
+ hash = chunk[:content_hash] || Helpers::Chunker.send(:apollo_compatible_content_hash, chunk[:content].to_s)
195
+ return filter_cache[hash] if filter_cache.key?(hash)
196
+
197
+ result = Legion::LLM.structured( # rubocop:disable Legion/HelperMigration/DirectLlm
198
+ messages: [
199
+ { role: 'system', content: prompt },
200
+ { role: 'user', content: chunk[:content].to_s }
201
+ ],
202
+ schema: FILTER_SCHEMA,
203
+ caller: { extension: 'lex-knowledge', runner: 'ingest', operation: 'filter_chunk' }
204
+ )
205
+ data = result.is_a?(Hash) ? (result[:data] || result) : {}
206
+ filter_cache[hash] = data[:relevant] == true && data[:confidence].to_f >= threshold.to_f
207
+ rescue StandardError => e
208
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.filter_chunk', content_hash: hash)
209
+ filter_cache[hash] = true
210
+ end
211
+ private_class_method :chunk_allowed_by_filter?
212
+
213
+ def filter_cache
214
+ Thread.current[:lex_knowledge_filter_cache] ||= {}
215
+ end
216
+ private_class_method :filter_cache
217
+
218
+ def llm_structured_available?
219
+ defined?(Legion::LLM) && Legion::LLM.respond_to?(:structured)
220
+ end
221
+ private_class_method :llm_structured_available?
222
+
169
223
  def batch_embed_chunks(chunks, force:)
170
224
  exists_map = force ? {} : build_exists_map(chunks)
171
225
  return paired_without_embed(chunks, exists_map) unless llm_embed_available?
@@ -175,7 +229,7 @@ module Legion
175
229
 
176
230
  chunks.map { |c| { chunk: c, embedding: embed_map[c[:content_hash]], exists: exists_map.fetch(c[:content_hash], false) } }
177
231
  rescue StandardError => e
178
- log.warn(e.message)
232
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.batch_embed_chunks')
179
233
  paired_without_embed(chunks, {})
180
234
  end
181
235
  private_class_method :batch_embed_chunks
@@ -201,7 +255,7 @@ module Legion
201
255
  h[needs_embed[r[:index]][:content_hash]] = r[:vector] unless r[:error]
202
256
  end
203
257
  rescue StandardError => e
204
- log.warn(e.message)
258
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.build_embed_map')
205
259
  {}
206
260
  end
207
261
  private_class_method :build_embed_map
@@ -228,19 +282,19 @@ module Legion
228
282
  end
229
283
  force ? :updated : :created
230
284
  rescue StandardError => e
231
- log.warn("[knowledge][upsert_chunk] unexpected error class=#{e.class} message=#{e.message} chunk_hash=#{chunk[:content_hash]&.slice(0, 12)}")
285
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.upsert_chunk', content_hash: chunk[:content_hash]&.slice(0, 12))
232
286
  :skipped
233
287
  end
234
288
  private_class_method :upsert_chunk_with_embedding
235
289
 
236
290
  def chunk_exists?(content_hash)
237
- return false unless defined?(Legion::Data::Model::ApolloEntry)
291
+ return false unless Helpers::ApolloModels.entry_available?
238
292
 
239
- Legion::Data::Model::ApolloEntry
240
- .where(content_hash: content_hash)
241
- .any?
293
+ Helpers::ApolloModels.entry
294
+ .where(content_hash: content_hash)
295
+ .any?
242
296
  rescue StandardError => e
243
- log.warn(e.message)
297
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.chunk_exists', content_hash: content_hash)
244
298
  false
245
299
  end
246
300
  private_class_method :chunk_exists?
@@ -248,18 +302,20 @@ module Legion
248
302
  def ingest_to_apollo(chunk, embedding)
249
303
  return unless defined?(Legion::Extensions::Apollo)
250
304
 
305
+ context = {
306
+ source_file: chunk[:source_file],
307
+ heading: chunk[:heading],
308
+ section_path: chunk[:section_path],
309
+ chunk_index: chunk[:chunk_index],
310
+ token_count: chunk[:token_count]
311
+ }
251
312
  payload = {
252
313
  content: chunk[:content],
253
314
  content_type: 'document_chunk',
254
315
  content_hash: chunk[:content_hash],
255
316
  tags: [chunk[:source_file], chunk[:heading], 'document_chunk'].compact.uniq,
256
- metadata: {
257
- source_file: chunk[:source_file],
258
- heading: chunk[:heading],
259
- section_path: chunk[:section_path],
260
- chunk_index: chunk[:chunk_index],
261
- token_count: chunk[:token_count]
262
- }
317
+ context: context,
318
+ metadata: context
263
319
  }
264
320
  payload[:embedding] = embedding if embedding
265
321
 
@@ -278,7 +334,7 @@ module Legion
278
334
  metadata: { source_file: file_path, retired: true }
279
335
  )
280
336
  rescue StandardError => e
281
- log.warn(e.message)
337
+ handle_exception(e, level: :warn, operation: 'knowledge.ingest.retire_file', file_path: file_path)
282
338
  nil
283
339
  end
284
340
  private_class_method :retire_file