lex-knowledge 0.6.10 → 0.6.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../helpers/apollo_models'
4
+
3
5
  module Legion
4
6
  module Extensions
5
7
  module Knowledge
6
8
  module Runners
7
9
  module Maintenance # rubocop:disable Legion/Extension/RunnerIncludeHelpers
10
+ extend Legion::Logging::Helper
11
+ extend Legion::Settings::Helper
12
+
8
13
  module_function
9
14
 
10
15
  def detect_orphans(path:)
@@ -21,6 +26,7 @@ module Legion
21
26
  total_manifest_files: manifest_files.size
22
27
  }
23
28
  rescue StandardError => e
29
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.detect_orphans', path: path)
24
30
  { success: false, error: e.message }
25
31
  end
26
32
 
@@ -34,6 +40,7 @@ module Legion
34
40
 
35
41
  { success: true, archived: archived, files_cleaned: detection[:orphan_files].size, dry_run: false }
36
42
  rescue StandardError => e
43
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.cleanup_orphans', path: path)
37
44
  { success: false, error: e.message }
38
45
  end
39
46
 
@@ -43,11 +50,12 @@ module Legion
43
50
 
44
51
  Runners::Ingest.ingest_corpus(path: path, force: true)
45
52
  rescue StandardError => e
53
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.reindex', path: path)
46
54
  { success: false, error: e.message }
47
55
  end
48
56
 
49
57
  def health(path:)
50
- resolved = path || (Legion::Settings.dig(:knowledge, :corpus_path) if defined?(Legion::Settings))
58
+ resolved = path || settings[:corpus_path]
51
59
  return { success: false, error: 'corpus_path is required' } if resolved.nil? || resolved.to_s.empty?
52
60
 
53
61
  scan_entries = Helpers::Manifest.scan(path: resolved)
@@ -62,11 +70,12 @@ module Legion
62
70
  sync: build_sync_stats(resolved, scan_entries)
63
71
  }
64
72
  rescue StandardError => e
73
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.health', path: path)
65
74
  { success: false, error: e.message }
66
75
  end
67
76
 
68
77
  def quality_report(limit: nil)
69
- resolved_limit = limit || settings_quality_limit
78
+ resolved_limit = limit || settings[:maintenance][:quality_report_limit]
70
79
 
71
80
  {
72
81
  success: true,
@@ -77,6 +86,7 @@ module Legion
77
86
  summary: quality_summary
78
87
  }
79
88
  rescue StandardError => e
89
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.quality_report')
80
90
  { success: false, error: e.message }
81
91
  end
82
92
 
@@ -92,17 +102,18 @@ module Legion
92
102
  private_class_method :build_local_stats
93
103
 
94
104
  def build_apollo_stats
95
- return apollo_defaults unless defined?(Legion::Data::Model::ApolloEntry)
105
+ return apollo_defaults unless Helpers::ApolloModels.entry_available?
96
106
 
97
- base = Legion::Data::Model::ApolloEntry
98
- .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
99
- .exclude(status: 'archived')
107
+ base = Helpers::ApolloModels.entry
108
+ .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
109
+ .exclude(status: 'archived')
100
110
  total = base.count
101
111
  return apollo_defaults if total.zero?
102
112
 
103
113
  rows = base.select(:confidence, :status, :access_count, :embedding, :created_at).all
104
114
  apollo_stats_from_rows(base, rows, total)
105
- rescue StandardError => _e
115
+ rescue StandardError => e
116
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.build_apollo_stats')
106
117
  apollo_defaults
107
118
  end
108
119
  private_class_method :build_apollo_stats
@@ -110,7 +121,7 @@ module Legion
110
121
  def apollo_stats_from_rows(base, rows, total)
111
122
  confidences = rows.map { |r| r[:confidence].to_f }
112
123
  with_embeddings = rows.count { |r| !r[:embedding].nil? }
113
- stale_threshold = settings_stale_threshold
124
+ stale_threshold = settings[:maintenance][:stale_threshold]
114
125
  timestamps = rows.map { |r| r[:created_at] }
115
126
 
116
127
  {
@@ -163,95 +174,100 @@ module Legion
163
174
  private_class_method :load_manifest_files
164
175
 
165
176
  def load_apollo_source_files
166
- return [] unless defined?(Legion::Data::Model::ApolloEntry)
167
-
168
- Legion::Data::Model::ApolloEntry
169
- .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
170
- .exclude(status: 'archived')
171
- .select_map(Sequel.lit("source_context->>'source_file'"))
172
- .compact
173
- .uniq
174
- rescue StandardError => _e
177
+ return [] unless Helpers::ApolloModels.entry_available?
178
+
179
+ Helpers::ApolloModels.entry
180
+ .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
181
+ .exclude(status: 'archived')
182
+ .select_map(Sequel.lit("source_context->>'source_file'"))
183
+ .compact
184
+ .uniq
185
+ rescue StandardError => e
186
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.load_apollo_source_files')
175
187
  []
176
188
  end
177
189
  private_class_method :load_apollo_source_files
178
190
 
179
191
  def count_apollo_chunks
180
- return 0 unless defined?(Legion::Data::Model::ApolloEntry)
192
+ return 0 unless Helpers::ApolloModels.entry_available?
181
193
 
182
- Legion::Data::Model::ApolloEntry
183
- .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
184
- .exclude(status: 'archived')
185
- .count
186
- rescue StandardError => _e
194
+ Helpers::ApolloModels.entry
195
+ .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
196
+ .exclude(status: 'archived')
197
+ .count
198
+ rescue StandardError => e
199
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.count_apollo_chunks')
187
200
  0
188
201
  end
189
202
  private_class_method :count_apollo_chunks
190
203
 
191
204
  def archive_orphan_entries(orphan_files)
192
- return 0 unless defined?(Legion::Data::Model::ApolloEntry)
205
+ return 0 unless Helpers::ApolloModels.entry_available?
193
206
 
194
- Legion::Data::Model::ApolloEntry
195
- .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
196
- .where(Sequel.lit("source_context->>'source_file' IN ?", orphan_files))
197
- .exclude(status: 'archived')
198
- .update(status: 'archived', updated_at: Time.now)
207
+ Helpers::ApolloModels.entry
208
+ .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
209
+ .where(Sequel.lit("source_context->>'source_file' IN ?", orphan_files))
210
+ .exclude(status: 'archived')
211
+ .update(status: 'archived', updated_at: Time.now)
199
212
  end
200
213
  private_class_method :archive_orphan_entries
201
214
 
202
215
  def hot_chunks(limit)
203
- return [] unless defined?(Legion::Data::Model::ApolloEntry)
204
-
205
- Legion::Data::Model::ApolloEntry
206
- .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
207
- .exclude(status: 'archived')
208
- .where { access_count.positive? }
209
- .order(Sequel.desc(:access_count))
210
- .limit(limit)
211
- .select_map([:id, :access_count, :confidence,
212
- Sequel.lit("source_context->>'source_file' AS source_file")])
213
- .map { |r| { id: r[0], access_count: r[1], confidence: r[2], source_file: r[3] } }
214
- rescue StandardError => _e
216
+ return [] unless Helpers::ApolloModels.entry_available?
217
+
218
+ Helpers::ApolloModels.entry
219
+ .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
220
+ .exclude(status: 'archived')
221
+ .where { access_count.positive? }
222
+ .order(Sequel.desc(:access_count))
223
+ .limit(limit)
224
+ .select_map([:id, :access_count, :confidence,
225
+ Sequel.lit("source_context->>'source_file' AS source_file")])
226
+ .map { |r| { id: r[0], access_count: r[1], confidence: r[2], source_file: r[3] } }
227
+ rescue StandardError => e
228
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.hot_chunks')
215
229
  []
216
230
  end
217
231
  private_class_method :hot_chunks
218
232
 
219
233
  def cold_chunks(limit)
220
- return [] unless defined?(Legion::Data::Model::ApolloEntry)
234
+ return [] unless Helpers::ApolloModels.entry_available?
221
235
 
222
- days = settings_cold_chunk_days
236
+ days = settings[:maintenance][:cold_chunk_days]
223
237
  cutoff = Time.now - (days * 86_400)
224
238
 
225
- Legion::Data::Model::ApolloEntry
226
- .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
227
- .exclude(status: 'archived')
228
- .where(access_count: 0)
229
- .where { created_at < cutoff }
230
- .order(:created_at)
231
- .limit(limit)
232
- .select_map([:id, :confidence, :created_at,
233
- Sequel.lit("source_context->>'source_file' AS source_file")])
234
- .map { |r| { id: r[0], confidence: r[1], created_at: r[2]&.iso8601, source_file: r[3] } }
235
- rescue StandardError => _e
239
+ Helpers::ApolloModels.entry
240
+ .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
241
+ .exclude(status: 'archived')
242
+ .where(access_count: 0)
243
+ .where { created_at < cutoff }
244
+ .order(:created_at)
245
+ .limit(limit)
246
+ .select_map([:id, :confidence, :created_at,
247
+ Sequel.lit("source_context->>'source_file' AS source_file")])
248
+ .map { |r| { id: r[0], confidence: r[1], created_at: r[2]&.iso8601, source_file: r[3] } }
249
+ rescue StandardError => e
250
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.cold_chunks')
236
251
  []
237
252
  end
238
253
  private_class_method :cold_chunks
239
254
 
240
255
  def low_confidence_chunks(limit)
241
- return [] unless defined?(Legion::Data::Model::ApolloEntry)
242
-
243
- threshold = settings_stale_threshold
244
-
245
- Legion::Data::Model::ApolloEntry
246
- .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
247
- .exclude(status: 'archived')
248
- .where { confidence < threshold }
249
- .order(:confidence)
250
- .limit(limit)
251
- .select_map([:id, :confidence, :access_count,
252
- Sequel.lit("source_context->>'source_file' AS source_file")])
253
- .map { |r| { id: r[0], confidence: r[1], access_count: r[2], source_file: r[3] } }
254
- rescue StandardError => _e
256
+ return [] unless Helpers::ApolloModels.entry_available?
257
+
258
+ threshold = settings[:maintenance][:stale_threshold]
259
+
260
+ Helpers::ApolloModels.entry
261
+ .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
262
+ .exclude(status: 'archived')
263
+ .where { confidence < threshold }
264
+ .order(:confidence)
265
+ .limit(limit)
266
+ .select_map([:id, :confidence, :access_count,
267
+ Sequel.lit("source_context->>'source_file' AS source_file")])
268
+ .map { |r| { id: r[0], confidence: r[1], access_count: r[2], source_file: r[3] } }
269
+ rescue StandardError => e
270
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.low_confidence_chunks')
255
271
  []
256
272
  end
257
273
  private_class_method :low_confidence_chunks
@@ -259,58 +275,33 @@ module Legion
259
275
  def quality_summary
260
276
  defaults = { total_queries: 0, avg_retrieval_score: nil, chunks_never_accessed: 0,
261
277
  chunks_below_threshold: 0 }
262
- return defaults unless defined?(Legion::Data::Model::ApolloEntry)
278
+ return defaults unless Helpers::ApolloModels.entry_available?
263
279
 
264
- base = Legion::Data::Model::ApolloEntry
265
- .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
266
- .exclude(status: 'archived')
280
+ base = Helpers::ApolloModels.entry
281
+ .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
282
+ .exclude(status: 'archived')
267
283
 
268
284
  {
269
285
  total_queries: query_count,
270
286
  avg_retrieval_score: nil,
271
287
  chunks_never_accessed: base.where(access_count: 0).count,
272
- chunks_below_threshold: base.where { confidence < settings_stale_threshold }.count
288
+ chunks_below_threshold: base.where { confidence < settings[:maintenance][:stale_threshold] }.count
273
289
  }
274
- rescue StandardError => _e
290
+ rescue StandardError => e
291
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.quality_summary')
275
292
  defaults
276
293
  end
277
294
  private_class_method :quality_summary
278
295
 
279
296
  def query_count
280
- return 0 unless defined?(Legion::Data::Model::ApolloAccessLog)
297
+ return 0 unless Helpers::ApolloModels.access_log_available?
281
298
 
282
- Legion::Data::Model::ApolloAccessLog.where(action: 'knowledge_query').count
283
- rescue StandardError => _e
299
+ Helpers::ApolloModels.access_log.where(action: 'query').count
300
+ rescue StandardError => e
301
+ handle_exception(e, level: :warn, operation: 'knowledge.maintenance.query_count')
284
302
  0
285
303
  end
286
304
  private_class_method :query_count
287
-
288
- def settings_stale_threshold
289
- return 0.3 unless defined?(Legion::Settings)
290
-
291
- Legion::Settings.dig(:knowledge, :maintenance, :stale_threshold) || 0.3
292
- rescue StandardError => _e
293
- 0.3
294
- end
295
- private_class_method :settings_stale_threshold
296
-
297
- def settings_cold_chunk_days
298
- return 7 unless defined?(Legion::Settings)
299
-
300
- Legion::Settings.dig(:knowledge, :maintenance, :cold_chunk_days) || 7
301
- rescue StandardError => _e
302
- 7
303
- end
304
- private_class_method :settings_cold_chunk_days
305
-
306
- def settings_quality_limit
307
- return 10 unless defined?(Legion::Settings)
308
-
309
- Legion::Settings.dig(:knowledge, :maintenance, :quality_report_limit) || 10
310
- rescue StandardError => _e
311
- 10
312
- end
313
- private_class_method :settings_quality_limit
314
305
  end
315
306
  end
316
307
  end
@@ -5,6 +5,9 @@ module Legion
5
5
  module Knowledge
6
6
  module Runners
7
7
  module Monitor # rubocop:disable Legion/Extension/RunnerIncludeHelpers
8
+ extend Legion::Logging::Helper
9
+ extend Legion::Settings::Helper
10
+
8
11
  module_function
9
12
 
10
13
  DEFAULT_EXTENSIONS = %w[.md .txt].freeze
@@ -18,7 +21,8 @@ module Legion
18
21
  end
19
22
 
20
23
  monitors
21
- rescue StandardError => _e
24
+ rescue StandardError => e
25
+ handle_exception(e, level: :warn, operation: 'knowledge.monitor.resolve_monitors')
22
26
  []
23
27
  end
24
28
 
@@ -41,6 +45,7 @@ module Legion
41
45
 
42
46
  { success: true, monitor: entry }
43
47
  rescue StandardError => e
48
+ handle_exception(e, level: :warn, operation: 'knowledge.monitor.add_monitor', path: path)
44
49
  { success: false, error: e.message }
45
50
  end
46
51
 
@@ -54,12 +59,14 @@ module Legion
54
59
 
55
60
  { success: true, removed: found }
56
61
  rescue StandardError => e
62
+ handle_exception(e, level: :warn, operation: 'knowledge.monitor.remove_monitor', identifier: identifier)
57
63
  { success: false, error: e.message }
58
64
  end
59
65
 
60
66
  def list_monitors
61
67
  { success: true, monitors: resolve_monitors }
62
68
  rescue StandardError => e
69
+ handle_exception(e, level: :warn, operation: 'knowledge.monitor.list_monitors')
63
70
  { success: false, error: e.message }
64
71
  end
65
72
 
@@ -70,44 +77,40 @@ module Legion
70
77
  monitors.each do |m|
71
78
  scan = Helpers::Manifest.scan(path: m[:path], extensions: m[:extensions])
72
79
  total_files += scan.size
73
- rescue StandardError => _e
80
+ rescue StandardError => e
81
+ handle_exception(e, level: :warn, operation: 'knowledge.monitor.scan_monitor', path: m[:path])
74
82
  next
75
83
  end
76
84
 
77
85
  { success: true, total_monitors: monitors.size, total_files: total_files }
78
86
  rescue StandardError => e
87
+ handle_exception(e, level: :warn, operation: 'knowledge.monitor.monitor_status')
79
88
  { success: false, error: e.message }
80
89
  end
81
90
 
82
91
  # --- private helpers ---
83
92
 
84
93
  def read_monitors_setting
85
- return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
86
-
87
- Legion::Settings.dig(:knowledge, :monitors)
88
- rescue StandardError => _e
94
+ settings[:monitors]
95
+ rescue StandardError => e
96
+ handle_exception(e, level: :warn, operation: 'knowledge.monitor.read_monitors_setting')
89
97
  nil
90
98
  end
91
99
  private_class_method :read_monitors_setting
92
100
 
93
101
  def read_legacy_corpus_path
94
- return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
95
-
96
- Legion::Settings.dig(:knowledge, :corpus_path)
97
- rescue StandardError => _e
102
+ settings[:corpus_path]
103
+ rescue StandardError => e
104
+ handle_exception(e, level: :warn, operation: 'knowledge.monitor.read_legacy_corpus_path')
98
105
  nil
99
106
  end
100
107
  private_class_method :read_legacy_corpus_path
101
108
 
102
109
  def persist_monitors(monitors)
103
- return false unless defined?(Legion::Settings)
104
-
105
- loader = Legion::Settings.loader
106
- knowledge = loader.settings[:knowledge] || {}
107
- knowledge[:monitors] = monitors
108
- loader.settings[:knowledge] = knowledge
110
+ settings[:monitors] = monitors
109
111
  true
110
- rescue StandardError => _e
112
+ rescue StandardError => e
113
+ handle_exception(e, level: :warn, operation: 'knowledge.monitor.persist_monitors')
111
114
  false
112
115
  end
113
116
  private_class_method :persist_monitors