lex-knowledge 0.6.10 → 0.6.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/legion/extensions/knowledge/actors/corpus_ingest.rb +5 -1
- data/lib/legion/extensions/knowledge/actors/corpus_watcher.rb +7 -12
- data/lib/legion/extensions/knowledge/actors/maintenance_runner.rb +15 -18
- data/lib/legion/extensions/knowledge/helpers/apollo_models.rb +45 -0
- data/lib/legion/extensions/knowledge/helpers/chunker.rb +5 -20
- data/lib/legion/extensions/knowledge/helpers/manifest.rb +3 -6
- data/lib/legion/extensions/knowledge/helpers/manifest_store.rb +10 -5
- data/lib/legion/extensions/knowledge/helpers/parser.rb +3 -0
- data/lib/legion/extensions/knowledge/runners/corpus.rb +3 -0
- data/lib/legion/extensions/knowledge/runners/ingest.rb +115 -49
- data/lib/legion/extensions/knowledge/runners/maintenance.rb +95 -104
- data/lib/legion/extensions/knowledge/runners/monitor.rb +20 -17
- data/lib/legion/extensions/knowledge/runners/query.rb +161 -20
- data/lib/legion/extensions/knowledge/version.rb +1 -1
- data/lib/legion/extensions/knowledge.rb +34 -0
- metadata +2 -1
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative '../helpers/apollo_models'
|
|
4
|
+
|
|
3
5
|
module Legion
|
|
4
6
|
module Extensions
|
|
5
7
|
module Knowledge
|
|
6
8
|
module Runners
|
|
7
9
|
module Maintenance # rubocop:disable Legion/Extension/RunnerIncludeHelpers
|
|
10
|
+
extend Legion::Logging::Helper
|
|
11
|
+
extend Legion::Settings::Helper
|
|
12
|
+
|
|
8
13
|
module_function
|
|
9
14
|
|
|
10
15
|
def detect_orphans(path:)
|
|
@@ -21,6 +26,7 @@ module Legion
|
|
|
21
26
|
total_manifest_files: manifest_files.size
|
|
22
27
|
}
|
|
23
28
|
rescue StandardError => e
|
|
29
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.detect_orphans', path: path)
|
|
24
30
|
{ success: false, error: e.message }
|
|
25
31
|
end
|
|
26
32
|
|
|
@@ -34,6 +40,7 @@ module Legion
|
|
|
34
40
|
|
|
35
41
|
{ success: true, archived: archived, files_cleaned: detection[:orphan_files].size, dry_run: false }
|
|
36
42
|
rescue StandardError => e
|
|
43
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.cleanup_orphans', path: path)
|
|
37
44
|
{ success: false, error: e.message }
|
|
38
45
|
end
|
|
39
46
|
|
|
@@ -43,11 +50,12 @@ module Legion
|
|
|
43
50
|
|
|
44
51
|
Runners::Ingest.ingest_corpus(path: path, force: true)
|
|
45
52
|
rescue StandardError => e
|
|
53
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.reindex', path: path)
|
|
46
54
|
{ success: false, error: e.message }
|
|
47
55
|
end
|
|
48
56
|
|
|
49
57
|
def health(path:)
|
|
50
|
-
resolved = path ||
|
|
58
|
+
resolved = path || settings[:corpus_path]
|
|
51
59
|
return { success: false, error: 'corpus_path is required' } if resolved.nil? || resolved.to_s.empty?
|
|
52
60
|
|
|
53
61
|
scan_entries = Helpers::Manifest.scan(path: resolved)
|
|
@@ -62,11 +70,12 @@ module Legion
|
|
|
62
70
|
sync: build_sync_stats(resolved, scan_entries)
|
|
63
71
|
}
|
|
64
72
|
rescue StandardError => e
|
|
73
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.health', path: path)
|
|
65
74
|
{ success: false, error: e.message }
|
|
66
75
|
end
|
|
67
76
|
|
|
68
77
|
def quality_report(limit: nil)
|
|
69
|
-
resolved_limit = limit ||
|
|
78
|
+
resolved_limit = limit || settings[:maintenance][:quality_report_limit]
|
|
70
79
|
|
|
71
80
|
{
|
|
72
81
|
success: true,
|
|
@@ -77,6 +86,7 @@ module Legion
|
|
|
77
86
|
summary: quality_summary
|
|
78
87
|
}
|
|
79
88
|
rescue StandardError => e
|
|
89
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.quality_report')
|
|
80
90
|
{ success: false, error: e.message }
|
|
81
91
|
end
|
|
82
92
|
|
|
@@ -92,17 +102,18 @@ module Legion
|
|
|
92
102
|
private_class_method :build_local_stats
|
|
93
103
|
|
|
94
104
|
def build_apollo_stats
|
|
95
|
-
return apollo_defaults unless
|
|
105
|
+
return apollo_defaults unless Helpers::ApolloModels.entry_available?
|
|
96
106
|
|
|
97
|
-
base =
|
|
98
|
-
|
|
99
|
-
|
|
107
|
+
base = Helpers::ApolloModels.entry
|
|
108
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
109
|
+
.exclude(status: 'archived')
|
|
100
110
|
total = base.count
|
|
101
111
|
return apollo_defaults if total.zero?
|
|
102
112
|
|
|
103
113
|
rows = base.select(:confidence, :status, :access_count, :embedding, :created_at).all
|
|
104
114
|
apollo_stats_from_rows(base, rows, total)
|
|
105
|
-
rescue StandardError =>
|
|
115
|
+
rescue StandardError => e
|
|
116
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.build_apollo_stats')
|
|
106
117
|
apollo_defaults
|
|
107
118
|
end
|
|
108
119
|
private_class_method :build_apollo_stats
|
|
@@ -110,7 +121,7 @@ module Legion
|
|
|
110
121
|
def apollo_stats_from_rows(base, rows, total)
|
|
111
122
|
confidences = rows.map { |r| r[:confidence].to_f }
|
|
112
123
|
with_embeddings = rows.count { |r| !r[:embedding].nil? }
|
|
113
|
-
stale_threshold =
|
|
124
|
+
stale_threshold = settings[:maintenance][:stale_threshold]
|
|
114
125
|
timestamps = rows.map { |r| r[:created_at] }
|
|
115
126
|
|
|
116
127
|
{
|
|
@@ -163,95 +174,100 @@ module Legion
|
|
|
163
174
|
private_class_method :load_manifest_files
|
|
164
175
|
|
|
165
176
|
def load_apollo_source_files
|
|
166
|
-
return [] unless
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
rescue StandardError =>
|
|
177
|
+
return [] unless Helpers::ApolloModels.entry_available?
|
|
178
|
+
|
|
179
|
+
Helpers::ApolloModels.entry
|
|
180
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
181
|
+
.exclude(status: 'archived')
|
|
182
|
+
.select_map(Sequel.lit("source_context->>'source_file'"))
|
|
183
|
+
.compact
|
|
184
|
+
.uniq
|
|
185
|
+
rescue StandardError => e
|
|
186
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.load_apollo_source_files')
|
|
175
187
|
[]
|
|
176
188
|
end
|
|
177
189
|
private_class_method :load_apollo_source_files
|
|
178
190
|
|
|
179
191
|
def count_apollo_chunks
|
|
180
|
-
return 0 unless
|
|
192
|
+
return 0 unless Helpers::ApolloModels.entry_available?
|
|
181
193
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
rescue StandardError =>
|
|
194
|
+
Helpers::ApolloModels.entry
|
|
195
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
196
|
+
.exclude(status: 'archived')
|
|
197
|
+
.count
|
|
198
|
+
rescue StandardError => e
|
|
199
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.count_apollo_chunks')
|
|
187
200
|
0
|
|
188
201
|
end
|
|
189
202
|
private_class_method :count_apollo_chunks
|
|
190
203
|
|
|
191
204
|
def archive_orphan_entries(orphan_files)
|
|
192
|
-
return 0 unless
|
|
205
|
+
return 0 unless Helpers::ApolloModels.entry_available?
|
|
193
206
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
207
|
+
Helpers::ApolloModels.entry
|
|
208
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
209
|
+
.where(Sequel.lit("source_context->>'source_file' IN ?", orphan_files))
|
|
210
|
+
.exclude(status: 'archived')
|
|
211
|
+
.update(status: 'archived', updated_at: Time.now)
|
|
199
212
|
end
|
|
200
213
|
private_class_method :archive_orphan_entries
|
|
201
214
|
|
|
202
215
|
def hot_chunks(limit)
|
|
203
|
-
return [] unless
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
rescue StandardError =>
|
|
216
|
+
return [] unless Helpers::ApolloModels.entry_available?
|
|
217
|
+
|
|
218
|
+
Helpers::ApolloModels.entry
|
|
219
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
220
|
+
.exclude(status: 'archived')
|
|
221
|
+
.where { access_count.positive? }
|
|
222
|
+
.order(Sequel.desc(:access_count))
|
|
223
|
+
.limit(limit)
|
|
224
|
+
.select_map([:id, :access_count, :confidence,
|
|
225
|
+
Sequel.lit("source_context->>'source_file' AS source_file")])
|
|
226
|
+
.map { |r| { id: r[0], access_count: r[1], confidence: r[2], source_file: r[3] } }
|
|
227
|
+
rescue StandardError => e
|
|
228
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.hot_chunks')
|
|
215
229
|
[]
|
|
216
230
|
end
|
|
217
231
|
private_class_method :hot_chunks
|
|
218
232
|
|
|
219
233
|
def cold_chunks(limit)
|
|
220
|
-
return [] unless
|
|
234
|
+
return [] unless Helpers::ApolloModels.entry_available?
|
|
221
235
|
|
|
222
|
-
days =
|
|
236
|
+
days = settings[:maintenance][:cold_chunk_days]
|
|
223
237
|
cutoff = Time.now - (days * 86_400)
|
|
224
238
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
rescue StandardError =>
|
|
239
|
+
Helpers::ApolloModels.entry
|
|
240
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
241
|
+
.exclude(status: 'archived')
|
|
242
|
+
.where(access_count: 0)
|
|
243
|
+
.where { created_at < cutoff }
|
|
244
|
+
.order(:created_at)
|
|
245
|
+
.limit(limit)
|
|
246
|
+
.select_map([:id, :confidence, :created_at,
|
|
247
|
+
Sequel.lit("source_context->>'source_file' AS source_file")])
|
|
248
|
+
.map { |r| { id: r[0], confidence: r[1], created_at: r[2]&.iso8601, source_file: r[3] } }
|
|
249
|
+
rescue StandardError => e
|
|
250
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.cold_chunks')
|
|
236
251
|
[]
|
|
237
252
|
end
|
|
238
253
|
private_class_method :cold_chunks
|
|
239
254
|
|
|
240
255
|
def low_confidence_chunks(limit)
|
|
241
|
-
return [] unless
|
|
242
|
-
|
|
243
|
-
threshold =
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
rescue StandardError =>
|
|
256
|
+
return [] unless Helpers::ApolloModels.entry_available?
|
|
257
|
+
|
|
258
|
+
threshold = settings[:maintenance][:stale_threshold]
|
|
259
|
+
|
|
260
|
+
Helpers::ApolloModels.entry
|
|
261
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
262
|
+
.exclude(status: 'archived')
|
|
263
|
+
.where { confidence < threshold }
|
|
264
|
+
.order(:confidence)
|
|
265
|
+
.limit(limit)
|
|
266
|
+
.select_map([:id, :confidence, :access_count,
|
|
267
|
+
Sequel.lit("source_context->>'source_file' AS source_file")])
|
|
268
|
+
.map { |r| { id: r[0], confidence: r[1], access_count: r[2], source_file: r[3] } }
|
|
269
|
+
rescue StandardError => e
|
|
270
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.low_confidence_chunks')
|
|
255
271
|
[]
|
|
256
272
|
end
|
|
257
273
|
private_class_method :low_confidence_chunks
|
|
@@ -259,58 +275,33 @@ module Legion
|
|
|
259
275
|
def quality_summary
|
|
260
276
|
defaults = { total_queries: 0, avg_retrieval_score: nil, chunks_never_accessed: 0,
|
|
261
277
|
chunks_below_threshold: 0 }
|
|
262
|
-
return defaults unless
|
|
278
|
+
return defaults unless Helpers::ApolloModels.entry_available?
|
|
263
279
|
|
|
264
|
-
base =
|
|
265
|
-
|
|
266
|
-
|
|
280
|
+
base = Helpers::ApolloModels.entry
|
|
281
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
282
|
+
.exclude(status: 'archived')
|
|
267
283
|
|
|
268
284
|
{
|
|
269
285
|
total_queries: query_count,
|
|
270
286
|
avg_retrieval_score: nil,
|
|
271
287
|
chunks_never_accessed: base.where(access_count: 0).count,
|
|
272
|
-
chunks_below_threshold: base.where { confidence <
|
|
288
|
+
chunks_below_threshold: base.where { confidence < settings[:maintenance][:stale_threshold] }.count
|
|
273
289
|
}
|
|
274
|
-
rescue StandardError =>
|
|
290
|
+
rescue StandardError => e
|
|
291
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.quality_summary')
|
|
275
292
|
defaults
|
|
276
293
|
end
|
|
277
294
|
private_class_method :quality_summary
|
|
278
295
|
|
|
279
296
|
def query_count
|
|
280
|
-
return 0 unless
|
|
297
|
+
return 0 unless Helpers::ApolloModels.access_log_available?
|
|
281
298
|
|
|
282
|
-
|
|
283
|
-
rescue StandardError =>
|
|
299
|
+
Helpers::ApolloModels.access_log.where(action: 'query').count
|
|
300
|
+
rescue StandardError => e
|
|
301
|
+
handle_exception(e, level: :warn, operation: 'knowledge.maintenance.query_count')
|
|
284
302
|
0
|
|
285
303
|
end
|
|
286
304
|
private_class_method :query_count
|
|
287
|
-
|
|
288
|
-
def settings_stale_threshold
|
|
289
|
-
return 0.3 unless defined?(Legion::Settings)
|
|
290
|
-
|
|
291
|
-
Legion::Settings.dig(:knowledge, :maintenance, :stale_threshold) || 0.3
|
|
292
|
-
rescue StandardError => _e
|
|
293
|
-
0.3
|
|
294
|
-
end
|
|
295
|
-
private_class_method :settings_stale_threshold
|
|
296
|
-
|
|
297
|
-
def settings_cold_chunk_days
|
|
298
|
-
return 7 unless defined?(Legion::Settings)
|
|
299
|
-
|
|
300
|
-
Legion::Settings.dig(:knowledge, :maintenance, :cold_chunk_days) || 7
|
|
301
|
-
rescue StandardError => _e
|
|
302
|
-
7
|
|
303
|
-
end
|
|
304
|
-
private_class_method :settings_cold_chunk_days
|
|
305
|
-
|
|
306
|
-
def settings_quality_limit
|
|
307
|
-
return 10 unless defined?(Legion::Settings)
|
|
308
|
-
|
|
309
|
-
Legion::Settings.dig(:knowledge, :maintenance, :quality_report_limit) || 10
|
|
310
|
-
rescue StandardError => _e
|
|
311
|
-
10
|
|
312
|
-
end
|
|
313
|
-
private_class_method :settings_quality_limit
|
|
314
305
|
end
|
|
315
306
|
end
|
|
316
307
|
end
|
|
@@ -5,6 +5,9 @@ module Legion
|
|
|
5
5
|
module Knowledge
|
|
6
6
|
module Runners
|
|
7
7
|
module Monitor # rubocop:disable Legion/Extension/RunnerIncludeHelpers
|
|
8
|
+
extend Legion::Logging::Helper
|
|
9
|
+
extend Legion::Settings::Helper
|
|
10
|
+
|
|
8
11
|
module_function
|
|
9
12
|
|
|
10
13
|
DEFAULT_EXTENSIONS = %w[.md .txt].freeze
|
|
@@ -18,7 +21,8 @@ module Legion
|
|
|
18
21
|
end
|
|
19
22
|
|
|
20
23
|
monitors
|
|
21
|
-
rescue StandardError =>
|
|
24
|
+
rescue StandardError => e
|
|
25
|
+
handle_exception(e, level: :warn, operation: 'knowledge.monitor.resolve_monitors')
|
|
22
26
|
[]
|
|
23
27
|
end
|
|
24
28
|
|
|
@@ -41,6 +45,7 @@ module Legion
|
|
|
41
45
|
|
|
42
46
|
{ success: true, monitor: entry }
|
|
43
47
|
rescue StandardError => e
|
|
48
|
+
handle_exception(e, level: :warn, operation: 'knowledge.monitor.add_monitor', path: path)
|
|
44
49
|
{ success: false, error: e.message }
|
|
45
50
|
end
|
|
46
51
|
|
|
@@ -54,12 +59,14 @@ module Legion
|
|
|
54
59
|
|
|
55
60
|
{ success: true, removed: found }
|
|
56
61
|
rescue StandardError => e
|
|
62
|
+
handle_exception(e, level: :warn, operation: 'knowledge.monitor.remove_monitor', identifier: identifier)
|
|
57
63
|
{ success: false, error: e.message }
|
|
58
64
|
end
|
|
59
65
|
|
|
60
66
|
def list_monitors
|
|
61
67
|
{ success: true, monitors: resolve_monitors }
|
|
62
68
|
rescue StandardError => e
|
|
69
|
+
handle_exception(e, level: :warn, operation: 'knowledge.monitor.list_monitors')
|
|
63
70
|
{ success: false, error: e.message }
|
|
64
71
|
end
|
|
65
72
|
|
|
@@ -70,44 +77,40 @@ module Legion
|
|
|
70
77
|
monitors.each do |m|
|
|
71
78
|
scan = Helpers::Manifest.scan(path: m[:path], extensions: m[:extensions])
|
|
72
79
|
total_files += scan.size
|
|
73
|
-
rescue StandardError =>
|
|
80
|
+
rescue StandardError => e
|
|
81
|
+
handle_exception(e, level: :warn, operation: 'knowledge.monitor.scan_monitor', path: m[:path])
|
|
74
82
|
next
|
|
75
83
|
end
|
|
76
84
|
|
|
77
85
|
{ success: true, total_monitors: monitors.size, total_files: total_files }
|
|
78
86
|
rescue StandardError => e
|
|
87
|
+
handle_exception(e, level: :warn, operation: 'knowledge.monitor.monitor_status')
|
|
79
88
|
{ success: false, error: e.message }
|
|
80
89
|
end
|
|
81
90
|
|
|
82
91
|
# --- private helpers ---
|
|
83
92
|
|
|
84
93
|
def read_monitors_setting
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
rescue StandardError => _e
|
|
94
|
+
settings[:monitors]
|
|
95
|
+
rescue StandardError => e
|
|
96
|
+
handle_exception(e, level: :warn, operation: 'knowledge.monitor.read_monitors_setting')
|
|
89
97
|
nil
|
|
90
98
|
end
|
|
91
99
|
private_class_method :read_monitors_setting
|
|
92
100
|
|
|
93
101
|
def read_legacy_corpus_path
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
rescue StandardError => _e
|
|
102
|
+
settings[:corpus_path]
|
|
103
|
+
rescue StandardError => e
|
|
104
|
+
handle_exception(e, level: :warn, operation: 'knowledge.monitor.read_legacy_corpus_path')
|
|
98
105
|
nil
|
|
99
106
|
end
|
|
100
107
|
private_class_method :read_legacy_corpus_path
|
|
101
108
|
|
|
102
109
|
def persist_monitors(monitors)
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
loader = Legion::Settings.loader
|
|
106
|
-
knowledge = loader.settings[:knowledge] || {}
|
|
107
|
-
knowledge[:monitors] = monitors
|
|
108
|
-
loader.settings[:knowledge] = knowledge
|
|
110
|
+
settings[:monitors] = monitors
|
|
109
111
|
true
|
|
110
|
-
rescue StandardError =>
|
|
112
|
+
rescue StandardError => e
|
|
113
|
+
handle_exception(e, level: :warn, operation: 'knowledge.monitor.persist_monitors')
|
|
111
114
|
false
|
|
112
115
|
end
|
|
113
116
|
private_class_method :persist_monitors
|