lex-knowledge 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/legion/extensions/knowledge/actors/maintenance_runner.rb +48 -0
- data/lib/legion/extensions/knowledge/client.rb +1 -0
- data/lib/legion/extensions/knowledge/runners/maintenance.rb +315 -0
- data/lib/legion/extensions/knowledge/runners/query.rb +75 -8
- data/lib/legion/extensions/knowledge/version.rb +1 -1
- data/lib/legion/extensions/knowledge.rb +2 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8d512db91b31e6d8a9747a3987aa90dd406d04f8987400787f6de27b13eb10c5
|
|
4
|
+
data.tar.gz: 2d9df5e1289bb80f603dbd882863d1802bf942e8ff12940d259ba44e25b728f3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2022e4654a3a815e8c5433daaad7c2e9767b039c51513e12d9cd6c71a2de47e3ee883c6e0624aa9ac8f6108439c19d4552260694482db4e2da9d58851fa093bd
|
|
7
|
+
data.tar.gz: d1afc8cb8fdcd0317ee9fa416c91827ca3536e40fed404791a355b4b4662360211e3c064dc09e8504d0ed5eb359f1dc906edaa57277f205b1aa9eafedcf261da
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Actor
|
|
7
|
+
class MaintenanceRunner < Legion::Extensions::Actors::Every
|
|
8
|
+
def runner_class = 'Legion::Extensions::Knowledge::Runners::Maintenance'
|
|
9
|
+
def runner_function = 'health'
|
|
10
|
+
def check_subtask? = false
|
|
11
|
+
def generate_task? = false
|
|
12
|
+
|
|
13
|
+
def every_interval
|
|
14
|
+
if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
|
|
15
|
+
Legion::Settings.dig(:knowledge, :actors, :maintenance_interval) || 21_600
|
|
16
|
+
else
|
|
17
|
+
21_600
|
|
18
|
+
end
|
|
19
|
+
rescue StandardError
|
|
20
|
+
21_600
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def enabled?
|
|
24
|
+
return false unless corpus_path && !corpus_path.empty?
|
|
25
|
+
|
|
26
|
+
true
|
|
27
|
+
rescue StandardError
|
|
28
|
+
false
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def args
|
|
32
|
+
{ path: corpus_path }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def corpus_path
|
|
38
|
+
return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
|
|
39
|
+
|
|
40
|
+
Legion::Settings.dig(:knowledge, :corpus_path)
|
|
41
|
+
rescue StandardError
|
|
42
|
+
nil
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Runners
|
|
7
|
+
module Maintenance
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def detect_orphans(path:)
|
|
11
|
+
manifest_files = load_manifest_files(path)
|
|
12
|
+
apollo_files = load_apollo_source_files
|
|
13
|
+
|
|
14
|
+
orphan_files = apollo_files - manifest_files
|
|
15
|
+
|
|
16
|
+
{
|
|
17
|
+
success: true,
|
|
18
|
+
orphan_count: orphan_files.size,
|
|
19
|
+
orphan_files: orphan_files,
|
|
20
|
+
total_apollo_chunks: count_apollo_chunks,
|
|
21
|
+
total_manifest_files: manifest_files.size
|
|
22
|
+
}
|
|
23
|
+
rescue StandardError => e
|
|
24
|
+
{ success: false, error: e.message }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def cleanup_orphans(path:, dry_run: true)
|
|
28
|
+
detection = detect_orphans(path: path)
|
|
29
|
+
return detection unless detection[:success]
|
|
30
|
+
return detection.merge(archived: 0, files_cleaned: 0, dry_run: dry_run) if detection[:orphan_count].zero?
|
|
31
|
+
return detection.merge(archived: detection[:orphan_count], files_cleaned: detection[:orphan_files].size, dry_run: true) if dry_run
|
|
32
|
+
|
|
33
|
+
archived = archive_orphan_entries(detection[:orphan_files])
|
|
34
|
+
|
|
35
|
+
{ success: true, archived: archived, files_cleaned: detection[:orphan_files].size, dry_run: false }
|
|
36
|
+
rescue StandardError => e
|
|
37
|
+
{ success: false, error: e.message }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def reindex(path:)
|
|
41
|
+
store_path = Helpers::ManifestStore.store_path(corpus_path: path)
|
|
42
|
+
::FileUtils.rm_f(store_path)
|
|
43
|
+
|
|
44
|
+
Runners::Ingest.ingest_corpus(path: path, force: true)
|
|
45
|
+
rescue StandardError => e
|
|
46
|
+
{ success: false, error: e.message }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def health(path:)
|
|
50
|
+
scan_entries = Helpers::Manifest.scan(path: path)
|
|
51
|
+
store_path = Helpers::ManifestStore.store_path(corpus_path: path)
|
|
52
|
+
manifest_file = ::File.exist?(store_path)
|
|
53
|
+
last_ingest = manifest_file ? ::File.mtime(store_path).iso8601 : nil
|
|
54
|
+
|
|
55
|
+
{
|
|
56
|
+
success: true,
|
|
57
|
+
local: build_local_stats(path, scan_entries, manifest_file, last_ingest),
|
|
58
|
+
apollo: build_apollo_stats,
|
|
59
|
+
sync: build_sync_stats(path, scan_entries)
|
|
60
|
+
}
|
|
61
|
+
rescue StandardError => e
|
|
62
|
+
{ success: false, error: e.message }
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def quality_report(limit: nil)
|
|
66
|
+
resolved_limit = limit || settings_quality_limit
|
|
67
|
+
|
|
68
|
+
{
|
|
69
|
+
success: true,
|
|
70
|
+
hot_chunks: hot_chunks(resolved_limit),
|
|
71
|
+
cold_chunks: cold_chunks(resolved_limit),
|
|
72
|
+
low_confidence: low_confidence_chunks(resolved_limit),
|
|
73
|
+
poor_retrieval: [],
|
|
74
|
+
summary: quality_summary
|
|
75
|
+
}
|
|
76
|
+
rescue StandardError => e
|
|
77
|
+
{ success: false, error: e.message }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def build_local_stats(path, scan_entries, manifest_file, last_ingest)
|
|
81
|
+
{
|
|
82
|
+
corpus_path: path,
|
|
83
|
+
file_count: scan_entries.size,
|
|
84
|
+
total_bytes: scan_entries.sum { |e| e[:size] },
|
|
85
|
+
manifest_exists: manifest_file,
|
|
86
|
+
last_ingest: last_ingest
|
|
87
|
+
}
|
|
88
|
+
end
|
|
89
|
+
private_class_method :build_local_stats
|
|
90
|
+
|
|
91
|
+
def build_apollo_stats
|
|
92
|
+
return apollo_defaults unless defined?(Legion::Data::Model::ApolloEntry)
|
|
93
|
+
|
|
94
|
+
base = Legion::Data::Model::ApolloEntry
|
|
95
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
96
|
+
.exclude(status: 'archived')
|
|
97
|
+
total = base.count
|
|
98
|
+
return apollo_defaults if total.zero?
|
|
99
|
+
|
|
100
|
+
rows = base.select(:confidence, :status, :access_count, :embedding, :created_at).all
|
|
101
|
+
apollo_stats_from_rows(base, rows, total)
|
|
102
|
+
rescue StandardError
|
|
103
|
+
apollo_defaults
|
|
104
|
+
end
|
|
105
|
+
private_class_method :build_apollo_stats
|
|
106
|
+
|
|
107
|
+
def apollo_stats_from_rows(base, rows, total)
|
|
108
|
+
confidences = rows.map { |r| r[:confidence].to_f }
|
|
109
|
+
with_embeddings = rows.count { |r| !r[:embedding].nil? }
|
|
110
|
+
stale_threshold = settings_stale_threshold
|
|
111
|
+
timestamps = rows.map { |r| r[:created_at] }
|
|
112
|
+
|
|
113
|
+
{
|
|
114
|
+
total_chunks: total,
|
|
115
|
+
by_status: base.group_and_count(:status).as_hash(:status, :count).transform_keys(&:to_sym),
|
|
116
|
+
embedding_coverage: (with_embeddings.to_f / total).round(4),
|
|
117
|
+
avg_confidence: confidences.sum / confidences.size.to_f,
|
|
118
|
+
confidence_range: confidences.minmax,
|
|
119
|
+
stale_count: confidences.count { |c| c < stale_threshold },
|
|
120
|
+
never_accessed: rows.count { |r| r[:access_count].to_i.zero? },
|
|
121
|
+
unique_source_files: load_apollo_source_files.size,
|
|
122
|
+
oldest_chunk: timestamps.min&.iso8601,
|
|
123
|
+
newest_chunk: timestamps.max&.iso8601
|
|
124
|
+
}
|
|
125
|
+
end
|
|
126
|
+
private_class_method :apollo_stats_from_rows
|
|
127
|
+
|
|
128
|
+
def apollo_defaults
|
|
129
|
+
{
|
|
130
|
+
total_chunks: 0,
|
|
131
|
+
by_status: {},
|
|
132
|
+
embedding_coverage: 0.0,
|
|
133
|
+
avg_confidence: 0.0,
|
|
134
|
+
confidence_range: [0.0, 0.0],
|
|
135
|
+
stale_count: 0,
|
|
136
|
+
never_accessed: 0,
|
|
137
|
+
unique_source_files: 0,
|
|
138
|
+
oldest_chunk: nil,
|
|
139
|
+
newest_chunk: nil
|
|
140
|
+
}
|
|
141
|
+
end
|
|
142
|
+
private_class_method :apollo_defaults
|
|
143
|
+
|
|
144
|
+
def build_sync_stats(path, scan_entries)
|
|
145
|
+
manifest_paths = load_manifest_files(path)
|
|
146
|
+
apollo_paths = load_apollo_source_files
|
|
147
|
+
scan_paths = scan_entries.map { |e| e[:path] }
|
|
148
|
+
|
|
149
|
+
{
|
|
150
|
+
orphan_count: (apollo_paths - manifest_paths).size,
|
|
151
|
+
missing_count: (scan_paths - apollo_paths).size
|
|
152
|
+
}
|
|
153
|
+
end
|
|
154
|
+
private_class_method :build_sync_stats
|
|
155
|
+
|
|
156
|
+
def load_manifest_files(path)
|
|
157
|
+
manifest = Helpers::ManifestStore.load(corpus_path: path)
|
|
158
|
+
manifest.map { |e| e[:path] }.compact.uniq
|
|
159
|
+
end
|
|
160
|
+
private_class_method :load_manifest_files
|
|
161
|
+
|
|
162
|
+
def load_apollo_source_files
|
|
163
|
+
return [] unless defined?(Legion::Data::Model::ApolloEntry)
|
|
164
|
+
|
|
165
|
+
Legion::Data::Model::ApolloEntry
|
|
166
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
167
|
+
.exclude(status: 'archived')
|
|
168
|
+
.select_map(Sequel.lit("source_context->>'source_file'"))
|
|
169
|
+
.compact
|
|
170
|
+
.uniq
|
|
171
|
+
rescue StandardError
|
|
172
|
+
[]
|
|
173
|
+
end
|
|
174
|
+
private_class_method :load_apollo_source_files
|
|
175
|
+
|
|
176
|
+
def count_apollo_chunks
|
|
177
|
+
return 0 unless defined?(Legion::Data::Model::ApolloEntry)
|
|
178
|
+
|
|
179
|
+
Legion::Data::Model::ApolloEntry
|
|
180
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
181
|
+
.exclude(status: 'archived')
|
|
182
|
+
.count
|
|
183
|
+
rescue StandardError
|
|
184
|
+
0
|
|
185
|
+
end
|
|
186
|
+
private_class_method :count_apollo_chunks
|
|
187
|
+
|
|
188
|
+
def archive_orphan_entries(orphan_files)
|
|
189
|
+
return 0 unless defined?(Legion::Data::Model::ApolloEntry)
|
|
190
|
+
|
|
191
|
+
Legion::Data::Model::ApolloEntry
|
|
192
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
193
|
+
.where(Sequel.lit("source_context->>'source_file' IN ?", orphan_files))
|
|
194
|
+
.exclude(status: 'archived')
|
|
195
|
+
.update(status: 'archived', updated_at: Time.now)
|
|
196
|
+
end
|
|
197
|
+
private_class_method :archive_orphan_entries
|
|
198
|
+
|
|
199
|
+
def hot_chunks(limit)
|
|
200
|
+
return [] unless defined?(Legion::Data::Model::ApolloEntry)
|
|
201
|
+
|
|
202
|
+
Legion::Data::Model::ApolloEntry
|
|
203
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
204
|
+
.exclude(status: 'archived')
|
|
205
|
+
.where { access_count.positive? }
|
|
206
|
+
.order(Sequel.desc(:access_count))
|
|
207
|
+
.limit(limit)
|
|
208
|
+
.select_map([:id, :access_count, :confidence,
|
|
209
|
+
Sequel.lit("source_context->>'source_file' AS source_file")])
|
|
210
|
+
.map { |r| { id: r[0], access_count: r[1], confidence: r[2], source_file: r[3] } }
|
|
211
|
+
rescue StandardError
|
|
212
|
+
[]
|
|
213
|
+
end
|
|
214
|
+
private_class_method :hot_chunks
|
|
215
|
+
|
|
216
|
+
def cold_chunks(limit)
|
|
217
|
+
return [] unless defined?(Legion::Data::Model::ApolloEntry)
|
|
218
|
+
|
|
219
|
+
days = settings_cold_chunk_days
|
|
220
|
+
cutoff = Time.now - (days * 86_400)
|
|
221
|
+
|
|
222
|
+
Legion::Data::Model::ApolloEntry
|
|
223
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
224
|
+
.exclude(status: 'archived')
|
|
225
|
+
.where(access_count: 0)
|
|
226
|
+
.where { created_at < cutoff }
|
|
227
|
+
.order(:created_at)
|
|
228
|
+
.limit(limit)
|
|
229
|
+
.select_map([:id, :confidence, :created_at,
|
|
230
|
+
Sequel.lit("source_context->>'source_file' AS source_file")])
|
|
231
|
+
.map { |r| { id: r[0], confidence: r[1], created_at: r[2]&.iso8601, source_file: r[3] } }
|
|
232
|
+
rescue StandardError
|
|
233
|
+
[]
|
|
234
|
+
end
|
|
235
|
+
private_class_method :cold_chunks
|
|
236
|
+
|
|
237
|
+
def low_confidence_chunks(limit)
|
|
238
|
+
return [] unless defined?(Legion::Data::Model::ApolloEntry)
|
|
239
|
+
|
|
240
|
+
threshold = settings_stale_threshold
|
|
241
|
+
|
|
242
|
+
Legion::Data::Model::ApolloEntry
|
|
243
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
244
|
+
.exclude(status: 'archived')
|
|
245
|
+
.where { confidence < threshold }
|
|
246
|
+
.order(:confidence)
|
|
247
|
+
.limit(limit)
|
|
248
|
+
.select_map([:id, :confidence, :access_count,
|
|
249
|
+
Sequel.lit("source_context->>'source_file' AS source_file")])
|
|
250
|
+
.map { |r| { id: r[0], confidence: r[1], access_count: r[2], source_file: r[3] } }
|
|
251
|
+
rescue StandardError
|
|
252
|
+
[]
|
|
253
|
+
end
|
|
254
|
+
private_class_method :low_confidence_chunks
|
|
255
|
+
|
|
256
|
+
def quality_summary
|
|
257
|
+
defaults = { total_queries: 0, avg_retrieval_score: nil, chunks_never_accessed: 0,
|
|
258
|
+
chunks_below_threshold: 0 }
|
|
259
|
+
return defaults unless defined?(Legion::Data::Model::ApolloEntry)
|
|
260
|
+
|
|
261
|
+
base = Legion::Data::Model::ApolloEntry
|
|
262
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
263
|
+
.exclude(status: 'archived')
|
|
264
|
+
|
|
265
|
+
{
|
|
266
|
+
total_queries: query_count,
|
|
267
|
+
avg_retrieval_score: nil,
|
|
268
|
+
chunks_never_accessed: base.where(access_count: 0).count,
|
|
269
|
+
chunks_below_threshold: base.where { confidence < settings_stale_threshold }.count
|
|
270
|
+
}
|
|
271
|
+
rescue StandardError
|
|
272
|
+
defaults
|
|
273
|
+
end
|
|
274
|
+
private_class_method :quality_summary
|
|
275
|
+
|
|
276
|
+
def query_count
|
|
277
|
+
return 0 unless defined?(Legion::Data::Model::ApolloAccessLog)
|
|
278
|
+
|
|
279
|
+
Legion::Data::Model::ApolloAccessLog.where(action: 'knowledge_query').count
|
|
280
|
+
rescue StandardError
|
|
281
|
+
0
|
|
282
|
+
end
|
|
283
|
+
private_class_method :query_count
|
|
284
|
+
|
|
285
|
+
def settings_stale_threshold
|
|
286
|
+
return 0.3 unless defined?(Legion::Settings)
|
|
287
|
+
|
|
288
|
+
Legion::Settings.dig(:knowledge, :maintenance, :stale_threshold) || 0.3
|
|
289
|
+
rescue StandardError
|
|
290
|
+
0.3
|
|
291
|
+
end
|
|
292
|
+
private_class_method :settings_stale_threshold
|
|
293
|
+
|
|
294
|
+
def settings_cold_chunk_days
|
|
295
|
+
return 7 unless defined?(Legion::Settings)
|
|
296
|
+
|
|
297
|
+
Legion::Settings.dig(:knowledge, :maintenance, :cold_chunk_days) || 7
|
|
298
|
+
rescue StandardError
|
|
299
|
+
7
|
|
300
|
+
end
|
|
301
|
+
private_class_method :settings_cold_chunk_days
|
|
302
|
+
|
|
303
|
+
def settings_quality_limit
|
|
304
|
+
return 10 unless defined?(Legion::Settings)
|
|
305
|
+
|
|
306
|
+
Legion::Settings.dig(:knowledge, :maintenance, :quality_report_limit) || 10
|
|
307
|
+
rescue StandardError
|
|
308
|
+
10
|
|
309
|
+
end
|
|
310
|
+
private_class_method :settings_quality_limit
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'digest'
|
|
4
|
+
|
|
3
5
|
module Legion
|
|
4
6
|
module Extensions
|
|
5
7
|
module Knowledge
|
|
@@ -17,15 +19,21 @@ module Legion
|
|
|
17
19
|
|
|
18
20
|
latency_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - started) * 1000).round
|
|
19
21
|
|
|
22
|
+
score = average_score(chunks)
|
|
23
|
+
unless chunks.empty?
|
|
24
|
+
record_feedback(
|
|
25
|
+
question: question,
|
|
26
|
+
chunk_ids: chunks.filter_map { |c| c[:id] },
|
|
27
|
+
retrieval_score: score.to_f,
|
|
28
|
+
synthesized: synthesize && llm_available?
|
|
29
|
+
)
|
|
30
|
+
end
|
|
31
|
+
|
|
20
32
|
{
|
|
21
33
|
success: true,
|
|
22
34
|
answer: answer,
|
|
23
35
|
sources: chunks.map { |c| format_source(c) },
|
|
24
|
-
metadata:
|
|
25
|
-
retrieval_score: average_score(chunks),
|
|
26
|
-
chunk_count: chunks.size,
|
|
27
|
-
latency_ms: latency_ms
|
|
28
|
-
}
|
|
36
|
+
metadata: build_metadata(chunks, score, latency_ms)
|
|
29
37
|
}
|
|
30
38
|
rescue StandardError => e
|
|
31
39
|
{ success: false, error: e.message }
|
|
@@ -38,14 +46,26 @@ module Legion
|
|
|
38
46
|
{
|
|
39
47
|
success: true,
|
|
40
48
|
sources: chunks.map { |c| format_source(c) },
|
|
41
|
-
metadata:
|
|
42
|
-
chunk_count: chunks.size
|
|
43
|
-
}
|
|
49
|
+
metadata: build_metadata(chunks, average_score(chunks))
|
|
44
50
|
}
|
|
45
51
|
rescue StandardError => e
|
|
46
52
|
{ success: false, error: e.message }
|
|
47
53
|
end
|
|
48
54
|
|
|
55
|
+
def record_feedback(question:, chunk_ids:, retrieval_score:, synthesized: true, rating: nil)
|
|
56
|
+
question_hash = ::Digest::SHA256.hexdigest(question.to_s)[0, 16]
|
|
57
|
+
emit_feedback_event(
|
|
58
|
+
question_hash: question_hash,
|
|
59
|
+
chunk_ids: chunk_ids,
|
|
60
|
+
retrieval_score: retrieval_score,
|
|
61
|
+
synthesized: synthesized,
|
|
62
|
+
rating: rating
|
|
63
|
+
)
|
|
64
|
+
{ success: true, question_hash: question_hash, rating: rating }
|
|
65
|
+
rescue StandardError => e
|
|
66
|
+
{ success: false, error: e.message }
|
|
67
|
+
end
|
|
68
|
+
|
|
49
69
|
def retrieve_chunks(question, top_k)
|
|
50
70
|
return [] unless defined?(Legion::Extensions::Apollo)
|
|
51
71
|
|
|
@@ -97,6 +117,53 @@ module Legion
|
|
|
97
117
|
end
|
|
98
118
|
private_class_method :average_score
|
|
99
119
|
|
|
120
|
+
def build_metadata(chunks, score, latency_ms = nil)
|
|
121
|
+
confidences = chunks.filter_map { |c| c[:confidence] }
|
|
122
|
+
distances = chunks.filter_map { |c| c[:distance] }
|
|
123
|
+
source_names = chunks.filter_map do |c|
|
|
124
|
+
c.dig(:metadata, :source_file) || c[:source_file]
|
|
125
|
+
end.uniq
|
|
126
|
+
statuses = chunks.group_by { |c| c[:status] }.transform_values(&:size)
|
|
127
|
+
|
|
128
|
+
meta = {
|
|
129
|
+
retrieval_score: score,
|
|
130
|
+
chunk_count: chunks.size,
|
|
131
|
+
confidence_avg: confidences.empty? ? nil : (confidences.sum.to_f / confidences.size).round(4),
|
|
132
|
+
confidence_range: confidences.empty? ? nil : confidences.minmax,
|
|
133
|
+
distance_range: distances.empty? ? nil : distances.minmax,
|
|
134
|
+
source_files: source_names,
|
|
135
|
+
source_file_count: source_names.size,
|
|
136
|
+
all_embedded: chunks.none? { |c| zero_embedding?(c) },
|
|
137
|
+
statuses: statuses
|
|
138
|
+
}
|
|
139
|
+
meta[:latency_ms] = latency_ms unless latency_ms.nil?
|
|
140
|
+
meta
|
|
141
|
+
end
|
|
142
|
+
private_class_method :build_metadata
|
|
143
|
+
|
|
144
|
+
def zero_embedding?(chunk)
|
|
145
|
+
emb = chunk[:embedding]
|
|
146
|
+
return true if emb.nil?
|
|
147
|
+
|
|
148
|
+
emb.is_a?(Array) && (emb.empty? || emb.all?(&:zero?))
|
|
149
|
+
end
|
|
150
|
+
private_class_method :zero_embedding?
|
|
151
|
+
|
|
152
|
+
def emit_feedback_event(question_hash:, chunk_ids:, retrieval_score:, synthesized:, rating:)
|
|
153
|
+
return unless defined?(Legion::Events)
|
|
154
|
+
|
|
155
|
+
Legion::Events.emit('knowledge.query_feedback', {
|
|
156
|
+
question_hash: question_hash,
|
|
157
|
+
chunk_ids: chunk_ids,
|
|
158
|
+
retrieval_score: retrieval_score,
|
|
159
|
+
synthesized: synthesized,
|
|
160
|
+
rating: rating
|
|
161
|
+
})
|
|
162
|
+
rescue StandardError
|
|
163
|
+
nil
|
|
164
|
+
end
|
|
165
|
+
private_class_method :emit_feedback_event
|
|
166
|
+
|
|
100
167
|
def llm_available?
|
|
101
168
|
defined?(Legion::LLM)
|
|
102
169
|
end
|
|
@@ -8,6 +8,7 @@ require_relative 'knowledge/helpers/chunker'
|
|
|
8
8
|
require_relative 'knowledge/runners/ingest'
|
|
9
9
|
require_relative 'knowledge/runners/query'
|
|
10
10
|
require_relative 'knowledge/runners/corpus'
|
|
11
|
+
require_relative 'knowledge/runners/maintenance'
|
|
11
12
|
require_relative 'knowledge/client'
|
|
12
13
|
|
|
13
14
|
if defined?(Legion::Transport)
|
|
@@ -17,6 +18,7 @@ if defined?(Legion::Transport)
|
|
|
17
18
|
end
|
|
18
19
|
|
|
19
20
|
require_relative 'knowledge/actors/corpus_watcher' if defined?(Legion::Extensions::Actors::Every)
|
|
21
|
+
require_relative 'knowledge/actors/maintenance_runner' if defined?(Legion::Extensions::Actors::Every)
|
|
20
22
|
|
|
21
23
|
require_relative 'knowledge/actors/corpus_ingest' if defined?(Legion::Extensions::Actors::Subscription)
|
|
22
24
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-knowledge
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Matthew Iverson
|
|
@@ -118,6 +118,7 @@ files:
|
|
|
118
118
|
- lib/legion/extensions/knowledge.rb
|
|
119
119
|
- lib/legion/extensions/knowledge/actors/corpus_ingest.rb
|
|
120
120
|
- lib/legion/extensions/knowledge/actors/corpus_watcher.rb
|
|
121
|
+
- lib/legion/extensions/knowledge/actors/maintenance_runner.rb
|
|
121
122
|
- lib/legion/extensions/knowledge/client.rb
|
|
122
123
|
- lib/legion/extensions/knowledge/helpers/chunker.rb
|
|
123
124
|
- lib/legion/extensions/knowledge/helpers/manifest.rb
|
|
@@ -125,6 +126,7 @@ files:
|
|
|
125
126
|
- lib/legion/extensions/knowledge/helpers/parser.rb
|
|
126
127
|
- lib/legion/extensions/knowledge/runners/corpus.rb
|
|
127
128
|
- lib/legion/extensions/knowledge/runners/ingest.rb
|
|
129
|
+
- lib/legion/extensions/knowledge/runners/maintenance.rb
|
|
128
130
|
- lib/legion/extensions/knowledge/runners/query.rb
|
|
129
131
|
- lib/legion/extensions/knowledge/transport/exchanges/knowledge.rb
|
|
130
132
|
- lib/legion/extensions/knowledge/transport/messages/ingest_message.rb
|