lex-knowledge 0.6.10 → 0.6.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../helpers/apollo_models'
4
+
3
5
  require 'digest'
4
6
 
5
7
  module Legion
@@ -7,13 +9,23 @@ module Legion
7
9
  module Knowledge
8
10
  module Runners
9
11
  module Query # rubocop:disable Legion/Extension/RunnerIncludeHelpers
12
+ extend Legion::Logging::Helper
13
+ extend Legion::JSON::Helper
14
+ extend Legion::Settings::Helper
15
+
10
16
  module_function
11
17
 
12
- def query(question:, top_k: nil, synthesize: true)
18
+ def query(question:, top_k: nil, synthesize: true, expand_neighbors: false, neighbor_radius: nil)
13
19
  started = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
14
- resolved_k = top_k || settings_top_k || 5
20
+ resolved_k = top_k || settings[:query][:top_k]
21
+ resolved_radius = resolve_neighbor_radius(neighbor_radius)
15
22
 
16
- chunks = retrieve_chunks(question, resolved_k)
23
+ chunks = retrieve_chunks(
24
+ question,
25
+ resolved_k,
26
+ expand_neighbors: expand_neighbors,
27
+ neighbor_radius: resolved_radius
28
+ )
17
29
 
18
30
  answer = (synthesize_answer(question, chunks) if synthesize && llm_available?)
19
31
 
@@ -36,12 +48,19 @@ module Legion
36
48
  metadata: build_metadata(chunks, score, latency_ms)
37
49
  }
38
50
  rescue StandardError => e
51
+ handle_exception(e, level: :warn, operation: 'knowledge.query.query')
39
52
  { success: false, error: e.message }
40
53
  end
41
54
 
42
- def retrieve(question:, top_k: nil)
43
- resolved_k = top_k || settings_top_k || 5
44
- chunks = retrieve_chunks(question, resolved_k)
55
+ def retrieve(question:, top_k: nil, expand_neighbors: false, neighbor_radius: nil)
56
+ resolved_k = top_k || settings[:query][:top_k]
57
+ resolved_radius = resolve_neighbor_radius(neighbor_radius)
58
+ chunks = retrieve_chunks(
59
+ question,
60
+ resolved_k,
61
+ expand_neighbors: expand_neighbors,
62
+ neighbor_radius: resolved_radius
63
+ )
45
64
 
46
65
  {
47
66
  success: true,
@@ -49,6 +68,7 @@ module Legion
49
68
  metadata: build_metadata(chunks, average_score(chunks))
50
69
  }
51
70
  rescue StandardError => e
71
+ handle_exception(e, level: :warn, operation: 'knowledge.query.retrieve')
52
72
  { success: false, error: e.message }
53
73
  end
54
74
 
@@ -63,10 +83,11 @@ module Legion
63
83
  )
64
84
  { success: true, question_hash: question_hash, rating: rating }
65
85
  rescue StandardError => e
86
+ handle_exception(e, level: :warn, operation: 'knowledge.query.record_feedback')
66
87
  { success: false, error: e.message }
67
88
  end
68
89
 
69
- def retrieve_chunks(question, top_k)
90
+ def retrieve_chunks(question, top_k, expand_neighbors: false, neighbor_radius: 1)
70
91
  return [] unless defined?(Legion::Extensions::Apollo)
71
92
 
72
93
  result = Legion::Extensions::Apollo::Runners::Knowledge.retrieve_relevant(
@@ -74,12 +95,81 @@ module Legion
74
95
  limit: top_k,
75
96
  tags: ['document_chunk']
76
97
  )
77
- result.is_a?(Hash) && result[:success] ? Array(result[:entries]) : []
78
- rescue StandardError => _e
98
+ chunks = result.is_a?(Hash) && result[:success] ? Array(result[:entries]) : []
99
+ expand_neighbors ? expand_neighbor_chunks(chunks, neighbor_radius) : chunks
100
+ rescue StandardError => e
101
+ handle_exception(e, level: :warn, operation: 'knowledge.query.retrieve_chunks')
79
102
  []
80
103
  end
81
104
  private_class_method :retrieve_chunks
82
105
 
106
+ def expand_neighbor_chunks(chunks, neighbor_radius)
107
+ return chunks if chunks.empty?
108
+
109
+ radius = neighbor_radius.to_i
110
+ return chunks unless radius.positive? && Helpers::ApolloModels.entry_available?
111
+
112
+ merge_neighbor_chunks(chunks.flat_map { |chunk| neighbor_window_for(chunk, radius) })
113
+ rescue StandardError => e
114
+ handle_exception(e, level: :warn, operation: 'knowledge.query.expand_neighbor_chunks')
115
+ chunks
116
+ end
117
+ private_class_method :expand_neighbor_chunks
118
+
119
+ def neighbor_window_for(chunk, radius)
120
+ context = chunk_context(chunk)
121
+ return [chunk] unless context[:source_file] && !context[:chunk_index].nil?
122
+
123
+ source_file = context[:source_file]
124
+ chunk_index = context[:chunk_index].to_i
125
+ lower = chunk_index - radius
126
+ upper = chunk_index + radius
127
+
128
+ rows = neighbor_dataset(source_file, lower, upper).all.map { |entry| chunk_from_entry(entry) }
129
+ rows << chunk unless rows.any? { |row| chunk_dedupe_key(row) == chunk_dedupe_key(chunk) }
130
+ rows.sort_by { |row| chunk_context(row)[:chunk_index].to_i }
131
+ rescue StandardError => e
132
+ handle_exception(e, level: :warn, operation: 'knowledge.query.neighbor_window')
133
+ [chunk]
134
+ end
135
+ private_class_method :neighbor_window_for
136
+
137
+ def neighbor_dataset(source_file, lower, upper)
138
+ Helpers::ApolloModels.entry
139
+ .where(content_type: 'document_chunk')
140
+ .where(Sequel.lit("source_context->>'source_file' = ?", source_file))
141
+ .where(Sequel.lit("(source_context->>'chunk_index')::integer BETWEEN ? AND ?", lower, upper))
142
+ .order(Sequel.lit("(source_context->>'chunk_index')::integer ASC"))
143
+ end
144
+ private_class_method :neighbor_dataset
145
+
146
+ def chunk_from_entry(entry)
147
+ values = entry.respond_to?(:values) ? entry.values : entry
148
+ context = normalize_context(values[:source_context] || values[:metadata] || values[:context])
149
+
150
+ {
151
+ id: values[:id],
152
+ content: values[:content],
153
+ content_type: values[:content_type],
154
+ confidence: values[:confidence],
155
+ tags: values[:tags],
156
+ source_agent: values[:source_agent],
157
+ knowledge_domain: values[:knowledge_domain],
158
+ status: values[:status],
159
+ content_hash: values[:content_hash],
160
+ metadata: context
161
+ }.compact
162
+ end
163
+ private_class_method :chunk_from_entry
164
+
165
+ def merge_neighbor_chunks(chunks)
166
+ chunks.each_with_object({}) do |chunk, merged|
167
+ key = chunk_dedupe_key(chunk)
168
+ merged[key] ||= chunk
169
+ end.values
170
+ end
171
+ private_class_method :merge_neighbor_chunks
172
+
83
173
  def synthesize_answer(question, chunks)
84
174
  return nil unless llm_available?
85
175
 
@@ -91,10 +181,14 @@ module Legion
91
181
  "Context:\n#{context_text}\n\nQuestion: #{question}\n\nAnswer:"
92
182
  end
93
183
 
94
- result = llm_chat(message: prompt, caller: { extension: 'lex-knowledge' })
95
- result.is_a?(Hash) ? result[:content] : result
184
+ result = Legion::LLM.chat( # rubocop:disable Legion/HelperMigration/DirectLlm
185
+ message: prompt,
186
+ caller: { extension: 'lex-knowledge' }
187
+ )
188
+ result.is_a?(Hash) ? result[:content] : result.content
96
189
  rescue StandardError => e
97
- "Error generating answer: #{e.message}"
190
+ handle_exception(e, level: :warn, operation: 'knowledge.query.synthesize_answer')
191
+ nil
98
192
  end
99
193
  private_class_method :synthesize_answer
100
194
 
@@ -103,11 +197,61 @@ module Legion
103
197
  content: chunk[:content],
104
198
  source_file: chunk.dig(:metadata, :source_file) || chunk[:source_file],
105
199
  heading: chunk.dig(:metadata, :heading) || chunk[:heading],
200
+ chunk_index: chunk.dig(:metadata, :chunk_index) || chunk[:chunk_index],
106
201
  distance: chunk[:distance] || chunk[:score]
107
202
  }
108
203
  end
109
204
  private_class_method :format_source
110
205
 
206
+ def chunk_context(chunk)
207
+ context = normalize_context(chunk[:metadata] || chunk[:source_context] || chunk[:context])
208
+ if (context[:source_file].nil? || context[:chunk_index].nil?) && chunk[:id] && Helpers::ApolloModels.entry_available?
209
+ row = Helpers::ApolloModels.entry.where(id: chunk[:id]).first
210
+ context = context.merge(normalize_context(row_context(row))) if row
211
+ end
212
+
213
+ context[:source_file] ||= chunk[:source_file]
214
+ context[:chunk_index] ||= chunk[:chunk_index]
215
+ context[:heading] ||= chunk[:heading]
216
+ context
217
+ rescue StandardError => e
218
+ handle_exception(e, level: :warn, operation: 'knowledge.query.chunk_context')
219
+ {}
220
+ end
221
+ private_class_method :chunk_context
222
+
223
+ def row_context(row)
224
+ values = row.respond_to?(:values) ? row.values : row
225
+ values[:source_context] || values[:metadata] || values[:context]
226
+ end
227
+ private_class_method :row_context
228
+
229
+ def normalize_context(context)
230
+ normalized = case context
231
+ when String
232
+ context.strip.empty? ? {} : json_parse(context)
233
+ when Hash
234
+ context
235
+ else
236
+ {}
237
+ end
238
+
239
+ normalized.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
240
+ rescue StandardError => e
241
+ handle_exception(e, level: :warn, operation: 'knowledge.query.normalize_context')
242
+ {}
243
+ end
244
+ private_class_method :normalize_context
245
+
246
+ def chunk_dedupe_key(chunk)
247
+ chunk[:id] || chunk[:content_hash] || [
248
+ chunk_context(chunk)[:source_file],
249
+ chunk_context(chunk)[:chunk_index],
250
+ chunk[:content]
251
+ ]
252
+ end
253
+ private_class_method :chunk_dedupe_key
254
+
111
255
  def average_score(chunks)
112
256
  return nil if chunks.empty?
113
257
 
@@ -160,7 +304,8 @@ module Legion
160
304
  synthesized: synthesized,
161
305
  rating: rating
162
306
  })
163
- rescue StandardError => _e
307
+ rescue StandardError => e
308
+ handle_exception(e, level: :warn, operation: 'knowledge.query.emit_feedback_event')
164
309
  nil
165
310
  end
166
311
  private_class_method :emit_feedback_event
@@ -170,14 +315,10 @@ module Legion
170
315
  end
171
316
  private_class_method :llm_available?
172
317
 
173
- def settings_top_k
174
- return nil unless defined?(Legion::Settings)
175
-
176
- Legion::Settings.dig(:knowledge, :query, :top_k)
177
- rescue StandardError => _e
178
- nil
318
+ def resolve_neighbor_radius(neighbor_radius)
319
+ (neighbor_radius || settings[:query][:neighbor_radius]).to_i
179
320
  end
180
- private_class_method :settings_top_k
321
+ private_class_method :resolve_neighbor_radius
181
322
  end
182
323
  end
183
324
  end
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Knowledge
6
- VERSION = '0.6.10'
6
+ VERSION = '0.6.15'
7
7
  end
8
8
  end
9
9
  end
@@ -1,10 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging'
4
+ require 'legion/settings'
5
+ require 'legion/json'
3
6
  require_relative 'knowledge/version'
4
7
  require_relative 'knowledge/helpers/manifest'
5
8
  require_relative 'knowledge/helpers/manifest_store'
6
9
  require_relative 'knowledge/helpers/parser'
7
10
  require_relative 'knowledge/helpers/chunker'
11
+ require_relative 'knowledge/helpers/apollo_models'
8
12
  require_relative 'knowledge/runners/ingest'
9
13
  require_relative 'knowledge/runners/query'
10
14
  require_relative 'knowledge/runners/corpus'
@@ -27,11 +31,41 @@ require_relative 'knowledge/actors/corpus_ingest'
27
31
  module Legion
28
32
  module Extensions
29
33
  module Knowledge
34
+ extend Legion::Logging::Helper
35
+ extend Legion::Settings::Helper
30
36
  extend Legion::Extensions::Core if defined?(Legion::Extensions::Core)
31
37
 
32
38
  def self.remote_invocable?
33
39
  false
34
40
  end
41
+
42
+ def self.default_settings
43
+ {
44
+ corpus_path: nil,
45
+ monitors: [],
46
+ chunker: {
47
+ max_tokens: 512,
48
+ overlap_tokens: 128
49
+ },
50
+ query: {
51
+ top_k: 5,
52
+ neighbor_radius: 1
53
+ },
54
+ ingest: {
55
+ filter_prompt: nil,
56
+ filter_threshold: 0.5
57
+ },
58
+ maintenance: {
59
+ stale_threshold: 0.3,
60
+ cold_chunk_days: 7,
61
+ quality_report_limit: 10
62
+ },
63
+ actors: {
64
+ watcher_interval: 300,
65
+ maintenance_interval: 21_600
66
+ }
67
+ }
68
+ end
35
69
  end
36
70
  end
37
71
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-knowledge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.10
4
+ version: 0.6.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Iverson
@@ -120,6 +120,7 @@ files:
120
120
  - lib/legion/extensions/knowledge/actors/corpus_watcher.rb
121
121
  - lib/legion/extensions/knowledge/actors/maintenance_runner.rb
122
122
  - lib/legion/extensions/knowledge/client.rb
123
+ - lib/legion/extensions/knowledge/helpers/apollo_models.rb
123
124
  - lib/legion/extensions/knowledge/helpers/chunker.rb
124
125
  - lib/legion/extensions/knowledge/helpers/manifest.rb
125
126
  - lib/legion/extensions/knowledge/helpers/manifest_store.rb