lex-knowledge 0.6.9 → 0.6.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../helpers/apollo_models'
4
+
3
5
  require 'digest'
4
6
 
5
7
  module Legion
@@ -7,13 +9,23 @@ module Legion
7
9
  module Knowledge
8
10
  module Runners
9
11
  module Query # rubocop:disable Legion/Extension/RunnerIncludeHelpers
12
+ extend Legion::Logging::Helper
13
+ extend Legion::JSON::Helper
14
+ extend Legion::Settings::Helper
15
+
10
16
  module_function
11
17
 
12
- def query(question:, top_k: nil, synthesize: true)
18
+ def query(question:, top_k: nil, synthesize: true, expand_neighbors: false, neighbor_radius: nil)
13
19
  started = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
14
- resolved_k = top_k || settings_top_k || 5
20
+ resolved_k = top_k || settings[:query][:top_k]
21
+ resolved_radius = resolve_neighbor_radius(neighbor_radius)
15
22
 
16
- chunks = retrieve_chunks(question, resolved_k)
23
+ chunks = retrieve_chunks(
24
+ question,
25
+ resolved_k,
26
+ expand_neighbors: expand_neighbors,
27
+ neighbor_radius: resolved_radius
28
+ )
17
29
 
18
30
  answer = (synthesize_answer(question, chunks) if synthesize && llm_available?)
19
31
 
@@ -36,12 +48,19 @@ module Legion
36
48
  metadata: build_metadata(chunks, score, latency_ms)
37
49
  }
38
50
  rescue StandardError => e
51
+ handle_exception(e, level: :warn, operation: 'knowledge.query.query')
39
52
  { success: false, error: e.message }
40
53
  end
41
54
 
42
- def retrieve(question:, top_k: nil)
43
- resolved_k = top_k || settings_top_k || 5
44
- chunks = retrieve_chunks(question, resolved_k)
55
+ def retrieve(question:, top_k: nil, expand_neighbors: false, neighbor_radius: nil)
56
+ resolved_k = top_k || settings[:query][:top_k]
57
+ resolved_radius = resolve_neighbor_radius(neighbor_radius)
58
+ chunks = retrieve_chunks(
59
+ question,
60
+ resolved_k,
61
+ expand_neighbors: expand_neighbors,
62
+ neighbor_radius: resolved_radius
63
+ )
45
64
 
46
65
  {
47
66
  success: true,
@@ -49,6 +68,7 @@ module Legion
49
68
  metadata: build_metadata(chunks, average_score(chunks))
50
69
  }
51
70
  rescue StandardError => e
71
+ handle_exception(e, level: :warn, operation: 'knowledge.query.retrieve')
52
72
  { success: false, error: e.message }
53
73
  end
54
74
 
@@ -63,10 +83,11 @@ module Legion
63
83
  )
64
84
  { success: true, question_hash: question_hash, rating: rating }
65
85
  rescue StandardError => e
86
+ handle_exception(e, level: :warn, operation: 'knowledge.query.record_feedback')
66
87
  { success: false, error: e.message }
67
88
  end
68
89
 
69
- def retrieve_chunks(question, top_k)
90
+ def retrieve_chunks(question, top_k, expand_neighbors: false, neighbor_radius: 1)
70
91
  return [] unless defined?(Legion::Extensions::Apollo)
71
92
 
72
93
  result = Legion::Extensions::Apollo::Runners::Knowledge.retrieve_relevant(
@@ -74,12 +95,81 @@ module Legion
74
95
  limit: top_k,
75
96
  tags: ['document_chunk']
76
97
  )
77
- result.is_a?(Hash) && result[:success] ? Array(result[:entries]) : []
78
- rescue StandardError => _e
98
+ chunks = result.is_a?(Hash) && result[:success] ? Array(result[:entries]) : []
99
+ expand_neighbors ? expand_neighbor_chunks(chunks, neighbor_radius) : chunks
100
+ rescue StandardError => e
101
+ handle_exception(e, level: :warn, operation: 'knowledge.query.retrieve_chunks')
79
102
  []
80
103
  end
81
104
  private_class_method :retrieve_chunks
82
105
 
106
+ def expand_neighbor_chunks(chunks, neighbor_radius)
107
+ return chunks if chunks.empty?
108
+
109
+ radius = neighbor_radius.to_i
110
+ return chunks unless radius.positive? && Helpers::ApolloModels.entry_available?
111
+
112
+ merge_neighbor_chunks(chunks.flat_map { |chunk| neighbor_window_for(chunk, radius) })
113
+ rescue StandardError => e
114
+ handle_exception(e, level: :warn, operation: 'knowledge.query.expand_neighbor_chunks')
115
+ chunks
116
+ end
117
+ private_class_method :expand_neighbor_chunks
118
+
119
+ def neighbor_window_for(chunk, radius)
120
+ context = chunk_context(chunk)
121
+ return [chunk] unless context[:source_file] && !context[:chunk_index].nil?
122
+
123
+ source_file = context[:source_file]
124
+ chunk_index = context[:chunk_index].to_i
125
+ lower = chunk_index - radius
126
+ upper = chunk_index + radius
127
+
128
+ rows = neighbor_dataset(source_file, lower, upper).all.map { |entry| chunk_from_entry(entry) }
129
+ rows << chunk unless rows.any? { |row| chunk_dedupe_key(row) == chunk_dedupe_key(chunk) }
130
+ rows.sort_by { |row| chunk_context(row)[:chunk_index].to_i }
131
+ rescue StandardError => e
132
+ handle_exception(e, level: :warn, operation: 'knowledge.query.neighbor_window')
133
+ [chunk]
134
+ end
135
+ private_class_method :neighbor_window_for
136
+
137
+ def neighbor_dataset(source_file, lower, upper)
138
+ Helpers::ApolloModels.entry
139
+ .where(content_type: 'document_chunk')
140
+ .where(Sequel.lit("source_context->>'source_file' = ?", source_file))
141
+ .where(Sequel.lit("(source_context->>'chunk_index')::integer BETWEEN ? AND ?", lower, upper))
142
+ .order(Sequel.lit("(source_context->>'chunk_index')::integer ASC"))
143
+ end
144
+ private_class_method :neighbor_dataset
145
+
146
+ def chunk_from_entry(entry)
147
+ values = entry.respond_to?(:values) ? entry.values : entry
148
+ context = normalize_context(values[:source_context] || values[:metadata] || values[:context])
149
+
150
+ {
151
+ id: values[:id],
152
+ content: values[:content],
153
+ content_type: values[:content_type],
154
+ confidence: values[:confidence],
155
+ tags: values[:tags],
156
+ source_agent: values[:source_agent],
157
+ knowledge_domain: values[:knowledge_domain],
158
+ status: values[:status],
159
+ content_hash: values[:content_hash],
160
+ metadata: context
161
+ }.compact
162
+ end
163
+ private_class_method :chunk_from_entry
164
+
165
+ def merge_neighbor_chunks(chunks)
166
+ chunks.each_with_object({}) do |chunk, merged|
167
+ key = chunk_dedupe_key(chunk)
168
+ merged[key] ||= chunk
169
+ end.values
170
+ end
171
+ private_class_method :merge_neighbor_chunks
172
+
83
173
  def synthesize_answer(question, chunks)
84
174
  return nil unless llm_available?
85
175
 
@@ -94,6 +184,7 @@ module Legion
94
184
  result = llm_chat(message: prompt, caller: { extension: 'lex-knowledge' })
95
185
  result.is_a?(Hash) ? result[:content] : result
96
186
  rescue StandardError => e
187
+ handle_exception(e, level: :warn, operation: 'knowledge.query.synthesize_answer')
97
188
  "Error generating answer: #{e.message}"
98
189
  end
99
190
  private_class_method :synthesize_answer
@@ -103,11 +194,61 @@ module Legion
103
194
  content: chunk[:content],
104
195
  source_file: chunk.dig(:metadata, :source_file) || chunk[:source_file],
105
196
  heading: chunk.dig(:metadata, :heading) || chunk[:heading],
197
+ chunk_index: chunk.dig(:metadata, :chunk_index) || chunk[:chunk_index],
106
198
  distance: chunk[:distance] || chunk[:score]
107
199
  }
108
200
  end
109
201
  private_class_method :format_source
110
202
 
203
+ def chunk_context(chunk)
204
+ context = normalize_context(chunk[:metadata] || chunk[:source_context] || chunk[:context])
205
+ if (context[:source_file].nil? || context[:chunk_index].nil?) && chunk[:id] && Helpers::ApolloModels.entry_available?
206
+ row = Helpers::ApolloModels.entry.where(id: chunk[:id]).first
207
+ context = context.merge(normalize_context(row_context(row))) if row
208
+ end
209
+
210
+ context[:source_file] ||= chunk[:source_file]
211
+ context[:chunk_index] ||= chunk[:chunk_index]
212
+ context[:heading] ||= chunk[:heading]
213
+ context
214
+ rescue StandardError => e
215
+ handle_exception(e, level: :warn, operation: 'knowledge.query.chunk_context')
216
+ {}
217
+ end
218
+ private_class_method :chunk_context
219
+
220
+ def row_context(row)
221
+ values = row.respond_to?(:values) ? row.values : row
222
+ values[:source_context] || values[:metadata] || values[:context]
223
+ end
224
+ private_class_method :row_context
225
+
226
+ def normalize_context(context)
227
+ normalized = case context
228
+ when String
229
+ context.strip.empty? ? {} : json_parse(context)
230
+ when Hash
231
+ context
232
+ else
233
+ {}
234
+ end
235
+
236
+ normalized.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
237
+ rescue StandardError => e
238
+ handle_exception(e, level: :warn, operation: 'knowledge.query.normalize_context')
239
+ {}
240
+ end
241
+ private_class_method :normalize_context
242
+
243
+ def chunk_dedupe_key(chunk)
244
+ chunk[:id] || chunk[:content_hash] || [
245
+ chunk_context(chunk)[:source_file],
246
+ chunk_context(chunk)[:chunk_index],
247
+ chunk[:content]
248
+ ]
249
+ end
250
+ private_class_method :chunk_dedupe_key
251
+
111
252
  def average_score(chunks)
112
253
  return nil if chunks.empty?
113
254
 
@@ -160,7 +301,8 @@ module Legion
160
301
  synthesized: synthesized,
161
302
  rating: rating
162
303
  })
163
- rescue StandardError => _e
304
+ rescue StandardError => e
305
+ handle_exception(e, level: :warn, operation: 'knowledge.query.emit_feedback_event')
164
306
  nil
165
307
  end
166
308
  private_class_method :emit_feedback_event
@@ -170,14 +312,10 @@ module Legion
170
312
  end
171
313
  private_class_method :llm_available?
172
314
 
173
- def settings_top_k
174
- return nil unless defined?(Legion::Settings)
175
-
176
- Legion::Settings.dig(:knowledge, :query, :top_k)
177
- rescue StandardError => _e
178
- nil
315
+ def resolve_neighbor_radius(neighbor_radius)
316
+ (neighbor_radius || settings[:query][:neighbor_radius]).to_i
179
317
  end
180
- private_class_method :settings_top_k
318
+ private_class_method :resolve_neighbor_radius
181
319
  end
182
320
  end
183
321
  end
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Knowledge
6
- VERSION = '0.6.9'
6
+ VERSION = '0.6.14'
7
7
  end
8
8
  end
9
9
  end
@@ -1,10 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'legion/logging'
4
+ require 'legion/settings'
5
+ require 'legion/json'
3
6
  require_relative 'knowledge/version'
4
7
  require_relative 'knowledge/helpers/manifest'
5
8
  require_relative 'knowledge/helpers/manifest_store'
6
9
  require_relative 'knowledge/helpers/parser'
7
10
  require_relative 'knowledge/helpers/chunker'
11
+ require_relative 'knowledge/helpers/apollo_models'
8
12
  require_relative 'knowledge/runners/ingest'
9
13
  require_relative 'knowledge/runners/query'
10
14
  require_relative 'knowledge/runners/corpus'
@@ -27,11 +31,41 @@ require_relative 'knowledge/actors/corpus_ingest'
27
31
  module Legion
28
32
  module Extensions
29
33
  module Knowledge
34
+ extend Legion::Logging::Helper
35
+ extend Legion::Settings::Helper
30
36
  extend Legion::Extensions::Core if defined?(Legion::Extensions::Core)
31
37
 
32
38
  def self.remote_invocable?
33
39
  false
34
40
  end
41
+
42
+ def self.default_settings
43
+ {
44
+ corpus_path: nil,
45
+ monitors: [],
46
+ chunker: {
47
+ max_tokens: 512,
48
+ overlap_tokens: 128
49
+ },
50
+ query: {
51
+ top_k: 5,
52
+ neighbor_radius: 1
53
+ },
54
+ ingest: {
55
+ filter_prompt: nil,
56
+ filter_threshold: 0.5
57
+ },
58
+ maintenance: {
59
+ stale_threshold: 0.3,
60
+ cold_chunk_days: 7,
61
+ quality_report_limit: 10
62
+ },
63
+ actors: {
64
+ watcher_interval: 300,
65
+ maintenance_interval: 21_600
66
+ }
67
+ }
68
+ end
35
69
  end
36
70
  end
37
71
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-knowledge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.9
4
+ version: 0.6.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Iverson
@@ -120,6 +120,7 @@ files:
120
120
  - lib/legion/extensions/knowledge/actors/corpus_watcher.rb
121
121
  - lib/legion/extensions/knowledge/actors/maintenance_runner.rb
122
122
  - lib/legion/extensions/knowledge/client.rb
123
+ - lib/legion/extensions/knowledge/helpers/apollo_models.rb
123
124
  - lib/legion/extensions/knowledge/helpers/chunker.rb
124
125
  - lib/legion/extensions/knowledge/helpers/manifest.rb
125
126
  - lib/legion/extensions/knowledge/helpers/manifest_store.rb