lex-knowledge 0.6.0 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 93e36e27a559476697a65f659e5c5a21db2e061b40877db6ea467875642d3232
4
- data.tar.gz: dfae368bcc8db4eba0c6b41da9bb79537ed3a9b4fd898b189905fb1ca58a6bc7
3
+ metadata.gz: 02e8d24d0e0762a0845d902fbbf0b4b1b40ba181ca7656307f8a15ba31d5419e
4
+ data.tar.gz: bb5b158d9c815dade808201d81a228d144d2aa8e66dedf9df1366c3d7427df6c
5
5
  SHA512:
6
- metadata.gz: c9d11bdbcc27f14431c6e900c703a02cea7389bf2220c6e8c2fb1c920a97d7a86993d11e207453ef79ab928da68629a98aa711d51d009dd04d1f5215337dc4a6
7
- data.tar.gz: 9fe9dc96f91a19aca0d58b4ad49abbcf0876797bba5ddea6984cff0d51b7a577ce0f165fa8d54d9271b185da84d6a267d7c9883903e99937f7b084454b1affcc
6
+ metadata.gz: 7e3a4e56320e2b30f78ac3323e124cdcd0650fbd2dde431592d71a3bf55182ba1d8deafc1b89ce6bf3ef7828b8a75e210b0390f018791ab009e4a022fa58ea33
7
+ data.tar.gz: 7c39e020cfa3df824f388c9b3e5c1f5aa4168e08142a7f4b7ce3bd18a1af2ae96276c383dddf5e5c56ee71b320298db805908396fb1f4e566fc0aabdc248538c
@@ -10,10 +10,10 @@ module Legion
10
10
  def check_subtask? = false
11
11
  def generate_task? = false
12
12
 
13
- def enabled?
14
- defined?(Legion::Transport) &&
13
+ def enabled? # rubocop:disable Legion/Extension/ActorEnabledSideEffects
14
+ Legion.const_defined?(:Transport, false) &&
15
15
  defined?(Legion::Extensions::Knowledge::Runners::Ingest)
16
- rescue StandardError
16
+ rescue StandardError => _e
17
17
  false
18
18
  end
19
19
  end
@@ -4,25 +4,27 @@ module Legion
4
4
  module Extensions
5
5
  module Knowledge
6
6
  module Actor
7
- class CorpusWatcher < Legion::Extensions::Actors::Every
7
+ class CorpusWatcher < Legion::Extensions::Actors::Every # rubocop:disable Legion/Extension/EveryActorRequiresTime
8
8
  def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
9
9
  def runner_function = 'ingest_corpus'
10
10
  def check_subtask? = false
11
11
  def generate_task? = false
12
12
 
13
- def every_interval
13
+ def time
14
14
  if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
15
15
  Legion::Settings.dig(:knowledge, :actors, :watcher_interval) || 300
16
16
  else
17
17
  300
18
18
  end
19
- rescue StandardError
19
+ rescue StandardError => e
20
+ log.warn(e.message)
20
21
  300
21
22
  end
22
23
 
23
- def enabled?
24
+ def enabled? # rubocop:disable Legion/Extension/ActorEnabledSideEffects
24
25
  resolve_monitors.any?
25
- rescue StandardError
26
+ rescue StandardError => e
27
+ log.warn(e.message)
26
28
  false
27
29
  end
28
30
 
@@ -32,9 +34,14 @@ module Legion
32
34
 
33
35
  private
34
36
 
37
+ def log
38
+ Legion::Logging
39
+ end
40
+
35
41
  def resolve_monitors
36
42
  Runners::Monitor.resolve_monitors
37
- rescue StandardError
43
+ rescue StandardError => e
44
+ log.warn(e.message)
38
45
  []
39
46
  end
40
47
  end
@@ -4,27 +4,29 @@ module Legion
4
4
  module Extensions
5
5
  module Knowledge
6
6
  module Actor
7
- class MaintenanceRunner < Legion::Extensions::Actors::Every
7
+ class MaintenanceRunner < Legion::Extensions::Actors::Every # rubocop:disable Legion/Extension/EveryActorRequiresTime
8
8
  def runner_class = 'Legion::Extensions::Knowledge::Runners::Maintenance'
9
9
  def runner_function = 'health'
10
10
  def check_subtask? = false
11
11
  def generate_task? = false
12
12
 
13
- def every_interval
13
+ def time
14
14
  if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
15
15
  Legion::Settings.dig(:knowledge, :actors, :maintenance_interval) || 21_600
16
16
  else
17
17
  21_600
18
18
  end
19
- rescue StandardError
19
+ rescue StandardError => e
20
+ log.warn(e.message)
20
21
  21_600
21
22
  end
22
23
 
23
- def enabled?
24
+ def enabled? # rubocop:disable Legion/Extension/ActorEnabledSideEffects
24
25
  return false unless corpus_path && !corpus_path.empty?
25
26
 
26
27
  true
27
- rescue StandardError
28
+ rescue StandardError => e
29
+ log.warn(e.message)
28
30
  false
29
31
  end
30
32
 
@@ -34,11 +36,16 @@ module Legion
34
36
 
35
37
  private
36
38
 
39
+ def log
40
+ Legion::Logging
41
+ end
42
+
37
43
  def corpus_path
38
44
  return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
39
45
 
40
46
  Legion::Settings.dig(:knowledge, :corpus_path)
41
- rescue StandardError
47
+ rescue StandardError => e
48
+ log.warn(e.message)
42
49
  nil
43
50
  end
44
51
  end
@@ -79,7 +79,7 @@ module Legion
79
79
  return nil unless defined?(Legion::Settings)
80
80
 
81
81
  Legion::Settings.dig(:knowledge, :chunker, :max_tokens)
82
- rescue StandardError
82
+ rescue StandardError => _e
83
83
  nil
84
84
  end
85
85
  private_class_method :settings_max_tokens
@@ -88,7 +88,7 @@ module Legion
88
88
  return nil unless defined?(Legion::Settings)
89
89
 
90
90
  Legion::Settings.dig(:knowledge, :chunker, :overlap_tokens)
91
- rescue StandardError
91
+ rescue StandardError => _e
92
92
  nil
93
93
  end
94
94
  private_class_method :settings_overlap_tokens
@@ -20,7 +20,7 @@ module Legion
20
20
 
21
21
  raw = ::File.read(path, encoding: 'utf-8')
22
22
  ::JSON.parse(raw, symbolize_names: true)
23
- rescue StandardError
23
+ rescue StandardError => _e
24
24
  []
25
25
  end
26
26
 
@@ -31,7 +31,7 @@ module Legion
31
31
  ::File.write(tmp, ::JSON.generate(manifest.map { |e| serialize_entry(e) }))
32
32
  ::File.rename(tmp, path)
33
33
  true
34
- rescue StandardError
34
+ rescue StandardError => _e
35
35
  false
36
36
  end
37
37
 
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Knowledge
6
6
  module Runners
7
- module Corpus
7
+ module Corpus # rubocop:disable Legion/Extension/RunnerIncludeHelpers
8
8
  module_function
9
9
 
10
10
  def manifest_path(path:)
@@ -1,12 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'securerandom'
4
+
3
5
  module Legion
4
6
  module Extensions
5
7
  module Knowledge
6
8
  module Runners
7
- module Ingest
9
+ module Ingest # rubocop:disable Legion/Extension/RunnerIncludeHelpers
8
10
  module_function
9
11
 
12
+ def log
13
+ Legion::Logging
14
+ end
15
+ private_class_method :log
16
+
10
17
  def scan_corpus(path:, extensions: nil)
11
18
  opts = { path: path }
12
19
  opts[:extensions] = extensions if extensions
@@ -28,6 +35,7 @@ module Legion
28
35
 
29
36
  ingest_corpus_path(path: path, dry_run: dry_run, force: force)
30
37
  rescue ArgumentError => e
38
+ log.warn(e.message)
31
39
  { success: false, error: e.message }
32
40
  end
33
41
 
@@ -63,6 +71,7 @@ module Legion
63
71
  chunks_updated: chunks_updated
64
72
  }
65
73
  rescue StandardError => e
74
+ log.warn(e.message)
66
75
  { success: false, error: e.message }
67
76
  end
68
77
  private_class_method :ingest_corpus_path
@@ -71,6 +80,7 @@ module Legion
71
80
  results = monitors.map do |monitor|
72
81
  ingest_corpus(path: monitor[:path], dry_run: dry_run, force: force)
73
82
  rescue StandardError => e
83
+ log.warn(e.message)
74
84
  { success: false, path: monitor[:path], error: e.message }
75
85
  end
76
86
 
@@ -91,10 +101,28 @@ module Legion
91
101
 
92
102
  { success: true, monitors_processed: results.size, **total }
93
103
  rescue StandardError => e
104
+ log.warn(e.message)
94
105
  { success: false, error: e.message }
95
106
  end
96
107
  private_class_method :ingest_monitors
97
108
 
109
+ def ingest_content(content:, source_type: :text, metadata: {})
110
+ source_path = "content://#{source_type}/#{SecureRandom.uuid}"
111
+ section = {
112
+ content: content,
113
+ heading: source_type.to_s,
114
+ section_path: [source_type.to_s],
115
+ source_file: source_path
116
+ }
117
+ chunks = Helpers::Chunker.chunk(sections: [section])
118
+ paired = batch_embed_chunks(chunks, force: false)
119
+ paired.each { |p| upsert_chunk_with_embedding(p[:chunk], p[:embedding], force: false, exists: p[:exists] || false) }
120
+ { status: :ingested, chunks: chunks.size, source_type: source_type, metadata: metadata }
121
+ rescue StandardError => e
122
+ log.warn(e.message)
123
+ { status: :failed, error: e.message, source_type: source_type, metadata: metadata }
124
+ end
125
+
98
126
  def ingest_file(file_path:, force: false)
99
127
  result = process_file(file_path, dry_run: false, force: force)
100
128
 
@@ -106,6 +134,7 @@ module Legion
106
134
  chunks_updated: result[:updated]
107
135
  }
108
136
  rescue StandardError => e
137
+ log.warn(e.message)
109
138
  { success: false, error: e.message }
110
139
  end
111
140
 
@@ -145,7 +174,8 @@ module Legion
145
174
  embed_map = needs_embed.empty? ? {} : build_embed_map(needs_embed)
146
175
 
147
176
  chunks.map { |c| { chunk: c, embedding: embed_map[c[:content_hash]], exists: exists_map.fetch(c[:content_hash], false) } }
148
- rescue StandardError
177
+ rescue StandardError => e
178
+ log.warn(e.message)
149
179
  paired_without_embed(chunks, {})
150
180
  end
151
181
  private_class_method :batch_embed_chunks
@@ -166,11 +196,12 @@ module Legion
166
196
  private_class_method :paired_without_embed
167
197
 
168
198
  def build_embed_map(needs_embed)
169
- results = Legion::LLM.embed_batch(needs_embed.map { |c| c[:content] })
199
+ results = Legion::LLM.embed_batch(needs_embed.map { |c| c[:content] }) # rubocop:disable Legion/HelperMigration/DirectLlm
170
200
  results.each_with_object({}) do |r, h|
171
201
  h[needs_embed[r[:index]][:content_hash]] = r[:vector] unless r[:error]
172
202
  end
173
- rescue StandardError
203
+ rescue StandardError => e
204
+ log.warn(e.message)
174
205
  {}
175
206
  end
176
207
  private_class_method :build_embed_map
@@ -182,7 +213,8 @@ module Legion
182
213
 
183
214
  ingest_to_apollo(chunk, embedding)
184
215
  force ? :updated : :created
185
- rescue StandardError
216
+ rescue StandardError => e
217
+ log.warn(e.message)
186
218
  :skipped
187
219
  end
188
220
  private_class_method :upsert_chunk_with_embedding
@@ -194,7 +226,8 @@ module Legion
194
226
  .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
195
227
  .where(Sequel.like(:content, "%#{content_hash}%"))
196
228
  .any?
197
- rescue StandardError
229
+ rescue StandardError => e
230
+ log.warn(e.message)
198
231
  false
199
232
  end
200
233
  private_class_method :chunk_exists?
@@ -225,13 +258,14 @@ module Legion
225
258
  return unless defined?(Legion::Apollo)
226
259
  return unless Legion::Apollo.respond_to?(:ingest) && Legion::Apollo.started?
227
260
 
228
- Legion::Apollo.ingest(
261
+ Legion::Apollo.ingest( # rubocop:disable Legion/HelperMigration/DirectKnowledge
229
262
  content: file_path,
230
263
  content_type: 'document_retired',
231
264
  tags: [file_path, 'retired', 'document_chunk'].uniq,
232
265
  metadata: { source_file: file_path, retired: true }
233
266
  )
234
- rescue StandardError
267
+ rescue StandardError => e
268
+ log.warn(e.message)
235
269
  nil
236
270
  end
237
271
  private_class_method :retire_file
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Knowledge
6
6
  module Runners
7
- module Maintenance
7
+ module Maintenance # rubocop:disable Legion/Extension/RunnerIncludeHelpers
8
8
  module_function
9
9
 
10
10
  def detect_orphans(path:)
@@ -99,7 +99,7 @@ module Legion
99
99
 
100
100
  rows = base.select(:confidence, :status, :access_count, :embedding, :created_at).all
101
101
  apollo_stats_from_rows(base, rows, total)
102
- rescue StandardError
102
+ rescue StandardError => _e
103
103
  apollo_defaults
104
104
  end
105
105
  private_class_method :build_apollo_stats
@@ -155,7 +155,7 @@ module Legion
155
155
 
156
156
  def load_manifest_files(path)
157
157
  manifest = Helpers::ManifestStore.load(corpus_path: path)
158
- manifest.map { |e| e[:path] }.compact.uniq
158
+ manifest.filter_map { |e| e[:path] }.uniq
159
159
  end
160
160
  private_class_method :load_manifest_files
161
161
 
@@ -168,7 +168,7 @@ module Legion
168
168
  .select_map(Sequel.lit("source_context->>'source_file'"))
169
169
  .compact
170
170
  .uniq
171
- rescue StandardError
171
+ rescue StandardError => _e
172
172
  []
173
173
  end
174
174
  private_class_method :load_apollo_source_files
@@ -180,7 +180,7 @@ module Legion
180
180
  .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
181
181
  .exclude(status: 'archived')
182
182
  .count
183
- rescue StandardError
183
+ rescue StandardError => _e
184
184
  0
185
185
  end
186
186
  private_class_method :count_apollo_chunks
@@ -208,7 +208,7 @@ module Legion
208
208
  .select_map([:id, :access_count, :confidence,
209
209
  Sequel.lit("source_context->>'source_file' AS source_file")])
210
210
  .map { |r| { id: r[0], access_count: r[1], confidence: r[2], source_file: r[3] } }
211
- rescue StandardError
211
+ rescue StandardError => _e
212
212
  []
213
213
  end
214
214
  private_class_method :hot_chunks
@@ -229,7 +229,7 @@ module Legion
229
229
  .select_map([:id, :confidence, :created_at,
230
230
  Sequel.lit("source_context->>'source_file' AS source_file")])
231
231
  .map { |r| { id: r[0], confidence: r[1], created_at: r[2]&.iso8601, source_file: r[3] } }
232
- rescue StandardError
232
+ rescue StandardError => _e
233
233
  []
234
234
  end
235
235
  private_class_method :cold_chunks
@@ -248,7 +248,7 @@ module Legion
248
248
  .select_map([:id, :confidence, :access_count,
249
249
  Sequel.lit("source_context->>'source_file' AS source_file")])
250
250
  .map { |r| { id: r[0], confidence: r[1], access_count: r[2], source_file: r[3] } }
251
- rescue StandardError
251
+ rescue StandardError => _e
252
252
  []
253
253
  end
254
254
  private_class_method :low_confidence_chunks
@@ -268,7 +268,7 @@ module Legion
268
268
  chunks_never_accessed: base.where(access_count: 0).count,
269
269
  chunks_below_threshold: base.where { confidence < settings_stale_threshold }.count
270
270
  }
271
- rescue StandardError
271
+ rescue StandardError => _e
272
272
  defaults
273
273
  end
274
274
  private_class_method :quality_summary
@@ -277,7 +277,7 @@ module Legion
277
277
  return 0 unless defined?(Legion::Data::Model::ApolloAccessLog)
278
278
 
279
279
  Legion::Data::Model::ApolloAccessLog.where(action: 'knowledge_query').count
280
- rescue StandardError
280
+ rescue StandardError => _e
281
281
  0
282
282
  end
283
283
  private_class_method :query_count
@@ -286,7 +286,7 @@ module Legion
286
286
  return 0.3 unless defined?(Legion::Settings)
287
287
 
288
288
  Legion::Settings.dig(:knowledge, :maintenance, :stale_threshold) || 0.3
289
- rescue StandardError
289
+ rescue StandardError => _e
290
290
  0.3
291
291
  end
292
292
  private_class_method :settings_stale_threshold
@@ -295,7 +295,7 @@ module Legion
295
295
  return 7 unless defined?(Legion::Settings)
296
296
 
297
297
  Legion::Settings.dig(:knowledge, :maintenance, :cold_chunk_days) || 7
298
- rescue StandardError
298
+ rescue StandardError => _e
299
299
  7
300
300
  end
301
301
  private_class_method :settings_cold_chunk_days
@@ -304,7 +304,7 @@ module Legion
304
304
  return 10 unless defined?(Legion::Settings)
305
305
 
306
306
  Legion::Settings.dig(:knowledge, :maintenance, :quality_report_limit) || 10
307
- rescue StandardError
307
+ rescue StandardError => _e
308
308
  10
309
309
  end
310
310
  private_class_method :settings_quality_limit
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Knowledge
6
6
  module Runners
7
- module Monitor
7
+ module Monitor # rubocop:disable Legion/Extension/RunnerIncludeHelpers
8
8
  module_function
9
9
 
10
10
  DEFAULT_EXTENSIONS = %w[.md .txt].freeze
@@ -18,7 +18,7 @@ module Legion
18
18
  end
19
19
 
20
20
  monitors
21
- rescue StandardError
21
+ rescue StandardError => _e
22
22
  []
23
23
  end
24
24
 
@@ -70,7 +70,7 @@ module Legion
70
70
  monitors.each do |m|
71
71
  scan = Helpers::Manifest.scan(path: m[:path], extensions: m[:extensions])
72
72
  total_files += scan.size
73
- rescue StandardError
73
+ rescue StandardError => _e
74
74
  next
75
75
  end
76
76
 
@@ -85,7 +85,7 @@ module Legion
85
85
  return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
86
86
 
87
87
  Legion::Settings.dig(:knowledge, :monitors)
88
- rescue StandardError
88
+ rescue StandardError => _e
89
89
  nil
90
90
  end
91
91
  private_class_method :read_monitors_setting
@@ -94,7 +94,7 @@ module Legion
94
94
  return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
95
95
 
96
96
  Legion::Settings.dig(:knowledge, :corpus_path)
97
- rescue StandardError
97
+ rescue StandardError => _e
98
98
  nil
99
99
  end
100
100
  private_class_method :read_legacy_corpus_path
@@ -107,7 +107,7 @@ module Legion
107
107
  knowledge[:monitors] = monitors
108
108
  loader.settings[:knowledge] = knowledge
109
109
  true
110
- rescue StandardError
110
+ rescue StandardError => _e
111
111
  false
112
112
  end
113
113
  private_class_method :persist_monitors
@@ -6,7 +6,7 @@ module Legion
6
6
  module Extensions
7
7
  module Knowledge
8
8
  module Runners
9
- module Query
9
+ module Query # rubocop:disable Legion/Extension/RunnerIncludeHelpers
10
10
  module_function
11
11
 
12
12
  def query(question:, top_k: nil, synthesize: true)
@@ -74,7 +74,7 @@ module Legion
74
74
  limit: top_k,
75
75
  tags: ['document_chunk']
76
76
  )
77
- rescue StandardError
77
+ rescue StandardError => _e
78
78
  []
79
79
  end
80
80
  private_class_method :retrieve_chunks
@@ -90,7 +90,7 @@ module Legion
90
90
  "Context:\n#{context_text}\n\nQuestion: #{question}\n\nAnswer:"
91
91
  end
92
92
 
93
- result = Legion::LLM.chat(message: prompt, caller: { extension: 'lex-knowledge' })
93
+ result = llm_chat(message: prompt, caller: { extension: 'lex-knowledge' })
94
94
  result.is_a?(Hash) ? result[:content] : result
95
95
  rescue StandardError => e
96
96
  "Error generating answer: #{e.message}"
@@ -159,7 +159,7 @@ module Legion
159
159
  synthesized: synthesized,
160
160
  rating: rating
161
161
  })
162
- rescue StandardError
162
+ rescue StandardError => _e
163
163
  nil
164
164
  end
165
165
  private_class_method :emit_feedback_event
@@ -173,7 +173,7 @@ module Legion
173
173
  return nil unless defined?(Legion::Settings)
174
174
 
175
175
  Legion::Settings.dig(:knowledge, :query, :top_k)
176
- rescue StandardError
176
+ rescue StandardError => _e
177
177
  nil
178
178
  end
179
179
  private_class_method :settings_top_k
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Knowledge
6
- VERSION = '0.6.0'
6
+ VERSION = '0.6.4'
7
7
  end
8
8
  end
9
9
  end
@@ -12,22 +12,26 @@ require_relative 'knowledge/runners/maintenance'
12
12
  require_relative 'knowledge/runners/monitor'
13
13
  require_relative 'knowledge/client'
14
14
 
15
- if defined?(Legion::Transport)
15
+ if Legion.const_defined?(:Transport, false)
16
16
  require_relative 'knowledge/transport/exchanges/knowledge'
17
17
  require_relative 'knowledge/transport/queues/ingest'
18
18
  require_relative 'knowledge/transport/messages/ingest_message'
19
19
  require_relative 'knowledge/transport/messages/monitor_reload'
20
20
  end
21
21
 
22
- require_relative 'knowledge/actors/corpus_watcher' if defined?(Legion::Extensions::Actors::Every)
23
- require_relative 'knowledge/actors/maintenance_runner' if defined?(Legion::Extensions::Actors::Every)
22
+ require_relative 'knowledge/actors/corpus_watcher'
23
+ require_relative 'knowledge/actors/maintenance_runner'
24
24
 
25
- require_relative 'knowledge/actors/corpus_ingest' if defined?(Legion::Extensions::Actors::Subscription)
25
+ require_relative 'knowledge/actors/corpus_ingest'
26
26
 
27
27
  module Legion
28
28
  module Extensions
29
29
  module Knowledge
30
30
  extend Legion::Extensions::Core if defined?(Legion::Extensions::Core)
31
+
32
+ def self.remote_invocable?
33
+ false
34
+ end
31
35
  end
32
36
  end
33
37
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-knowledge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Iverson