lex-knowledge 0.5.0 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d512db91b31e6d8a9747a3987aa90dd406d04f8987400787f6de27b13eb10c5
4
- data.tar.gz: 2d9df5e1289bb80f603dbd882863d1802bf942e8ff12940d259ba44e25b728f3
3
+ metadata.gz: 40412f676840f9927c0e66d7603678bbde1489dcb9a2faba958b1a684fd47b3d
4
+ data.tar.gz: f9247b163289a8e3e07deaf63efbab11703e153150e91d77745eca010cb49b21
5
5
  SHA512:
6
- metadata.gz: 2022e4654a3a815e8c5433daaad7c2e9767b039c51513e12d9cd6c71a2de47e3ee883c6e0624aa9ac8f6108439c19d4552260694482db4e2da9d58851fa093bd
7
- data.tar.gz: d1afc8cb8fdcd0317ee9fa416c91827ca3536e40fed404791a355b4b4662360211e3c064dc09e8504d0ed5eb359f1dc906edaa57277f205b1aa9eafedcf261da
6
+ metadata.gz: 83cdc62303978876ae5f04c50d3bcf5fcd08faddaa3ee49dc8ac092c49df9665419fb2c52452d7247357b5e424d6122111d82b9e772879bd2afd0068dc780918
7
+ data.tar.gz: 24f0a1208fc396058ae014c497ef5bc49aa96dfe185ad32d0628a19b8fee4ab1c8754048d77d06a73a4a3d42f7c2c1537e6653fecae2b2b280c4b38e6f169745
@@ -10,34 +10,39 @@ module Legion
10
10
  def check_subtask? = false
11
11
  def generate_task? = false
12
12
 
13
- def every_interval
13
+ def time
14
14
  if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
15
15
  Legion::Settings.dig(:knowledge, :actors, :watcher_interval) || 300
16
16
  else
17
17
  300
18
18
  end
19
- rescue StandardError
19
+ rescue StandardError => e
20
+ log.warn(e.message)
20
21
  300
21
22
  end
22
23
 
23
24
  def enabled?
24
- corpus_path && !corpus_path.empty?
25
- rescue StandardError
25
+ resolve_monitors.any?
26
+ rescue StandardError => e
27
+ log.warn(e.message)
26
28
  false
27
29
  end
28
30
 
29
31
  def args
30
- { path: corpus_path }
32
+ { monitors: resolve_monitors }
31
33
  end
32
34
 
33
35
  private
34
36
 
35
- def corpus_path
36
- return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
37
+ def log
38
+ Legion::Logging
39
+ end
37
40
 
38
- Legion::Settings.dig(:knowledge, :corpus_path)
39
- rescue StandardError
40
- nil
41
+ def resolve_monitors
42
+ Runners::Monitor.resolve_monitors
43
+ rescue StandardError => e
44
+ log.warn(e.message)
45
+ []
41
46
  end
42
47
  end
43
48
  end
@@ -10,13 +10,14 @@ module Legion
10
10
  def check_subtask? = false
11
11
  def generate_task? = false
12
12
 
13
- def every_interval
13
+ def time
14
14
  if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
15
15
  Legion::Settings.dig(:knowledge, :actors, :maintenance_interval) || 21_600
16
16
  else
17
17
  21_600
18
18
  end
19
- rescue StandardError
19
+ rescue StandardError => e
20
+ log.warn(e.message)
20
21
  21_600
21
22
  end
22
23
 
@@ -24,7 +25,8 @@ module Legion
24
25
  return false unless corpus_path && !corpus_path.empty?
25
26
 
26
27
  true
27
- rescue StandardError
28
+ rescue StandardError => e
29
+ log.warn(e.message)
28
30
  false
29
31
  end
30
32
 
@@ -34,11 +36,16 @@ module Legion
34
36
 
35
37
  private
36
38
 
39
+ def log
40
+ Legion::Logging
41
+ end
42
+
37
43
  def corpus_path
38
44
  return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
39
45
 
40
46
  Legion::Settings.dig(:knowledge, :corpus_path)
41
- rescue StandardError
47
+ rescue StandardError => e
48
+ log.warn(e.message)
42
49
  nil
43
50
  end
44
51
  end
@@ -8,6 +8,7 @@ module Legion
8
8
  include Runners::Query
9
9
  include Runners::Corpus
10
10
  include Runners::Maintenance
11
+ include Runners::Monitor
11
12
  end
12
13
  end
13
14
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'securerandom'
4
+
3
5
  module Legion
4
6
  module Extensions
5
7
  module Knowledge
@@ -7,6 +9,11 @@ module Legion
7
9
  module Ingest
8
10
  module_function
9
11
 
12
+ def log
13
+ Legion::Logging
14
+ end
15
+ private_class_method :log
16
+
10
17
  def scan_corpus(path:, extensions: nil)
11
18
  opts = { path: path }
12
19
  opts[:extensions] = extensions if extensions
@@ -22,7 +29,17 @@ module Legion
22
29
  }
23
30
  end
24
31
 
25
- def ingest_corpus(path:, dry_run: false, force: false)
32
+ def ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false)
33
+ return ingest_monitors(monitors: monitors, dry_run: dry_run, force: force) if monitors&.any?
34
+ raise ArgumentError, 'path is required when monitors is not provided' if path.nil?
35
+
36
+ ingest_corpus_path(path: path, dry_run: dry_run, force: force)
37
+ rescue ArgumentError => e
38
+ log.warn(e.message)
39
+ { success: false, error: e.message }
40
+ end
41
+
42
+ def ingest_corpus_path(path:, dry_run: false, force: false)
26
43
  current = Helpers::Manifest.scan(path: path)
27
44
  previous = force ? [] : Helpers::ManifestStore.load(corpus_path: path)
28
45
  delta = Helpers::Manifest.diff(current: current, previous: previous)
@@ -54,8 +71,57 @@ module Legion
54
71
  chunks_updated: chunks_updated
55
72
  }
56
73
  rescue StandardError => e
74
+ log.warn(e.message)
75
+ { success: false, error: e.message }
76
+ end
77
+ private_class_method :ingest_corpus_path
78
+
79
+ def ingest_monitors(monitors:, dry_run: false, force: false)
80
+ results = monitors.map do |monitor|
81
+ ingest_corpus(path: monitor[:path], dry_run: dry_run, force: force)
82
+ rescue StandardError => e
83
+ log.warn(e.message)
84
+ { success: false, path: monitor[:path], error: e.message }
85
+ end
86
+
87
+ total = {
88
+ files_scanned: 0,
89
+ files_added: 0,
90
+ files_changed: 0,
91
+ files_removed: 0,
92
+ chunks_created: 0,
93
+ chunks_skipped: 0,
94
+ chunks_updated: 0
95
+ }
96
+ results.each do |r|
97
+ next unless r[:success]
98
+
99
+ total.each_key { |k| total[k] += r[k].to_i }
100
+ end
101
+
102
+ { success: true, monitors_processed: results.size, **total }
103
+ rescue StandardError => e
104
+ log.warn(e.message)
57
105
  { success: false, error: e.message }
58
106
  end
107
+ private_class_method :ingest_monitors
108
+
109
+ def ingest_content(content:, source_type: :text, metadata: {})
110
+ source_path = "content://#{source_type}/#{SecureRandom.uuid}"
111
+ section = {
112
+ content: content,
113
+ heading: source_type.to_s,
114
+ section_path: [source_type.to_s],
115
+ source_file: source_path
116
+ }
117
+ chunks = Helpers::Chunker.chunk(sections: [section])
118
+ paired = batch_embed_chunks(chunks, force: false)
119
+ paired.each { |p| upsert_chunk_with_embedding(p[:chunk], p[:embedding], force: false, exists: p[:exists] || false) }
120
+ { status: :ingested, chunks: chunks.size, source_type: source_type, metadata: metadata }
121
+ rescue StandardError => e
122
+ log.warn(e.message)
123
+ { status: :failed, error: e.message, source_type: source_type, metadata: metadata }
124
+ end
59
125
 
60
126
  def ingest_file(file_path:, force: false)
61
127
  result = process_file(file_path, dry_run: false, force: force)
@@ -68,6 +134,7 @@ module Legion
68
134
  chunks_updated: result[:updated]
69
135
  }
70
136
  rescue StandardError => e
137
+ log.warn(e.message)
71
138
  { success: false, error: e.message }
72
139
  end
73
140
 
@@ -107,7 +174,8 @@ module Legion
107
174
  embed_map = needs_embed.empty? ? {} : build_embed_map(needs_embed)
108
175
 
109
176
  chunks.map { |c| { chunk: c, embedding: embed_map[c[:content_hash]], exists: exists_map.fetch(c[:content_hash], false) } }
110
- rescue StandardError
177
+ rescue StandardError => e
178
+ log.warn(e.message)
111
179
  paired_without_embed(chunks, {})
112
180
  end
113
181
  private_class_method :batch_embed_chunks
@@ -132,7 +200,8 @@ module Legion
132
200
  results.each_with_object({}) do |r, h|
133
201
  h[needs_embed[r[:index]][:content_hash]] = r[:vector] unless r[:error]
134
202
  end
135
- rescue StandardError
203
+ rescue StandardError => e
204
+ log.warn(e.message)
136
205
  {}
137
206
  end
138
207
  private_class_method :build_embed_map
@@ -144,7 +213,8 @@ module Legion
144
213
 
145
214
  ingest_to_apollo(chunk, embedding)
146
215
  force ? :updated : :created
147
- rescue StandardError
216
+ rescue StandardError => e
217
+ log.warn(e.message)
148
218
  :skipped
149
219
  end
150
220
  private_class_method :upsert_chunk_with_embedding
@@ -156,7 +226,8 @@ module Legion
156
226
  .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
157
227
  .where(Sequel.like(:content, "%#{content_hash}%"))
158
228
  .any?
159
- rescue StandardError
229
+ rescue StandardError => e
230
+ log.warn(e.message)
160
231
  false
161
232
  end
162
233
  private_class_method :chunk_exists?
@@ -193,7 +264,8 @@ module Legion
193
264
  tags: [file_path, 'retired', 'document_chunk'].uniq,
194
265
  metadata: { source_file: file_path, retired: true }
195
266
  )
196
- rescue StandardError
267
+ rescue StandardError => e
268
+ log.warn(e.message)
197
269
  nil
198
270
  end
199
271
  private_class_method :retire_file
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Knowledge
6
+ module Runners
7
+ module Monitor
8
+ module_function
9
+
10
+ DEFAULT_EXTENSIONS = %w[.md .txt].freeze
11
+
12
+ def resolve_monitors
13
+ monitors = Array(read_monitors_setting)
14
+ legacy = read_legacy_corpus_path
15
+
16
+ if legacy && !legacy.empty? && monitors.none? { |m| m[:path] == legacy }
17
+ monitors << { path: legacy, extensions: %w[.md .txt .docx .pdf], label: 'legacy' }
18
+ end
19
+
20
+ monitors
21
+ rescue StandardError
22
+ []
23
+ end
24
+
25
+ def add_monitor(path:, extensions: nil, label: nil)
26
+ abs_path = File.expand_path(path)
27
+ return { success: false, error: "Path #{abs_path} does not exist or is not a directory" } unless File.directory?(abs_path)
28
+
29
+ existing = Array(read_monitors_setting)
30
+ return { success: false, error: "Path #{abs_path} is already registered" } if existing.any? { |m| m[:path] == abs_path }
31
+
32
+ entry = {
33
+ path: abs_path,
34
+ extensions: extensions || DEFAULT_EXTENSIONS.dup,
35
+ label: label || File.basename(abs_path),
36
+ added_at: Time.now.utc.iso8601
37
+ }
38
+
39
+ existing << entry
40
+ persist_monitors(existing)
41
+
42
+ { success: true, monitor: entry }
43
+ rescue StandardError => e
44
+ { success: false, error: e.message }
45
+ end
46
+
47
+ def remove_monitor(identifier:)
48
+ existing = Array(read_monitors_setting)
49
+ found = existing.find { |m| m[:path] == identifier || m[:label] == identifier }
50
+ return { success: false, error: "Monitor '#{identifier}' not found" } unless found
51
+
52
+ existing.delete(found)
53
+ persist_monitors(existing)
54
+
55
+ { success: true, removed: found }
56
+ rescue StandardError => e
57
+ { success: false, error: e.message }
58
+ end
59
+
60
+ def list_monitors
61
+ { success: true, monitors: resolve_monitors }
62
+ rescue StandardError => e
63
+ { success: false, error: e.message }
64
+ end
65
+
66
+ def monitor_status
67
+ monitors = resolve_monitors
68
+ total_files = 0
69
+
70
+ monitors.each do |m|
71
+ scan = Helpers::Manifest.scan(path: m[:path], extensions: m[:extensions])
72
+ total_files += scan.size
73
+ rescue StandardError
74
+ next
75
+ end
76
+
77
+ { success: true, total_monitors: monitors.size, total_files: total_files }
78
+ rescue StandardError => e
79
+ { success: false, error: e.message }
80
+ end
81
+
82
+ # --- private helpers ---
83
+
84
+ def read_monitors_setting
85
+ return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
86
+
87
+ Legion::Settings.dig(:knowledge, :monitors)
88
+ rescue StandardError
89
+ nil
90
+ end
91
+ private_class_method :read_monitors_setting
92
+
93
+ def read_legacy_corpus_path
94
+ return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
95
+
96
+ Legion::Settings.dig(:knowledge, :corpus_path)
97
+ rescue StandardError
98
+ nil
99
+ end
100
+ private_class_method :read_legacy_corpus_path
101
+
102
+ def persist_monitors(monitors)
103
+ return false unless defined?(Legion::Settings)
104
+
105
+ loader = Legion::Settings.loader
106
+ knowledge = loader.settings[:knowledge] || {}
107
+ knowledge[:monitors] = monitors
108
+ loader.settings[:knowledge] = knowledge
109
+ true
110
+ rescue StandardError
111
+ false
112
+ end
113
+ private_class_method :persist_monitors
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Knowledge
6
+ module Transport
7
+ module Messages
8
+ class MonitorReload < Legion::Transport::Message
9
+ def exchange_name = 'knowledge'
10
+ def routing_key = 'knowledge.monitor.reload'
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Knowledge
6
- VERSION = '0.5.0'
6
+ VERSION = '0.6.3'
7
7
  end
8
8
  end
9
9
  end
@@ -9,12 +9,14 @@ require_relative 'knowledge/runners/ingest'
9
9
  require_relative 'knowledge/runners/query'
10
10
  require_relative 'knowledge/runners/corpus'
11
11
  require_relative 'knowledge/runners/maintenance'
12
+ require_relative 'knowledge/runners/monitor'
12
13
  require_relative 'knowledge/client'
13
14
 
14
15
  if defined?(Legion::Transport)
15
16
  require_relative 'knowledge/transport/exchanges/knowledge'
16
17
  require_relative 'knowledge/transport/queues/ingest'
17
18
  require_relative 'knowledge/transport/messages/ingest_message'
19
+ require_relative 'knowledge/transport/messages/monitor_reload'
18
20
  end
19
21
 
20
22
  require_relative 'knowledge/actors/corpus_watcher' if defined?(Legion::Extensions::Actors::Every)
@@ -26,6 +28,10 @@ module Legion
26
28
  module Extensions
27
29
  module Knowledge
28
30
  extend Legion::Extensions::Core if defined?(Legion::Extensions::Core)
31
+
32
+ def self.remote_invocable?
33
+ false
34
+ end
29
35
  end
30
36
  end
31
37
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-knowledge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Iverson
@@ -127,9 +127,11 @@ files:
127
127
  - lib/legion/extensions/knowledge/runners/corpus.rb
128
128
  - lib/legion/extensions/knowledge/runners/ingest.rb
129
129
  - lib/legion/extensions/knowledge/runners/maintenance.rb
130
+ - lib/legion/extensions/knowledge/runners/monitor.rb
130
131
  - lib/legion/extensions/knowledge/runners/query.rb
131
132
  - lib/legion/extensions/knowledge/transport/exchanges/knowledge.rb
132
133
  - lib/legion/extensions/knowledge/transport/messages/ingest_message.rb
134
+ - lib/legion/extensions/knowledge/transport/messages/monitor_reload.rb
133
135
  - lib/legion/extensions/knowledge/transport/queues/ingest.rb
134
136
  - lib/legion/extensions/knowledge/version.rb
135
137
  homepage: https://github.com/LegionIO/lex-knowledge