lex-knowledge 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea1661f154fc5184b961caac74654775240dcc49fd7d8c6be469f6cd5520429c
4
- data.tar.gz: 3499bcb535edb1f56e896ed9c462a2cfabb9d7d1804dc55ecd559fc3b1941ae9
3
+ metadata.gz: 2a5307a5b8c19abaedd5f7d6be95d0fc3d068fa0b6d7366293c70da0edec7825
4
+ data.tar.gz: 756114f38b345f356a826e09c50714d8b14ff2a4baa6ef0087ddab9f2d75ff78
5
5
  SHA512:
6
- metadata.gz: 8b50acc322c82ae7629e5152e31c84ce9b348fdd1226e96aa7bc3c1f9f67cd330b809212f49c19636d93a45a199ce4e936a3d172cf71997ffad5aa69eb9712ef
7
- data.tar.gz: c1feac59942cf706beb303cbac9989e569937048d2f666af3e762ef50d69659c46de6bb350f69cc16a5103581d54682c3f68886f84a4a840b16b1f0e8cfb82e6
6
+ metadata.gz: 6d77ae8947c2ac53af380a35935c33cb075f4ae1a7b8c2b495389701fb15652a7baaefeeddd32606bc81d9e4c6f8f319562fe869906523b34ca4563ce0e245c4
7
+ data.tar.gz: a30c57db2c8cb0da0d54ac73afc178ef55892123e4a6878aa1bf1ca62014663c508c699857970f5582a0e0361a5fd3c29349dbe913a6d1ae8a19aed78515fcc0
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Knowledge
6
+ module Actor
7
+ class CorpusIngest < Legion::Extensions::Actors::Subscription
8
+ def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
9
+ def runner_function = 'ingest_file'
10
+ def check_subtask? = false
11
+ def generate_task? = false
12
+
13
+ def enabled?
14
+ defined?(Legion::Transport) &&
15
+ defined?(Legion::Extensions::Knowledge::Runners::Ingest)
16
+ rescue StandardError
17
+ false
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Knowledge
6
+ module Actor
7
+ class CorpusWatcher < Legion::Extensions::Actors::Every
8
+ def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
9
+ def runner_function = 'ingest_corpus'
10
+ def check_subtask? = false
11
+ def generate_task? = false
12
+
13
+ def every_interval
14
+ if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
15
+ Legion::Settings.dig(:knowledge, :actors, :watcher_interval) || 300
16
+ else
17
+ 300
18
+ end
19
+ rescue StandardError
20
+ 300
21
+ end
22
+
23
+ def enabled?
24
+ corpus_path && !corpus_path.empty?
25
+ rescue StandardError
26
+ false
27
+ end
28
+
29
+ def args
30
+ { path: corpus_path }
31
+ end
32
+
33
+ private
34
+
35
+ def corpus_path
36
+ return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
37
+
38
+ Legion::Settings.dig(:knowledge, :corpus_path)
39
+ rescue StandardError
40
+ nil
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -7,8 +7,28 @@ module Legion
7
7
  module Corpus
8
8
  module_function
9
9
 
10
- def corpus_stats
11
- { success: true, info: 'not yet implemented' }
10
+ def corpus_stats(path:, extensions: nil)
11
+ return { success: false, error: 'path does not exist' } unless ::File.exist?(path)
12
+
13
+ opts = { path: path }
14
+ opts[:extensions] = extensions if extensions
15
+ entries = Helpers::Manifest.scan(**opts)
16
+ chunk_count = entries.sum do |entry|
17
+ sections = Helpers::Parser.parse(file_path: entry[:path])
18
+ next 0 if sections.first&.key?(:error)
19
+
20
+ Helpers::Chunker.chunk(sections: sections).size
21
+ end
22
+
23
+ {
24
+ success: true,
25
+ path: path,
26
+ file_count: entries.size,
27
+ estimated_chunks: chunk_count,
28
+ total_bytes: entries.sum { |e| e[:size] }
29
+ }
30
+ rescue StandardError => e
31
+ { success: false, error: e.message }
12
32
  end
13
33
  end
14
34
  end
@@ -101,24 +101,22 @@ module Legion
101
101
  private_class_method :upsert_chunk
102
102
 
103
103
  def chunk_exists?(content_hash)
104
- return false unless defined?(Legion::Extensions::Apollo)
105
-
106
- Legion::Extensions::Apollo::Runners::Knowledge.retrieve_relevant(
107
- query: content_hash,
108
- limit: 1,
109
- tags: ['document_chunk'],
110
- filter: { content_hash: content_hash }
111
- ).any?
104
+ return false unless defined?(Legion::Data::Model::ApolloEntry)
105
+
106
+ Legion::Data::Model::ApolloEntry
107
+ .where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
108
+ .where(Sequel.like(:content, "%#{content_hash}%"))
109
+ .any?
112
110
  rescue StandardError
113
111
  false
114
112
  end
115
113
  private_class_method :chunk_exists?
116
114
 
117
115
  def generate_embedding(content)
118
- return nil unless defined?(Legion::Extensions::Apollo)
119
- return nil unless defined?(Legion::Extensions::Apollo::Helpers::Embedding)
116
+ return nil unless defined?(Legion::LLM) && Legion::LLM.respond_to?(:embed)
120
117
 
121
- Legion::Extensions::Apollo::Helpers::Embedding.generate(content)
118
+ result = Legion::LLM.embed(content)
119
+ result.is_a?(Hash) ? result[:vector] : nil
122
120
  rescue StandardError
123
121
  nil
124
122
  end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Knowledge
6
+ module Transport
7
+ module Exchanges
8
+ class Knowledge < Legion::Transport::Exchange
9
+ def exchange_name = 'knowledge'
10
+ def type = 'topic'
11
+ def durable = true
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Knowledge
6
+ module Transport
7
+ module Messages
8
+ class IngestMessage < Legion::Transport::Message
9
+ def exchange_name = 'knowledge'
10
+ def routing_key = 'knowledge.ingest'
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Knowledge
6
+ module Transport
7
+ module Queues
8
+ class Ingest < Legion::Transport::Queue
9
+ def queue_name = 'knowledge.ingest'
10
+ def exchange_name = 'knowledge'
11
+ def routing_key = 'knowledge.ingest'
12
+ def durable = true
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Knowledge
6
- VERSION = '0.1.2'
6
+ VERSION = '0.2.0'
7
7
  end
8
8
  end
9
9
  end
@@ -9,6 +9,16 @@ require_relative 'knowledge/runners/query'
9
9
  require_relative 'knowledge/runners/corpus'
10
10
  require_relative 'knowledge/client'
11
11
 
12
+ if defined?(Legion::Transport)
13
+ require_relative 'knowledge/transport/exchanges/knowledge'
14
+ require_relative 'knowledge/transport/queues/ingest'
15
+ require_relative 'knowledge/transport/messages/ingest_message'
16
+ end
17
+
18
+ require_relative 'knowledge/actors/corpus_watcher' if defined?(Legion::Extensions::Actors::Every)
19
+
20
+ require_relative 'knowledge/actors/corpus_ingest' if defined?(Legion::Extensions::Actors::Subscription)
21
+
12
22
  module Legion
13
23
  module Extensions
14
24
  module Knowledge
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-knowledge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthew Iverson
@@ -116,6 +116,8 @@ extra_rdoc_files: []
116
116
  files:
117
117
  - README.md
118
118
  - lib/legion/extensions/knowledge.rb
119
+ - lib/legion/extensions/knowledge/actors/corpus_ingest.rb
120
+ - lib/legion/extensions/knowledge/actors/corpus_watcher.rb
119
121
  - lib/legion/extensions/knowledge/client.rb
120
122
  - lib/legion/extensions/knowledge/helpers/chunker.rb
121
123
  - lib/legion/extensions/knowledge/helpers/manifest.rb
@@ -123,6 +125,9 @@ files:
123
125
  - lib/legion/extensions/knowledge/runners/corpus.rb
124
126
  - lib/legion/extensions/knowledge/runners/ingest.rb
125
127
  - lib/legion/extensions/knowledge/runners/query.rb
128
+ - lib/legion/extensions/knowledge/transport/exchanges/knowledge.rb
129
+ - lib/legion/extensions/knowledge/transport/messages/ingest_message.rb
130
+ - lib/legion/extensions/knowledge/transport/queues/ingest.rb
126
131
  - lib/legion/extensions/knowledge/version.rb
127
132
  homepage: https://github.com/LegionIO/lex-knowledge
128
133
  licenses: