lex-knowledge 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/legion/extensions/knowledge/actors/corpus_ingest.rb +23 -0
- data/lib/legion/extensions/knowledge/actors/corpus_watcher.rb +46 -0
- data/lib/legion/extensions/knowledge/runners/corpus.rb +22 -2
- data/lib/legion/extensions/knowledge/runners/ingest.rb +9 -11
- data/lib/legion/extensions/knowledge/transport/exchanges/knowledge.rb +17 -0
- data/lib/legion/extensions/knowledge/transport/messages/ingest_message.rb +16 -0
- data/lib/legion/extensions/knowledge/transport/queues/ingest.rb +18 -0
- data/lib/legion/extensions/knowledge/version.rb +1 -1
- data/lib/legion/extensions/knowledge.rb +10 -0
- metadata +6 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2a5307a5b8c19abaedd5f7d6be95d0fc3d068fa0b6d7366293c70da0edec7825
|
|
4
|
+
data.tar.gz: 756114f38b345f356a826e09c50714d8b14ff2a4baa6ef0087ddab9f2d75ff78
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6d77ae8947c2ac53af380a35935c33cb075f4ae1a7b8c2b495389701fb15652a7baaefeeddd32606bc81d9e4c6f8f319562fe869906523b34ca4563ce0e245c4
|
|
7
|
+
data.tar.gz: a30c57db2c8cb0da0d54ac73afc178ef55892123e4a6878aa1bf1ca62014663c508c699857970f5582a0e0361a5fd3c29349dbe913a6d1ae8a19aed78515fcc0
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Actor
|
|
7
|
+
class CorpusIngest < Legion::Extensions::Actors::Subscription
|
|
8
|
+
def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
|
|
9
|
+
def runner_function = 'ingest_file'
|
|
10
|
+
def check_subtask? = false
|
|
11
|
+
def generate_task? = false
|
|
12
|
+
|
|
13
|
+
def enabled?
|
|
14
|
+
defined?(Legion::Transport) &&
|
|
15
|
+
defined?(Legion::Extensions::Knowledge::Runners::Ingest)
|
|
16
|
+
rescue StandardError
|
|
17
|
+
false
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Actor
|
|
7
|
+
class CorpusWatcher < Legion::Extensions::Actors::Every
|
|
8
|
+
def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
|
|
9
|
+
def runner_function = 'ingest_corpus'
|
|
10
|
+
def check_subtask? = false
|
|
11
|
+
def generate_task? = false
|
|
12
|
+
|
|
13
|
+
def every_interval
|
|
14
|
+
if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
|
|
15
|
+
Legion::Settings.dig(:knowledge, :actors, :watcher_interval) || 300
|
|
16
|
+
else
|
|
17
|
+
300
|
|
18
|
+
end
|
|
19
|
+
rescue StandardError
|
|
20
|
+
300
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def enabled?
|
|
24
|
+
corpus_path && !corpus_path.empty?
|
|
25
|
+
rescue StandardError
|
|
26
|
+
false
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def args
|
|
30
|
+
{ path: corpus_path }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
def corpus_path
|
|
36
|
+
return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
|
|
37
|
+
|
|
38
|
+
Legion::Settings.dig(:knowledge, :corpus_path)
|
|
39
|
+
rescue StandardError
|
|
40
|
+
nil
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -7,8 +7,28 @@ module Legion
|
|
|
7
7
|
module Corpus
|
|
8
8
|
module_function
|
|
9
9
|
|
|
10
|
-
def corpus_stats
|
|
11
|
-
{ success:
|
|
10
|
+
def corpus_stats(path:, extensions: nil)
|
|
11
|
+
return { success: false, error: 'path does not exist' } unless ::File.exist?(path)
|
|
12
|
+
|
|
13
|
+
opts = { path: path }
|
|
14
|
+
opts[:extensions] = extensions if extensions
|
|
15
|
+
entries = Helpers::Manifest.scan(**opts)
|
|
16
|
+
chunk_count = entries.sum do |entry|
|
|
17
|
+
sections = Helpers::Parser.parse(file_path: entry[:path])
|
|
18
|
+
next 0 if sections.first&.key?(:error)
|
|
19
|
+
|
|
20
|
+
Helpers::Chunker.chunk(sections: sections).size
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
{
|
|
24
|
+
success: true,
|
|
25
|
+
path: path,
|
|
26
|
+
file_count: entries.size,
|
|
27
|
+
estimated_chunks: chunk_count,
|
|
28
|
+
total_bytes: entries.sum { |e| e[:size] }
|
|
29
|
+
}
|
|
30
|
+
rescue StandardError => e
|
|
31
|
+
{ success: false, error: e.message }
|
|
12
32
|
end
|
|
13
33
|
end
|
|
14
34
|
end
|
|
@@ -101,24 +101,22 @@ module Legion
|
|
|
101
101
|
private_class_method :upsert_chunk
|
|
102
102
|
|
|
103
103
|
def chunk_exists?(content_hash)
|
|
104
|
-
return false unless defined?(Legion::
|
|
105
|
-
|
|
106
|
-
Legion::
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
filter: { content_hash: content_hash }
|
|
111
|
-
).any?
|
|
104
|
+
return false unless defined?(Legion::Data::Model::ApolloEntry)
|
|
105
|
+
|
|
106
|
+
Legion::Data::Model::ApolloEntry
|
|
107
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
108
|
+
.where(Sequel.like(:content, "%#{content_hash}%"))
|
|
109
|
+
.any?
|
|
112
110
|
rescue StandardError
|
|
113
111
|
false
|
|
114
112
|
end
|
|
115
113
|
private_class_method :chunk_exists?
|
|
116
114
|
|
|
117
115
|
def generate_embedding(content)
|
|
118
|
-
return nil unless defined?(Legion::
|
|
119
|
-
return nil unless defined?(Legion::Extensions::Apollo::Helpers::Embedding)
|
|
116
|
+
return nil unless defined?(Legion::LLM) && Legion::LLM.respond_to?(:embed)
|
|
120
117
|
|
|
121
|
-
Legion::
|
|
118
|
+
result = Legion::LLM.embed(content)
|
|
119
|
+
result.is_a?(Hash) ? result[:vector] : nil
|
|
122
120
|
rescue StandardError
|
|
123
121
|
nil
|
|
124
122
|
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Transport
|
|
7
|
+
module Exchanges
|
|
8
|
+
class Knowledge < Legion::Transport::Exchange
|
|
9
|
+
def exchange_name = 'knowledge'
|
|
10
|
+
def type = 'topic'
|
|
11
|
+
def durable = true
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Transport
|
|
7
|
+
module Messages
|
|
8
|
+
class IngestMessage < Legion::Transport::Message
|
|
9
|
+
def exchange_name = 'knowledge'
|
|
10
|
+
def routing_key = 'knowledge.ingest'
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Transport
|
|
7
|
+
module Queues
|
|
8
|
+
class Ingest < Legion::Transport::Queue
|
|
9
|
+
def queue_name = 'knowledge.ingest'
|
|
10
|
+
def exchange_name = 'knowledge'
|
|
11
|
+
def routing_key = 'knowledge.ingest'
|
|
12
|
+
def durable = true
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -9,6 +9,16 @@ require_relative 'knowledge/runners/query'
|
|
|
9
9
|
require_relative 'knowledge/runners/corpus'
|
|
10
10
|
require_relative 'knowledge/client'
|
|
11
11
|
|
|
12
|
+
if defined?(Legion::Transport)
|
|
13
|
+
require_relative 'knowledge/transport/exchanges/knowledge'
|
|
14
|
+
require_relative 'knowledge/transport/queues/ingest'
|
|
15
|
+
require_relative 'knowledge/transport/messages/ingest_message'
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
require_relative 'knowledge/actors/corpus_watcher' if defined?(Legion::Extensions::Actors::Every)
|
|
19
|
+
|
|
20
|
+
require_relative 'knowledge/actors/corpus_ingest' if defined?(Legion::Extensions::Actors::Subscription)
|
|
21
|
+
|
|
12
22
|
module Legion
|
|
13
23
|
module Extensions
|
|
14
24
|
module Knowledge
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-knowledge
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Matthew Iverson
|
|
@@ -116,6 +116,8 @@ extra_rdoc_files: []
|
|
|
116
116
|
files:
|
|
117
117
|
- README.md
|
|
118
118
|
- lib/legion/extensions/knowledge.rb
|
|
119
|
+
- lib/legion/extensions/knowledge/actors/corpus_ingest.rb
|
|
120
|
+
- lib/legion/extensions/knowledge/actors/corpus_watcher.rb
|
|
119
121
|
- lib/legion/extensions/knowledge/client.rb
|
|
120
122
|
- lib/legion/extensions/knowledge/helpers/chunker.rb
|
|
121
123
|
- lib/legion/extensions/knowledge/helpers/manifest.rb
|
|
@@ -123,6 +125,9 @@ files:
|
|
|
123
125
|
- lib/legion/extensions/knowledge/runners/corpus.rb
|
|
124
126
|
- lib/legion/extensions/knowledge/runners/ingest.rb
|
|
125
127
|
- lib/legion/extensions/knowledge/runners/query.rb
|
|
128
|
+
- lib/legion/extensions/knowledge/transport/exchanges/knowledge.rb
|
|
129
|
+
- lib/legion/extensions/knowledge/transport/messages/ingest_message.rb
|
|
130
|
+
- lib/legion/extensions/knowledge/transport/queues/ingest.rb
|
|
126
131
|
- lib/legion/extensions/knowledge/version.rb
|
|
127
132
|
homepage: https://github.com/LegionIO/lex-knowledge
|
|
128
133
|
licenses:
|