lex-knowledge 0.1.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/legion/extensions/knowledge/actors/corpus_ingest.rb +23 -0
- data/lib/legion/extensions/knowledge/actors/corpus_watcher.rb +46 -0
- data/lib/legion/extensions/knowledge/helpers/manifest_store.rb +51 -0
- data/lib/legion/extensions/knowledge/helpers/parser.rb +35 -12
- data/lib/legion/extensions/knowledge/runners/corpus.rb +26 -2
- data/lib/legion/extensions/knowledge/runners/ingest.rb +92 -40
- data/lib/legion/extensions/knowledge/transport/exchanges/knowledge.rb +17 -0
- data/lib/legion/extensions/knowledge/transport/messages/ingest_message.rb +16 -0
- data/lib/legion/extensions/knowledge/transport/queues/ingest.rb +18 -0
- data/lib/legion/extensions/knowledge/version.rb +1 -1
- data/lib/legion/extensions/knowledge.rb +11 -0
- metadata +7 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: da332d52d481d4e45e8f7ee771a60fb13cebb74144a18ac54ff7b7ec4cb26f27
|
|
4
|
+
data.tar.gz: 3a858fbdae4511ec6e34573fccd85f330b33c7be818a2a78271af33cb0dcbfbd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e7e15c2174b28ea518c3fee6ecc68723bc628ee3c6d9121a2292706ad32125d32f766da164aa581c56915cd178e54379014a67d8ad1d5cd765485a6eb88d0610
|
|
7
|
+
data.tar.gz: eb7f343423ea41ff46b6a478b3c32a22b0dd3ab3939ad858d0e8d540b5faaf8f01496a5839c79b5ccb1847529367ad9c55ffdc72bebf5959891ff4a7c06b72e4
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Actor
|
|
7
|
+
class CorpusIngest < Legion::Extensions::Actors::Subscription
|
|
8
|
+
def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
|
|
9
|
+
def runner_function = 'ingest_file'
|
|
10
|
+
def check_subtask? = false
|
|
11
|
+
def generate_task? = false
|
|
12
|
+
|
|
13
|
+
def enabled?
|
|
14
|
+
defined?(Legion::Transport) &&
|
|
15
|
+
defined?(Legion::Extensions::Knowledge::Runners::Ingest)
|
|
16
|
+
rescue StandardError
|
|
17
|
+
false
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Actor
|
|
7
|
+
class CorpusWatcher < Legion::Extensions::Actors::Every
|
|
8
|
+
def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
|
|
9
|
+
def runner_function = 'ingest_corpus'
|
|
10
|
+
def check_subtask? = false
|
|
11
|
+
def generate_task? = false
|
|
12
|
+
|
|
13
|
+
def every_interval
|
|
14
|
+
if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
|
|
15
|
+
Legion::Settings.dig(:knowledge, :actors, :watcher_interval) || 300
|
|
16
|
+
else
|
|
17
|
+
300
|
|
18
|
+
end
|
|
19
|
+
rescue StandardError
|
|
20
|
+
300
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def enabled?
|
|
24
|
+
corpus_path && !corpus_path.empty?
|
|
25
|
+
rescue StandardError
|
|
26
|
+
false
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def args
|
|
30
|
+
{ path: corpus_path }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
def corpus_path
|
|
36
|
+
return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
|
|
37
|
+
|
|
38
|
+
Legion::Settings.dig(:knowledge, :corpus_path)
|
|
39
|
+
rescue StandardError
|
|
40
|
+
nil
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
require 'fileutils'
|
|
5
|
+
require 'json'
|
|
6
|
+
require 'tempfile'
|
|
7
|
+
|
|
8
|
+
module Legion
|
|
9
|
+
module Extensions
|
|
10
|
+
module Knowledge
|
|
11
|
+
module Helpers
|
|
12
|
+
module ManifestStore
|
|
13
|
+
module_function
|
|
14
|
+
|
|
15
|
+
STORE_DIR = ::File.expand_path('~/.legionio/knowledge').freeze
|
|
16
|
+
|
|
17
|
+
def load(corpus_path:)
|
|
18
|
+
path = store_path(corpus_path: corpus_path)
|
|
19
|
+
return [] unless ::File.exist?(path)
|
|
20
|
+
|
|
21
|
+
raw = ::File.read(path, encoding: 'utf-8')
|
|
22
|
+
::JSON.parse(raw, symbolize_names: true)
|
|
23
|
+
rescue StandardError
|
|
24
|
+
[]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def save(corpus_path:, manifest:)
|
|
28
|
+
::FileUtils.mkdir_p(STORE_DIR)
|
|
29
|
+
path = store_path(corpus_path: corpus_path)
|
|
30
|
+
tmp = "#{path}.tmp"
|
|
31
|
+
::File.write(tmp, ::JSON.generate(manifest.map { |e| serialize_entry(e) }))
|
|
32
|
+
::File.rename(tmp, path)
|
|
33
|
+
true
|
|
34
|
+
rescue StandardError
|
|
35
|
+
false
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def store_path(corpus_path:)
|
|
39
|
+
hash = ::Digest::SHA256.hexdigest(corpus_path.to_s)[0, 16]
|
|
40
|
+
::File.join(STORE_DIR, "#{hash}.manifest.json")
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def serialize_entry(entry)
|
|
44
|
+
entry.merge(mtime: entry[:mtime].to_s)
|
|
45
|
+
end
|
|
46
|
+
private_class_method :serialize_entry
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -15,6 +15,8 @@ module Legion
|
|
|
15
15
|
parse_markdown(file_path: file_path)
|
|
16
16
|
when '.txt'
|
|
17
17
|
parse_text(file_path: file_path)
|
|
18
|
+
when '.pdf', '.docx'
|
|
19
|
+
extract_via_data(file_path: file_path)
|
|
18
20
|
else
|
|
19
21
|
[{ error: 'unsupported format', source_file: file_path }]
|
|
20
22
|
end
|
|
@@ -22,32 +24,42 @@ module Legion
|
|
|
22
24
|
|
|
23
25
|
def parse_markdown(file_path:)
|
|
24
26
|
content = ::File.read(file_path, encoding: 'utf-8')
|
|
25
|
-
sections
|
|
27
|
+
sections = []
|
|
26
28
|
current_heading = ::File.basename(file_path, '.*')
|
|
27
29
|
current_lines = []
|
|
28
|
-
|
|
30
|
+
heading_stack = {}
|
|
29
31
|
|
|
30
32
|
content.each_line do |line|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
current_heading
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
current_heading = line.sub(/^#+\s*/, '').chomp
|
|
39
|
-
section_path = section_path.first(1) + [current_heading]
|
|
33
|
+
level = heading_level(line)
|
|
34
|
+
if level
|
|
35
|
+
flush_section(sections, current_heading, build_section_path(heading_stack), current_lines, file_path)
|
|
36
|
+
title = line.sub(/^#+\s*/, '').chomp
|
|
37
|
+
heading_stack.delete_if { |d, _| d >= level }
|
|
38
|
+
heading_stack[level] = title
|
|
39
|
+
current_heading = title
|
|
40
40
|
current_lines = []
|
|
41
41
|
else
|
|
42
42
|
current_lines << line
|
|
43
43
|
end
|
|
44
44
|
end
|
|
45
45
|
|
|
46
|
-
flush_section(sections, current_heading,
|
|
46
|
+
flush_section(sections, current_heading, build_section_path(heading_stack), current_lines, file_path)
|
|
47
47
|
|
|
48
48
|
sections.empty? ? [{ heading: ::File.basename(file_path, '.*'), section_path: [], content: content.strip, source_file: file_path }] : sections
|
|
49
49
|
end
|
|
50
50
|
|
|
51
|
+
def extract_via_data(file_path:)
|
|
52
|
+
return [{ error: 'unsupported format', source_file: file_path }] unless defined?(::Legion::Data::Extract)
|
|
53
|
+
|
|
54
|
+
result = ::Legion::Data::Extract.extract(file_path, type: :auto)
|
|
55
|
+
return [{ error: 'extraction_failed', source_file: file_path, detail: result }] unless result.is_a?(Hash) && result[:text]
|
|
56
|
+
|
|
57
|
+
heading = ::File.basename(file_path, '.*')
|
|
58
|
+
[{ heading: heading, section_path: [], content: result[:text].strip, source_file: file_path }]
|
|
59
|
+
rescue StandardError => e
|
|
60
|
+
[{ error: 'extraction_failed', source_file: file_path, detail: e.message }]
|
|
61
|
+
end
|
|
62
|
+
|
|
51
63
|
def parse_text(file_path:)
|
|
52
64
|
content = ::File.read(file_path, encoding: 'utf-8')
|
|
53
65
|
heading = ::File.basename(file_path, '.*')
|
|
@@ -67,6 +79,17 @@ module Legion
|
|
|
67
79
|
}
|
|
68
80
|
end
|
|
69
81
|
private_class_method :flush_section
|
|
82
|
+
|
|
83
|
+
def heading_level(line)
|
|
84
|
+
m = line.match(/^(\#{1,6})\s/)
|
|
85
|
+
m ? m[1].length : nil
|
|
86
|
+
end
|
|
87
|
+
private_class_method :heading_level
|
|
88
|
+
|
|
89
|
+
def build_section_path(stack)
|
|
90
|
+
stack.sort.map { |_, title| title }
|
|
91
|
+
end
|
|
92
|
+
private_class_method :build_section_path
|
|
70
93
|
end
|
|
71
94
|
end
|
|
72
95
|
end
|
|
@@ -7,8 +7,32 @@ module Legion
|
|
|
7
7
|
module Corpus
|
|
8
8
|
module_function
|
|
9
9
|
|
|
10
|
-
def
|
|
11
|
-
|
|
10
|
+
def manifest_path(path:)
|
|
11
|
+
Helpers::ManifestStore.store_path(corpus_path: path)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def corpus_stats(path:, extensions: nil)
|
|
15
|
+
return { success: false, error: 'path does not exist' } unless ::File.exist?(path)
|
|
16
|
+
|
|
17
|
+
opts = { path: path }
|
|
18
|
+
opts[:extensions] = extensions if extensions
|
|
19
|
+
entries = Helpers::Manifest.scan(**opts)
|
|
20
|
+
chunk_count = entries.sum do |entry|
|
|
21
|
+
sections = Helpers::Parser.parse(file_path: entry[:path])
|
|
22
|
+
next 0 if sections.first&.key?(:error)
|
|
23
|
+
|
|
24
|
+
Helpers::Chunker.chunk(sections: sections).size
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
{
|
|
28
|
+
success: true,
|
|
29
|
+
path: path,
|
|
30
|
+
file_count: entries.size,
|
|
31
|
+
estimated_chunks: chunk_count,
|
|
32
|
+
total_bytes: entries.sum { |e| e[:size] }
|
|
33
|
+
}
|
|
34
|
+
rescue StandardError => e
|
|
35
|
+
{ success: false, error: e.message }
|
|
12
36
|
end
|
|
13
37
|
end
|
|
14
38
|
end
|
|
@@ -23,23 +23,32 @@ module Legion
|
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
def ingest_corpus(path:, dry_run: false, force: false)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
26
|
+
current = Helpers::Manifest.scan(path: path)
|
|
27
|
+
previous = force ? [] : Helpers::ManifestStore.load(corpus_path: path)
|
|
28
|
+
delta = Helpers::Manifest.diff(current: current, previous: previous)
|
|
29
|
+
|
|
30
|
+
to_process = delta[:added] + delta[:changed]
|
|
31
|
+
chunks_created = 0
|
|
32
|
+
chunks_skipped = 0
|
|
33
|
+
chunks_updated = 0
|
|
34
|
+
|
|
35
|
+
to_process.each do |file_path|
|
|
36
|
+
result = process_file(file_path, dry_run: dry_run, force: force)
|
|
37
|
+
chunks_created += result[:created]
|
|
38
|
+
chunks_skipped += result[:skipped]
|
|
39
|
+
chunks_updated += result[:updated]
|
|
38
40
|
end
|
|
39
41
|
|
|
42
|
+
delta[:removed].each { |file_path| retire_file(file_path: file_path) } unless dry_run
|
|
43
|
+
|
|
44
|
+
Helpers::ManifestStore.save(corpus_path: path, manifest: current) unless dry_run
|
|
45
|
+
|
|
40
46
|
{
|
|
41
47
|
success: true,
|
|
42
|
-
files_scanned:
|
|
48
|
+
files_scanned: current.size,
|
|
49
|
+
files_added: delta[:added].size,
|
|
50
|
+
files_changed: delta[:changed].size,
|
|
51
|
+
files_removed: delta[:removed].size,
|
|
43
52
|
chunks_created: chunks_created,
|
|
44
53
|
chunks_skipped: chunks_skipped,
|
|
45
54
|
chunks_updated: chunks_updated
|
|
@@ -67,12 +76,18 @@ module Legion
|
|
|
67
76
|
return { created: 0, skipped: 0, updated: 0 } if sections.first&.key?(:error)
|
|
68
77
|
|
|
69
78
|
chunks = Helpers::Chunker.chunk(sections: sections)
|
|
79
|
+
paired = if dry_run
|
|
80
|
+
chunks.map { |c| { chunk: c, embedding: nil } }
|
|
81
|
+
else
|
|
82
|
+
batch_embed_chunks(chunks, force: force)
|
|
83
|
+
end
|
|
84
|
+
|
|
70
85
|
created = 0
|
|
71
86
|
skipped = 0
|
|
72
87
|
updated = 0
|
|
73
88
|
|
|
74
|
-
|
|
75
|
-
outcome =
|
|
89
|
+
paired.each do |p|
|
|
90
|
+
outcome = upsert_chunk_with_embedding(p[:chunk], p[:embedding], dry_run: dry_run, force: force, exists: p[:exists] || false)
|
|
76
91
|
case outcome
|
|
77
92
|
when :created then created += 1
|
|
78
93
|
when :skipped then skipped += 1
|
|
@@ -84,46 +99,68 @@ module Legion
|
|
|
84
99
|
end
|
|
85
100
|
private_class_method :process_file
|
|
86
101
|
|
|
87
|
-
def
|
|
88
|
-
|
|
102
|
+
def batch_embed_chunks(chunks, force:)
|
|
103
|
+
exists_map = force ? {} : build_exists_map(chunks)
|
|
104
|
+
return paired_without_embed(chunks, exists_map) unless llm_embed_available?
|
|
89
105
|
|
|
90
|
-
|
|
106
|
+
needs_embed = force ? chunks : chunks.reject { |c| exists_map[c[:content_hash]] }
|
|
107
|
+
embed_map = needs_embed.empty? ? {} : build_embed_map(needs_embed)
|
|
91
108
|
|
|
92
|
-
|
|
109
|
+
chunks.map { |c| { chunk: c, embedding: embed_map[c[:content_hash]], exists: exists_map.fetch(c[:content_hash], false) } }
|
|
110
|
+
rescue StandardError
|
|
111
|
+
paired_without_embed(chunks, {})
|
|
112
|
+
end
|
|
113
|
+
private_class_method :batch_embed_chunks
|
|
93
114
|
|
|
94
|
-
|
|
95
|
-
|
|
115
|
+
def build_exists_map(chunks)
|
|
116
|
+
chunks.to_h { |c| [c[:content_hash], chunk_exists?(c[:content_hash])] }
|
|
117
|
+
end
|
|
118
|
+
private_class_method :build_exists_map
|
|
119
|
+
|
|
120
|
+
def llm_embed_available?
|
|
121
|
+
defined?(Legion::LLM) && Legion::LLM.respond_to?(:embed_batch)
|
|
122
|
+
end
|
|
123
|
+
private_class_method :llm_embed_available?
|
|
124
|
+
|
|
125
|
+
def paired_without_embed(chunks, exists_map)
|
|
126
|
+
chunks.map { |c| { chunk: c, embedding: nil, exists: exists_map.fetch(c[:content_hash], false) } }
|
|
127
|
+
end
|
|
128
|
+
private_class_method :paired_without_embed
|
|
129
|
+
|
|
130
|
+
def build_embed_map(needs_embed)
|
|
131
|
+
results = Legion::LLM.embed_batch(needs_embed.map { |c| c[:content] })
|
|
132
|
+
results.each_with_object({}) do |r, h|
|
|
133
|
+
h[needs_embed[r[:index]][:content_hash]] = r[:vector] unless r[:error]
|
|
134
|
+
end
|
|
135
|
+
rescue StandardError
|
|
136
|
+
{}
|
|
137
|
+
end
|
|
138
|
+
private_class_method :build_embed_map
|
|
139
|
+
|
|
140
|
+
def upsert_chunk_with_embedding(chunk, embedding, dry_run: false, force: false, exists: false)
|
|
141
|
+
return :created if dry_run
|
|
142
|
+
return :created unless defined?(Legion::Extensions::Apollo)
|
|
143
|
+
return :skipped if !force && exists
|
|
96
144
|
|
|
145
|
+
ingest_to_apollo(chunk, embedding)
|
|
97
146
|
force ? :updated : :created
|
|
98
147
|
rescue StandardError
|
|
99
148
|
:skipped
|
|
100
149
|
end
|
|
101
|
-
private_class_method :
|
|
150
|
+
private_class_method :upsert_chunk_with_embedding
|
|
102
151
|
|
|
103
152
|
def chunk_exists?(content_hash)
|
|
104
|
-
return false unless defined?(Legion::
|
|
105
|
-
|
|
106
|
-
Legion::
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
filter: { content_hash: content_hash }
|
|
111
|
-
).any?
|
|
153
|
+
return false unless defined?(Legion::Data::Model::ApolloEntry)
|
|
154
|
+
|
|
155
|
+
Legion::Data::Model::ApolloEntry
|
|
156
|
+
.where(Sequel.pg_array_op(:tags).contains(Sequel.pg_array(['document_chunk'])))
|
|
157
|
+
.where(Sequel.like(:content, "%#{content_hash}%"))
|
|
158
|
+
.any?
|
|
112
159
|
rescue StandardError
|
|
113
160
|
false
|
|
114
161
|
end
|
|
115
162
|
private_class_method :chunk_exists?
|
|
116
163
|
|
|
117
|
-
def generate_embedding(content)
|
|
118
|
-
return nil unless defined?(Legion::Extensions::Apollo)
|
|
119
|
-
return nil unless defined?(Legion::Extensions::Apollo::Helpers::Embedding)
|
|
120
|
-
|
|
121
|
-
Legion::Extensions::Apollo::Helpers::Embedding.generate(content)
|
|
122
|
-
rescue StandardError
|
|
123
|
-
nil
|
|
124
|
-
end
|
|
125
|
-
private_class_method :generate_embedding
|
|
126
|
-
|
|
127
164
|
def ingest_to_apollo(chunk, embedding)
|
|
128
165
|
return unless defined?(Legion::Extensions::Apollo)
|
|
129
166
|
|
|
@@ -145,6 +182,21 @@ module Legion
|
|
|
145
182
|
Legion::Extensions::Apollo::Runners::Knowledge.handle_ingest(**payload)
|
|
146
183
|
end
|
|
147
184
|
private_class_method :ingest_to_apollo
|
|
185
|
+
|
|
186
|
+
def retire_file(file_path:)
|
|
187
|
+
return unless defined?(Legion::Apollo)
|
|
188
|
+
return unless Legion::Apollo.respond_to?(:ingest) && Legion::Apollo.started?
|
|
189
|
+
|
|
190
|
+
Legion::Apollo.ingest(
|
|
191
|
+
content: file_path,
|
|
192
|
+
content_type: 'document_retired',
|
|
193
|
+
tags: [file_path, 'retired', 'document_chunk'].uniq,
|
|
194
|
+
metadata: { source_file: file_path, retired: true }
|
|
195
|
+
)
|
|
196
|
+
rescue StandardError
|
|
197
|
+
nil
|
|
198
|
+
end
|
|
199
|
+
private_class_method :retire_file
|
|
148
200
|
end
|
|
149
201
|
end
|
|
150
202
|
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Transport
|
|
7
|
+
module Exchanges
|
|
8
|
+
class Knowledge < Legion::Transport::Exchange
|
|
9
|
+
def exchange_name = 'knowledge'
|
|
10
|
+
def type = 'topic'
|
|
11
|
+
def durable = true
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Transport
|
|
7
|
+
module Messages
|
|
8
|
+
class IngestMessage < Legion::Transport::Message
|
|
9
|
+
def exchange_name = 'knowledge'
|
|
10
|
+
def routing_key = 'knowledge.ingest'
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Knowledge
|
|
6
|
+
module Transport
|
|
7
|
+
module Queues
|
|
8
|
+
class Ingest < Legion::Transport::Queue
|
|
9
|
+
def queue_name = 'knowledge.ingest'
|
|
10
|
+
def exchange_name = 'knowledge'
|
|
11
|
+
def routing_key = 'knowledge.ingest'
|
|
12
|
+
def durable = true
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative 'knowledge/version'
|
|
4
4
|
require_relative 'knowledge/helpers/manifest'
|
|
5
|
+
require_relative 'knowledge/helpers/manifest_store'
|
|
5
6
|
require_relative 'knowledge/helpers/parser'
|
|
6
7
|
require_relative 'knowledge/helpers/chunker'
|
|
7
8
|
require_relative 'knowledge/runners/ingest'
|
|
@@ -9,6 +10,16 @@ require_relative 'knowledge/runners/query'
|
|
|
9
10
|
require_relative 'knowledge/runners/corpus'
|
|
10
11
|
require_relative 'knowledge/client'
|
|
11
12
|
|
|
13
|
+
if defined?(Legion::Transport)
|
|
14
|
+
require_relative 'knowledge/transport/exchanges/knowledge'
|
|
15
|
+
require_relative 'knowledge/transport/queues/ingest'
|
|
16
|
+
require_relative 'knowledge/transport/messages/ingest_message'
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
require_relative 'knowledge/actors/corpus_watcher' if defined?(Legion::Extensions::Actors::Every)
|
|
20
|
+
|
|
21
|
+
require_relative 'knowledge/actors/corpus_ingest' if defined?(Legion::Extensions::Actors::Subscription)
|
|
22
|
+
|
|
12
23
|
module Legion
|
|
13
24
|
module Extensions
|
|
14
25
|
module Knowledge
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-knowledge
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Matthew Iverson
|
|
@@ -116,13 +116,19 @@ extra_rdoc_files: []
|
|
|
116
116
|
files:
|
|
117
117
|
- README.md
|
|
118
118
|
- lib/legion/extensions/knowledge.rb
|
|
119
|
+
- lib/legion/extensions/knowledge/actors/corpus_ingest.rb
|
|
120
|
+
- lib/legion/extensions/knowledge/actors/corpus_watcher.rb
|
|
119
121
|
- lib/legion/extensions/knowledge/client.rb
|
|
120
122
|
- lib/legion/extensions/knowledge/helpers/chunker.rb
|
|
121
123
|
- lib/legion/extensions/knowledge/helpers/manifest.rb
|
|
124
|
+
- lib/legion/extensions/knowledge/helpers/manifest_store.rb
|
|
122
125
|
- lib/legion/extensions/knowledge/helpers/parser.rb
|
|
123
126
|
- lib/legion/extensions/knowledge/runners/corpus.rb
|
|
124
127
|
- lib/legion/extensions/knowledge/runners/ingest.rb
|
|
125
128
|
- lib/legion/extensions/knowledge/runners/query.rb
|
|
129
|
+
- lib/legion/extensions/knowledge/transport/exchanges/knowledge.rb
|
|
130
|
+
- lib/legion/extensions/knowledge/transport/messages/ingest_message.rb
|
|
131
|
+
- lib/legion/extensions/knowledge/transport/queues/ingest.rb
|
|
126
132
|
- lib/legion/extensions/knowledge/version.rb
|
|
127
133
|
homepage: https://github.com/LegionIO/lex-knowledge
|
|
128
134
|
licenses:
|