lex-apollo 0.4.6 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/lib/legion/extensions/apollo/actors/writeback_store.rb +26 -0
- data/lib/legion/extensions/apollo/actors/writeback_vectorize.rb +50 -0
- data/lib/legion/extensions/apollo/helpers/capability.rb +68 -0
- data/lib/legion/extensions/apollo/helpers/tag_normalizer.rb +36 -0
- data/lib/legion/extensions/apollo/helpers/writeback.rb +157 -0
- data/lib/legion/extensions/apollo/runners/knowledge.rb +29 -6
- data/lib/legion/extensions/apollo/runners/maintenance.rb +0 -1
- data/lib/legion/extensions/apollo/transport/messages/writeback.rb +48 -0
- data/lib/legion/extensions/apollo/transport/queues/writeback_store.rb +23 -0
- data/lib/legion/extensions/apollo/transport/queues/writeback_vectorize.rb +23 -0
- data/lib/legion/extensions/apollo/version.rb +1 -1
- data/lib/legion/extensions/apollo.rb +6 -0
- data/spec/legion/extensions/apollo/actors/writeback_store_spec.rb +42 -0
- data/spec/legion/extensions/apollo/actors/writeback_vectorize_spec.rb +102 -0
- data/spec/legion/extensions/apollo/helpers/capability_spec.rb +56 -0
- data/spec/legion/extensions/apollo/helpers/tag_normalizer_spec.rb +62 -0
- data/spec/legion/extensions/apollo/helpers/writeback_spec.rb +96 -0
- data/spec/legion/extensions/apollo/runners/knowledge_spec.rb +73 -11
- data/spec/legion/extensions/apollo/runners/maintenance_spec.rb +0 -1
- data/spec/legion/extensions/apollo/runners/request_spec.rb +14 -8
- data/spec/legion/extensions/apollo/transport/messages/writeback_spec.rb +87 -0
- metadata +15 -3
- data/lib/legion/extensions/apollo/helpers/embedding.rb +0 -104
- data/spec/legion/extensions/apollo/helpers/embedding_spec.rb +0 -127
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ef4d63cf3e8b7a9ce86f11af1150b851a911fa7383ca2233d753bc328be31cb5
|
|
4
|
+
data.tar.gz: 8d4e3f73d3d7d04c1dff338eb226a3385d0977d57d77c2378d41723b3fff80e6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fae193470f3da5dedcdb5ee91310ac824064aa3ebd60de9f21e9d9a6c5eb770db23ad21603cb6c8d7337fb7fb84b1f85f9c3155f1ff38cac2bd426a3270eaabf
|
|
7
|
+
data.tar.gz: 6d6827a8defe1e41724a3033193f774034830cddf28bd34096622410f736d233a79853567f71b5e2f7547e48194ff3d23ce685633b55f832747ad37026c54df5
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,27 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.8] - 2026-03-25
|
|
4
|
+
|
|
5
|
+
### Changed
|
|
6
|
+
- Removed `Helpers::Embedding` direct Ollama bypass; all embedding generation now routes through `Legion::LLM::Embeddings.generate`
|
|
7
|
+
- `Runners::Knowledge`, `Helpers::Writeback`, and `Actor::WritebackVectorize` all use `Legion::LLM::Embeddings.generate` and extract `[:vector]` from the result hash
|
|
8
|
+
- Zero-vector fallback (`Array.new(1024, 0.0)`) preserved when LLM is unavailable or returns nil vector
|
|
9
|
+
|
|
10
|
+
### Removed
|
|
11
|
+
- `lib/legion/extensions/apollo/helpers/embedding.rb` — direct Faraday/Ollama embedding bypass (superseded by `Legion::LLM` provider routing)
|
|
12
|
+
- `spec/legion/extensions/apollo/helpers/embedding_spec.rb` — tests for removed helper
|
|
13
|
+
|
|
14
|
+
## [0.4.7] - 2026-03-25
|
|
15
|
+
|
|
16
|
+
### Added
|
|
17
|
+
- Knowledge capture writeback system: `Helpers::Writeback` evaluates LLM responses for Apollo ingest with echo chamber prevention
|
|
18
|
+
- `Helpers::TagNormalizer` for mechanical tag normalization (aliases, special chars, max 5)
|
|
19
|
+
- `Helpers::Capability` for detecting embed/write capabilities per node
|
|
20
|
+
- Writeback transport layer: `Messages::Writeback`, `Queues::WritebackStore`, `Queues::WritebackVectorize`
|
|
21
|
+
- Writeback subscription actors: `Actor::WritebackStore` (pre-embedded), `Actor::WritebackVectorize` (needs embedding)
|
|
22
|
+
- Content hash dedup in `Runners::Knowledge#handle_ingest` — collision boosts confidence instead of inserting
|
|
23
|
+
- Identity tracking: `submitted_by`, `submitted_from`, `content_hash` fields on knowledge entries
|
|
24
|
+
|
|
3
25
|
## [0.4.6] - 2026-03-25
|
|
4
26
|
|
|
5
27
|
### Added
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/actors/subscription' if defined?(Legion::Extensions::Actors::Subscription)
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Apollo
|
|
8
|
+
module Actor
|
|
9
|
+
class WritebackStore < Legion::Extensions::Actors::Subscription
|
|
10
|
+
def runner_class = 'Legion::Extensions::Apollo::Runners::Knowledge'
|
|
11
|
+
def runner_function = 'handle_ingest'
|
|
12
|
+
def check_subtask? = false
|
|
13
|
+
def generate_task? = false
|
|
14
|
+
|
|
15
|
+
def enabled?
|
|
16
|
+
defined?(Legion::Extensions::Apollo::Runners::Knowledge) &&
|
|
17
|
+
defined?(Legion::Transport) &&
|
|
18
|
+
Helpers::Capability.apollo_write_enabled?
|
|
19
|
+
rescue StandardError
|
|
20
|
+
false
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/actors/subscription' if defined?(Legion::Extensions::Actors::Subscription)
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Apollo
|
|
8
|
+
module Actor
|
|
9
|
+
class WritebackVectorize < Legion::Extensions::Actors::Subscription
|
|
10
|
+
def runner_class = self.class
|
|
11
|
+
def runner_function = 'handle_vectorize'
|
|
12
|
+
def check_subtask? = false
|
|
13
|
+
def generate_task? = false
|
|
14
|
+
|
|
15
|
+
def handle_vectorize(payload)
|
|
16
|
+
payload = symbolize(payload)
|
|
17
|
+
result = Legion::LLM::Embeddings.generate(text: payload[:content])
|
|
18
|
+
vector = result.is_a?(Hash) ? result[:vector] : result
|
|
19
|
+
embedding = vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
|
|
20
|
+
enriched = payload.merge(embedding: embedding)
|
|
21
|
+
|
|
22
|
+
if Helpers::Capability.can_write?
|
|
23
|
+
Runners::Knowledge.handle_ingest(**enriched)
|
|
24
|
+
else
|
|
25
|
+
Transport::Messages::Writeback.new(
|
|
26
|
+
**enriched, has_embedding: true
|
|
27
|
+
).publish
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
{ success: true, action: :vectorized }
|
|
31
|
+
rescue StandardError => e
|
|
32
|
+
{ success: false, error: e.message }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def enabled?
|
|
36
|
+
defined?(Legion::Transport) && Helpers::Capability.can_embed?
|
|
37
|
+
rescue StandardError
|
|
38
|
+
false
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def symbolize(hash)
|
|
44
|
+
hash.transform_keys(&:to_sym)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Apollo
|
|
6
|
+
module Helpers
|
|
7
|
+
module Capability
|
|
8
|
+
EMBEDDING_MODELS = %w[mxbai-embed-large bge-large snowflake-arctic-embed].freeze
|
|
9
|
+
|
|
10
|
+
module_function
|
|
11
|
+
|
|
12
|
+
def can_embed?
|
|
13
|
+
return false unless defined?(Legion::LLM) && Legion::LLM.started?
|
|
14
|
+
|
|
15
|
+
ollama_embedding_available? || cloud_embedding_configured?
|
|
16
|
+
rescue StandardError
|
|
17
|
+
false
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def can_write?
|
|
21
|
+
return false unless apollo_write_enabled?
|
|
22
|
+
return false unless defined?(Legion::Data) && Legion::Data.connected?
|
|
23
|
+
|
|
24
|
+
check_db_write_privilege
|
|
25
|
+
rescue StandardError
|
|
26
|
+
false
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def apollo_write_enabled?
|
|
30
|
+
Legion::Settings.dig(:data, :apollo_write) == true
|
|
31
|
+
rescue StandardError
|
|
32
|
+
false
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def ollama_embedding_available?
|
|
36
|
+
return false unless defined?(Legion::LLM::Discovery::Ollama)
|
|
37
|
+
|
|
38
|
+
EMBEDDING_MODELS.any? { |m| Legion::LLM::Discovery::Ollama.model_available?(m) }
|
|
39
|
+
rescue StandardError
|
|
40
|
+
false
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def cloud_embedding_configured?
|
|
44
|
+
provider = Legion::Settings.dig(:apollo, :embedding, :provider)
|
|
45
|
+
model = Legion::Settings.dig(:apollo, :embedding, :model)
|
|
46
|
+
!provider.nil? && !model.nil?
|
|
47
|
+
rescue StandardError
|
|
48
|
+
false
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def check_db_write_privilege
|
|
52
|
+
return @apollo_write_privilege unless @apollo_write_privilege.nil?
|
|
53
|
+
|
|
54
|
+
@apollo_write_privilege = Legion::Data.connection
|
|
55
|
+
.fetch("SELECT has_table_privilege(current_user, 'apollo_entries', 'INSERT') AS can_insert")
|
|
56
|
+
.first[:can_insert] == true
|
|
57
|
+
rescue StandardError
|
|
58
|
+
@apollo_write_privilege = false
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def reset!
|
|
62
|
+
@apollo_write_privilege = nil
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Apollo
|
|
6
|
+
module Helpers
|
|
7
|
+
module TagNormalizer
|
|
8
|
+
ALIASES = {
|
|
9
|
+
'c#' => 'csharp', '.net' => 'dotnet', 'c++' => 'cplusplus',
|
|
10
|
+
'node.js' => 'nodejs', 'vue.js' => 'vuejs', 'react.js' => 'reactjs'
|
|
11
|
+
}.freeze
|
|
12
|
+
|
|
13
|
+
module_function
|
|
14
|
+
|
|
15
|
+
def normalize(raw)
|
|
16
|
+
tag = raw.to_s.strip.downcase
|
|
17
|
+
tag = ALIASES[tag] if ALIASES.key?(tag)
|
|
18
|
+
tag = tag.gsub(/[^a-z0-9\- ]/, '')
|
|
19
|
+
.gsub(/\s+/, '-')
|
|
20
|
+
.gsub(/-+/, '-')
|
|
21
|
+
.sub(/^-/, '')
|
|
22
|
+
.sub(/-$/, '')
|
|
23
|
+
tag.empty? ? nil : tag
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def normalize_all(tags, max: 5)
|
|
27
|
+
Array(tags)
|
|
28
|
+
.filter_map { |t| normalize(t) }
|
|
29
|
+
.uniq
|
|
30
|
+
.first(max)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
require 'socket'
|
|
5
|
+
|
|
6
|
+
module Legion
|
|
7
|
+
module Extensions
|
|
8
|
+
module Apollo
|
|
9
|
+
module Helpers
|
|
10
|
+
module Writeback
|
|
11
|
+
RESEARCH_TOOLS = %w[read_file search_files search_content run_command].freeze
|
|
12
|
+
MAX_CONTENT_LENGTH = 4000
|
|
13
|
+
MIN_CONTENT_LENGTH = 50
|
|
14
|
+
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
def evaluate_and_route(request:, response:, enrichments: {})
|
|
18
|
+
return unless writeback_enabled?
|
|
19
|
+
return unless should_capture?(request, response, enrichments)
|
|
20
|
+
|
|
21
|
+
payload = build_payload(request: request, response: response)
|
|
22
|
+
route_payload(payload)
|
|
23
|
+
rescue StandardError => e
|
|
24
|
+
Legion::Logging.warn("apollo writeback failed: #{e.message}") if defined?(Legion::Logging)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def should_capture?(_request, response, enrichments)
|
|
28
|
+
content = response_content(response)
|
|
29
|
+
return false if content.nil? || content.length < min_content_length
|
|
30
|
+
|
|
31
|
+
tool_calls = extract_tool_calls(response, enrichments)
|
|
32
|
+
research_calls = tool_calls.select { |tc| RESEARCH_TOOLS.include?(tc[:name] || tc['name']) }
|
|
33
|
+
|
|
34
|
+
return false if research_calls.empty?
|
|
35
|
+
|
|
36
|
+
apollo_results = enrichments['rag_context:apollo_results']
|
|
37
|
+
return true if apollo_results.nil? || (apollo_results[:count] || 0).zero?
|
|
38
|
+
|
|
39
|
+
# Apollo had results — only capture if LLM also did additional research
|
|
40
|
+
research_calls.any?
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def build_payload(request:, response:, source_channel: nil)
|
|
44
|
+
content = response_content(response)
|
|
45
|
+
caller_identity = extract_identity(request)
|
|
46
|
+
user_query = extract_user_query(request)
|
|
47
|
+
tags = derive_tags(user_query)
|
|
48
|
+
|
|
49
|
+
{
|
|
50
|
+
content: content[0...MAX_CONTENT_LENGTH],
|
|
51
|
+
content_type: 'observation',
|
|
52
|
+
tags: Helpers::TagNormalizer.normalize_all(tags),
|
|
53
|
+
source_agent: response.respond_to?(:model) ? response.model : 'unknown',
|
|
54
|
+
source_channel: "#{source_channel || 'pipeline'}_synthesis",
|
|
55
|
+
submitted_by: caller_identity,
|
|
56
|
+
submitted_from: Socket.gethostname,
|
|
57
|
+
knowledge_domain: nil,
|
|
58
|
+
content_hash: content_hash(content)
|
|
59
|
+
}
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def route_payload(payload)
|
|
63
|
+
can_embed = Helpers::Capability.can_embed?
|
|
64
|
+
can_write = Helpers::Capability.can_write?
|
|
65
|
+
|
|
66
|
+
if can_embed
|
|
67
|
+
result = Legion::LLM::Embeddings.generate(text: payload[:content])
|
|
68
|
+
vector = result.is_a?(Hash) ? result[:vector] : result
|
|
69
|
+
payload[:embedding] = vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
if can_write && can_embed
|
|
73
|
+
write_directly(payload)
|
|
74
|
+
else
|
|
75
|
+
publish_to_transport(payload, has_embedding: can_embed)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def write_directly(payload)
|
|
80
|
+
Runners::Knowledge.handle_ingest(**payload)
|
|
81
|
+
rescue StandardError => e
|
|
82
|
+
Legion::Logging.warn("apollo direct write failed, falling back to transport: #{e.message}") if defined?(Legion::Logging)
|
|
83
|
+
publish_to_transport(payload, has_embedding: !payload[:embedding].nil?)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def publish_to_transport(payload, has_embedding: false)
|
|
87
|
+
return unless defined?(Legion::Transport)
|
|
88
|
+
|
|
89
|
+
Transport::Messages::Writeback.new(
|
|
90
|
+
**payload, has_embedding: has_embedding
|
|
91
|
+
).publish
|
|
92
|
+
rescue StandardError => e
|
|
93
|
+
Legion::Logging.warn("apollo writeback publish failed: #{e.message}") if defined?(Legion::Logging)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def writeback_enabled?
|
|
97
|
+
Legion::Settings.dig(:apollo, :writeback, :enabled) != false
|
|
98
|
+
rescue StandardError
|
|
99
|
+
true
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def min_content_length
|
|
103
|
+
Legion::Settings.dig(:apollo, :writeback, :min_content_length) || MIN_CONTENT_LENGTH
|
|
104
|
+
rescue StandardError
|
|
105
|
+
MIN_CONTENT_LENGTH
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def content_hash(content)
|
|
109
|
+
normalized = content.to_s.strip.downcase.gsub(/\s+/, ' ')
|
|
110
|
+
Digest::MD5.hexdigest(normalized)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def response_content(response)
|
|
114
|
+
msg = response.respond_to?(:message) ? response.message : nil
|
|
115
|
+
return nil unless msg.is_a?(Hash)
|
|
116
|
+
|
|
117
|
+
msg[:content] || msg['content']
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def extract_identity(request)
|
|
121
|
+
return 'unknown' unless request.respond_to?(:caller) && request.caller.is_a?(Hash)
|
|
122
|
+
|
|
123
|
+
request.caller.dig(:requested_by, :identity) || 'unknown'
|
|
124
|
+
rescue StandardError
|
|
125
|
+
'unknown'
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def extract_user_query(request)
|
|
129
|
+
return '' unless request.respond_to?(:messages)
|
|
130
|
+
|
|
131
|
+
user_msgs = Array(request.messages).select { |m| m[:role] == 'user' || m['role'] == 'user' }
|
|
132
|
+
(user_msgs.last || {})[:content] || ''
|
|
133
|
+
rescue StandardError
|
|
134
|
+
''
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def extract_tool_calls(response, enrichments)
|
|
138
|
+
calls = []
|
|
139
|
+
calls += Array(response.tool_calls) if response.respond_to?(:tool_calls)
|
|
140
|
+
calls += Array(enrichments['tool_calls']) if enrichments['tool_calls']
|
|
141
|
+
calls.uniq { |tc| tc[:name] || tc['name'] }
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def derive_tags(query)
|
|
145
|
+
stop_words = %w[a an the is are was were be been being have has had do does did will would shall
|
|
146
|
+
should may might can could of in to for on with at by from as into about between
|
|
147
|
+
how what when where why who which this that these those it its and or but not]
|
|
148
|
+
words = query.to_s.downcase.gsub(/[^a-z0-9\s]/, '').split
|
|
149
|
+
words.reject { |w| stop_words.include?(w) || w.length < 3 }
|
|
150
|
+
.uniq
|
|
151
|
+
.first(5)
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
require 'json'
|
|
4
4
|
require_relative '../helpers/confidence'
|
|
5
|
-
require_relative '../helpers/embedding'
|
|
6
5
|
|
|
7
6
|
module Legion
|
|
8
7
|
module Extensions
|
|
@@ -59,12 +58,25 @@ module Legion
|
|
|
59
58
|
}
|
|
60
59
|
end
|
|
61
60
|
|
|
62
|
-
def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, context: {}, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
|
|
61
|
+
def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
|
|
63
62
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
64
63
|
|
|
65
|
-
|
|
64
|
+
# Content hash dedup
|
|
65
|
+
hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
|
|
66
|
+
if hash
|
|
67
|
+
existing = Legion::Data::Model::ApolloEntry
|
|
68
|
+
.where(content_hash: hash)
|
|
69
|
+
.exclude(status: 'archived')
|
|
70
|
+
.first
|
|
71
|
+
if existing
|
|
72
|
+
existing.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
|
|
73
|
+
return { success: true, entry_id: existing.id, deduped: true }
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
embedding = embed_text(content)
|
|
66
78
|
content_type_sym = content_type.to_s
|
|
67
|
-
tag_array = Array(tags)
|
|
79
|
+
tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
|
|
68
80
|
domain = knowledge_domain || tag_array.first || 'general'
|
|
69
81
|
|
|
70
82
|
corroborated, existing_id = find_corroboration(embedding, content_type_sym, source_agent, source_channel)
|
|
@@ -81,6 +93,9 @@ module Legion
|
|
|
81
93
|
tags: Sequel.pg_array(tag_array),
|
|
82
94
|
status: 'candidate',
|
|
83
95
|
knowledge_domain: domain,
|
|
96
|
+
submitted_by: submitted_by,
|
|
97
|
+
submitted_from: submitted_from,
|
|
98
|
+
content_hash: hash,
|
|
84
99
|
embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
|
|
85
100
|
)
|
|
86
101
|
existing_id = new_entry.id
|
|
@@ -103,7 +118,7 @@ module Legion
|
|
|
103
118
|
def handle_query(query:, limit: Helpers::GraphQuery.default_query_limit, min_confidence: Helpers::GraphQuery.default_query_min_confidence, status: [:confirmed], tags: nil, domain: nil, agent_id: 'unknown', **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
|
|
104
119
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
105
120
|
|
|
106
|
-
embedding =
|
|
121
|
+
embedding = embed_text(query)
|
|
107
122
|
sql = Helpers::GraphQuery.build_semantic_search_sql(
|
|
108
123
|
limit: limit, min_confidence: min_confidence,
|
|
109
124
|
statuses: Array(status).map(&:to_s), tags: tags, domain: domain
|
|
@@ -212,7 +227,7 @@ module Legion
|
|
|
212
227
|
|
|
213
228
|
return { success: true, entries: [], count: 0 } if query.nil? || query.to_s.strip.empty?
|
|
214
229
|
|
|
215
|
-
embedding =
|
|
230
|
+
embedding = embed_text(query.to_s)
|
|
216
231
|
sql = Helpers::GraphQuery.build_semantic_search_sql(
|
|
217
232
|
limit: limit, min_confidence: min_confidence,
|
|
218
233
|
statuses: ['confirmed'], tags: tags, domain: domain
|
|
@@ -294,6 +309,14 @@ module Legion
|
|
|
294
309
|
|
|
295
310
|
private
|
|
296
311
|
|
|
312
|
+
def embed_text(text)
|
|
313
|
+
result = Legion::LLM::Embeddings.generate(text: text)
|
|
314
|
+
vector = result.is_a?(Hash) ? result[:vector] : result
|
|
315
|
+
vector.is_a?(Array) && vector.any? ? vector : Array.new(1024, 0.0)
|
|
316
|
+
rescue StandardError
|
|
317
|
+
Array.new(1024, 0.0)
|
|
318
|
+
end
|
|
319
|
+
|
|
297
320
|
def allowed_domains_for(target_domain)
|
|
298
321
|
rules = if defined?(Legion::Settings) && Legion::Settings.dig(:apollo, :domain_isolation)
|
|
299
322
|
Legion::Settings.dig(:apollo, :domain_isolation)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/transport/message' if defined?(Legion::Transport)
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Apollo
|
|
8
|
+
module Transport
|
|
9
|
+
module Messages
|
|
10
|
+
class Writeback < Legion::Transport::Message
|
|
11
|
+
def exchange
|
|
12
|
+
Exchanges::Apollo
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def routing_key
|
|
16
|
+
@options[:has_embedding] ? 'apollo.writeback.store' : 'apollo.writeback.vectorize'
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def type
|
|
20
|
+
'apollo_writeback'
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def message
|
|
24
|
+
{
|
|
25
|
+
content: @options[:content],
|
|
26
|
+
content_type: @options[:content_type],
|
|
27
|
+
tags: @options[:tags],
|
|
28
|
+
source_agent: @options[:source_agent],
|
|
29
|
+
source_channel: @options[:source_channel],
|
|
30
|
+
submitted_by: @options[:submitted_by],
|
|
31
|
+
submitted_from: @options[:submitted_from],
|
|
32
|
+
embedding: @options[:embedding],
|
|
33
|
+
knowledge_domain: @options[:knowledge_domain],
|
|
34
|
+
context: @options[:context] || {}
|
|
35
|
+
}.compact
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def validate
|
|
39
|
+
raise TypeError, 'content is required' unless @options[:content].is_a?(String)
|
|
40
|
+
|
|
41
|
+
@valid = true
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/transport/queue' if defined?(Legion::Transport)
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Apollo
|
|
8
|
+
module Transport
|
|
9
|
+
module Queues
|
|
10
|
+
class WritebackStore < Legion::Transport::Queue
|
|
11
|
+
def queue_name
|
|
12
|
+
'apollo.writeback.store'
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def queue_options
|
|
16
|
+
{ manual_ack: true, durable: true, arguments: { 'x-dead-letter-exchange': 'apollo.dlx' } }
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/transport/queue' if defined?(Legion::Transport)
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Apollo
|
|
8
|
+
module Transport
|
|
9
|
+
module Queues
|
|
10
|
+
class WritebackVectorize < Legion::Transport::Queue
|
|
11
|
+
def queue_name
|
|
12
|
+
'apollo.writeback.vectorize'
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def queue_options
|
|
16
|
+
{ manual_ack: true, durable: true, arguments: { 'x-dead-letter-exchange': 'apollo.dlx' } }
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -4,6 +4,9 @@ require 'legion/extensions/apollo/version'
|
|
|
4
4
|
require 'legion/extensions/apollo/helpers/confidence'
|
|
5
5
|
require 'legion/extensions/apollo/helpers/similarity'
|
|
6
6
|
require 'legion/extensions/apollo/helpers/graph_query'
|
|
7
|
+
require 'legion/extensions/apollo/helpers/tag_normalizer'
|
|
8
|
+
require 'legion/extensions/apollo/helpers/capability'
|
|
9
|
+
require 'legion/extensions/apollo/helpers/writeback'
|
|
7
10
|
require 'legion/extensions/apollo/runners/knowledge'
|
|
8
11
|
require 'legion/extensions/apollo/runners/expertise'
|
|
9
12
|
require 'legion/extensions/apollo/runners/maintenance'
|
|
@@ -21,6 +24,9 @@ if defined?(Legion::Transport)
|
|
|
21
24
|
require 'legion/extensions/apollo/transport/queues/gas'
|
|
22
25
|
require 'legion/extensions/apollo/transport/messages/ingest'
|
|
23
26
|
require 'legion/extensions/apollo/transport/messages/query'
|
|
27
|
+
require 'legion/extensions/apollo/transport/messages/writeback'
|
|
28
|
+
require 'legion/extensions/apollo/transport/queues/writeback_store'
|
|
29
|
+
require 'legion/extensions/apollo/transport/queues/writeback_vectorize'
|
|
24
30
|
end
|
|
25
31
|
|
|
26
32
|
module Legion
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
unless defined?(Legion::Extensions::Actors::Subscription)
|
|
6
|
+
module Legion
|
|
7
|
+
module Extensions
|
|
8
|
+
module Actors
|
|
9
|
+
class Subscription; end # rubocop:disable Lint/EmptyClass
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
$LOADED_FEATURES << 'legion/extensions/actors/subscription' unless $LOADED_FEATURES.include?('legion/extensions/actors/subscription')
|
|
15
|
+
|
|
16
|
+
require 'legion/extensions/apollo/helpers/capability'
|
|
17
|
+
require 'legion/extensions/apollo/runners/knowledge'
|
|
18
|
+
require 'legion/extensions/apollo/actors/writeback_store'
|
|
19
|
+
|
|
20
|
+
RSpec.describe Legion::Extensions::Apollo::Actor::WritebackStore do
|
|
21
|
+
subject(:actor) { described_class.new }
|
|
22
|
+
|
|
23
|
+
describe '#runner_class' do
|
|
24
|
+
it 'returns Knowledge runner string' do
|
|
25
|
+
expect(actor.runner_class).to eq('Legion::Extensions::Apollo::Runners::Knowledge')
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
describe '#runner_function' do
|
|
30
|
+
it 'returns handle_ingest' do
|
|
31
|
+
expect(actor.runner_function).to eq('handle_ingest')
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
describe '#check_subtask?' do
|
|
36
|
+
it { expect(actor.check_subtask?).to be false }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
describe '#generate_task?' do
|
|
40
|
+
it { expect(actor.generate_task?).to be false }
|
|
41
|
+
end
|
|
42
|
+
end
|