lex-apollo 0.4.5 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/lib/legion/extensions/apollo/actors/writeback_store.rb +26 -0
- data/lib/legion/extensions/apollo/actors/writeback_vectorize.rb +48 -0
- data/lib/legion/extensions/apollo/helpers/capability.rb +68 -0
- data/lib/legion/extensions/apollo/helpers/embedding.rb +59 -3
- data/lib/legion/extensions/apollo/helpers/graph_query.rb +3 -3
- data/lib/legion/extensions/apollo/helpers/similarity.rb +13 -0
- data/lib/legion/extensions/apollo/helpers/tag_normalizer.rb +36 -0
- data/lib/legion/extensions/apollo/helpers/writeback.rb +156 -0
- data/lib/legion/extensions/apollo/runners/knowledge.rb +24 -5
- data/lib/legion/extensions/apollo/transport/messages/writeback.rb +48 -0
- data/lib/legion/extensions/apollo/transport/queues/writeback_store.rb +23 -0
- data/lib/legion/extensions/apollo/transport/queues/writeback_vectorize.rb +23 -0
- data/lib/legion/extensions/apollo/version.rb +1 -1
- data/lib/legion/extensions/apollo.rb +6 -0
- data/spec/legion/extensions/apollo/actors/writeback_store_spec.rb +42 -0
- data/spec/legion/extensions/apollo/actors/writeback_vectorize_spec.rb +93 -0
- data/spec/legion/extensions/apollo/contradiction_spec.rb +1 -1
- data/spec/legion/extensions/apollo/helpers/capability_spec.rb +56 -0
- data/spec/legion/extensions/apollo/helpers/embedding_spec.rb +6 -6
- data/spec/legion/extensions/apollo/helpers/graph_query_spec.rb +2 -2
- data/spec/legion/extensions/apollo/helpers/tag_normalizer_spec.rb +62 -0
- data/spec/legion/extensions/apollo/helpers/writeback_spec.rb +96 -0
- data/spec/legion/extensions/apollo/runners/knowledge_spec.rb +54 -0
- data/spec/legion/extensions/apollo/runners/request_spec.rb +1 -1
- data/spec/legion/extensions/apollo/transport/messages/writeback_spec.rb +87 -0
- metadata +15 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4ed5f6ac36b031c41850cae2f4e4c6768666c4151c730b7557f7d5f21ac24df7
|
|
4
|
+
data.tar.gz: 870ffad7838f068a01aa2b03af51ba2ae7df1cfe30debda9a8d3275705ef775a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '0583ae3da3f19c3e852929e97947bf49b94e89d37f918d65fcc2507719049ac641ab6867c1e2b7eee899c923c1cb52ec75670c3cab345b159506d34a01099463'
|
|
7
|
+
data.tar.gz: 4f77f0e210baa01f3e77b06808ddfe06456d4f10358b8add5e4d602b8669be9b8223e6b69c30f2b56b2d18f20e5d0eabde504b05a8426812236d9a5918631bc4
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,27 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.7] - 2026-03-25
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Knowledge capture writeback system: `Helpers::Writeback` evaluates LLM responses for Apollo ingest with echo chamber prevention
|
|
7
|
+
- `Helpers::TagNormalizer` for mechanical tag normalization (aliases, special chars, max 5)
|
|
8
|
+
- `Helpers::Capability` for detecting embed/write capabilities per node
|
|
9
|
+
- Writeback transport layer: `Messages::Writeback`, `Queues::WritebackStore`, `Queues::WritebackVectorize`
|
|
10
|
+
- Writeback subscription actors: `Actor::WritebackStore` (pre-embedded), `Actor::WritebackVectorize` (needs embedding)
|
|
11
|
+
- Content hash dedup in `Runners::Knowledge#handle_ingest` — collision boosts confidence instead of inserting
|
|
12
|
+
- Identity tracking: `submitted_by`, `submitted_from`, `content_hash` fields on knowledge entries
|
|
13
|
+
|
|
14
|
+
## [0.4.6] - 2026-03-25
|
|
15
|
+
|
|
16
|
+
### Added
|
|
17
|
+
- Apollo-specific embedding provider/model settings: `apollo.embedding.provider` and `apollo.embedding.model` override LLM defaults
|
|
18
|
+
- `embedding_opts` helper reads Apollo settings and passes `provider:`/`model:` to `Legion::LLM.embed`
|
|
19
|
+
- Local-first embedding: `detect_local_model` checks Ollama for pulled 1024-dim models (`mxbai-embed-large`, `bge-large`, `snowflake-arctic-embed`) before falling back to cloud provider
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
- `DEFAULT_DIMENSION` changed from 1536 to 1024 for cross-provider compatibility (Bedrock Titan v2, OpenAI with dimensions:, Ollama models)
|
|
23
|
+
- `Helpers::Embedding.generate` now passes provider/model from Apollo settings, falling back to LLM defaults when not configured
|
|
24
|
+
|
|
3
25
|
## [0.4.5] - 2026-03-25
|
|
4
26
|
|
|
5
27
|
### Added
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/actors/subscription' if defined?(Legion::Extensions::Actors::Subscription)
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Apollo
|
|
8
|
+
module Actor
|
|
9
|
+
class WritebackStore < Legion::Extensions::Actors::Subscription
|
|
10
|
+
def runner_class = 'Legion::Extensions::Apollo::Runners::Knowledge'
|
|
11
|
+
def runner_function = 'handle_ingest'
|
|
12
|
+
def check_subtask? = false
|
|
13
|
+
def generate_task? = false
|
|
14
|
+
|
|
15
|
+
def enabled?
|
|
16
|
+
defined?(Legion::Extensions::Apollo::Runners::Knowledge) &&
|
|
17
|
+
defined?(Legion::Transport) &&
|
|
18
|
+
Helpers::Capability.apollo_write_enabled?
|
|
19
|
+
rescue StandardError
|
|
20
|
+
false
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/actors/subscription' if defined?(Legion::Extensions::Actors::Subscription)
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Apollo
|
|
8
|
+
module Actor
|
|
9
|
+
class WritebackVectorize < Legion::Extensions::Actors::Subscription
|
|
10
|
+
def runner_class = self.class
|
|
11
|
+
def runner_function = 'handle_vectorize'
|
|
12
|
+
def check_subtask? = false
|
|
13
|
+
def generate_task? = false
|
|
14
|
+
|
|
15
|
+
def handle_vectorize(payload)
|
|
16
|
+
payload = symbolize(payload)
|
|
17
|
+
embedding = Helpers::Embedding.generate(text: payload[:content])
|
|
18
|
+
enriched = payload.merge(embedding: embedding)
|
|
19
|
+
|
|
20
|
+
if Helpers::Capability.can_write?
|
|
21
|
+
Runners::Knowledge.handle_ingest(**enriched)
|
|
22
|
+
else
|
|
23
|
+
Transport::Messages::Writeback.new(
|
|
24
|
+
**enriched, has_embedding: true
|
|
25
|
+
).publish
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
{ success: true, action: :vectorized }
|
|
29
|
+
rescue StandardError => e
|
|
30
|
+
{ success: false, error: e.message }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def enabled?
|
|
34
|
+
defined?(Legion::Transport) && Helpers::Capability.can_embed?
|
|
35
|
+
rescue StandardError
|
|
36
|
+
false
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def symbolize(hash)
|
|
42
|
+
hash.transform_keys(&:to_sym)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Apollo
|
|
6
|
+
module Helpers
|
|
7
|
+
module Capability
|
|
8
|
+
EMBEDDING_MODELS = %w[mxbai-embed-large bge-large snowflake-arctic-embed].freeze
|
|
9
|
+
|
|
10
|
+
module_function
|
|
11
|
+
|
|
12
|
+
def can_embed?
|
|
13
|
+
return false unless defined?(Legion::LLM) && Legion::LLM.started?
|
|
14
|
+
|
|
15
|
+
ollama_embedding_available? || cloud_embedding_configured?
|
|
16
|
+
rescue StandardError
|
|
17
|
+
false
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def can_write?
|
|
21
|
+
return false unless apollo_write_enabled?
|
|
22
|
+
return false unless defined?(Legion::Data) && Legion::Data.connected?
|
|
23
|
+
|
|
24
|
+
check_db_write_privilege
|
|
25
|
+
rescue StandardError
|
|
26
|
+
false
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def apollo_write_enabled?
|
|
30
|
+
Legion::Settings.dig(:data, :apollo_write) == true
|
|
31
|
+
rescue StandardError
|
|
32
|
+
false
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def ollama_embedding_available?
|
|
36
|
+
return false unless defined?(Legion::LLM::Discovery::Ollama)
|
|
37
|
+
|
|
38
|
+
EMBEDDING_MODELS.any? { |m| Legion::LLM::Discovery::Ollama.model_available?(m) }
|
|
39
|
+
rescue StandardError
|
|
40
|
+
false
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def cloud_embedding_configured?
|
|
44
|
+
provider = Legion::Settings.dig(:apollo, :embedding, :provider)
|
|
45
|
+
model = Legion::Settings.dig(:apollo, :embedding, :model)
|
|
46
|
+
!provider.nil? && !model.nil?
|
|
47
|
+
rescue StandardError
|
|
48
|
+
false
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def check_db_write_privilege
|
|
52
|
+
return @apollo_write_privilege unless @apollo_write_privilege.nil?
|
|
53
|
+
|
|
54
|
+
@apollo_write_privilege = Legion::Data.connection
|
|
55
|
+
.fetch("SELECT has_table_privilege(current_user, 'apollo_entries', 'INSERT') AS can_insert")
|
|
56
|
+
.first[:can_insert] == true
|
|
57
|
+
rescue StandardError
|
|
58
|
+
@apollo_write_privilege = false
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def reset!
|
|
62
|
+
@apollo_write_privilege = nil
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -5,7 +5,9 @@ module Legion
|
|
|
5
5
|
module Apollo
|
|
6
6
|
module Helpers
|
|
7
7
|
module Embedding
|
|
8
|
-
DEFAULT_DIMENSION =
|
|
8
|
+
DEFAULT_DIMENSION = 1024
|
|
9
|
+
|
|
10
|
+
LOCAL_EMBEDDING_MODELS = %w[mxbai-embed-large bge-large snowflake-arctic-embed].freeze
|
|
9
11
|
|
|
10
12
|
module_function
|
|
11
13
|
|
|
@@ -15,8 +17,15 @@ module Legion
|
|
|
15
17
|
return zero_vector
|
|
16
18
|
end
|
|
17
19
|
|
|
18
|
-
|
|
19
|
-
vector =
|
|
20
|
+
local_model = detect_local_model
|
|
21
|
+
vector = if local_model
|
|
22
|
+
ollama_embed(text, local_model)
|
|
23
|
+
else
|
|
24
|
+
opts = cloud_embedding_opts
|
|
25
|
+
result = Legion::LLM.embed(text, **opts)
|
|
26
|
+
result.is_a?(Hash) ? result[:vector] : result
|
|
27
|
+
end
|
|
28
|
+
|
|
20
29
|
if vector.is_a?(Array) && vector.any?
|
|
21
30
|
@dimension = vector.size
|
|
22
31
|
vector
|
|
@@ -38,6 +47,53 @@ module Legion
|
|
|
38
47
|
DEFAULT_DIMENSION
|
|
39
48
|
end
|
|
40
49
|
|
|
50
|
+
def ollama_embed(text, model)
|
|
51
|
+
require 'faraday'
|
|
52
|
+
base_url = ollama_base_url
|
|
53
|
+
Legion::Logging.debug("[apollo] embedding via local Ollama: #{model}") if defined?(Legion::Logging)
|
|
54
|
+
conn = Faraday.new(url: base_url) { |f| f.options.timeout = 10 }
|
|
55
|
+
response = conn.post('/api/embed', { model: model, input: text }.to_json,
|
|
56
|
+
'Content-Type' => 'application/json')
|
|
57
|
+
return nil unless response.success?
|
|
58
|
+
|
|
59
|
+
parsed = ::JSON.parse(response.body)
|
|
60
|
+
parsed['embeddings']&.first
|
|
61
|
+
rescue StandardError => e
|
|
62
|
+
Legion::Logging.warn("[apollo] local Ollama embed failed: #{e.message}") if defined?(Legion::Logging)
|
|
63
|
+
nil
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def ollama_base_url
|
|
67
|
+
return 'http://localhost:11434' unless defined?(Legion::Settings)
|
|
68
|
+
|
|
69
|
+
Legion::Settings[:llm].dig(:providers, :ollama, :base_url) || 'http://localhost:11434'
|
|
70
|
+
rescue StandardError
|
|
71
|
+
'http://localhost:11434'
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def cloud_embedding_opts
|
|
75
|
+
return {} unless defined?(Legion::Settings) && !Legion::Settings[:apollo].nil?
|
|
76
|
+
|
|
77
|
+
embedding = Legion::Settings[:apollo][:embedding] || {}
|
|
78
|
+
opts = {}
|
|
79
|
+
opts[:provider] = embedding[:provider].to_sym if embedding[:provider]
|
|
80
|
+
opts[:model] = embedding[:model] if embedding[:model]
|
|
81
|
+
opts
|
|
82
|
+
rescue StandardError
|
|
83
|
+
{}
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def detect_local_model
|
|
87
|
+
return nil unless defined?(Legion::LLM::Discovery::Ollama)
|
|
88
|
+
|
|
89
|
+
LOCAL_EMBEDDING_MODELS.find do |m|
|
|
90
|
+
Legion::LLM::Discovery::Ollama.model_available?(m) ||
|
|
91
|
+
Legion::LLM::Discovery::Ollama.model_available?("#{m}:latest")
|
|
92
|
+
end
|
|
93
|
+
rescue StandardError
|
|
94
|
+
nil
|
|
95
|
+
end
|
|
96
|
+
|
|
41
97
|
def zero_vector
|
|
42
98
|
Array.new(dimension, 0.0)
|
|
43
99
|
end
|
|
@@ -32,7 +32,7 @@ module Legion
|
|
|
32
32
|
SELECT e.id, e.content, e.content_type, e.confidence, e.tags, e.source_agent,
|
|
33
33
|
0 AS depth, 1.0::float AS activation
|
|
34
34
|
FROM apollo_entries e
|
|
35
|
-
WHERE e.id =
|
|
35
|
+
WHERE e.id = :entry_id
|
|
36
36
|
|
|
37
37
|
UNION ALL
|
|
38
38
|
|
|
@@ -72,11 +72,11 @@ module Legion
|
|
|
72
72
|
<<~SQL
|
|
73
73
|
SELECT e.id, e.content, e.content_type, e.confidence, e.tags, e.source_agent,
|
|
74
74
|
e.access_count, e.created_at, e.knowledge_domain,
|
|
75
|
-
(e.embedding <=>
|
|
75
|
+
(e.embedding <=> :embedding) AS distance
|
|
76
76
|
FROM apollo_entries e
|
|
77
77
|
WHERE #{where_clause}
|
|
78
78
|
AND e.embedding IS NOT NULL
|
|
79
|
-
ORDER BY e.embedding <=>
|
|
79
|
+
ORDER BY e.embedding <=> :embedding
|
|
80
80
|
LIMIT #{limit}
|
|
81
81
|
SQL
|
|
82
82
|
end
|
|
@@ -10,6 +10,10 @@ module Legion
|
|
|
10
10
|
module_function
|
|
11
11
|
|
|
12
12
|
def cosine_similarity(vec_a:, vec_b:, **)
|
|
13
|
+
vec_a = parse_vector(vec_a)
|
|
14
|
+
vec_b = parse_vector(vec_b)
|
|
15
|
+
return 0.0 unless vec_a.is_a?(Array) && vec_b.is_a?(Array)
|
|
16
|
+
|
|
13
17
|
dot = vec_a.zip(vec_b).sum { |x, y| x * y }
|
|
14
18
|
mag_a = Math.sqrt(vec_a.sum { |x| x**2 })
|
|
15
19
|
mag_b = Math.sqrt(vec_b.sum { |x| x**2 })
|
|
@@ -18,6 +22,15 @@ module Legion
|
|
|
18
22
|
dot / (mag_a * mag_b)
|
|
19
23
|
end
|
|
20
24
|
|
|
25
|
+
def parse_vector(vec)
|
|
26
|
+
return vec if vec.is_a?(Array)
|
|
27
|
+
return nil unless vec.is_a?(String)
|
|
28
|
+
|
|
29
|
+
::JSON.parse(vec)
|
|
30
|
+
rescue StandardError
|
|
31
|
+
nil
|
|
32
|
+
end
|
|
33
|
+
|
|
21
34
|
def above_corroboration_threshold?(similarity:, **)
|
|
22
35
|
similarity >= Confidence::CORROBORATION_SIMILARITY_THRESHOLD
|
|
23
36
|
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Apollo
|
|
6
|
+
module Helpers
|
|
7
|
+
module TagNormalizer
|
|
8
|
+
ALIASES = {
|
|
9
|
+
'c#' => 'csharp', '.net' => 'dotnet', 'c++' => 'cplusplus',
|
|
10
|
+
'node.js' => 'nodejs', 'vue.js' => 'vuejs', 'react.js' => 'reactjs'
|
|
11
|
+
}.freeze
|
|
12
|
+
|
|
13
|
+
module_function
|
|
14
|
+
|
|
15
|
+
def normalize(raw)
|
|
16
|
+
tag = raw.to_s.strip.downcase
|
|
17
|
+
tag = ALIASES[tag] if ALIASES.key?(tag)
|
|
18
|
+
tag = tag.gsub(/[^a-z0-9\- ]/, '')
|
|
19
|
+
.gsub(/\s+/, '-')
|
|
20
|
+
.gsub(/-+/, '-')
|
|
21
|
+
.sub(/^-/, '')
|
|
22
|
+
.sub(/-$/, '')
|
|
23
|
+
tag.empty? ? nil : tag
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def normalize_all(tags, max: 5)
|
|
27
|
+
Array(tags)
|
|
28
|
+
.filter_map { |t| normalize(t) }
|
|
29
|
+
.uniq
|
|
30
|
+
.first(max)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
require 'socket'
|
|
5
|
+
|
|
6
|
+
module Legion
|
|
7
|
+
module Extensions
|
|
8
|
+
module Apollo
|
|
9
|
+
module Helpers
|
|
10
|
+
module Writeback
|
|
11
|
+
RESEARCH_TOOLS = %w[read_file search_files search_content run_command].freeze
|
|
12
|
+
MAX_CONTENT_LENGTH = 4000
|
|
13
|
+
MIN_CONTENT_LENGTH = 50
|
|
14
|
+
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
def evaluate_and_route(request:, response:, enrichments: {})
|
|
18
|
+
return unless writeback_enabled?
|
|
19
|
+
return unless should_capture?(request, response, enrichments)
|
|
20
|
+
|
|
21
|
+
payload = build_payload(request: request, response: response)
|
|
22
|
+
route_payload(payload)
|
|
23
|
+
rescue StandardError => e
|
|
24
|
+
Legion::Logging.warn("apollo writeback failed: #{e.message}") if defined?(Legion::Logging)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def should_capture?(_request, response, enrichments)
|
|
28
|
+
content = response_content(response)
|
|
29
|
+
return false if content.nil? || content.length < min_content_length
|
|
30
|
+
|
|
31
|
+
tool_calls = extract_tool_calls(response, enrichments)
|
|
32
|
+
research_calls = tool_calls.select { |tc| RESEARCH_TOOLS.include?(tc[:name] || tc['name']) }
|
|
33
|
+
|
|
34
|
+
return false if research_calls.empty?
|
|
35
|
+
|
|
36
|
+
apollo_results = enrichments['rag_context:apollo_results']
|
|
37
|
+
return true if apollo_results.nil? || (apollo_results[:count] || 0).zero?
|
|
38
|
+
|
|
39
|
+
# Apollo had results — only capture if LLM also did additional research
|
|
40
|
+
research_calls.any?
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def build_payload(request:, response:, source_channel: nil)
|
|
44
|
+
content = response_content(response)
|
|
45
|
+
caller_identity = extract_identity(request)
|
|
46
|
+
user_query = extract_user_query(request)
|
|
47
|
+
tags = derive_tags(user_query)
|
|
48
|
+
|
|
49
|
+
{
|
|
50
|
+
content: content[0...MAX_CONTENT_LENGTH],
|
|
51
|
+
content_type: 'observation',
|
|
52
|
+
tags: Helpers::TagNormalizer.normalize_all(tags),
|
|
53
|
+
source_agent: response.respond_to?(:model) ? response.model : 'unknown',
|
|
54
|
+
source_channel: "#{source_channel || 'pipeline'}_synthesis",
|
|
55
|
+
submitted_by: caller_identity,
|
|
56
|
+
submitted_from: Socket.gethostname,
|
|
57
|
+
knowledge_domain: nil,
|
|
58
|
+
content_hash: content_hash(content)
|
|
59
|
+
}
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def route_payload(payload)
|
|
63
|
+
can_embed = Helpers::Capability.can_embed?
|
|
64
|
+
can_write = Helpers::Capability.can_write?
|
|
65
|
+
|
|
66
|
+
if can_embed
|
|
67
|
+
embedding = Helpers::Embedding.generate(text: payload[:content])
|
|
68
|
+
payload[:embedding] = embedding
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
if can_write && can_embed
|
|
72
|
+
write_directly(payload)
|
|
73
|
+
else
|
|
74
|
+
publish_to_transport(payload, has_embedding: can_embed)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def write_directly(payload)
|
|
79
|
+
Runners::Knowledge.handle_ingest(**payload)
|
|
80
|
+
rescue StandardError => e
|
|
81
|
+
Legion::Logging.warn("apollo direct write failed, falling back to transport: #{e.message}") if defined?(Legion::Logging)
|
|
82
|
+
publish_to_transport(payload, has_embedding: !payload[:embedding].nil?)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def publish_to_transport(payload, has_embedding: false)
|
|
86
|
+
return unless defined?(Legion::Transport)
|
|
87
|
+
|
|
88
|
+
Transport::Messages::Writeback.new(
|
|
89
|
+
**payload, has_embedding: has_embedding
|
|
90
|
+
).publish
|
|
91
|
+
rescue StandardError => e
|
|
92
|
+
Legion::Logging.warn("apollo writeback publish failed: #{e.message}") if defined?(Legion::Logging)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def writeback_enabled?
|
|
96
|
+
Legion::Settings.dig(:apollo, :writeback, :enabled) != false
|
|
97
|
+
rescue StandardError
|
|
98
|
+
true
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def min_content_length
|
|
102
|
+
Legion::Settings.dig(:apollo, :writeback, :min_content_length) || MIN_CONTENT_LENGTH
|
|
103
|
+
rescue StandardError
|
|
104
|
+
MIN_CONTENT_LENGTH
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def content_hash(content)
|
|
108
|
+
normalized = content.to_s.strip.downcase.gsub(/\s+/, ' ')
|
|
109
|
+
Digest::MD5.hexdigest(normalized)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def response_content(response)
|
|
113
|
+
msg = response.respond_to?(:message) ? response.message : nil
|
|
114
|
+
return nil unless msg.is_a?(Hash)
|
|
115
|
+
|
|
116
|
+
msg[:content] || msg['content']
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def extract_identity(request)
|
|
120
|
+
return 'unknown' unless request.respond_to?(:caller) && request.caller.is_a?(Hash)
|
|
121
|
+
|
|
122
|
+
request.caller.dig(:requested_by, :identity) || 'unknown'
|
|
123
|
+
rescue StandardError
|
|
124
|
+
'unknown'
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def extract_user_query(request)
|
|
128
|
+
return '' unless request.respond_to?(:messages)
|
|
129
|
+
|
|
130
|
+
user_msgs = Array(request.messages).select { |m| m[:role] == 'user' || m['role'] == 'user' }
|
|
131
|
+
(user_msgs.last || {})[:content] || ''
|
|
132
|
+
rescue StandardError
|
|
133
|
+
''
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def extract_tool_calls(response, enrichments)
|
|
137
|
+
calls = []
|
|
138
|
+
calls += Array(response.tool_calls) if response.respond_to?(:tool_calls)
|
|
139
|
+
calls += Array(enrichments['tool_calls']) if enrichments['tool_calls']
|
|
140
|
+
calls.uniq { |tc| tc[:name] || tc['name'] }
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def derive_tags(query)
|
|
144
|
+
stop_words = %w[a an the is are was were be been being have has had do does did will would shall
|
|
145
|
+
should may might can could of in to for on with at by from as into about between
|
|
146
|
+
how what when where why who which this that these those it its and or but not]
|
|
147
|
+
words = query.to_s.downcase.gsub(/[^a-z0-9\s]/, '').split
|
|
148
|
+
words.reject { |w| stop_words.include?(w) || w.length < 3 }
|
|
149
|
+
.uniq
|
|
150
|
+
.first(5)
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
@@ -59,12 +59,25 @@ module Legion
|
|
|
59
59
|
}
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
-
def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, context: {}, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
|
|
62
|
+
def handle_ingest(content:, content_type:, tags: [], source_agent: 'unknown', source_provider: nil, source_channel: nil, knowledge_domain: nil, submitted_by: nil, submitted_from: nil, content_hash: nil, context: {}, **) # rubocop:disable Metrics/ParameterLists, Layout/LineLength
|
|
63
63
|
return { success: false, error: 'apollo_data_not_available' } unless defined?(Legion::Data::Model::ApolloEntry)
|
|
64
64
|
|
|
65
|
+
# Content hash dedup
|
|
66
|
+
hash = content_hash || (defined?(Helpers::Writeback) ? Helpers::Writeback.content_hash(content) : nil)
|
|
67
|
+
if hash
|
|
68
|
+
existing = Legion::Data::Model::ApolloEntry
|
|
69
|
+
.where(content_hash: hash)
|
|
70
|
+
.exclude(status: 'archived')
|
|
71
|
+
.first
|
|
72
|
+
if existing
|
|
73
|
+
existing.update(confidence: [existing.confidence + Helpers::Confidence.retrieval_boost, 1.0].min)
|
|
74
|
+
return { success: true, entry_id: existing.id, deduped: true }
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
65
78
|
embedding = Helpers::Embedding.generate(text: content)
|
|
66
79
|
content_type_sym = content_type.to_s
|
|
67
|
-
tag_array = Array(tags)
|
|
80
|
+
tag_array = defined?(Helpers::TagNormalizer) ? Helpers::TagNormalizer.normalize_all(tags) : Array(tags)
|
|
68
81
|
domain = knowledge_domain || tag_array.first || 'general'
|
|
69
82
|
|
|
70
83
|
corroborated, existing_id = find_corroboration(embedding, content_type_sym, source_agent, source_channel)
|
|
@@ -81,6 +94,9 @@ module Legion
|
|
|
81
94
|
tags: Sequel.pg_array(tag_array),
|
|
82
95
|
status: 'candidate',
|
|
83
96
|
knowledge_domain: domain,
|
|
97
|
+
submitted_by: submitted_by,
|
|
98
|
+
submitted_from: submitted_from,
|
|
99
|
+
content_hash: hash,
|
|
84
100
|
embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
|
|
85
101
|
)
|
|
86
102
|
existing_id = new_entry.id
|
|
@@ -112,6 +128,8 @@ module Legion
|
|
|
112
128
|
db = Legion::Data::Model::ApolloEntry.db
|
|
113
129
|
entries = db.fetch(sql, embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")).all
|
|
114
130
|
|
|
131
|
+
entries = entries.reject { |e| e[:distance].respond_to?(:nan?) && e[:distance].nan? }
|
|
132
|
+
|
|
115
133
|
entries.each do |entry|
|
|
116
134
|
Legion::Data::Model::ApolloEntry.where(id: entry[:id]).update(
|
|
117
135
|
access_count: Sequel.expr(:access_count) + 1,
|
|
@@ -130,7 +148,7 @@ module Legion
|
|
|
130
148
|
|
|
131
149
|
formatted = entries.map do |entry|
|
|
132
150
|
{ id: entry[:id], content: entry[:content], content_type: entry[:content_type],
|
|
133
|
-
confidence: entry[:confidence], distance: entry[:distance],
|
|
151
|
+
confidence: entry[:confidence], distance: entry[:distance]&.to_f,
|
|
134
152
|
tags: entry[:tags], source_agent: entry[:source_agent],
|
|
135
153
|
knowledge_domain: entry[:knowledge_domain] }
|
|
136
154
|
end
|
|
@@ -218,6 +236,7 @@ module Legion
|
|
|
218
236
|
|
|
219
237
|
db = Legion::Data::Model::ApolloEntry.db
|
|
220
238
|
entries = db.fetch(sql, embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")).all
|
|
239
|
+
entries = entries.reject { |e| e[:distance].respond_to?(:nan?) && e[:distance].nan? }
|
|
221
240
|
|
|
222
241
|
entries.each do |entry|
|
|
223
242
|
Legion::Data::Model::ApolloEntry.where(id: entry[:id]).update(
|
|
@@ -228,7 +247,7 @@ module Legion
|
|
|
228
247
|
|
|
229
248
|
formatted = entries.map do |entry|
|
|
230
249
|
{ id: entry[:id], content: entry[:content], content_type: entry[:content_type],
|
|
231
|
-
confidence: entry[:confidence], distance: entry[:distance],
|
|
250
|
+
confidence: entry[:confidence], distance: entry[:distance]&.to_f,
|
|
232
251
|
tags: entry[:tags], source_agent: entry[:source_agent],
|
|
233
252
|
knowledge_domain: entry[:knowledge_domain] }
|
|
234
253
|
end
|
|
@@ -313,7 +332,7 @@ module Legion
|
|
|
313
332
|
|
|
314
333
|
db = Legion::Data::Model::ApolloEntry.db
|
|
315
334
|
similar = db.fetch(
|
|
316
|
-
"SELECT id, content, embedding FROM apollo_entries WHERE id !=
|
|
335
|
+
"SELECT id, content, embedding FROM apollo_entries WHERE id != :entry_id AND embedding IS NOT NULL ORDER BY embedding <=> :embedding LIMIT #{sim_limit}", # rubocop:disable Layout/LineLength
|
|
317
336
|
entry_id: entry_id,
|
|
318
337
|
embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
|
|
319
338
|
).all
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/transport/message' if defined?(Legion::Transport)
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Apollo
|
|
8
|
+
module Transport
|
|
9
|
+
module Messages
|
|
10
|
+
class Writeback < Legion::Transport::Message
|
|
11
|
+
def exchange
|
|
12
|
+
Exchanges::Apollo
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def routing_key
|
|
16
|
+
@options[:has_embedding] ? 'apollo.writeback.store' : 'apollo.writeback.vectorize'
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def type
|
|
20
|
+
'apollo_writeback'
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def message
|
|
24
|
+
{
|
|
25
|
+
content: @options[:content],
|
|
26
|
+
content_type: @options[:content_type],
|
|
27
|
+
tags: @options[:tags],
|
|
28
|
+
source_agent: @options[:source_agent],
|
|
29
|
+
source_channel: @options[:source_channel],
|
|
30
|
+
submitted_by: @options[:submitted_by],
|
|
31
|
+
submitted_from: @options[:submitted_from],
|
|
32
|
+
embedding: @options[:embedding],
|
|
33
|
+
knowledge_domain: @options[:knowledge_domain],
|
|
34
|
+
context: @options[:context] || {}
|
|
35
|
+
}.compact
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def validate
|
|
39
|
+
raise TypeError, 'content is required' unless @options[:content].is_a?(String)
|
|
40
|
+
|
|
41
|
+
@valid = true
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/transport/queue' if defined?(Legion::Transport)
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Apollo
|
|
8
|
+
module Transport
|
|
9
|
+
module Queues
|
|
10
|
+
class WritebackStore < Legion::Transport::Queue
|
|
11
|
+
def queue_name
|
|
12
|
+
'apollo.writeback.store'
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def queue_options
|
|
16
|
+
{ manual_ack: true, durable: true, arguments: { 'x-dead-letter-exchange': 'apollo.dlx' } }
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|