lex-apollo 0.4.5 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/lib/legion/extensions/apollo/helpers/embedding.rb +59 -3
- data/lib/legion/extensions/apollo/helpers/graph_query.rb +3 -3
- data/lib/legion/extensions/apollo/helpers/similarity.rb +13 -0
- data/lib/legion/extensions/apollo/runners/knowledge.rb +6 -3
- data/lib/legion/extensions/apollo/version.rb +1 -1
- data/spec/legion/extensions/apollo/contradiction_spec.rb +1 -1
- data/spec/legion/extensions/apollo/helpers/embedding_spec.rb +6 -6
- data/spec/legion/extensions/apollo/helpers/graph_query_spec.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9a1c9042626e5d9e09a783b8c703b11f261573fe1658498a4fa48ad023306baf
|
|
4
|
+
data.tar.gz: 2f3e561b6851825d9d5c67325007802eafea63c10c93a00e37a40170202a0bdc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d2dae8784e591deeb1608671dbabbd313813a6f94214b77de0539d2316d6ef3a34e21d5002eb28a262e98c4c4d57773793be4ba006d185e0fd3afcf6816902c5
|
|
7
|
+
data.tar.gz: 41e11ae62a001fab8e79b514d30c3c28374d38d1d4379926e00752df74e35ccfa79c61b3ac75e7e60341de0420287635413b265eca5fa0cd7fe4e7f02a5fad0f
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.6] - 2026-03-25
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Apollo-specific embedding provider/model settings: `apollo.embedding.provider` and `apollo.embedding.model` override LLM defaults
|
|
7
|
+
- `embedding_opts` helper reads Apollo settings and passes `provider:`/`model:` to `Legion::LLM.embed`
|
|
8
|
+
- Local-first embedding: `detect_local_model` checks Ollama for pulled 1024-dim models (`mxbai-embed-large`, `bge-large`, `snowflake-arctic-embed`) before falling back to cloud provider
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
- `DEFAULT_DIMENSION` changed from 1536 to 1024 for cross-provider compatibility (Bedrock Titan v2, OpenAI with dimensions:, Ollama models)
|
|
12
|
+
- `Helpers::Embedding.generate` now passes provider/model from Apollo settings, falling back to LLM defaults when not configured
|
|
13
|
+
|
|
3
14
|
## [0.4.5] - 2026-03-25
|
|
4
15
|
|
|
5
16
|
### Added
|
|
@@ -5,7 +5,9 @@ module Legion
|
|
|
5
5
|
module Apollo
|
|
6
6
|
module Helpers
|
|
7
7
|
module Embedding
|
|
8
|
-
DEFAULT_DIMENSION =
|
|
8
|
+
DEFAULT_DIMENSION = 1024
|
|
9
|
+
|
|
10
|
+
LOCAL_EMBEDDING_MODELS = %w[mxbai-embed-large bge-large snowflake-arctic-embed].freeze
|
|
9
11
|
|
|
10
12
|
module_function
|
|
11
13
|
|
|
@@ -15,8 +17,15 @@ module Legion
|
|
|
15
17
|
return zero_vector
|
|
16
18
|
end
|
|
17
19
|
|
|
18
|
-
|
|
19
|
-
vector =
|
|
20
|
+
local_model = detect_local_model
|
|
21
|
+
vector = if local_model
|
|
22
|
+
ollama_embed(text, local_model)
|
|
23
|
+
else
|
|
24
|
+
opts = cloud_embedding_opts
|
|
25
|
+
result = Legion::LLM.embed(text, **opts)
|
|
26
|
+
result.is_a?(Hash) ? result[:vector] : result
|
|
27
|
+
end
|
|
28
|
+
|
|
20
29
|
if vector.is_a?(Array) && vector.any?
|
|
21
30
|
@dimension = vector.size
|
|
22
31
|
vector
|
|
@@ -38,6 +47,53 @@ module Legion
|
|
|
38
47
|
DEFAULT_DIMENSION
|
|
39
48
|
end
|
|
40
49
|
|
|
50
|
+
def ollama_embed(text, model)
|
|
51
|
+
require 'faraday'
|
|
52
|
+
base_url = ollama_base_url
|
|
53
|
+
Legion::Logging.debug("[apollo] embedding via local Ollama: #{model}") if defined?(Legion::Logging)
|
|
54
|
+
conn = Faraday.new(url: base_url) { |f| f.options.timeout = 10 }
|
|
55
|
+
response = conn.post('/api/embed', { model: model, input: text }.to_json,
|
|
56
|
+
'Content-Type' => 'application/json')
|
|
57
|
+
return nil unless response.success?
|
|
58
|
+
|
|
59
|
+
parsed = ::JSON.parse(response.body)
|
|
60
|
+
parsed['embeddings']&.first
|
|
61
|
+
rescue StandardError => e
|
|
62
|
+
Legion::Logging.warn("[apollo] local Ollama embed failed: #{e.message}") if defined?(Legion::Logging)
|
|
63
|
+
nil
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def ollama_base_url
|
|
67
|
+
return 'http://localhost:11434' unless defined?(Legion::Settings)
|
|
68
|
+
|
|
69
|
+
Legion::Settings[:llm].dig(:providers, :ollama, :base_url) || 'http://localhost:11434'
|
|
70
|
+
rescue StandardError
|
|
71
|
+
'http://localhost:11434'
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def cloud_embedding_opts
|
|
75
|
+
return {} unless defined?(Legion::Settings) && !Legion::Settings[:apollo].nil?
|
|
76
|
+
|
|
77
|
+
embedding = Legion::Settings[:apollo][:embedding] || {}
|
|
78
|
+
opts = {}
|
|
79
|
+
opts[:provider] = embedding[:provider].to_sym if embedding[:provider]
|
|
80
|
+
opts[:model] = embedding[:model] if embedding[:model]
|
|
81
|
+
opts
|
|
82
|
+
rescue StandardError
|
|
83
|
+
{}
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def detect_local_model
|
|
87
|
+
return nil unless defined?(Legion::LLM::Discovery::Ollama)
|
|
88
|
+
|
|
89
|
+
LOCAL_EMBEDDING_MODELS.find do |m|
|
|
90
|
+
Legion::LLM::Discovery::Ollama.model_available?(m) ||
|
|
91
|
+
Legion::LLM::Discovery::Ollama.model_available?("#{m}:latest")
|
|
92
|
+
end
|
|
93
|
+
rescue StandardError
|
|
94
|
+
nil
|
|
95
|
+
end
|
|
96
|
+
|
|
41
97
|
def zero_vector
|
|
42
98
|
Array.new(dimension, 0.0)
|
|
43
99
|
end
|
|
@@ -32,7 +32,7 @@ module Legion
|
|
|
32
32
|
SELECT e.id, e.content, e.content_type, e.confidence, e.tags, e.source_agent,
|
|
33
33
|
0 AS depth, 1.0::float AS activation
|
|
34
34
|
FROM apollo_entries e
|
|
35
|
-
WHERE e.id =
|
|
35
|
+
WHERE e.id = :entry_id
|
|
36
36
|
|
|
37
37
|
UNION ALL
|
|
38
38
|
|
|
@@ -72,11 +72,11 @@ module Legion
|
|
|
72
72
|
<<~SQL
|
|
73
73
|
SELECT e.id, e.content, e.content_type, e.confidence, e.tags, e.source_agent,
|
|
74
74
|
e.access_count, e.created_at, e.knowledge_domain,
|
|
75
|
-
(e.embedding <=>
|
|
75
|
+
(e.embedding <=> :embedding) AS distance
|
|
76
76
|
FROM apollo_entries e
|
|
77
77
|
WHERE #{where_clause}
|
|
78
78
|
AND e.embedding IS NOT NULL
|
|
79
|
-
ORDER BY e.embedding <=>
|
|
79
|
+
ORDER BY e.embedding <=> :embedding
|
|
80
80
|
LIMIT #{limit}
|
|
81
81
|
SQL
|
|
82
82
|
end
|
|
@@ -10,6 +10,10 @@ module Legion
|
|
|
10
10
|
module_function
|
|
11
11
|
|
|
12
12
|
def cosine_similarity(vec_a:, vec_b:, **)
|
|
13
|
+
vec_a = parse_vector(vec_a)
|
|
14
|
+
vec_b = parse_vector(vec_b)
|
|
15
|
+
return 0.0 unless vec_a.is_a?(Array) && vec_b.is_a?(Array)
|
|
16
|
+
|
|
13
17
|
dot = vec_a.zip(vec_b).sum { |x, y| x * y }
|
|
14
18
|
mag_a = Math.sqrt(vec_a.sum { |x| x**2 })
|
|
15
19
|
mag_b = Math.sqrt(vec_b.sum { |x| x**2 })
|
|
@@ -18,6 +22,15 @@ module Legion
|
|
|
18
22
|
dot / (mag_a * mag_b)
|
|
19
23
|
end
|
|
20
24
|
|
|
25
|
+
def parse_vector(vec)
|
|
26
|
+
return vec if vec.is_a?(Array)
|
|
27
|
+
return nil unless vec.is_a?(String)
|
|
28
|
+
|
|
29
|
+
::JSON.parse(vec)
|
|
30
|
+
rescue StandardError
|
|
31
|
+
nil
|
|
32
|
+
end
|
|
33
|
+
|
|
21
34
|
def above_corroboration_threshold?(similarity:, **)
|
|
22
35
|
similarity >= Confidence::CORROBORATION_SIMILARITY_THRESHOLD
|
|
23
36
|
end
|
|
@@ -112,6 +112,8 @@ module Legion
|
|
|
112
112
|
db = Legion::Data::Model::ApolloEntry.db
|
|
113
113
|
entries = db.fetch(sql, embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")).all
|
|
114
114
|
|
|
115
|
+
entries = entries.reject { |e| e[:distance].respond_to?(:nan?) && e[:distance].nan? }
|
|
116
|
+
|
|
115
117
|
entries.each do |entry|
|
|
116
118
|
Legion::Data::Model::ApolloEntry.where(id: entry[:id]).update(
|
|
117
119
|
access_count: Sequel.expr(:access_count) + 1,
|
|
@@ -130,7 +132,7 @@ module Legion
|
|
|
130
132
|
|
|
131
133
|
formatted = entries.map do |entry|
|
|
132
134
|
{ id: entry[:id], content: entry[:content], content_type: entry[:content_type],
|
|
133
|
-
confidence: entry[:confidence], distance: entry[:distance],
|
|
135
|
+
confidence: entry[:confidence], distance: entry[:distance]&.to_f,
|
|
134
136
|
tags: entry[:tags], source_agent: entry[:source_agent],
|
|
135
137
|
knowledge_domain: entry[:knowledge_domain] }
|
|
136
138
|
end
|
|
@@ -218,6 +220,7 @@ module Legion
|
|
|
218
220
|
|
|
219
221
|
db = Legion::Data::Model::ApolloEntry.db
|
|
220
222
|
entries = db.fetch(sql, embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")).all
|
|
223
|
+
entries = entries.reject { |e| e[:distance].respond_to?(:nan?) && e[:distance].nan? }
|
|
221
224
|
|
|
222
225
|
entries.each do |entry|
|
|
223
226
|
Legion::Data::Model::ApolloEntry.where(id: entry[:id]).update(
|
|
@@ -228,7 +231,7 @@ module Legion
|
|
|
228
231
|
|
|
229
232
|
formatted = entries.map do |entry|
|
|
230
233
|
{ id: entry[:id], content: entry[:content], content_type: entry[:content_type],
|
|
231
|
-
confidence: entry[:confidence], distance: entry[:distance],
|
|
234
|
+
confidence: entry[:confidence], distance: entry[:distance]&.to_f,
|
|
232
235
|
tags: entry[:tags], source_agent: entry[:source_agent],
|
|
233
236
|
knowledge_domain: entry[:knowledge_domain] }
|
|
234
237
|
end
|
|
@@ -313,7 +316,7 @@ module Legion
|
|
|
313
316
|
|
|
314
317
|
db = Legion::Data::Model::ApolloEntry.db
|
|
315
318
|
similar = db.fetch(
|
|
316
|
-
"SELECT id, content, embedding FROM apollo_entries WHERE id !=
|
|
319
|
+
"SELECT id, content, embedding FROM apollo_entries WHERE id != :entry_id AND embedding IS NOT NULL ORDER BY embedding <=> :embedding LIMIT #{sim_limit}", # rubocop:disable Layout/LineLength
|
|
317
320
|
entry_id: entry_id,
|
|
318
321
|
embedding: Sequel.lit("'[#{embedding.join(',')}]'::vector")
|
|
319
322
|
).all
|
|
@@ -36,7 +36,7 @@ RSpec.describe 'Apollo Contradiction Detection' do
|
|
|
36
36
|
allow(mock_db).to receive(:fetch).and_return(double(all: []))
|
|
37
37
|
knowledge.send(:detect_contradictions, 'uuid-1', embedding, 'test')
|
|
38
38
|
expect(mock_db).to have_received(:fetch).with(
|
|
39
|
-
a_string_including('ORDER BY embedding <=>
|
|
39
|
+
a_string_including('ORDER BY embedding <=> :embedding'),
|
|
40
40
|
hash_including(:entry_id, :embedding)
|
|
41
41
|
)
|
|
42
42
|
end
|
|
@@ -12,8 +12,8 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::Embedding do
|
|
|
12
12
|
|
|
13
13
|
it 'returns a zero vector of the correct dimension' do
|
|
14
14
|
result = described_class.generate(text: 'hello world')
|
|
15
|
-
expect(result).to eq(Array.new(
|
|
16
|
-
expect(result.size).to eq(
|
|
15
|
+
expect(result).to eq(Array.new(1024, 0.0))
|
|
16
|
+
expect(result.size).to eq(1024)
|
|
17
17
|
end
|
|
18
18
|
end
|
|
19
19
|
|
|
@@ -24,12 +24,12 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::Embedding do
|
|
|
24
24
|
|
|
25
25
|
it 'returns a zero vector' do
|
|
26
26
|
result = described_class.generate(text: 'hello world')
|
|
27
|
-
expect(result).to eq(Array.new(
|
|
27
|
+
expect(result).to eq(Array.new(1024, 0.0))
|
|
28
28
|
end
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
context 'when Legion::LLM is available and started' do
|
|
32
|
-
let(:mock_vector) { Array.new(
|
|
32
|
+
let(:mock_vector) { Array.new(1024) { rand(-1.0..1.0) } }
|
|
33
33
|
|
|
34
34
|
before do
|
|
35
35
|
llm = Module.new do
|
|
@@ -44,7 +44,7 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::Embedding do
|
|
|
44
44
|
it 'returns the vector from the LLM response hash' do
|
|
45
45
|
result = described_class.generate(text: 'hello world')
|
|
46
46
|
expect(result).to eq(mock_vector)
|
|
47
|
-
expect(Legion::LLM).to have_received(:embed).with('hello world')
|
|
47
|
+
expect(Legion::LLM).to have_received(:embed).with('hello world', **{})
|
|
48
48
|
end
|
|
49
49
|
end
|
|
50
50
|
|
|
@@ -120,7 +120,7 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::Embedding do
|
|
|
120
120
|
end
|
|
121
121
|
|
|
122
122
|
it 'returns the default dimension' do
|
|
123
|
-
expect(described_class.configured_dimension).to eq(
|
|
123
|
+
expect(described_class.configured_dimension).to eq(1024)
|
|
124
124
|
end
|
|
125
125
|
end
|
|
126
126
|
end
|
|
@@ -24,7 +24,7 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::GraphQuery do
|
|
|
24
24
|
expect(sql).to include('apollo_entries')
|
|
25
25
|
expect(sql).to include('apollo_relations')
|
|
26
26
|
expect(sql).to include('WITH RECURSIVE')
|
|
27
|
-
expect(sql).to include('
|
|
27
|
+
expect(sql).to include(':entry_id')
|
|
28
28
|
end
|
|
29
29
|
|
|
30
30
|
it 'includes relation type filter when specified' do
|
|
@@ -49,7 +49,7 @@ RSpec.describe Legion::Extensions::Apollo::Helpers::GraphQuery do
|
|
|
49
49
|
it 'returns SQL with vector placeholder' do
|
|
50
50
|
sql = described_class.build_semantic_search_sql(limit: 5, min_confidence: 0.3)
|
|
51
51
|
expect(sql).to include('apollo_entries')
|
|
52
|
-
expect(sql).to include('
|
|
52
|
+
expect(sql).to include(':embedding')
|
|
53
53
|
expect(sql).to include('<=>')
|
|
54
54
|
expect(sql).to include('LIMIT 5')
|
|
55
55
|
end
|