legionio 1.4.72 → 1.4.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/legion/cli/observe_command.rb +21 -0
- data/lib/legion/mcp/context_compiler.rb +33 -2
- data/lib/legion/mcp/embedding_index.rb +113 -0
- data/lib/legion/mcp/server.rb +11 -0
- data/lib/legion/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7693068b5190222c508dd07a248424e8f9e1eff77e80059cbd8a4fc81e9e6325
|
|
4
|
+
data.tar.gz: 43cc0da2efe7d604101ab87426c21b5f8ce677a8c023aeec38e5b91018b515ae
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a2d75a86b1646113be917a9f8ed17f8f7aa8ffb48ff371db78ee62fbfdba1929d85e675f2bf7d6a5469b077f10370f3d044eb6f5049db353d471f5be1c3cbab5
|
|
7
|
+
data.tar.gz: 2f00a29a6ddf7be18ee71839b97e1a8aeeb74f2cecd820932447bc574fcfeb595a6adce0d757909bf3e3d68909235fcbb32ad95182192b5471dea0a2f3d39a0e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# Legion Changelog
|
|
2
2
|
|
|
3
|
+
## [1.4.73] - 2026-03-19
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- TBI Phase 3: semantic tool retrieval via embedding vectors
|
|
7
|
+
- `Legion::MCP::EmbeddingIndex` module: in-memory embedding cache with pure-Ruby cosine similarity
|
|
8
|
+
- `ContextCompiler` semantic score blending: 60% semantic + 40% keyword when embeddings available, keyword-only fallback
|
|
9
|
+
- `Server.populate_embedding_index`: auto-populates tool embeddings on MCP server build (no-op if LLM unavailable)
|
|
10
|
+
- `legion observe embeddings` subcommand: index size, coverage, and populated status
|
|
11
|
+
- 61 new specs (1666 total): EmbeddingIndex unit, ContextCompiler semantic blending, integration wiring, CLI
|
|
12
|
+
|
|
3
13
|
## [1.4.72] - 2026-03-19
|
|
4
14
|
|
|
5
15
|
### Added
|
|
@@ -72,6 +72,27 @@ module Legion
|
|
|
72
72
|
puts 'Observation data cleared.'
|
|
73
73
|
end
|
|
74
74
|
|
|
75
|
+
desc 'embeddings', 'Show MCP tool embedding index status'
|
|
76
|
+
def embeddings
|
|
77
|
+
require 'legion/mcp/embedding_index'
|
|
78
|
+
data = {
|
|
79
|
+
index_size: Legion::MCP::EmbeddingIndex.size,
|
|
80
|
+
coverage: Legion::MCP::EmbeddingIndex.coverage,
|
|
81
|
+
populated: Legion::MCP::EmbeddingIndex.populated?
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if options['json']
|
|
85
|
+
puts ::JSON.pretty_generate(data.transform_keys(&:to_s))
|
|
86
|
+
return
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
puts 'MCP Embedding Index'
|
|
90
|
+
puts '=' * 40
|
|
91
|
+
puts "Index Size: #{data[:index_size]}"
|
|
92
|
+
puts "Coverage: #{(data[:coverage] * 100).round(1)}%"
|
|
93
|
+
puts "Populated: #{data[:populated]}"
|
|
94
|
+
end
|
|
95
|
+
|
|
75
96
|
private
|
|
76
97
|
|
|
77
98
|
def serialize_stats(data)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'embedding_index'
|
|
4
|
+
|
|
3
5
|
module Legion
|
|
4
6
|
module MCP
|
|
5
7
|
module ContextCompiler
|
|
@@ -112,6 +114,7 @@ module Legion
|
|
|
112
114
|
# Clears the memoized tool_index.
|
|
113
115
|
def reset!
|
|
114
116
|
@tool_index = nil
|
|
117
|
+
Legion::MCP::EmbeddingIndex.reset! if defined?(Legion::MCP::EmbeddingIndex)
|
|
115
118
|
end
|
|
116
119
|
|
|
117
120
|
def build_tool_index
|
|
@@ -131,10 +134,38 @@ module Legion
|
|
|
131
134
|
keywords = intent_string.downcase.split
|
|
132
135
|
return [] if keywords.empty?
|
|
133
136
|
|
|
137
|
+
kw_scores = keyword_score_map(keywords)
|
|
138
|
+
sem_scores = semantic_score_map(intent_string)
|
|
139
|
+
use_semantic = !sem_scores.empty?
|
|
140
|
+
|
|
134
141
|
tool_index.values.map do |entry|
|
|
142
|
+
kw_raw = kw_scores[entry[:name]] || 0
|
|
143
|
+
if use_semantic
|
|
144
|
+
max_kw = kw_scores.values.max || 1
|
|
145
|
+
normalized_kw = max_kw.positive? ? kw_raw.to_f / max_kw : 0.0
|
|
146
|
+
sem = sem_scores[entry[:name]] || 0.0
|
|
147
|
+
blended = (normalized_kw * 0.4) + (sem * 0.6)
|
|
148
|
+
else
|
|
149
|
+
blended = kw_raw.to_f
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
{ name: entry[:name], description: entry[:description], score: blended }
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def keyword_score_map(keywords)
|
|
157
|
+
tool_index.values.to_h do |entry|
|
|
135
158
|
haystack = "#{entry[:name].downcase} #{entry[:description].downcase}"
|
|
136
|
-
score
|
|
137
|
-
|
|
159
|
+
score = keywords.count { |kw| haystack.include?(kw) }
|
|
160
|
+
[entry[:name], score]
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def semantic_score_map(intent_string)
|
|
165
|
+
return {} unless defined?(Legion::MCP::EmbeddingIndex) && Legion::MCP::EmbeddingIndex.populated?
|
|
166
|
+
|
|
167
|
+
Legion::MCP::EmbeddingIndex.semantic_match(intent_string, limit: tool_index.size).to_h do |result|
|
|
168
|
+
[result[:name], result[:score]]
|
|
138
169
|
end
|
|
139
170
|
end
|
|
140
171
|
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module MCP
|
|
5
|
+
module EmbeddingIndex
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
def build_from_tool_data(tool_data, embedder: default_embedder)
|
|
9
|
+
@embedder = embedder
|
|
10
|
+
mutex.synchronize do
|
|
11
|
+
tool_data.each do |tool|
|
|
12
|
+
composite = build_composite(tool[:name], tool[:description], tool[:params])
|
|
13
|
+
vector = safe_embed(composite, embedder)
|
|
14
|
+
next unless vector
|
|
15
|
+
|
|
16
|
+
index[tool[:name]] = {
|
|
17
|
+
name: tool[:name],
|
|
18
|
+
composite_text: composite,
|
|
19
|
+
vector: vector,
|
|
20
|
+
built_at: Time.now
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def semantic_match(intent, embedder: @embedder || default_embedder, limit: 5)
|
|
27
|
+
return [] if index.empty?
|
|
28
|
+
|
|
29
|
+
intent_vec = safe_embed(intent, embedder)
|
|
30
|
+
return [] unless intent_vec
|
|
31
|
+
|
|
32
|
+
scores = mutex.synchronize do
|
|
33
|
+
index.values.filter_map do |entry|
|
|
34
|
+
next unless entry[:vector]
|
|
35
|
+
|
|
36
|
+
score = cosine_similarity(intent_vec, entry[:vector])
|
|
37
|
+
{ name: entry[:name], score: score }
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
scores.sort_by { |s| -s[:score] }.first(limit)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def cosine_similarity(vec_a, vec_b)
|
|
45
|
+
dot = vec_a.zip(vec_b).sum { |a, b| a * b }
|
|
46
|
+
mag_a = Math.sqrt(vec_a.sum { |x| x**2 })
|
|
47
|
+
mag_b = Math.sqrt(vec_b.sum { |x| x**2 })
|
|
48
|
+
return 0.0 if mag_a.zero? || mag_b.zero?
|
|
49
|
+
|
|
50
|
+
dot / (mag_a * mag_b)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def entry(tool_name)
|
|
54
|
+
mutex.synchronize { index[tool_name] }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def size
|
|
58
|
+
mutex.synchronize { index.size }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def populated?
|
|
62
|
+
mutex.synchronize { !index.empty? }
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def coverage
|
|
66
|
+
mutex.synchronize do
|
|
67
|
+
return 0.0 if index.empty?
|
|
68
|
+
|
|
69
|
+
with_vectors = index.values.count { |e| e[:vector] }
|
|
70
|
+
with_vectors.to_f / index.size
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def reset!
|
|
75
|
+
@embedder = nil
|
|
76
|
+
mutex.synchronize { index.clear }
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def index
|
|
80
|
+
@index ||= {}
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def mutex
|
|
84
|
+
@mutex ||= Mutex.new
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def build_composite(name, description, params)
|
|
88
|
+
parts = [name, '--', description]
|
|
89
|
+
parts << "Params: #{params.join(', ')}" unless params.empty?
|
|
90
|
+
parts.join(' ')
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def safe_embed(text, embedder)
|
|
94
|
+
return nil unless embedder
|
|
95
|
+
|
|
96
|
+
result = embedder.call(text)
|
|
97
|
+
return nil unless result.is_a?(Array) && !result.empty?
|
|
98
|
+
|
|
99
|
+
result
|
|
100
|
+
rescue StandardError
|
|
101
|
+
nil
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def default_embedder
|
|
105
|
+
return nil unless defined?(Legion::LLM) && Legion::LLM.respond_to?(:started?) && Legion::LLM.started?
|
|
106
|
+
|
|
107
|
+
->(text) { Legion::LLM.embed(text)[:vector] }
|
|
108
|
+
rescue StandardError
|
|
109
|
+
nil
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
data/lib/legion/mcp/server.rb
CHANGED
|
@@ -36,6 +36,7 @@ require_relative 'tools/rbac_check'
|
|
|
36
36
|
require_relative 'tools/rbac_assignments'
|
|
37
37
|
require_relative 'tools/rbac_grants'
|
|
38
38
|
require_relative 'context_compiler'
|
|
39
|
+
require_relative 'embedding_index'
|
|
39
40
|
require_relative 'tools/do_action'
|
|
40
41
|
require_relative 'tools/discover_tools'
|
|
41
42
|
require_relative 'resources/runner_catalog'
|
|
@@ -109,12 +110,22 @@ module Legion
|
|
|
109
110
|
build_filtered_tool_list.map(&:to_h)
|
|
110
111
|
end
|
|
111
112
|
|
|
113
|
+
# Populate embedding index for semantic tool matching (lazy — no-op if LLM unavailable)
|
|
114
|
+
populate_embedding_index
|
|
115
|
+
|
|
112
116
|
Resources::RunnerCatalog.register(server)
|
|
113
117
|
Resources::ExtensionInfo.register_read_handler(server)
|
|
114
118
|
|
|
115
119
|
server
|
|
116
120
|
end
|
|
117
121
|
|
|
122
|
+
def populate_embedding_index(embedder: EmbeddingIndex.default_embedder)
|
|
123
|
+
return unless embedder
|
|
124
|
+
|
|
125
|
+
tool_data = ContextCompiler.tool_index.values
|
|
126
|
+
EmbeddingIndex.build_from_tool_data(tool_data, embedder: embedder)
|
|
127
|
+
end
|
|
128
|
+
|
|
118
129
|
def wire_observer(data)
|
|
119
130
|
return unless data[:method] == 'tools/call' && data[:tool_name]
|
|
120
131
|
|
data/lib/legion/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legionio
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.4.
|
|
4
|
+
version: 1.4.73
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -560,6 +560,7 @@ files:
|
|
|
560
560
|
- lib/legion/mcp.rb
|
|
561
561
|
- lib/legion/mcp/auth.rb
|
|
562
562
|
- lib/legion/mcp/context_compiler.rb
|
|
563
|
+
- lib/legion/mcp/embedding_index.rb
|
|
563
564
|
- lib/legion/mcp/observer.rb
|
|
564
565
|
- lib/legion/mcp/resources/extension_info.rb
|
|
565
566
|
- lib/legion/mcp/resources/runner_catalog.rb
|