legionio 1.4.72 → 1.4.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d9c2f9048a25ea9cff5f0b8db43aaaf49ab40bb6c388b2f892c3f952fb7233f4
4
- data.tar.gz: 02225b7029518b45f1d78f632cce4fdbaee631efb9c03cade5e575ac3c96e3cf
3
+ metadata.gz: 7693068b5190222c508dd07a248424e8f9e1eff77e80059cbd8a4fc81e9e6325
4
+ data.tar.gz: 43cc0da2efe7d604101ab87426c21b5f8ce677a8c023aeec38e5b91018b515ae
5
5
  SHA512:
6
- metadata.gz: 758ee671a15875e26a60fe87b10b3499c5ef47f766d5cd3d43498b7c362be4a791defe79b99bda56ccbf263558fa7a4e8450b307f3bc29734c3c3d279bba4dd4
7
- data.tar.gz: 2c04803613fe2a2ba39ef4aa29865f66b96b237ba7c3f74920cd784bedfebc9bc90df91042714a016f06587a6d234ed722cf29a5727c039e8f09fbddfb322184
6
+ metadata.gz: a2d75a86b1646113be917a9f8ed17f8f7aa8ffb48ff371db78ee62fbfdba1929d85e675f2bf7d6a5469b077f10370f3d044eb6f5049db353d471f5be1c3cbab5
7
+ data.tar.gz: 2f00a29a6ddf7be18ee71839b97e1a8aeeb74f2cecd820932447bc574fcfeb595a6adce0d757909bf3e3d68909235fcbb32ad95182192b5471dea0a2f3d39a0e
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Legion Changelog
2
2
 
3
+ ## [1.4.73] - 2026-03-19
4
+
5
+ ### Added
6
+ - TBI Phase 3: semantic tool retrieval via embedding vectors
7
+ - `Legion::MCP::EmbeddingIndex` module: in-memory embedding cache with pure-Ruby cosine similarity
8
+ - `ContextCompiler` semantic score blending: 60% semantic + 40% keyword when embeddings available, keyword-only fallback
9
+ - `Server.populate_embedding_index`: auto-populates tool embeddings on MCP server build (no-op if LLM unavailable)
10
+ - `legion observe embeddings` subcommand: index size, coverage, and populated status
11
+ - 61 new specs (1666 total): EmbeddingIndex unit, ContextCompiler semantic blending, integration wiring, CLI
12
+
3
13
  ## [1.4.72] - 2026-03-19
4
14
 
5
15
  ### Added
@@ -72,6 +72,27 @@ module Legion
72
72
  puts 'Observation data cleared.'
73
73
  end
74
74
 
75
+ desc 'embeddings', 'Show MCP tool embedding index status'
76
+ def embeddings
77
+ require 'legion/mcp/embedding_index'
78
+ data = {
79
+ index_size: Legion::MCP::EmbeddingIndex.size,
80
+ coverage: Legion::MCP::EmbeddingIndex.coverage,
81
+ populated: Legion::MCP::EmbeddingIndex.populated?
82
+ }
83
+
84
+ if options['json']
85
+ puts ::JSON.pretty_generate(data.transform_keys(&:to_s))
86
+ return
87
+ end
88
+
89
+ puts 'MCP Embedding Index'
90
+ puts '=' * 40
91
+ puts "Index Size: #{data[:index_size]}"
92
+ puts "Coverage: #{(data[:coverage] * 100).round(1)}%"
93
+ puts "Populated: #{data[:populated]}"
94
+ end
95
+
75
96
  private
76
97
 
77
98
  def serialize_stats(data)
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'embedding_index'
4
+
3
5
  module Legion
4
6
  module MCP
5
7
  module ContextCompiler
@@ -112,6 +114,7 @@ module Legion
112
114
  # Clears the memoized tool_index.
113
115
  def reset!
114
116
  @tool_index = nil
117
+ Legion::MCP::EmbeddingIndex.reset! if defined?(Legion::MCP::EmbeddingIndex)
115
118
  end
116
119
 
117
120
  def build_tool_index
@@ -131,10 +134,38 @@ module Legion
131
134
  keywords = intent_string.downcase.split
132
135
  return [] if keywords.empty?
133
136
 
137
+ kw_scores = keyword_score_map(keywords)
138
+ sem_scores = semantic_score_map(intent_string)
139
+ use_semantic = !sem_scores.empty?
140
+
134
141
  tool_index.values.map do |entry|
142
+ kw_raw = kw_scores[entry[:name]] || 0
143
+ if use_semantic
144
+ max_kw = kw_scores.values.max || 1
145
+ normalized_kw = max_kw.positive? ? kw_raw.to_f / max_kw : 0.0
146
+ sem = sem_scores[entry[:name]] || 0.0
147
+ blended = (normalized_kw * 0.4) + (sem * 0.6)
148
+ else
149
+ blended = kw_raw.to_f
150
+ end
151
+
152
+ { name: entry[:name], description: entry[:description], score: blended }
153
+ end
154
+ end
155
+
156
+ def keyword_score_map(keywords)
157
+ tool_index.values.to_h do |entry|
135
158
  haystack = "#{entry[:name].downcase} #{entry[:description].downcase}"
136
- score = keywords.count { |kw| haystack.include?(kw) }
137
- { name: entry[:name], description: entry[:description], score: score }
159
+ score = keywords.count { |kw| haystack.include?(kw) }
160
+ [entry[:name], score]
161
+ end
162
+ end
163
+
164
+ def semantic_score_map(intent_string)
165
+ return {} unless defined?(Legion::MCP::EmbeddingIndex) && Legion::MCP::EmbeddingIndex.populated?
166
+
167
+ Legion::MCP::EmbeddingIndex.semantic_match(intent_string, limit: tool_index.size).to_h do |result|
168
+ [result[:name], result[:score]]
138
169
  end
139
170
  end
140
171
  end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module MCP
5
+ module EmbeddingIndex
6
+ module_function
7
+
8
+ def build_from_tool_data(tool_data, embedder: default_embedder)
9
+ @embedder = embedder
10
+ mutex.synchronize do
11
+ tool_data.each do |tool|
12
+ composite = build_composite(tool[:name], tool[:description], tool[:params])
13
+ vector = safe_embed(composite, embedder)
14
+ next unless vector
15
+
16
+ index[tool[:name]] = {
17
+ name: tool[:name],
18
+ composite_text: composite,
19
+ vector: vector,
20
+ built_at: Time.now
21
+ }
22
+ end
23
+ end
24
+ end
25
+
26
+ def semantic_match(intent, embedder: @embedder || default_embedder, limit: 5)
27
+ return [] if index.empty?
28
+
29
+ intent_vec = safe_embed(intent, embedder)
30
+ return [] unless intent_vec
31
+
32
+ scores = mutex.synchronize do
33
+ index.values.filter_map do |entry|
34
+ next unless entry[:vector]
35
+
36
+ score = cosine_similarity(intent_vec, entry[:vector])
37
+ { name: entry[:name], score: score }
38
+ end
39
+ end
40
+
41
+ scores.sort_by { |s| -s[:score] }.first(limit)
42
+ end
43
+
44
+ def cosine_similarity(vec_a, vec_b)
45
+ dot = vec_a.zip(vec_b).sum { |a, b| a * b }
46
+ mag_a = Math.sqrt(vec_a.sum { |x| x**2 })
47
+ mag_b = Math.sqrt(vec_b.sum { |x| x**2 })
48
+ return 0.0 if mag_a.zero? || mag_b.zero?
49
+
50
+ dot / (mag_a * mag_b)
51
+ end
52
+
53
+ def entry(tool_name)
54
+ mutex.synchronize { index[tool_name] }
55
+ end
56
+
57
+ def size
58
+ mutex.synchronize { index.size }
59
+ end
60
+
61
+ def populated?
62
+ mutex.synchronize { !index.empty? }
63
+ end
64
+
65
+ def coverage
66
+ mutex.synchronize do
67
+ return 0.0 if index.empty?
68
+
69
+ with_vectors = index.values.count { |e| e[:vector] }
70
+ with_vectors.to_f / index.size
71
+ end
72
+ end
73
+
74
+ def reset!
75
+ @embedder = nil
76
+ mutex.synchronize { index.clear }
77
+ end
78
+
79
+ def index
80
+ @index ||= {}
81
+ end
82
+
83
+ def mutex
84
+ @mutex ||= Mutex.new
85
+ end
86
+
87
+ def build_composite(name, description, params)
88
+ parts = [name, '--', description]
89
+ parts << "Params: #{params.join(', ')}" unless params.empty?
90
+ parts.join(' ')
91
+ end
92
+
93
+ def safe_embed(text, embedder)
94
+ return nil unless embedder
95
+
96
+ result = embedder.call(text)
97
+ return nil unless result.is_a?(Array) && !result.empty?
98
+
99
+ result
100
+ rescue StandardError
101
+ nil
102
+ end
103
+
104
+ def default_embedder
105
+ return nil unless defined?(Legion::LLM) && Legion::LLM.respond_to?(:started?) && Legion::LLM.started?
106
+
107
+ ->(text) { Legion::LLM.embed(text)[:vector] }
108
+ rescue StandardError
109
+ nil
110
+ end
111
+ end
112
+ end
113
+ end
@@ -36,6 +36,7 @@ require_relative 'tools/rbac_check'
36
36
  require_relative 'tools/rbac_assignments'
37
37
  require_relative 'tools/rbac_grants'
38
38
  require_relative 'context_compiler'
39
+ require_relative 'embedding_index'
39
40
  require_relative 'tools/do_action'
40
41
  require_relative 'tools/discover_tools'
41
42
  require_relative 'resources/runner_catalog'
@@ -109,12 +110,22 @@ module Legion
109
110
  build_filtered_tool_list.map(&:to_h)
110
111
  end
111
112
 
113
+ # Populate embedding index for semantic tool matching (lazy — no-op if LLM unavailable)
114
+ populate_embedding_index
115
+
112
116
  Resources::RunnerCatalog.register(server)
113
117
  Resources::ExtensionInfo.register_read_handler(server)
114
118
 
115
119
  server
116
120
  end
117
121
 
122
+ def populate_embedding_index(embedder: EmbeddingIndex.default_embedder)
123
+ return unless embedder
124
+
125
+ tool_data = ContextCompiler.tool_index.values
126
+ EmbeddingIndex.build_from_tool_data(tool_data, embedder: embedder)
127
+ end
128
+
118
129
  def wire_observer(data)
119
130
  return unless data[:method] == 'tools/call' && data[:tool_name]
120
131
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legion
4
- VERSION = '1.4.72'
4
+ VERSION = '1.4.73'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legionio
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.72
4
+ version: 1.4.73
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -560,6 +560,7 @@ files:
560
560
  - lib/legion/mcp.rb
561
561
  - lib/legion/mcp/auth.rb
562
562
  - lib/legion/mcp/context_compiler.rb
563
+ - lib/legion/mcp/embedding_index.rb
563
564
  - lib/legion/mcp/observer.rb
564
565
  - lib/legion/mcp/resources/extension_info.rb
565
566
  - lib/legion/mcp/resources/runner_catalog.rb