legion-llm 0.9.51 → 0.9.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e0cae7c608acb8fe3f09852e7833639c08998bb944b2df026d9c1999c03ff017
4
- data.tar.gz: bbea922035cf6f38eb43139ea5d33deaca70cbbea8693edccb3811bdc5f43608
3
+ metadata.gz: 9594126ab3b86d52c27c53288196bafe9951a31953e027db66834e9d3b97baba
4
+ data.tar.gz: e0b98b99f06be07d64394d55ef86cc906c784178f1b67cb30cac0778c2afcf97
5
5
  SHA512:
6
- metadata.gz: cc620102bcfdbd73387ba3da2e31e80e4fd9c9b9fd3ceeb85b00417972deeda55bbc427702df3a86ec7a2d3f07be34f99383bf9141b79679e394e66c45eda7c1
7
- data.tar.gz: 4f5b8e4739873d147be2ddfed81c6a04297a016f9c7c4c143b6ad0409f61a9f75c39a14c6de4b30f37119b4e5aa577e4faba813917da0a9c771c016500e079a2
6
+ metadata.gz: 4b467420153c061faffba784a1578b2041d281f94189bc82e0d0555d8cf3fb6132ae22390a46e81598c81ff9d7df0c938ae7b514f54579266636101c158c4334
7
+ data.tar.gz: b6087830e389876412a55464548cb19b55b15b2a74ecdc9cb6e7f638454c8a8001bca26efce94aed0285f747a368b8fdc19195e0525da2305bdfd1655cfbf30f
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.9.53] - 2026-05-29
4
+
5
+ ### Added
6
+ - API: OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`, `/v1/responses`) are now also available under the `/api/llm/inference/v1/` prefix, allowing Mastra `openai-compatible` providers to use `http://127.0.0.1:4567/api/llm/inference` as the base URL — consistent with the Claude and Codex client routing patches in legion-interlink
7
+ - API: auth `before` filter extended to cover `/api/llm/inference/v1/*` in addition to `/v1/*`
8
+
9
+ ## [0.9.52] - 2026-05-27
10
+
11
+ ### Fixed
12
+ - Discovery: `verify_embedding` now checks `model_available?` for Ollama instead of blindly returning true — prevents `can_embed?` from reporting true when the embedding model (e.g. `mxbai-embed-large`) hasn't been pulled on the local node
13
+ - Discovery: `detect_embedding_from_registry` now calls `verify_embedding` before setting `@can_embed = true`, closing a gap where registry-declared capability metadata was trusted without verifying the model exists locally
14
+
3
15
  ## [0.9.51] - 2026-05-23
4
16
 
5
17
  ### Changed
@@ -9,9 +9,9 @@ module Legion
9
9
  extend Legion::Logging::Helper
10
10
 
11
11
  def self.registered(app)
12
- log.debug('[llm][api][auth] registering /v1/* before filter')
12
+ log.debug('[llm][api][auth] registering /v1/* and /api/llm/inference/v1/* before filters')
13
13
 
14
- app.before '/v1/*' do
14
+ auth_check = proc do
15
15
  log.debug("[llm][api][auth] before filter action=check path=#{request.path_info}")
16
16
  next unless auth_enabled?
17
17
 
@@ -27,6 +27,10 @@ module Legion
27
27
  log.debug("[llm][api][auth] action=authorized path=#{request.path_info}")
28
28
  end
29
29
 
30
+ app.before('/api/llm/inference/v1/*', &auth_check)
31
+
32
+ app.before('/v1/*', &auth_check)
33
+
30
34
  app.helpers do
31
35
  define_method(:auth_enabled?) do
32
36
  Legion::LLM::Settings.value(:api, :auth, :enabled) == true
@@ -53,7 +57,7 @@ module Legion
53
57
  end
54
58
  end
55
59
 
56
- log.debug('[llm][api][auth] /v1/* before filter registered')
60
+ log.debug('[llm][api][auth] /v1/* and /api/llm/inference/v1/* before filters registered')
57
61
  rescue StandardError => e
58
62
  handle_exception(e, level: :error, handled: false, operation: 'llm.api.auth.register')
59
63
  end
@@ -11,10 +11,19 @@ module Legion
11
11
  module ChatCompletions
12
12
  extend Legion::Logging::Helper
13
13
 
14
- def self.registered(app) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
15
- log.debug('[llm][api][openai][chat_completions] registering POST /v1/chat/completions')
14
+ def self.registered(app)
15
+ log.debug('[llm][api][openai][chat_completions] registering POST /v1/chat/completions + /api/llm/inference/v1/chat/completions')
16
16
 
17
- app.post '/v1/chat/completions' do # rubocop:disable Metrics/BlockLength
17
+ handler = build_handler
18
+
19
+ app.post('/v1/chat/completions') { instance_exec(&handler) }
20
+ app.post('/api/llm/inference/v1/chat/completions') { instance_exec(&handler) }
21
+
22
+ log.debug('[llm][api][openai][chat_completions] routes registered')
23
+ end
24
+
25
+ def self.build_handler # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
26
+ proc do # rubocop:disable Metrics/BlockLength
18
27
  require_llm!
19
28
  body = parse_request_body
20
29
 
@@ -121,8 +130,6 @@ module Legion
121
130
  halt 500, { 'Content-Type' => 'application/json' },
122
131
  Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
123
132
  end
124
-
125
- log.debug('[llm][api][openai][chat_completions] POST /v1/chat/completions registered')
126
133
  end
127
134
 
128
135
  def self.build_openai_tool_classes(tools)
@@ -10,9 +10,18 @@ module Legion
10
10
  extend Legion::Logging::Helper
11
11
 
12
12
  def self.registered(app)
13
- log.debug('[llm][api][openai][embeddings] registering POST /v1/embeddings')
13
+ log.debug('[llm][api][openai][embeddings] registering POST /v1/embeddings + /api/llm/inference/v1/embeddings')
14
14
 
15
- app.post '/v1/embeddings' do
15
+ handler = build_handler
16
+
17
+ app.post('/v1/embeddings') { instance_exec(&handler) }
18
+ app.post('/api/llm/inference/v1/embeddings') { instance_exec(&handler) }
19
+
20
+ log.debug('[llm][api][openai][embeddings] routes registered')
21
+ end
22
+
23
+ def self.build_handler
24
+ proc do
16
25
  require_llm!
17
26
  body = parse_request_body
18
27
 
@@ -57,8 +66,6 @@ module Legion
57
66
  halt 500, { 'Content-Type' => 'application/json' },
58
67
  Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
59
68
  end
60
-
61
- log.debug('[llm][api][openai][embeddings] POST /v1/embeddings registered')
62
69
  end
63
70
  end
64
71
  end
@@ -11,9 +11,9 @@ module Legion
11
11
  extend Legion::Logging::Helper
12
12
 
13
13
  def self.registered(app)
14
- log.debug('[llm][api][openai][models] registering GET /v1/models and GET /v1/models/:id')
14
+ log.debug('[llm][api][openai][models] registering GET /v1/models + /api/llm/inference/v1/models routes')
15
15
 
16
- app.get '/v1/models' do
16
+ list_handler = proc do
17
17
  log.debug('[llm][api][openai][models] action=list')
18
18
  require_llm!
19
19
 
@@ -28,7 +28,7 @@ module Legion
28
28
  Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
29
29
  end
30
30
 
31
- app.get '/v1/models/:id' do
31
+ get_handler = proc do
32
32
  model_id = params[:id]
33
33
  log.debug("[llm][api][openai][models] action=get id=#{model_id}")
34
34
  require_llm!
@@ -52,7 +52,12 @@ module Legion
52
52
  Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
53
53
  end
54
54
 
55
- log.debug('[llm][api][openai][models] GET /v1/models routes registered')
55
+ app.get('/v1/models') { instance_exec(&list_handler) }
56
+ app.get('/api/llm/inference/v1/models') { instance_exec(&list_handler) }
57
+ app.get('/v1/models/:id') { instance_exec(&get_handler) }
58
+ app.get('/api/llm/inference/v1/models/:id') { instance_exec(&get_handler) }
59
+
60
+ log.debug('[llm][api][openai][models] routes registered')
56
61
  end
57
62
 
58
63
  def self.build_model_list
@@ -266,6 +266,12 @@ module Legion
266
266
  return false
267
267
  end
268
268
 
269
+ unless verify_embedding(provider, resolved)
270
+ log.debug '[llm][discovery] action=detect_embedding_from_registry verify_failed ' \
271
+ "provider=#{provider} model=#{resolved} — falling through to legacy probe"
272
+ return false
273
+ end
274
+
269
275
  @embedding_provider = provider
270
276
  @embedding_model = resolved
271
277
  @embedding_instance = instance
@@ -324,16 +330,9 @@ module Legion
324
330
 
325
331
  def verify_embedding(provider, model)
326
332
  log.debug "[llm][discovery] verify_embedding provider=#{provider} model=#{model}"
327
- return true if provider == :ollama
328
- return true if provider == :azure
329
- return false unless provider_supports_embeddings?(provider)
330
333
  return true unless model
331
334
 
332
- start_time = Time.now
333
- Call::Dispatch.call(provider: provider, capability: :embed, model: model, text: 'health check')
334
- elapsed = ((Time.now - start_time) * 1000).round
335
- log.info "[llm][discovery] embedding health check ok provider=#{provider} model=#{model} elapsed_ms=#{elapsed}"
336
- true
335
+ model_available?(model, provider: provider)
337
336
  rescue StandardError => e
338
337
  handle_exception(e, level: :warn, operation: 'llm.discovery.verify_embedding', provider: provider, model: model)
339
338
  false
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.9.51'
5
+ VERSION = '0.9.53'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.51
4
+ version: 0.9.53
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity