lex-llm 0.5.4 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a67345a318fe016e8b7c302f08cc335bf25f1e4605a2b16fd9d95a9c9d6ccd04
4
- data.tar.gz: a0d2f7b5998b3a70754cb538515e581cb9a17ae7bc38b72de305159cc486edd5
3
+ metadata.gz: 92e15ead2801ddcfdb692c3135b0a8774bf6ffbc137e70562afaf107dcd182da
4
+ data.tar.gz: 79ac6bb87c7c57f22857982096fe7818f1d07de3c62b7dbd114588451ed494cb
5
5
  SHA512:
6
- metadata.gz: 0e1d43f8bfc296cc15e1389f153adc134baf7dfa051d5604617126bfbfc558ce02416caf24509521d43448fc05bc43b278c8cf4fb6a197465de10af36489ada5
7
- data.tar.gz: 0163f3ab169203405a2081c79712343ad5e36ae76bafaa1c703ac20e30ec46324d584bb69da0ebed064a85f9eafcb054ed8f4cb4395789b516607993e3fee53e
6
+ metadata.gz: c46aaae88b383e6ffd8e24076d64287b7fd04bc0c3108b7f7b760807b5ce38d3b49b88294c5cf183d289255ccfecdf2c6513b63646ac3178cf79136f837f67d2
7
+ data.tar.gz: 4ca741e7ea3dcd3bfee9ad59a30a0379a130a3a885f344ad9b0e60509b0d4c18e5ef1e2e2bdf8c0c818e313a493357f475c60d7c27219df1c9302a77b2cebfbf
data/CHANGELOG.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.6.2 - 2026-06-20
4
+
5
+ ### Fixed
6
+ - Publish one `llm.registry` model-availability event per discovered model from the shared discovery/filter loop before whitelist/blacklist removes blocked models from routable offerings, preserving shadow-model visibility without polluting inventory.
7
+
8
+ ## 0.6.1 - 2026-06-20
9
+
10
+ ### Fixed
11
+ - Canonicalize routing capabilities in `lex-llm` itself: `embedding` is now the standard singular capability, `reasoning` aliases to `thinking`, and image/audio generation aliases collapse to the router vocabulary used by `Model::Info`, `ModelOffering`, and `CapabilityPolicy`.
12
+ - Standardize `enable_*` / `*_flag` capability overrides in the base provider contract, including provider-level, instance-level, and model-level extraction from shared settings handling.
13
+
14
+ ## 0.6.0 - 2026-06-19
15
+
16
+ ### Added
17
+ - **`Inventory::ScopedRefresher` mixin** — uniform `::Every` actor pattern for catalog writers.
18
+ Each `lex-llm-*` gem includes this and supplies `scope_key` + `compute_lanes_for_scope`. The mixin
19
+ handles write-then-delete-orphans, auth-failure cooldown circuit, and idempotent re-tick semantics.
20
+ Requires legion-llm `>= 0.14.0` (`Inventory.write_lane` / `.delete_lane`).
21
+ - Standard `weight: 100` default in provider settings schema (feeds RANKING v2 `lane_weight`).
22
+ - `ScopedRefresher.compose_id(tier:, provider:, instance:, type:, model:, **)` — canonical 5-part
23
+ lane id composer. All lane id composition must go through this method; never constructed inline.
24
+ - `:fleet` first-class tier in `Taxonomies::TIERS` enum.
25
+ - `Capabilities.normalize` normalization helper (PR #152 I1).
26
+
3
27
  ## 0.5.4 - 2026-06-17
4
28
 
5
29
  ### Fixed
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ # Capability vocabulary normalization. Collapses aliases so provider-specific
7
+ # capability names (:function_calling from Gemini, :tool_use from Anthropic, :tools
8
+ # from OpenAI) compare as equal. Used on BOTH sides of request_lane capability
9
+ # filtering (lane declaration and router request payload) — without this, vocabulary
10
+ # differences silently mismatch and the router returns no lane.
11
+ module Capabilities
12
+ CANONICAL = %i[
13
+ completion embedding streaming tools vision thinking structured_output
14
+ moderation image audio_transcription audio_speech responses
15
+ ].freeze
16
+
17
+ ALIASES = {
18
+ function_calling: :tools,
19
+ tool_use: :tools,
20
+ tool_calls: :tools,
21
+ tool: :tools,
22
+ functions: :tools,
23
+ stream: :streaming,
24
+ stream_chat: :streaming,
25
+ responses_api: :responses,
26
+ embeddings: :embedding,
27
+ embed: :embedding,
28
+ reasoning: :thinking,
29
+ image_generation: :image,
30
+ images: :image,
31
+ audio_generation: :audio_speech,
32
+ speech_generation: :audio_speech,
33
+ transcription: :audio_transcription
34
+ }.freeze
35
+
36
+ module_function
37
+
38
+ # Normalize a capability list — collapse aliases, downcase, dedup.
39
+ def normalize(caps, **)
40
+ Array(caps).compact.each_with_object([]) do |cap, normalized|
41
+ next unless cap.respond_to?(:to_s)
42
+
43
+ sym = cap.to_s.downcase.strip.tr('-', '_').to_sym
44
+ next if sym.to_s.empty?
45
+
46
+ normalized << canonical(sym)
47
+ end.uniq.freeze
48
+ end
49
+
50
+ def merge(*sets, **)
51
+ sets.flat_map { |set| normalize(set) }.uniq.freeze
52
+ end
53
+
54
+ def include_all?(available, required, **)
55
+ required = normalize(required)
56
+ return true if required.empty?
57
+
58
+ normalized = normalize(available)
59
+ required.all? { |cap| normalized.include?(cap) }
60
+ end
61
+
62
+ def canonical(capability)
63
+ sym = capability.to_s.downcase.strip.tr('-', '_').to_sym
64
+ ALIASES.fetch(sym, sym)
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -7,26 +7,30 @@ module Legion
7
7
  # Returns both a flat capability list and per-capability source metadata.
8
8
  module CapabilityPolicy
9
9
  OPTIONAL_CAPABILITIES = %i[
10
- streaming tools vision embeddings thinking structured_output image audio_transcription audio_speech
10
+ completion embedding streaming tools vision thinking structured_output
11
+ moderation image audio_transcription audio_speech
11
12
  ].freeze
12
13
 
13
- BOOLEAN_ALIASES = {
14
- enable_streaming: :streaming,
15
- enable_tools: :tools,
16
- enable_thinking: :thinking,
17
- enable_vision: :vision,
18
- enable_embeddings: :embeddings,
19
- enable_images: :image,
20
- streaming_flag: :streaming,
14
+ BOOLEAN_ALIASES = OPTIONAL_CAPABILITIES.each_with_object({}) do |capability, result|
15
+ result[:"enable_#{capability}"] = capability
16
+ result[:"#{capability}_flag"] = capability
17
+ end.merge(
18
+ enable_embeddings: :embedding,
19
+ embeddings_flag: :embedding,
20
+ enable_functions: :tools,
21
+ functions_flag: :tools,
21
22
  tool_flag: :tools,
22
- tools_flag: :tools,
23
- thinking_flag: :thinking,
24
- vision_flag: :vision,
25
- embedding_flag: :embeddings,
26
- embeddings_flag: :embeddings,
27
- image_flag: :image,
28
- images_flag: :image
29
- }.freeze
23
+ enable_function_calling: :tools,
24
+ function_calling_flag: :tools,
25
+ enable_reasoning: :thinking,
26
+ reasoning_flag: :thinking,
27
+ enable_images: :image,
28
+ images_flag: :image,
29
+ enable_image_generation: :image,
30
+ image_generation_flag: :image,
31
+ enable_audio_generation: :audio_speech,
32
+ audio_generation_flag: :audio_speech
33
+ ).freeze
30
34
 
31
35
  module_function
32
36
 
@@ -88,7 +92,8 @@ module Legion
88
92
 
89
93
  def normalized_booleans(value)
90
94
  normalize_hash(value).each_with_object({}) do |(key, raw), result|
91
- capability = key.to_s.downcase.tr('-', '_').to_sym
95
+ capability = canonical_capability(key)
96
+ next if capability.nil?
92
97
  next unless OPTIONAL_CAPABILITIES.include?(capability)
93
98
  next unless [true, false].include?(raw)
94
99
 
@@ -96,6 +101,10 @@ module Legion
96
101
  end
97
102
  end
98
103
 
104
+ def canonical_capability(key)
105
+ Legion::Extensions::Llm::Capabilities.normalize([key]).first
106
+ end
107
+
99
108
  def normalize_hash(value)
100
109
  return {} unless value.respond_to?(:to_h)
101
110
 
@@ -90,7 +90,7 @@ module Legion
90
90
 
91
91
  ::Legion::Settings.dig(*path)
92
92
  rescue StandardError => e
93
- handle_exception(e, level: :debug, handled: true, operation: 'llm.credential_sources.setting',
93
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.credential_sources.setting',
94
94
  path: path.map(&:to_s))
95
95
  nil
96
96
  end
@@ -117,7 +117,7 @@ module Legion
117
117
  end
118
118
  true
119
119
  rescue StandardError => e
120
- handle_exception(e, level: :debug, handled: true, operation: 'llm.credential_sources.socket_open',
120
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.credential_sources.socket_open',
121
121
  host:, port:)
122
122
  false
123
123
  ensure
@@ -135,7 +135,7 @@ module Legion
135
135
  response = conn.get(path)
136
136
  response.status >= 200 && response.status < 300
137
137
  rescue StandardError => e
138
- handle_exception(e, level: :debug, handled: true, operation: 'llm.credential_sources.http_ok',
138
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.credential_sources.http_ok',
139
139
  path:)
140
140
  false
141
141
  ensure
@@ -209,7 +209,7 @@ module Legion
209
209
  normalized = host.delete_prefix('[').delete_suffix(']')
210
210
  %w[localhost 127.0.0.1 ::1].include?(normalized)
211
211
  rescue URI::InvalidURIError => e
212
- handle_exception(e, level: :debug, handled: true, operation: 'llm.credential_sources.localhost')
212
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.credential_sources.localhost')
213
213
  false
214
214
  end
215
215
 
@@ -244,7 +244,7 @@ module Legion
244
244
  ::JSON.parse(raw, symbolize_names: true)
245
245
  end
246
246
  rescue StandardError => e
247
- handle_exception(e, level: :debug, handled: true, operation: 'llm.credential_sources.read_json',
247
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.credential_sources.read_json',
248
248
  path:)
249
249
  {}
250
250
  end
@@ -267,7 +267,7 @@ module Legion
267
267
 
268
268
  exp.to_i > Time.now.to_i
269
269
  rescue StandardError => e
270
- handle_exception(e, level: :debug, handled: true, operation: 'llm.credential_sources.token_valid')
270
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.credential_sources.token_valid')
271
271
  true
272
272
  end
273
273
 
@@ -169,7 +169,7 @@ module Legion
169
169
  def safe_publish_error(envelope, error)
170
170
  publish_error(envelope, error)
171
171
  rescue StandardError => e
172
- handle_exception(e, level: :debug, handled: true,
172
+ handle_exception(e, level: :warn, handled: true,
173
173
  operation: 'llm.fleet.provider_responder.safe_publish_error',
174
174
  error_class: error.class.name)
175
175
  nil
@@ -33,7 +33,7 @@ module Legion
33
33
  configured << llm if llm.respond_to?(:key?)
34
34
  configured
35
35
  rescue StandardError => e
36
- handle_exception(e, level: :debug, handled: true, operation: 'llm.fleet.settings.configured')
36
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.fleet.settings.configured')
37
37
  []
38
38
  end
39
39
 
@@ -54,7 +54,7 @@ module Legion
54
54
  def safe_fetch(source, key)
55
55
  source[key] || source[key.to_s]
56
56
  rescue StandardError => e
57
- handle_exception(e, level: :debug, handled: true, operation: 'llm.fleet.settings.safe_fetch',
57
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.fleet.settings.safe_fetch',
58
58
  key: key.to_s)
59
59
  nil
60
60
  end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../capabilities'
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Llm
8
+ module Inventory
9
+ # Inventory-side capability normalization. Every per-gem DiscoveryRefresh actor
10
+ # calls this from `lanes_from_instance` to coerce offering capabilities into a
11
+ # canonical symbol list before writing the lane to Inventory. Without this
12
+ # constant, every actor's guard `return [] unless defined?(...)` fires and the
13
+ # lane is written with `capabilities: []` — even when the operator declared
14
+ # `enable_tools: true` / `enable_thinking: true` on the instance and the
15
+ # provider's CapabilityPolicy correctly resolved them in the offering.
16
+ #
17
+ # Delegates to Legion::Extensions::Llm::Capabilities so the alias table
18
+ # (function_calling/tool_use/tool_calls/tool/functions → tools, etc.) stays
19
+ # in one place.
20
+ module Capabilities
21
+ ALIASES = Legion::Extensions::Llm::Capabilities::ALIASES
22
+
23
+ module_function
24
+
25
+ def normalize(caps, **)
26
+ Legion::Extensions::Llm::Capabilities.normalize(caps, **)
27
+ end
28
+
29
+ def merge(*sets, **)
30
+ Legion::Extensions::Llm::Capabilities.merge(*sets, **)
31
+ end
32
+
33
+ def include_all?(available, required, **)
34
+ Legion::Extensions::Llm::Capabilities.include_all?(available, required, **)
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Inventory
7
+ # Mix into a Legion::Extensions::Llm::*::Actors::DiscoveryRefresh class.
8
+ # The host class must include Legion::Extensions::Helpers::Lex (auto-injects
9
+ # log / settings / handle_exception / cache_*) and define:
10
+ # - #scope_key — Hash like { provider: :vllm, instance: instance_id }
11
+ # - #compute_lanes_for_scope — Array<Hash> lane fact-sheets (no health, no
12
+ # lane_weight — added by Inventory.write_lane).
13
+ # Each lane MUST set :id via compose_id.
14
+ # - #credential_hash — String identifying the auth credential for this scope
15
+ # (used by the auth-failure cooldown circuit).
16
+ module ScopedRefresher
17
+ # Auth-failure cooldown TTL (5 minutes). Operator can fix the credential
18
+ # and lanes auto-recover on the next tick after expiry.
19
+ AUTH_COOLDOWN_TTL = 300
20
+
21
+ # G22: 5-part lane id composed here and ONLY here. All gem writers MUST call
22
+ # this helper; Inventory.write_lane rejects any lane with a missing or malformed :id.
23
+ # Accepts a Hash (or keyword splat) with keys: tier, provider_family, instance_id, type, model.
24
+ def self.compose_id(lane_fields)
25
+ t = lane_fields[:tier]
26
+ pf = lane_fields[:provider_family]
27
+ ii = lane_fields[:instance_id]
28
+ ty = lane_fields[:type]
29
+ mo = lane_fields[:model]
30
+ "#{t}:#{pf}:#{ii}:#{ty}:#{mo}"
31
+ end
32
+
33
+ # G7 write-then-delete-orphans: write new lanes FIRST (eliminates zero-results
34
+ # race window), then delete orphans from the previous scope snapshot.
35
+ def tick(**)
36
+ return if auth_cooldown_active?
37
+
38
+ new_lanes = safe_compute
39
+ log.info("[llm][scoped_refresher] action=tick provider=#{scope_key[:provider]} lanes_computed=#{new_lanes ? new_lanes.size : 0}")
40
+ return unless new_lanes&.any?
41
+
42
+ written = 0
43
+ new_lanes.each do |lane_fact|
44
+ written += 1 if Legion::LLM::Inventory.write_lane(lane: lane_fact)
45
+ end
46
+ log.info("[llm][scoped_refresher] action=tick_complete provider=#{scope_key[:provider]} lanes_computed=#{new_lanes.size} lanes_written=#{written}")
47
+
48
+ orphans = (@prev_scope_keys || []) - new_lanes.map { it[:id] }
49
+ orphans.each { |id| Legion::LLM::Inventory.delete_lane(id: id) }
50
+
51
+ @prev_scope_keys = new_lanes.map { it[:id] }
52
+ end
53
+
54
+ private
55
+
56
+ # Wraps compute_lanes_for_scope with auth-failure cooldown logic.
57
+ # If a cooldown key is present from a previous auth failure, skips the
58
+ # compute entirely (no real call burned). On a new auth failure, writes the
59
+ # cooldown key with AUTH_COOLDOWN_TTL so subsequent ticks also skip.
60
+ def safe_compute
61
+ if auth_cooldown_active?
62
+ log.warn("[llm][scoped_refresher] action=skip reason=auth_cooldown scope=#{scope_key}")
63
+ return nil
64
+ end
65
+ compute_lanes_for_scope
66
+ rescue NotImplementedError
67
+ raise
68
+ rescue StandardError => e
69
+ if auth_failure?(error: e)
70
+ Legion::Cache::Local.set(auth_cooldown_key, 1, ttl: AUTH_COOLDOWN_TTL)
71
+ handle_exception(e, level: :warn, handled: true,
72
+ operation: 'inventory.scoped_refresher.auth_failure',
73
+ scope: scope_key)
74
+ else
75
+ handle_exception(e, level: :warn, handled: true,
76
+ operation: 'inventory.scoped_refresher.compute',
77
+ scope: scope_key)
78
+ end
79
+ nil
80
+ end
81
+
82
+ def auth_cooldown_active?
83
+ !Legion::Cache::Local.get(auth_cooldown_key).nil?
84
+ rescue StandardError
85
+ false
86
+ end
87
+
88
+ def auth_cooldown_key
89
+ "llm_auth_failed:#{credential_hash}"
90
+ end
91
+
92
+ # Default auth-failure predicate. Matches HTTP 401/403 status codes and
93
+ # common auth-error message patterns. Provider gems may override this if
94
+ # their error shapes differ (e.g. Bedrock's AccessDeniedException).
95
+ def auth_failure?(error:, **)
96
+ return true if error.respond_to?(:status_code) && [401, 403].include?(error.status_code)
97
+ return true if error.respond_to?(:http_status) && [401, 403].include?(error.http_status)
98
+
99
+ error.message&.match?(/unauthorized|invalid[_ ]api[_ ]key|invalid[_ ]credentials|forbidden/i)
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
@@ -35,7 +35,7 @@ module Legion
35
35
  provider: provider.to_s.downcase.to_sym,
36
36
  instance: (instance || :default).to_s.downcase.to_sym,
37
37
  family: normalized_family,
38
- capabilities: normalize_symbols(capabilities),
38
+ capabilities: normalize_capabilities(capabilities),
39
39
  context_length: to_int(context_length),
40
40
  parameter_count: to_int(parameter_count),
41
41
  parameter_size: parameter_size&.to_s&.strip,
@@ -185,6 +185,17 @@ module Legion
185
185
 
186
186
  private
187
187
 
188
+ def normalize_capabilities(value)
189
+ raw = Array(value).compact.filter_map do |cap|
190
+ next unless cap.respond_to?(:to_s)
191
+
192
+ sym = cap.to_s.downcase.strip.tr('-', '_').to_sym
193
+ sym unless sym.to_s.empty?
194
+ end
195
+
196
+ (raw + Legion::Extensions::Llm::Capabilities.normalize(raw)).uniq.freeze
197
+ end
198
+
188
199
  def normalize_symbols(value)
189
200
  Array(value).compact.each_with_object([]) do |item, normalized|
190
201
  symbol = item.to_s.downcase.strip.to_sym