lex-llm 0.3.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +49 -0
- data/README.md +18 -2
- data/lex-llm.gemspec +3 -0
- data/lib/legion/extensions/llm/auto_registration.rb +7 -36
- data/lib/legion/extensions/llm/embedding.rb +1 -1
- data/lib/legion/extensions/llm/error.rb +14 -0
- data/lib/legion/extensions/llm/errors/unsupported_capability.rb +21 -0
- data/lib/legion/extensions/llm/fleet/default_exchange_reply.rb +81 -0
- data/lib/legion/extensions/llm/fleet/envelope_validation.rb +39 -0
- data/lib/legion/extensions/llm/fleet/protocol.rb +16 -0
- data/lib/legion/extensions/llm/fleet/provider_responder.rb +304 -0
- data/lib/legion/extensions/llm/fleet/publish_safety.rb +123 -0
- data/lib/legion/extensions/llm/fleet/settings.rb +66 -0
- data/lib/legion/extensions/llm/fleet/token_error.rb +11 -0
- data/lib/legion/extensions/llm/fleet/token_validator.rb +205 -0
- data/lib/legion/extensions/llm/fleet/worker_execution.rb +165 -0
- data/lib/legion/extensions/llm/message.rb +9 -3
- data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +37 -36
- data/lib/legion/extensions/llm/provider.rb +198 -4
- data/lib/legion/extensions/llm/provider_contract.rb +21 -0
- data/lib/legion/extensions/llm/provider_settings.rb +18 -1
- data/lib/legion/extensions/llm/responses/chat_response.rb +43 -0
- data/lib/legion/extensions/llm/responses/embedding_response.rb +38 -0
- data/lib/legion/extensions/llm/responses/stream_chunk.rb +43 -0
- data/lib/legion/extensions/llm/responses/thinking_extractor.rb +155 -0
- data/lib/legion/extensions/llm/stream_accumulator.rb +12 -1
- data/lib/legion/extensions/llm/transport/exchanges/fleet.rb +24 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_error.rb +64 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_request.rb +155 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_response.rb +63 -0
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +38 -11
- metadata +62 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b266813f29f9a144b2a57408f39fe98bc27b7a53e59b13871ca22c0dc8cf6127
|
|
4
|
+
data.tar.gz: 80cb7a8866d4cd2b9c150dd4567f99aa09d12208f8acfa38df3dd578c7c93831
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d43d28ab982b938f012a66000f73ee7cb4b9cae34ae31cbb6c11794d87845280ae919e2b91b81b594f76f6e11b95e9c57ff796c46d1ce595a74962b6d4a91800
|
|
7
|
+
data.tar.gz: 1976f2adfd60d698e547e92b00f7d779ab28b5c75c975a7245bc58ecc94dbb0d81b767c76552b3d1cee24a53fdd8d2bb98d1e3cb204816e27963491326daee50
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,54 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.4.3 - 2026-05-06
|
|
4
|
+
|
|
5
|
+
- Move provider-owned fleet responder execution into `lex-llm` so provider gems no longer depend on `legion-llm`.
|
|
6
|
+
- Add shared responder-side fleet token validation, idempotency protection, provider dispatch, and response/error publishing helpers.
|
|
7
|
+
- Reserve fleet replay tokens before provider dispatch, split replay TTL into auth settings, and raise explicit responder transport configuration errors.
|
|
8
|
+
|
|
9
|
+
## 0.4.2 - 2026-05-06
|
|
10
|
+
|
|
11
|
+
- Remove the temporary settings logger wrapper and lazy-load fleet transport envelopes so `lex-llm` boot does not force `legion-transport` loading.
|
|
12
|
+
|
|
13
|
+
## 0.4.1 - 2026-05-06
|
|
14
|
+
|
|
15
|
+
- Make `AutoRegistration` a pure provider discovery mixin and remove upward `Legion::LLM::Call::Registry` mutation hooks.
|
|
16
|
+
- Add provider alias metadata so `legion-llm` can register compatibility provider families without provider require-time side effects.
|
|
17
|
+
- Pass live discovery flags and filters through from `Provider#discover_offerings` to `#list_models`.
|
|
18
|
+
- Merge provider-specific embedding params into canonical `Provider#embed` request payloads.
|
|
19
|
+
|
|
20
|
+
## 0.4.0 - 2026-05-06
|
|
21
|
+
|
|
22
|
+
- Set the coordinated sweep dependency floor for provider-owned fleet responders.
|
|
23
|
+
- Make `Provider#discover_offerings(live: false)` serve only cached live discovery results so inventory reads do not probe provider endpoints.
|
|
24
|
+
|
|
25
|
+
## 0.3.6 - 2026-05-06
|
|
26
|
+
|
|
27
|
+
- Replace shared fleet request, response, and error envelopes with strict fleet protocol v2 fields.
|
|
28
|
+
- Reject legacy fleet envelope fields and publish provider replies through the AMQP default exchange reply queue with optional mandatory routing and publisher confirms.
|
|
29
|
+
|
|
30
|
+
## 0.3.5 - 2026-05-06
|
|
31
|
+
|
|
32
|
+
- Add shared response normalization value objects for chat, stream, embedding, and thinking extraction.
|
|
33
|
+
- Strip provider thinking from caller-visible OpenAI-compatible completion content, including malformed trailing close-tag output.
|
|
34
|
+
- Preserve provider reasoning metadata while tolerating streaming tool-call deltas without optional function names.
|
|
35
|
+
|
|
36
|
+
## 0.3.4 - 2026-05-06
|
|
37
|
+
|
|
38
|
+
- Add shared provider contract and unsupported capability error namespace for lex-llm provider gems.
|
|
39
|
+
- Require keyword provider embed/count token calls and validate provider settings instance nesting.
|
|
40
|
+
- Move shared fleet defaults under nested consumer/auth settings.
|
|
41
|
+
|
|
42
|
+
## 0.3.3 - 2026-05-03
|
|
43
|
+
|
|
44
|
+
- Fix OpenAI-compatible streaming to keep split `<think>` tag content out of streamed assistant content.
|
|
45
|
+
- Strip leaked assistant thinking from outbound OpenAI-compatible history, including dangling close-tag content from prior responses.
|
|
46
|
+
- Tolerate incomplete streaming tool-call deltas that omit `function.name`.
|
|
47
|
+
|
|
48
|
+
## 0.3.2 - 2026-05-03
|
|
49
|
+
|
|
50
|
+
- Fix AutoRegistration to pass the discovered instance id into provider adapter config for instance-aware model offerings
|
|
51
|
+
|
|
3
52
|
## 0.3.1 - 2026-05-02
|
|
4
53
|
|
|
5
54
|
- Fix AutoRegistration to pass tier and capabilities metadata to Call::Registry on registration
|
data/README.md
CHANGED
|
@@ -37,7 +37,7 @@ Expected provider gems include:
|
|
|
37
37
|
- `lex-llm-mlx`
|
|
38
38
|
- `lex-llm-bedrock`
|
|
39
39
|
- `lex-llm-vertex`
|
|
40
|
-
- `lex-llm-azure`
|
|
40
|
+
- `lex-llm-azure-foundry`
|
|
41
41
|
|
|
42
42
|
## Install
|
|
43
43
|
|
|
@@ -48,7 +48,7 @@ gem 'lex-llm'
|
|
|
48
48
|
Provider extensions should declare `lex-llm` as a gemspec dependency:
|
|
49
49
|
|
|
50
50
|
```ruby
|
|
51
|
-
spec.add_dependency 'lex-llm', '>= 0.
|
|
51
|
+
spec.add_dependency 'lex-llm', '>= 0.4.3'
|
|
52
52
|
```
|
|
53
53
|
|
|
54
54
|
For local development across LegionIO repos, prefer a local path override in the app or test `Gemfile`, not a permanent git dependency in the gemspec.
|
|
@@ -297,6 +297,22 @@ At minimum, a provider extension should define:
|
|
|
297
297
|
|
|
298
298
|
Provider extensions should avoid duplicating shared classes, schema logic, fleet lane construction, JSON handling, or common request/response objects.
|
|
299
299
|
|
|
300
|
+
Canonical provider calls are keyword-based:
|
|
301
|
+
|
|
302
|
+
```ruby
|
|
303
|
+
provider.chat(messages:, model:, tools: [], temperature: nil, params: {}, headers: {}, schema: nil, thinking: nil)
|
|
304
|
+
provider.stream_chat(messages:, model:, tools: [], temperature: nil, params: {}, headers: {}, schema: nil, thinking: nil) { |chunk| ... }
|
|
305
|
+
provider.embed(text:, model:, dimensions: nil, params: {}, headers: {})
|
|
306
|
+
provider.image(prompt:, model:, size:, with: nil, mask: nil, params: {})
|
|
307
|
+
provider.count_tokens(messages:, model:, params: {})
|
|
308
|
+
provider.health(live: false)
|
|
309
|
+
provider.discover_offerings(live: false, **filters)
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
Provider responses should normalize through the shared response objects before they reach callers. Visible assistant text and provider reasoning are separate values: provider-specific thinking fields, OpenAI-compatible `reasoning_content`, and literal `<think>...</think>` text are removed from caller-visible content and preserved as thinking metadata when present.
|
|
313
|
+
|
|
314
|
+
Fleet envelopes also live here. `FleetRequest`, `FleetResponse`, and `FleetError` are protocol-v2 transport messages with `operation`, `request_id`, `correlation_id`, `idempotency_key`, `message_context`, and signed-token fields. Provider gems should consume and publish these shared envelopes instead of defining local fleet message shapes.
|
|
315
|
+
|
|
300
316
|
All providers inherit `#readiness(live: false)`, which returns configured state, provider locality, API base, endpoint helpers, and non-live health metadata without probing remote services. Providers with a cheap health endpoint can pass `live: true` to include that endpoint response. OpenAI-compatible providers also inherit shared model-list parsing that maps discovered models into normalized capabilities and modalities for Legion routing.
|
|
301
317
|
|
|
302
318
|
## Schema Status
|
data/lex-llm.gemspec
CHANGED
|
@@ -29,14 +29,17 @@ Gem::Specification.new do |spec|
|
|
|
29
29
|
|
|
30
30
|
# Runtime dependencies
|
|
31
31
|
spec.add_dependency 'base64'
|
|
32
|
+
spec.add_dependency 'concurrent-ruby', '>= 1.2'
|
|
32
33
|
spec.add_dependency 'event_stream_parser', '~> 1'
|
|
33
34
|
spec.add_dependency 'faraday', ENV['FARADAY_VERSION'] || '>= 1.10.0'
|
|
34
35
|
spec.add_dependency 'faraday-multipart', '>= 1'
|
|
35
36
|
spec.add_dependency 'faraday-net_http', '>= 1'
|
|
36
37
|
spec.add_dependency 'faraday-retry', '>= 1'
|
|
38
|
+
spec.add_dependency 'legion-crypt', '>= 1.5.1'
|
|
37
39
|
spec.add_dependency 'legion-json', '>= 1.2.1'
|
|
38
40
|
spec.add_dependency 'legion-logging', '>= 1.3.2'
|
|
39
41
|
spec.add_dependency 'legion-settings', '>= 1.3.14'
|
|
42
|
+
spec.add_dependency 'legion-transport', '>= 1.4.14'
|
|
40
43
|
spec.add_dependency 'marcel', '~> 1'
|
|
41
44
|
spec.add_dependency 'ruby_llm-schema', '~> 0'
|
|
42
45
|
spec.add_dependency 'zeitwerk', '~> 2'
|
|
@@ -3,9 +3,9 @@
|
|
|
3
3
|
module Legion
|
|
4
4
|
module Extensions
|
|
5
5
|
module Llm
|
|
6
|
-
# Mixin that lex-llm-* provider modules `extend` to
|
|
7
|
-
#
|
|
8
|
-
#
|
|
6
|
+
# Mixin that lex-llm-* provider modules `extend` to expose shared
|
|
7
|
+
# discovery metadata. Registration into Legion::LLM is owned by
|
|
8
|
+
# legion-llm so loaded providers can be rediscovered after reloads.
|
|
9
9
|
#
|
|
10
10
|
# Prerequisites on the extending module:
|
|
11
11
|
# - `PROVIDER_FAMILY` constant (Symbol, e.g. :ollama)
|
|
@@ -16,39 +16,10 @@ module Legion
|
|
|
16
16
|
{}
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
# the adapter (these are metadata, not connection config).
|
|
24
|
-
#
|
|
25
|
-
# Guarded: no-op when Legion::LLM::Call::Registry is not loaded.
|
|
26
|
-
def register_discovered_instances
|
|
27
|
-
return unless defined?(Legion::LLM::Call::Registry)
|
|
28
|
-
|
|
29
|
-
instances = discover_instances
|
|
30
|
-
instances.each do |instance_id, config|
|
|
31
|
-
registry_config = config.except(:tier, :capabilities)
|
|
32
|
-
adapter = Legion::LLM::Call::LexLLMAdapter.new(
|
|
33
|
-
self::PROVIDER_FAMILY, provider_class, instance_config: registry_config
|
|
34
|
-
)
|
|
35
|
-
meta = { tier: config[:tier], capabilities: config[:capabilities] || [] }
|
|
36
|
-
Legion::LLM::Call::Registry.register(
|
|
37
|
-
self::PROVIDER_FAMILY, adapter, instance: instance_id, metadata: meta
|
|
38
|
-
)
|
|
39
|
-
end
|
|
40
|
-
rescue StandardError => e
|
|
41
|
-
log.warn "[#{self::PROVIDER_FAMILY}] self-registration failed: #{e.message}" if respond_to?(:log)
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
# Deregisters all instances for this provider and re-runs discovery.
|
|
45
|
-
#
|
|
46
|
-
# Guarded: no-op when Legion::LLM::Call::Registry is not loaded.
|
|
47
|
-
def rediscover!
|
|
48
|
-
return unless defined?(Legion::LLM::Call::Registry)
|
|
49
|
-
|
|
50
|
-
Legion::LLM::Call::Registry.deregister_provider(self::PROVIDER_FAMILY)
|
|
51
|
-
register_discovered_instances
|
|
19
|
+
# Optional provider-family aliases that legion-llm should register
|
|
20
|
+
# against the same discovered provider instances.
|
|
21
|
+
def provider_aliases
|
|
22
|
+
[]
|
|
52
23
|
end
|
|
53
24
|
end
|
|
54
25
|
end
|
|
@@ -27,6 +27,20 @@ module Legion
|
|
|
27
27
|
class ModelNotFoundError < StandardError; end
|
|
28
28
|
class UnsupportedAttachmentError < StandardError; end
|
|
29
29
|
|
|
30
|
+
# Backward-compatible unsupported-capability error alias.
|
|
31
|
+
class UnsupportedCapabilityError < Errors::UnsupportedCapability
|
|
32
|
+
def initialize(message = nil, provider: nil, capability: nil, model: nil)
|
|
33
|
+
if provider && capability
|
|
34
|
+
super(provider:, capability:, model:)
|
|
35
|
+
else
|
|
36
|
+
@provider = provider
|
|
37
|
+
@capability = capability
|
|
38
|
+
@model = model
|
|
39
|
+
StandardError.instance_method(:initialize).bind_call(self, message)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
30
44
|
# Error classes for different HTTP status codes
|
|
31
45
|
class BadRequestError < Error; end
|
|
32
46
|
class ForbiddenError < Error; end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Errors
|
|
7
|
+
# Raised when a provider receives a canonical call for an unsupported capability.
|
|
8
|
+
class UnsupportedCapability < StandardError
|
|
9
|
+
attr_reader :provider, :capability, :model
|
|
10
|
+
|
|
11
|
+
def initialize(provider:, capability:, model: nil)
|
|
12
|
+
@provider = provider
|
|
13
|
+
@capability = capability
|
|
14
|
+
@model = model
|
|
15
|
+
super("Provider #{provider} does not support #{capability}#{" for #{model}" if model}")
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'publish_safety'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Llm
|
|
8
|
+
module Fleet
|
|
9
|
+
# Publishes correlated fleet replies directly to the caller's reply queue.
|
|
10
|
+
module DefaultExchangeReply
|
|
11
|
+
include PublishSafety
|
|
12
|
+
|
|
13
|
+
DEFAULT_REPLY_PUBLISH_OPTIONS = {
|
|
14
|
+
mandatory: false,
|
|
15
|
+
publisher_confirm: false,
|
|
16
|
+
spool: false,
|
|
17
|
+
return_result: true
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
20
|
+
def publish(options = nil)
|
|
21
|
+
raise unless @valid
|
|
22
|
+
|
|
23
|
+
requested_options = DEFAULT_REPLY_PUBLISH_OPTIONS.merge(@options).merge(options || {})
|
|
24
|
+
return_result = return_publish_result?(requested_options)
|
|
25
|
+
publish_options = reply_publish_options(requested_options)
|
|
26
|
+
validate_payload_size
|
|
27
|
+
default_exchange = channel.default_exchange
|
|
28
|
+
return_state = {}
|
|
29
|
+
install_return_listener(default_exchange, requested_options, return_state)
|
|
30
|
+
prepare_publisher_confirms(default_exchange, requested_options)
|
|
31
|
+
default_exchange.publish(encode_message, **publish_options)
|
|
32
|
+
return nil unless return_result
|
|
33
|
+
|
|
34
|
+
publish_result(default_exchange, requested_options.merge(publish_options), return_state)
|
|
35
|
+
rescue Bunny::ConnectionClosedError, Bunny::ChannelAlreadyClosed, Bunny::ChannelError,
|
|
36
|
+
Bunny::NetworkErrorWrapper, IOError, Timeout::Error => e
|
|
37
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.fleet.reply.publish')
|
|
38
|
+
reply_publish_failure_result(e, publish_options || @options)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def reply_publish_failure_result(error, options)
|
|
44
|
+
{
|
|
45
|
+
status: :failed,
|
|
46
|
+
accepted: false,
|
|
47
|
+
error_class: error.class.name,
|
|
48
|
+
error: error.message,
|
|
49
|
+
routing_key: options[:routing_key] || routing_key,
|
|
50
|
+
message_id: message_id,
|
|
51
|
+
correlation_id: correlation_id
|
|
52
|
+
}.compact
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def reply_publish_options(options)
|
|
56
|
+
{
|
|
57
|
+
routing_key: routing_key,
|
|
58
|
+
content_type: options[:content_type] || content_type,
|
|
59
|
+
content_encoding: options[:content_encoding] || content_encoding,
|
|
60
|
+
type: options[:type] || type,
|
|
61
|
+
priority: options[:priority] || priority,
|
|
62
|
+
expiration: options[:expiration] || expiration,
|
|
63
|
+
headers: reply_headers(options),
|
|
64
|
+
persistent: options.key?(:persistent) ? options[:persistent] : persistent,
|
|
65
|
+
message_id: message_id,
|
|
66
|
+
correlation_id: correlation_id,
|
|
67
|
+
reply_to: reply_to,
|
|
68
|
+
app_id: options[:app_id] || app_id,
|
|
69
|
+
timestamp: timestamp,
|
|
70
|
+
mandatory: options[:mandatory] == true
|
|
71
|
+
}.compact
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def reply_headers(options)
|
|
75
|
+
options[:headers] ? headers.merge(options[:headers]) : headers
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'protocol'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Llm
|
|
8
|
+
module Fleet
|
|
9
|
+
# Shared validation helpers for strict fleet protocol v2 envelopes.
|
|
10
|
+
module EnvelopeValidation
|
|
11
|
+
LEGACY_OPTIONS = %i[schema_version request_type fleet_correlation_id].freeze
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def reject_legacy_options!
|
|
16
|
+
LEGACY_OPTIONS.each do |key|
|
|
17
|
+
if @options.key?(key) || @options.key?(key.to_s)
|
|
18
|
+
raise ArgumentError, "#{key} is not supported by fleet protocol v2"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def require_option!(key)
|
|
24
|
+
return if @options.key?(key) && !@options[key].nil?
|
|
25
|
+
|
|
26
|
+
raise ArgumentError, "#{key} is required"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def require_protocol_version!
|
|
30
|
+
version = @options.fetch(:protocol_version, Fleet::Protocol::VERSION)
|
|
31
|
+
return if version == Fleet::Protocol::VERSION
|
|
32
|
+
|
|
33
|
+
raise ArgumentError, "protocol_version must be #{Fleet::Protocol::VERSION}"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Fleet
|
|
7
|
+
module Protocol
|
|
8
|
+
VERSION = 2
|
|
9
|
+
REQUEST_TYPE = 'llm.fleet.request'
|
|
10
|
+
RESPONSE_TYPE = 'llm.fleet.response'
|
|
11
|
+
ERROR_TYPE = 'llm.fleet.error'
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
require_relative 'protocol'
|
|
6
|
+
require_relative 'settings'
|
|
7
|
+
require_relative 'worker_execution'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Llm
|
|
12
|
+
module Transport
|
|
13
|
+
# Autoloads responder publish envelopes without booting legion-transport during lex-llm load.
|
|
14
|
+
module Messages
|
|
15
|
+
autoload :FleetError, File.expand_path('../transport/messages/fleet_error', __dir__) unless
|
|
16
|
+
autoload?(:FleetError) || const_defined?(:FleetError, false)
|
|
17
|
+
autoload :FleetResponse, File.expand_path('../transport/messages/fleet_response', __dir__) unless
|
|
18
|
+
autoload?(:FleetResponse) || const_defined?(:FleetResponse, false)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
module Fleet
|
|
23
|
+
# Shared implementation for provider-owned fleet responder runners.
|
|
24
|
+
module ProviderResponder
|
|
25
|
+
class ConfigurationError < StandardError; end
|
|
26
|
+
|
|
27
|
+
REQUIRED_FIELDS = %i[
|
|
28
|
+
request_id correlation_id idempotency_key operation provider provider_instance model params reply_to
|
|
29
|
+
message_context caller trace_context signed_token timeout_seconds expires_at protocol_version
|
|
30
|
+
].freeze
|
|
31
|
+
LEGACY_FIELDS = %i[schema_version request_type fleet_correlation_id].freeze
|
|
32
|
+
|
|
33
|
+
FleetEnvelope = Struct.new(:data, keyword_init: true) do
|
|
34
|
+
def [](key)
|
|
35
|
+
data[key.to_sym] || data[key.to_s]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def key?(key)
|
|
39
|
+
data.key?(key.to_sym) || data.key?(key.to_s)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def fetch(key, default = nil)
|
|
43
|
+
key?(key) ? self[key] : default
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def to_h = data
|
|
47
|
+
def protocol_version = self[:protocol_version]
|
|
48
|
+
def request_id = self[:request_id]
|
|
49
|
+
def correlation_id = self[:correlation_id]
|
|
50
|
+
def idempotency_key = self[:idempotency_key]
|
|
51
|
+
def operation = self[:operation]
|
|
52
|
+
def provider = self[:provider]
|
|
53
|
+
def provider_instance = self[:provider_instance]
|
|
54
|
+
def model = self[:model]
|
|
55
|
+
def params = self[:params] || {}
|
|
56
|
+
def reply_to = self[:reply_to]
|
|
57
|
+
def message_context = self[:message_context] || {}
|
|
58
|
+
def trace_context = self[:trace_context] || {}
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
module_function
|
|
62
|
+
|
|
63
|
+
# Public runner entry point mirrors AMQP delivery callbacks, which carry both delivery and property metadata.
|
|
64
|
+
# rubocop:disable Metrics/ParameterLists
|
|
65
|
+
def call(payload:, provider_family:, provider_class:, provider_instances:, delivery: nil, properties: nil)
|
|
66
|
+
envelope = parse_payload(payload)
|
|
67
|
+
check_envelope!(envelope, provider_family:)
|
|
68
|
+
provider = build_provider(envelope:, provider_class:, provider_instances:)
|
|
69
|
+
response = WorkerExecution.call(envelope: envelope, provider: provider)
|
|
70
|
+
publish_response(envelope, response)
|
|
71
|
+
ack(delivery || properties)
|
|
72
|
+
response
|
|
73
|
+
rescue StandardError => e
|
|
74
|
+
safe_publish_error(envelope, e) if defined?(envelope) && envelope
|
|
75
|
+
reject(delivery || properties, requeue: requeue_error?(e))
|
|
76
|
+
raise
|
|
77
|
+
end
|
|
78
|
+
# rubocop:enable Metrics/ParameterLists
|
|
79
|
+
|
|
80
|
+
def enabled_for?(provider_instances)
|
|
81
|
+
instances = resolve_provider_instances(provider_instances)
|
|
82
|
+
instances.any? do |_instance_id, settings|
|
|
83
|
+
truthy?(dig(settings, :fleet, :respond_to_requests))
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def parse_payload(payload)
|
|
88
|
+
hash = case payload
|
|
89
|
+
when FleetEnvelope
|
|
90
|
+
payload.to_h
|
|
91
|
+
when String
|
|
92
|
+
parse_json(payload)
|
|
93
|
+
else
|
|
94
|
+
payload.respond_to?(:to_h) ? payload.to_h : {}
|
|
95
|
+
end
|
|
96
|
+
FleetEnvelope.new(data: deep_symbolize(hash))
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def check_envelope!(envelope, provider_family:)
|
|
100
|
+
reject_legacy_fields!(envelope)
|
|
101
|
+
REQUIRED_FIELDS.each do |field|
|
|
102
|
+
raise ArgumentError, "#{field} is required" unless envelope.key?(field) && !envelope[field].nil?
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
validate_protocol_version!(envelope)
|
|
106
|
+
validate_provider_family!(envelope, provider_family)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def build_provider(envelope:, provider_class:, provider_instances:)
|
|
110
|
+
instances = resolve_provider_instances(provider_instances)
|
|
111
|
+
instance_id = envelope.provider_instance.to_s
|
|
112
|
+
instance_settings = instances[instance_id.to_sym] || instances[instance_id]
|
|
113
|
+
unless instance_settings
|
|
114
|
+
raise ConfigurationError,
|
|
115
|
+
"fleet provider instance is not configured: #{instance_id}"
|
|
116
|
+
end
|
|
117
|
+
unless truthy?(dig(instance_settings, :fleet, :respond_to_requests))
|
|
118
|
+
raise ConfigurationError, "fleet responses are disabled for provider instance: #{instance_id}"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
provider_class.new(deep_symbolize(instance_settings))
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def publish_response(envelope, response)
|
|
125
|
+
transport_message_class(:FleetResponse).new(
|
|
126
|
+
protocol_version: envelope.protocol_version,
|
|
127
|
+
request_id: envelope.request_id,
|
|
128
|
+
correlation_id: envelope.correlation_id,
|
|
129
|
+
idempotency_key: envelope.idempotency_key,
|
|
130
|
+
operation: envelope.operation,
|
|
131
|
+
provider: envelope.provider,
|
|
132
|
+
provider_instance: envelope.provider_instance,
|
|
133
|
+
model: envelope.model,
|
|
134
|
+
reply_to: envelope.reply_to,
|
|
135
|
+
message_context: envelope.message_context,
|
|
136
|
+
trace_context: envelope.trace_context,
|
|
137
|
+
content: response_content(response),
|
|
138
|
+
tool_calls: response_field(response, :tool_calls) || [],
|
|
139
|
+
usage: response_usage(response),
|
|
140
|
+
finish_reason: response_field(response, :finish_reason),
|
|
141
|
+
metadata: response_metadata(response)
|
|
142
|
+
).publish
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def publish_error(envelope, error)
|
|
146
|
+
transport_message_class(:FleetError).new(
|
|
147
|
+
protocol_version: envelope.protocol_version,
|
|
148
|
+
request_id: envelope.request_id,
|
|
149
|
+
correlation_id: envelope.correlation_id,
|
|
150
|
+
idempotency_key: envelope.idempotency_key,
|
|
151
|
+
operation: envelope.operation,
|
|
152
|
+
provider: envelope.provider,
|
|
153
|
+
provider_instance: envelope.provider_instance,
|
|
154
|
+
model: envelope.model,
|
|
155
|
+
reply_to: envelope.reply_to,
|
|
156
|
+
message_context: envelope.message_context,
|
|
157
|
+
trace_context: envelope.trace_context,
|
|
158
|
+
code: error_code(error),
|
|
159
|
+
message: error.message,
|
|
160
|
+
error_class: error.class.name,
|
|
161
|
+
retryable: retryable_error?(error),
|
|
162
|
+
metadata: {}
|
|
163
|
+
).publish
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def safe_publish_error(envelope, error)
|
|
167
|
+
publish_error(envelope, error)
|
|
168
|
+
rescue StandardError
|
|
169
|
+
nil
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def transport_message_class(name)
|
|
173
|
+
::Legion::Extensions::Llm::Transport::Messages.const_get(name)
|
|
174
|
+
rescue LoadError, NameError => e
|
|
175
|
+
raise ConfigurationError, "fleet responder transport unavailable for #{name}: #{e.message}"
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def ack(delivery)
|
|
179
|
+
return unless delivery
|
|
180
|
+
|
|
181
|
+
if delivery.respond_to?(:ack)
|
|
182
|
+
delivery.ack
|
|
183
|
+
elsif delivery.respond_to?(:channel) && delivery.respond_to?(:delivery_tag)
|
|
184
|
+
delivery.channel.ack(delivery.delivery_tag)
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def reject(delivery, requeue:)
|
|
189
|
+
return unless delivery
|
|
190
|
+
|
|
191
|
+
if delivery.respond_to?(:reject)
|
|
192
|
+
delivery.reject(requeue)
|
|
193
|
+
elsif delivery.respond_to?(:channel) && delivery.respond_to?(:delivery_tag)
|
|
194
|
+
delivery.channel.reject(delivery.delivery_tag, requeue)
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def parse_json(payload)
|
|
199
|
+
if defined?(::Legion::JSON)
|
|
200
|
+
::Legion::JSON.parse(payload)
|
|
201
|
+
else
|
|
202
|
+
::JSON.parse(payload)
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def reject_legacy_fields!(envelope)
|
|
207
|
+
LEGACY_FIELDS.each do |field|
|
|
208
|
+
raise ArgumentError, "#{field} is not supported by fleet protocol v2" if envelope.key?(field)
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def validate_protocol_version!(envelope)
|
|
213
|
+
return if envelope.protocol_version == Protocol::VERSION
|
|
214
|
+
|
|
215
|
+
raise ArgumentError, "protocol_version must be #{Protocol::VERSION}"
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def validate_provider_family!(envelope, provider_family)
|
|
219
|
+
return if envelope.provider.to_s == provider_family.to_s
|
|
220
|
+
|
|
221
|
+
raise ArgumentError, "fleet request provider #{envelope.provider} does not match #{provider_family}"
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def resolve_provider_instances(provider_instances)
|
|
225
|
+
instances = provider_instances.respond_to?(:call) ? provider_instances.call : provider_instances
|
|
226
|
+
deep_symbolize(instances || {})
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def requeue_error?(error)
|
|
230
|
+
retryable_error?(error) &&
|
|
231
|
+
Settings.value(:fleet, :consumer, :requeue_transient, default: true) != false
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def retryable_error?(error)
|
|
235
|
+
return false if error.is_a?(ConfigurationError)
|
|
236
|
+
return false if error.is_a?(WorkerExecution::PolicyError)
|
|
237
|
+
|
|
238
|
+
true
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def error_code(error)
|
|
242
|
+
return 'configuration_error' if error.is_a?(ConfigurationError)
|
|
243
|
+
return 'policy_error' if error.is_a?(WorkerExecution::PolicyError)
|
|
244
|
+
|
|
245
|
+
'provider_error'
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def response_content(response)
|
|
249
|
+
response_field(response, :content) || response_field(response, :result) || response.to_s
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def response_usage(response)
|
|
253
|
+
usage = response_field(response, :usage) || response_field(response, :tokens)
|
|
254
|
+
return deep_symbolize(usage) if usage.respond_to?(:to_h)
|
|
255
|
+
|
|
256
|
+
{
|
|
257
|
+
input_tokens: response_field(response, :input_tokens),
|
|
258
|
+
output_tokens: response_field(response, :output_tokens),
|
|
259
|
+
thinking_tokens: response_field(response, :thinking_tokens)
|
|
260
|
+
}.compact
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def response_metadata(response)
|
|
264
|
+
metadata = response_field(response, :metadata)
|
|
265
|
+
metadata.respond_to?(:to_h) ? deep_symbolize(metadata) : {}
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
def response_field(response, field)
|
|
269
|
+
return response[field] if response.respond_to?(:key?) && response.key?(field)
|
|
270
|
+
return response[field.to_s] if response.respond_to?(:key?) && response.key?(field.to_s)
|
|
271
|
+
return response.public_send(field) if response.respond_to?(field)
|
|
272
|
+
|
|
273
|
+
nil
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def dig(hash, *keys)
|
|
277
|
+
keys.reduce(hash) do |current, key|
|
|
278
|
+
break nil unless current.respond_to?(:key?)
|
|
279
|
+
|
|
280
|
+
current[key.to_sym] || current[key.to_s]
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def truthy?(value)
|
|
285
|
+
value == true || value.to_s == 'true'
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def deep_symbolize(value)
|
|
289
|
+
case value
|
|
290
|
+
when Hash
|
|
291
|
+
value.each_with_object({}) do |(key, child), result|
|
|
292
|
+
result[key.respond_to?(:to_sym) ? key.to_sym : key] = deep_symbolize(child)
|
|
293
|
+
end
|
|
294
|
+
when Array
|
|
295
|
+
value.map { |child| deep_symbolize(child) }
|
|
296
|
+
else
|
|
297
|
+
value
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
end
|