lex-llm-gateway 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/legion/extensions/llm/gateway/runners/fleet.rb +7 -8
- data/lib/legion/extensions/llm/gateway/runners/inference.rb +30 -16
- data/lib/legion/extensions/llm/gateway/transport/messages/inference_request.rb +6 -8
- data/lib/legion/extensions/llm/gateway/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6598c8eb88e900cf46910dcb33c93a51e9013e4a56795ea7a94d6aa892f8a550
|
|
4
|
+
data.tar.gz: 7267fd499d23a63afcd45bd8780080bac45a045f917f8b60ec04181d804e5e94
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0f35e042cd9c333fecae0ba9896bb79e131f9ece92af6a68e03920a6eaad652ee3a4fe1a38f1906d1378186290c66591125bf610f707896cc0c09b80ccb1dfaf
|
|
7
|
+
data.tar.gz: bb641bd9d3efcd68dfbe0c5d818a72935c6c3c89e76a68dee56c33dc8a5a2039270c0eed875245cadf93dbe1b1f1c39b6e366637c35b19f20dfe5a411e0aa1b6
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.2.6] - 2026-03-23
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Fleet dispatch for structured and embed request types from Inference runner
|
|
7
|
+
- Multi-message chat support in fleet dispatch (passes messages array directly)
|
|
8
|
+
- InferenceRequest message now includes request_type, schema, and text fields
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
- Fleet.dispatch uses `**opts` for extensible parameter forwarding
|
|
12
|
+
- Fleet.publish_request uses anonymous keyword forwarding
|
|
13
|
+
- Inference extract helpers compacted to single-line ternary style
|
|
14
|
+
|
|
3
15
|
## [0.2.5] - 2026-03-23
|
|
4
16
|
|
|
5
17
|
### Added
|
|
@@ -10,17 +10,19 @@ module Legion
|
|
|
10
10
|
|
|
11
11
|
module_function
|
|
12
12
|
|
|
13
|
-
def dispatch(model:, messages:,
|
|
13
|
+
def dispatch(model:, messages:, **opts)
|
|
14
14
|
return error_result('fleet_unavailable') unless fleet_available?
|
|
15
15
|
|
|
16
|
+
intent = opts[:intent]
|
|
16
17
|
token = Helpers::Auth.sign_request({ model: model, intent: intent })
|
|
17
18
|
return error_result('fleet_auth_failed') if token.nil? && require_auth?
|
|
18
19
|
|
|
19
20
|
correlation_id = Helpers::Rpc.generate_correlation_id
|
|
20
21
|
publish_request(model: model, messages: messages, intent: intent,
|
|
21
|
-
correlation_id: correlation_id, signed_token: token
|
|
22
|
+
correlation_id: correlation_id, signed_token: token,
|
|
23
|
+
**opts.except(:intent, :timeout))
|
|
22
24
|
|
|
23
|
-
wait_for_response(correlation_id, timeout: resolve_timeout(timeout))
|
|
25
|
+
wait_for_response(correlation_id, timeout: resolve_timeout(opts[:timeout]))
|
|
24
26
|
end
|
|
25
27
|
|
|
26
28
|
def fleet_available?
|
|
@@ -68,12 +70,9 @@ module Legion
|
|
|
68
70
|
settings.dig(:routing, :fleet, :timeout_seconds) || DEFAULT_TIMEOUT
|
|
69
71
|
end
|
|
70
72
|
|
|
71
|
-
def publish_request(
|
|
72
|
-
reply_to = Helpers::Rpc.agent_queue_name
|
|
73
|
+
def publish_request(**)
|
|
73
74
|
Transport::Messages::InferenceRequest.new(
|
|
74
|
-
|
|
75
|
-
reply_to: reply_to, correlation_id: correlation_id,
|
|
76
|
-
signed_token: signed_token
|
|
75
|
+
reply_to: Helpers::Rpc.agent_queue_name, **
|
|
77
76
|
).publish
|
|
78
77
|
end
|
|
79
78
|
|
|
@@ -19,7 +19,7 @@ module Legion
|
|
|
19
19
|
|
|
20
20
|
def embed(text: nil, model: nil, provider: nil, **)
|
|
21
21
|
start_ms = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
|
22
|
-
response =
|
|
22
|
+
response = dispatch_embed(text: text, model: model, provider: provider, **)
|
|
23
23
|
elapsed_ms = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond) - start_ms
|
|
24
24
|
meter_response(response, request_type: 'embed', provider: provider, model_id: model,
|
|
25
25
|
latency_ms: elapsed_ms)
|
|
@@ -28,22 +28,42 @@ module Legion
|
|
|
28
28
|
|
|
29
29
|
def structured(messages: nil, schema: nil, model: nil, provider: nil, **)
|
|
30
30
|
start_ms = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
|
|
31
|
-
response =
|
|
32
|
-
|
|
31
|
+
response = dispatch_structured(messages: messages, schema: schema, model: model,
|
|
32
|
+
provider: provider, **)
|
|
33
33
|
elapsed_ms = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond) - start_ms
|
|
34
34
|
meter_response(response, request_type: 'structured', provider: provider, model_id: model,
|
|
35
35
|
latency_ms: elapsed_ms)
|
|
36
36
|
response
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
-
def dispatch_chat(message: nil, model: nil, provider: nil, **opts)
|
|
39
|
+
def dispatch_chat(message: nil, messages: nil, model: nil, provider: nil, **opts)
|
|
40
40
|
tier = opts[:tier]
|
|
41
41
|
Legion::Logging.debug "[Gateway::Inference] dispatch_chat tier=#{tier}" if defined?(Legion::Logging)
|
|
42
42
|
if tier == 'fleet' && fleet_available?
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
fleet_messages = messages || [{ role: 'user', content: message }]
|
|
44
|
+
Fleet.dispatch(model: model, messages: fleet_messages, intent: opts[:intent])
|
|
45
45
|
else
|
|
46
|
-
call_llm(:chat, message: message,
|
|
46
|
+
call_llm(:chat, message: message, messages: messages, model: model,
|
|
47
|
+
provider: provider, **opts)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def dispatch_embed(text: nil, model: nil, provider: nil, **opts)
|
|
52
|
+
if opts[:tier] == 'fleet' && fleet_available?
|
|
53
|
+
Fleet.dispatch(model: model, messages: [{ role: 'user', content: text }],
|
|
54
|
+
intent: opts[:intent], request_type: 'embed', text: text)
|
|
55
|
+
else
|
|
56
|
+
call_llm(:embed, text: text, model: model, provider: provider, **opts)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def dispatch_structured(messages: nil, schema: nil, model: nil, provider: nil, **opts)
|
|
61
|
+
if opts[:tier] == 'fleet' && fleet_available?
|
|
62
|
+
Fleet.dispatch(model: model, messages: messages, intent: opts[:intent],
|
|
63
|
+
request_type: 'structured', schema: schema)
|
|
64
|
+
else
|
|
65
|
+
call_llm(:structured, messages: messages, schema: schema, model: model,
|
|
66
|
+
provider: provider, **opts)
|
|
47
67
|
end
|
|
48
68
|
end
|
|
49
69
|
|
|
@@ -91,21 +111,15 @@ module Legion
|
|
|
91
111
|
end
|
|
92
112
|
|
|
93
113
|
def extract_tokens(response, field)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
response.public_send(field).to_i
|
|
114
|
+
response.respond_to?(field) ? response.public_send(field).to_i : 0
|
|
97
115
|
end
|
|
98
116
|
|
|
99
117
|
def extract_provider(response, fallback)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
fallback
|
|
118
|
+
response.respond_to?(:provider) ? response.provider : fallback
|
|
103
119
|
end
|
|
104
120
|
|
|
105
121
|
def extract_model(response, fallback)
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
fallback
|
|
122
|
+
response.respond_to?(:model) ? response.model : fallback
|
|
109
123
|
end
|
|
110
124
|
end
|
|
111
125
|
end
|
|
@@ -29,14 +29,12 @@ module Legion
|
|
|
29
29
|
|
|
30
30
|
def message
|
|
31
31
|
{
|
|
32
|
-
model: @options[:model],
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
provider: @options[:provider],
|
|
39
|
-
tier: @options[:tier]
|
|
32
|
+
model: @options[:model], messages: @options[:messages] || [],
|
|
33
|
+
intent: @options[:intent], reply_to: @options[:reply_to],
|
|
34
|
+
correlation_id: @options[:correlation_id], signed_token: @options[:signed_token],
|
|
35
|
+
provider: @options[:provider], tier: @options[:tier],
|
|
36
|
+
request_type: @options[:request_type], schema: @options[:schema],
|
|
37
|
+
text: @options[:text]
|
|
40
38
|
}
|
|
41
39
|
end
|
|
42
40
|
end
|