lex-llm-gateway 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 44562169a54b0e789ff4656a30599b2c43314c59fd5b5bb3056bf790da5a7a01
4
- data.tar.gz: 399266aaa074b1eeb0c69044987bf36d13a11668defcda7066133f5c25ed5a93
3
+ metadata.gz: 6598c8eb88e900cf46910dcb33c93a51e9013e4a56795ea7a94d6aa892f8a550
4
+ data.tar.gz: 7267fd499d23a63afcd45bd8780080bac45a045f917f8b60ec04181d804e5e94
5
5
  SHA512:
6
- metadata.gz: 9d122cadcffddfa7fa848a2d5cd992682e40a2e9cf84e5868af845d90572a7965f9fd462e6f23bc1d275410f48332ce7111a4d823dbbf17de3d16583903f407c
7
- data.tar.gz: a188786600832ed973a988da109d053a87e05b7cdea82ee08914f1cad7931d4db2f8f4f1f2ea37fe442f9382d731f4980e6a099c6e6db84af1bca10b49b95512
6
+ metadata.gz: 0f35e042cd9c333fecae0ba9896bb79e131f9ece92af6a68e03920a6eaad652ee3a4fe1a38f1906d1378186290c66591125bf610f707896cc0c09b80ccb1dfaf
7
+ data.tar.gz: bb641bd9d3efcd68dfbe0c5d818a72935c6c3c89e76a68dee56c33dc8a5a2039270c0eed875245cadf93dbe1b1f1c39b6e366637c35b19f20dfe5a411e0aa1b6
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.2.6] - 2026-03-23
4
+
5
+ ### Added
6
+ - Fleet dispatch for structured and embed request types from Inference runner
7
+ - Multi-message chat support in fleet dispatch (passes messages array directly)
8
+ - InferenceRequest message now includes request_type, schema, and text fields
9
+
10
+ ### Changed
11
+ - Fleet.dispatch uses `**opts` for extensible parameter forwarding
12
+ - Fleet.publish_request uses anonymous keyword forwarding
13
+ - Inference extract helpers compacted to single-line ternary style
14
+
3
15
  ## [0.2.5] - 2026-03-23
4
16
 
5
17
  ### Added
@@ -10,17 +10,19 @@ module Legion
10
10
 
11
11
  module_function
12
12
 
13
- def dispatch(model:, messages:, intent: nil, timeout: nil)
13
+ def dispatch(model:, messages:, **opts)
14
14
  return error_result('fleet_unavailable') unless fleet_available?
15
15
 
16
+ intent = opts[:intent]
16
17
  token = Helpers::Auth.sign_request({ model: model, intent: intent })
17
18
  return error_result('fleet_auth_failed') if token.nil? && require_auth?
18
19
 
19
20
  correlation_id = Helpers::Rpc.generate_correlation_id
20
21
  publish_request(model: model, messages: messages, intent: intent,
21
- correlation_id: correlation_id, signed_token: token)
22
+ correlation_id: correlation_id, signed_token: token,
23
+ **opts.except(:intent, :timeout))
22
24
 
23
- wait_for_response(correlation_id, timeout: resolve_timeout(timeout))
25
+ wait_for_response(correlation_id, timeout: resolve_timeout(opts[:timeout]))
24
26
  end
25
27
 
26
28
  def fleet_available?
@@ -68,12 +70,9 @@ module Legion
68
70
  settings.dig(:routing, :fleet, :timeout_seconds) || DEFAULT_TIMEOUT
69
71
  end
70
72
 
71
- def publish_request(model:, messages:, intent:, correlation_id:, signed_token:)
72
- reply_to = Helpers::Rpc.agent_queue_name
73
+ def publish_request(**)
73
74
  Transport::Messages::InferenceRequest.new(
74
- model: model, messages: messages, intent: intent,
75
- reply_to: reply_to, correlation_id: correlation_id,
76
- signed_token: signed_token
75
+ reply_to: Helpers::Rpc.agent_queue_name, **
77
76
  ).publish
78
77
  end
79
78
 
@@ -19,7 +19,7 @@ module Legion
19
19
 
20
20
  def embed(text: nil, model: nil, provider: nil, **)
21
21
  start_ms = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
22
- response = call_llm(:embed, text: text, model: model, provider: provider, **)
22
+ response = dispatch_embed(text: text, model: model, provider: provider, **)
23
23
  elapsed_ms = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond) - start_ms
24
24
  meter_response(response, request_type: 'embed', provider: provider, model_id: model,
25
25
  latency_ms: elapsed_ms)
@@ -28,22 +28,42 @@ module Legion
28
28
 
29
29
  def structured(messages: nil, schema: nil, model: nil, provider: nil, **)
30
30
  start_ms = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond)
31
- response = call_llm(:structured, messages: messages, schema: schema, model: model,
32
- provider: provider, **)
31
+ response = dispatch_structured(messages: messages, schema: schema, model: model,
32
+ provider: provider, **)
33
33
  elapsed_ms = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, :millisecond) - start_ms
34
34
  meter_response(response, request_type: 'structured', provider: provider, model_id: model,
35
35
  latency_ms: elapsed_ms)
36
36
  response
37
37
  end
38
38
 
39
- def dispatch_chat(message: nil, model: nil, provider: nil, **opts)
39
+ def dispatch_chat(message: nil, messages: nil, model: nil, provider: nil, **opts)
40
40
  tier = opts[:tier]
41
41
  Legion::Logging.debug "[Gateway::Inference] dispatch_chat tier=#{tier}" if defined?(Legion::Logging)
42
42
  if tier == 'fleet' && fleet_available?
43
- Fleet.dispatch(model: model, messages: [{ role: 'user', content: message }],
44
- intent: opts[:intent])
43
+ fleet_messages = messages || [{ role: 'user', content: message }]
44
+ Fleet.dispatch(model: model, messages: fleet_messages, intent: opts[:intent])
45
45
  else
46
- call_llm(:chat, message: message, model: model, provider: provider, **opts)
46
+ call_llm(:chat, message: message, messages: messages, model: model,
47
+ provider: provider, **opts)
48
+ end
49
+ end
50
+
51
+ def dispatch_embed(text: nil, model: nil, provider: nil, **opts)
52
+ if opts[:tier] == 'fleet' && fleet_available?
53
+ Fleet.dispatch(model: model, messages: [{ role: 'user', content: text }],
54
+ intent: opts[:intent], request_type: 'embed', text: text)
55
+ else
56
+ call_llm(:embed, text: text, model: model, provider: provider, **opts)
57
+ end
58
+ end
59
+
60
+ def dispatch_structured(messages: nil, schema: nil, model: nil, provider: nil, **opts)
61
+ if opts[:tier] == 'fleet' && fleet_available?
62
+ Fleet.dispatch(model: model, messages: messages, intent: opts[:intent],
63
+ request_type: 'structured', schema: schema)
64
+ else
65
+ call_llm(:structured, messages: messages, schema: schema, model: model,
66
+ provider: provider, **opts)
47
67
  end
48
68
  end
49
69
 
@@ -91,21 +111,15 @@ module Legion
91
111
  end
92
112
 
93
113
  def extract_tokens(response, field)
94
- return 0 unless response.respond_to?(field)
95
-
96
- response.public_send(field).to_i
114
+ response.respond_to?(field) ? response.public_send(field).to_i : 0
97
115
  end
98
116
 
99
117
  def extract_provider(response, fallback)
100
- return response.provider if response.respond_to?(:provider)
101
-
102
- fallback
118
+ response.respond_to?(:provider) ? response.provider : fallback
103
119
  end
104
120
 
105
121
  def extract_model(response, fallback)
106
- return response.model if response.respond_to?(:model)
107
-
108
- fallback
122
+ response.respond_to?(:model) ? response.model : fallback
109
123
  end
110
124
  end
111
125
  end
@@ -29,14 +29,12 @@ module Legion
29
29
 
30
30
  def message
31
31
  {
32
- model: @options[:model],
33
- messages: @options[:messages] || [],
34
- intent: @options[:intent],
35
- reply_to: @options[:reply_to],
36
- correlation_id: @options[:correlation_id],
37
- signed_token: @options[:signed_token],
38
- provider: @options[:provider],
39
- tier: @options[:tier]
32
+ model: @options[:model], messages: @options[:messages] || [],
33
+ intent: @options[:intent], reply_to: @options[:reply_to],
34
+ correlation_id: @options[:correlation_id], signed_token: @options[:signed_token],
35
+ provider: @options[:provider], tier: @options[:tier],
36
+ request_type: @options[:request_type], schema: @options[:schema],
37
+ text: @options[:text]
40
38
  }
41
39
  end
42
40
  end
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module LLM
6
6
  module Gateway
7
- VERSION = '0.2.5'
7
+ VERSION = '0.2.6'
8
8
  end
9
9
  end
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-gateway
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity