lex-llm-gateway 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 113ab7ef4818f7904351d1c4ebf08bdfc8beabc3310c219d909443399d051e41
4
- data.tar.gz: '0285316fbedb4b01afed93c25d9897b7723de482d06dd38bb85b2c6e59bcf2d8'
3
+ metadata.gz: 44562169a54b0e789ff4656a30599b2c43314c59fd5b5bb3056bf790da5a7a01
4
+ data.tar.gz: 399266aaa074b1eeb0c69044987bf36d13a11668defcda7066133f5c25ed5a93
5
5
  SHA512:
6
- metadata.gz: be41efa7a0e59477137225023730e271b04be3343cb178fd7110e863320f95d5ffeaafa4fc8d912056c78fd739e9ac0d6d2ec77b6d90c0d991b0df1622943e21
7
- data.tar.gz: 985e1a3bc30d68657dacdc8f0df39bb5cd33d0cbc7192eb3336e99c7161d99e74af6909b99814b92ebd6084dc989ae5e0bb257c285482509be308cc43a5f9519
6
+ metadata.gz: 9d122cadcffddfa7fa848a2d5cd992682e40a2e9cf84e5868af845d90572a7965f9fd462e6f23bc1d275410f48332ce7111a4d823dbbf17de3d16583903f407c
7
+ data.tar.gz: a188786600832ed973a988da109d053a87e05b7cdea82ee08914f1cad7931d4db2f8f4f1f2ea37fe442f9382d731f4980e6a099c6e6db84af1bca10b49b95512
data/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.2.5] - 2026-03-23
4
+
5
+ ### Added
6
+ - FleetHandler multi-request-type dispatch: structured, embed, and multi-message chat
7
+ - `call_chat` supports single and multi-message payloads
8
+ - `call_structured` dispatches to `Legion::LLM.structured` with schema
9
+ - `call_embed` dispatches to `Legion::LLM.embed` with text fallback from messages
10
+
11
+ ## [0.2.4] - 2026-03-23
12
+
13
+ ### Added
14
+ - Implement fleet RPC `wait_for_response` with `Concurrent::Promises` future and correlation ID matching
15
+ - Add `Helpers::ReplyDispatcher` process-singleton for managing reply queue consumer and pending futures
16
+ - Add `FleetHandler.publish_reply` to send `InferenceResponse` back to requester via AMQP default exchange
17
+
18
+ ### Fixed
19
+ - Fix `Actor::InferenceWorker` runner_class mismatch: now points to `FleetHandler` instead of `Inference`
20
+ - Add `use_runner? false` to InferenceWorker so it dispatches directly to the runner module
21
+
3
22
  ## [0.2.3] - 2026-03-22
4
23
 
5
24
  ### Changed
@@ -7,12 +7,16 @@ module Legion
7
7
  module Actor
8
8
  class InferenceWorker < Legion::Extensions::Actors::Subscription
9
9
  def runner_class
10
- 'Legion::Extensions::LLM::Gateway::Runners::Inference'
10
+ 'Legion::Extensions::LLM::Gateway::Runners::FleetHandler'
11
11
  end
12
12
 
13
13
  def runner_function
14
14
  'handle_fleet_request'
15
15
  end
16
+
17
+ def use_runner?
18
+ false
19
+ end
16
20
  end
17
21
  end
18
22
  end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'concurrent'
4
+
5
+ module Legion
6
+ module Extensions
7
+ module LLM
8
+ module Gateway
9
+ module Helpers
10
+ module ReplyDispatcher
11
+ @pending = Concurrent::Map.new
12
+ @mutex = Mutex.new
13
+ @consumer = nil
14
+
15
+ module_function
16
+
17
+ def register(correlation_id)
18
+ future = Concurrent::Promises.resolvable_future
19
+ @pending[correlation_id] = future
20
+ ensure_consumer
21
+ future
22
+ end
23
+
24
+ def deregister(correlation_id)
25
+ @pending.delete(correlation_id)
26
+ end
27
+
28
+ def handle_delivery(raw_payload, properties = {})
29
+ payload = parse_payload(raw_payload)
30
+ cid = properties[:correlation_id] || payload[:correlation_id]
31
+ return unless cid
32
+
33
+ future = @pending.delete(cid)
34
+ return unless future
35
+
36
+ future.fulfill(payload.merge(success: true))
37
+ rescue StandardError => e
38
+ log_warn("ReplyDispatcher: handle_delivery failed: #{e.message}")
39
+ end
40
+
41
+ def pending_count
42
+ @pending.size
43
+ end
44
+
45
+ def reset!
46
+ @mutex.synchronize do
47
+ cancel_consumer
48
+ @pending = Concurrent::Map.new
49
+ end
50
+ end
51
+
52
+ # private
53
+
54
+ def ensure_consumer # rubocop:disable Metrics/MethodLength
55
+ @mutex.synchronize do
56
+ return if @consumer
57
+ return unless transport_available?
58
+
59
+ queue_name = Rpc.agent_queue_name
60
+ return unless queue_name
61
+
62
+ channel = Legion::Transport.connection.create_channel
63
+ queue = channel.queue(queue_name, auto_delete: true, durable: false)
64
+ @consumer = queue.subscribe(manual_ack: false) do |_delivery, properties, body|
65
+ props = { correlation_id: properties.correlation_id }
66
+ handle_delivery(body, props)
67
+ end
68
+ end
69
+ rescue StandardError => e
70
+ log_warn("ReplyDispatcher: consumer setup failed: #{e.message}")
71
+ end
72
+
73
+ def cancel_consumer
74
+ @consumer&.cancel
75
+ @consumer = nil
76
+ rescue StandardError => e
77
+ log_warn("ReplyDispatcher: cancel failed: #{e.message}")
78
+ end
79
+
80
+ def transport_available?
81
+ defined?(Legion::Transport) &&
82
+ Legion::Transport.respond_to?(:connection) &&
83
+ Legion::Transport.connection
84
+ end
85
+
86
+ def parse_payload(raw)
87
+ return raw if raw.is_a?(Hash)
88
+
89
+ if defined?(Legion::JSON)
90
+ Legion::JSON.load(raw)
91
+ else
92
+ require 'json'
93
+ JSON.parse(raw, symbolize_names: true)
94
+ end
95
+ rescue StandardError
96
+ {}
97
+ end
98
+
99
+ def log_warn(msg)
100
+ Legion::Logging.warn(msg) if defined?(Legion::Logging)
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -78,6 +78,16 @@ module Legion
78
78
  end
79
79
 
80
80
  def wait_for_response(correlation_id, timeout:)
81
+ future = Helpers::ReplyDispatcher.register(correlation_id)
82
+ result = future.value!(timeout)
83
+ result || timeout_result(correlation_id, timeout)
84
+ rescue Concurrent::CancelledOperationError
85
+ timeout_result(correlation_id, timeout)
86
+ ensure
87
+ Helpers::ReplyDispatcher.deregister(correlation_id)
88
+ end
89
+
90
+ def timeout_result(correlation_id, timeout)
81
91
  { success: false, error: 'fleet_timeout', correlation_id: correlation_id, timeout: timeout }
82
92
  end
83
93
 
@@ -10,10 +10,16 @@ module Legion
10
10
 
11
11
  def handle_fleet_request(payload)
12
12
  token = payload[:signed_token]
13
- return { success: false, error: 'invalid_token' } if require_auth? && !valid_token?(token)
13
+ if require_auth? && !valid_token?(token)
14
+ error_response = { success: false, error: 'invalid_token' }
15
+ publish_reply(payload[:reply_to], payload[:correlation_id], error_response) if payload[:reply_to]
16
+ return error_response
17
+ end
14
18
 
15
19
  response = call_local_llm(payload)
16
- build_response(payload[:correlation_id], response)
20
+ response_hash = build_response(payload[:correlation_id], response)
21
+ publish_reply(payload[:reply_to], payload[:correlation_id], response_hash) if payload[:reply_to]
22
+ response_hash
17
23
  end
18
24
 
19
25
  def require_auth?
@@ -29,12 +35,38 @@ module Legion
29
35
  def call_local_llm(payload)
30
36
  return { error: 'llm_not_available' } unless defined?(Legion::LLM)
31
37
 
32
- Legion::LLM.chat(
38
+ case payload[:request_type]&.to_s
39
+ when 'structured'
40
+ call_structured(payload)
41
+ when 'embed'
42
+ call_embed(payload)
43
+ else
44
+ call_chat(payload)
45
+ end
46
+ end
47
+
48
+ def call_chat(payload)
49
+ messages = payload[:messages]
50
+ if messages.is_a?(Array) && messages.size > 1
51
+ Legion::LLM.chat(model: payload[:model], messages: messages)
52
+ else
53
+ Legion::LLM.chat(model: payload[:model], message: messages&.dig(0, :content))
54
+ end
55
+ end
56
+
57
+ def call_structured(payload)
58
+ Legion::LLM.structured(
33
59
  model: payload[:model],
34
- message: payload.dig(:messages, 0, :content)
60
+ messages: payload[:messages],
61
+ schema: payload[:schema]
35
62
  )
36
63
  end
37
64
 
65
+ def call_embed(payload)
66
+ text = payload[:text] || payload.dig(:messages, 0, :content)
67
+ Legion::LLM.embed(model: payload[:model], text: text)
68
+ end
69
+
38
70
  def build_response(correlation_id, response)
39
71
  {
40
72
  correlation_id: correlation_id,
@@ -47,6 +79,32 @@ module Legion
47
79
  }
48
80
  end
49
81
 
82
+ def publish_reply(reply_to, correlation_id, response_hash) # rubocop:disable Metrics/MethodLength
83
+ return unless defined?(Legion::Transport)
84
+
85
+ payload = if defined?(Legion::JSON)
86
+ Legion::JSON.dump(response_hash)
87
+ else
88
+ require 'json'
89
+ JSON.generate(response_hash)
90
+ end
91
+
92
+ channel = Legion::Transport.connection.create_channel
93
+ channel.default_exchange.publish(
94
+ payload,
95
+ routing_key: reply_to,
96
+ correlation_id: correlation_id,
97
+ content_type: 'application/json'
98
+ )
99
+ channel.close
100
+ rescue StandardError => e
101
+ log_warn("FleetHandler: publish_reply failed: #{e.message}")
102
+ end
103
+
104
+ def log_warn(msg)
105
+ Legion::Logging.warn(msg) if defined?(Legion::Logging)
106
+ end
107
+
50
108
  def extract_token(response, field)
51
109
  return 0 unless response.respond_to?(field)
52
110
 
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module LLM
6
6
  module Gateway
7
- VERSION = '0.2.3'
7
+ VERSION = '0.2.5'
8
8
  end
9
9
  end
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-gateway
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -182,6 +182,7 @@ files:
182
182
  - lib/legion/extensions/llm/gateway/actors/spool_flush.rb
183
183
  - lib/legion/extensions/llm/gateway/client.rb
184
184
  - lib/legion/extensions/llm/gateway/helpers/auth.rb
185
+ - lib/legion/extensions/llm/gateway/helpers/reply_dispatcher.rb
185
186
  - lib/legion/extensions/llm/gateway/helpers/rpc.rb
186
187
  - lib/legion/extensions/llm/gateway/runners/fleet.rb
187
188
  - lib/legion/extensions/llm/gateway/runners/fleet_handler.rb