lex-llm-gateway 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +20 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +21 -0
  5. data/README.md +15 -0
  6. data/lex-llm-gateway.gemspec +35 -0
  7. data/lib/legion/extensions/llm/gateway/actors/inference_worker.rb +21 -0
  8. data/lib/legion/extensions/llm/gateway/actors/metering_writer.rb +21 -0
  9. data/lib/legion/extensions/llm/gateway/actors/spool_flush.rb +41 -0
  10. data/lib/legion/extensions/llm/gateway/client.rb +55 -0
  11. data/lib/legion/extensions/llm/gateway/helpers/auth.rb +31 -0
  12. data/lib/legion/extensions/llm/gateway/helpers/rpc.rb +36 -0
  13. data/lib/legion/extensions/llm/gateway/runners/fleet.rb +92 -0
  14. data/lib/legion/extensions/llm/gateway/runners/fleet_handler.rb +66 -0
  15. data/lib/legion/extensions/llm/gateway/runners/inference.rb +115 -0
  16. data/lib/legion/extensions/llm/gateway/runners/metering.rb +86 -0
  17. data/lib/legion/extensions/llm/gateway/runners/metering_writer.rb +55 -0
  18. data/lib/legion/extensions/llm/gateway/transport/exchanges/inference.rb +23 -0
  19. data/lib/legion/extensions/llm/gateway/transport/exchanges/metering.rb +23 -0
  20. data/lib/legion/extensions/llm/gateway/transport/messages/inference_request.rb +48 -0
  21. data/lib/legion/extensions/llm/gateway/transport/messages/inference_response.rb +54 -0
  22. data/lib/legion/extensions/llm/gateway/transport/messages/metering_event.rb +75 -0
  23. data/lib/legion/extensions/llm/gateway/transport/queues/inference_process.rb +26 -0
  24. data/lib/legion/extensions/llm/gateway/transport/queues/metering_write.rb +26 -0
  25. data/lib/legion/extensions/llm/gateway/version.rb +11 -0
  26. data/lib/legion/extensions/llm/gateway.rb +13 -0
  27. metadata +128 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ae55215612840dc5c59339698ffd9b9ff3ab9cde5db07a38f0e537dd201e2f1d
4
+ data.tar.gz: ffa28291a888a470e761135d13ed1b289a5acca80d769e061b4ea589a94a3e48
5
+ SHA512:
6
+ metadata.gz: 813cb8b76c470236fe69a494f03107233fbd92a58937f8b9c61dffa877fc6873d14848a1913051982f81d0c72882e288c9da0dd2596e47fc92f1cdf2be928284
7
+ data.tar.gz: 642c7ccb73faa53e80b7474cfa510a963d73521d96234fd434dfa292766dccc97c5246a607a437715f2791e78c4d28c2a0d9918bc33dd441eaefadfb3cf91192
data/CHANGELOG.md ADDED
@@ -0,0 +1,20 @@
1
+ # Changelog
2
+
3
+ ## [0.2.0] - 2026-03-18
4
+
5
+ ### Added
6
+ - Transport topology: metering (topic) and inference (direct) exchanges, queues, messages
7
+ - RPC correlation and JWT auth helpers
8
+ - Metering event builder with publish-or-spool fallback
9
+ - Inference runner: chat, embed, structured with auto-metering
10
+ - Fleet RPC dispatch runner with JWT auth and timeout
11
+ - Fleet handler for incoming inference requests
12
+ - Metering writer: consumes RMQ events, writes to DB
13
+ - Spool flush interval actor (every 60s)
14
+ - Standalone Client class with all runners
15
+
16
+ ## [0.1.0] - 2026-03-18
17
+
18
+ ### Added
19
+ - Initial gem scaffold
20
+ - Extension entry point and version
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Esity
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,15 @@
1
+ # lex-llm-gateway
2
+
3
+ LLM inference gateway for LegionIO. Provides centralized metering over RabbitMQ, fleet RPC dispatch to GPU workers, and local disk spooling for offline resilience.
4
+
5
+ ## Installation
6
+
7
+ Add to your Gemfile:
8
+
9
+ ```ruby
10
+ gem 'lex-llm-gateway'
11
+ ```
12
+
13
+ ## License
14
+
15
+ MIT
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'legion/extensions/llm/gateway/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'lex-llm-gateway'
9
+ spec.version = Legion::Extensions::LLM::Gateway::VERSION
10
+ spec.authors = ['Esity']
11
+ spec.email = ['matthewdiverson@gmail.com']
12
+
13
+ spec.summary = 'Legion::Extensions::LLM::Gateway'
14
+ spec.description = 'LLM inference gateway: metering over RabbitMQ, fleet RPC dispatch, local disk spool'
15
+ spec.homepage = 'https://github.com/LegionIO/lex-llm-gateway'
16
+ spec.license = 'MIT'
17
+ spec.required_ruby_version = '>= 3.4'
18
+
19
+ spec.metadata['homepage_uri'] = spec.homepage
20
+ spec.metadata['source_code_uri'] = 'https://github.com/LegionIO/lex-llm-gateway'
21
+ spec.metadata['changelog_uri'] = 'https://github.com/LegionIO/lex-llm-gateway/blob/main/CHANGELOG.md'
22
+ spec.metadata['documentation_uri'] = 'https://github.com/LegionIO/lex-llm-gateway'
23
+ spec.metadata['bug_tracker_uri'] = 'https://github.com/LegionIO/lex-llm-gateway/issues'
24
+ spec.metadata['rubygems_mfa_required'] = 'true'
25
+
26
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
27
+ Dir.glob('{lib,exe}/**/*') + %w[lex-llm-gateway.gemspec Gemfile README.md CHANGELOG.md LICENSE]
28
+ end
29
+ spec.require_paths = ['lib']
30
+
31
+ spec.add_development_dependency 'rake'
32
+ spec.add_development_dependency 'rspec'
33
+ spec.add_development_dependency 'rubocop'
34
+ spec.add_development_dependency 'rubocop-rspec'
35
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Actor
8
+ class InferenceWorker < Legion::Extensions::Actors::Subscription
9
+ def runner_class
10
+ 'Legion::Extensions::LLM::Gateway::Runners::Inference'
11
+ end
12
+
13
+ def runner_function
14
+ 'handle_fleet_request'
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Actor
8
+ class MeteringWriter < Legion::Extensions::Actors::Subscription
9
+ def runner_class
10
+ 'Legion::Extensions::LLM::Gateway::Runners::MeteringWriter'
11
+ end
12
+
13
+ def runner_function
14
+ 'write_metering_record'
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Actor
8
+ class SpoolFlush < Legion::Extensions::Actors::Every
9
+ def runner_class
10
+ 'Legion::Extensions::LLM::Gateway::Runners::Metering'
11
+ end
12
+
13
+ def runner_function
14
+ 'flush_spool'
15
+ end
16
+
17
+ def time
18
+ 60
19
+ end
20
+
21
+ def run_now?
22
+ false
23
+ end
24
+
25
+ def use_runner?
26
+ false
27
+ end
28
+
29
+ def check_subtask?
30
+ false
31
+ end
32
+
33
+ def generate_task?
34
+ false
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'runners/inference'
4
+ require_relative 'runners/metering'
5
+ require_relative 'runners/fleet'
6
+
7
+ module Legion
8
+ module Extensions
9
+ module LLM
10
+ module Gateway
11
+ class Client
12
+ def initialize(**opts)
13
+ @opts = opts
14
+ end
15
+
16
+ def settings
17
+ { options: @opts }
18
+ end
19
+
20
+ def chat(**)
21
+ Runners::Inference.chat(**)
22
+ end
23
+
24
+ def embed(**)
25
+ Runners::Inference.embed(**)
26
+ end
27
+
28
+ def structured(**)
29
+ Runners::Inference.structured(**)
30
+ end
31
+
32
+ def build_event(**)
33
+ Runners::Metering.build_event(**)
34
+ end
35
+
36
+ def publish_or_spool(event)
37
+ Runners::Metering.publish_or_spool(event)
38
+ end
39
+
40
+ def flush_spool
41
+ Runners::Metering.flush_spool
42
+ end
43
+
44
+ def dispatch(**)
45
+ Runners::Fleet.dispatch(**)
46
+ end
47
+
48
+ def fleet_available?
49
+ Runners::Fleet.fleet_available?
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Helpers
8
+ module Auth
9
+ module_function
10
+
11
+ def sign_request(payload)
12
+ return nil unless defined?(Legion::Crypt::JWT)
13
+
14
+ Legion::Crypt::JWT.encode(payload: payload, ttl: 60)
15
+ rescue StandardError
16
+ nil
17
+ end
18
+
19
+ def validate_token(token)
20
+ return nil unless defined?(Legion::Crypt::JWT)
21
+
22
+ Legion::Crypt::JWT.decode(token: token)
23
+ rescue StandardError
24
+ nil
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+
5
+ module Legion
6
+ module Extensions
7
+ module LLM
8
+ module Gateway
9
+ module Helpers
10
+ module Rpc
11
+ module_function
12
+
13
+ def generate_correlation_id
14
+ SecureRandom.uuid
15
+ end
16
+
17
+ def agent_queue_name
18
+ if defined?(Legion::Transport) && Legion::Transport.respond_to?(:agent_queue_name)
19
+ return Legion::Transport.agent_queue_name
20
+ end
21
+
22
+ nil
23
+ end
24
+
25
+ def build_reply_headers(correlation_id:)
26
+ {
27
+ reply_to: agent_queue_name,
28
+ correlation_id: correlation_id
29
+ }
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Runners
8
+ module Fleet
9
+ DEFAULT_TIMEOUT = 30
10
+
11
+ module_function
12
+
13
+ def dispatch(model:, messages:, intent: nil, timeout: nil)
14
+ return error_result('fleet_unavailable') unless fleet_available?
15
+
16
+ token = Helpers::Auth.sign_request({ model: model, intent: intent })
17
+ return error_result('fleet_auth_failed') if token.nil? && require_auth?
18
+
19
+ correlation_id = Helpers::Rpc.generate_correlation_id
20
+ publish_request(model: model, messages: messages, intent: intent,
21
+ correlation_id: correlation_id, signed_token: token)
22
+
23
+ wait_for_response(correlation_id, timeout: resolve_timeout(timeout))
24
+ end
25
+
26
+ def fleet_available?
27
+ transport_ready? && fleet_enabled?
28
+ end
29
+
30
+ def transport_ready?
31
+ !!(defined?(Legion::Transport) &&
32
+ Legion::Transport.respond_to?(:connected?) &&
33
+ Legion::Transport.connected?)
34
+ end
35
+
36
+ def fleet_enabled?
37
+ return true unless defined?(Legion::Settings)
38
+
39
+ settings = Legion::Settings[:llm] rescue nil # rubocop:disable Style/RescueModifier
40
+ return true unless settings.is_a?(Hash)
41
+
42
+ routing = settings[:routing]
43
+ return true unless routing.is_a?(Hash)
44
+
45
+ routing.fetch(:use_fleet, true)
46
+ end
47
+
48
+ def require_auth?
49
+ return false unless defined?(Legion::Settings)
50
+
51
+ settings = Legion::Settings[:llm] rescue nil # rubocop:disable Style/RescueModifier
52
+ return false unless settings.is_a?(Hash)
53
+
54
+ fleet = settings.dig(:routing, :fleet)
55
+ return false unless fleet.is_a?(Hash)
56
+
57
+ fleet.fetch(:require_auth, false)
58
+ end
59
+
60
+ def resolve_timeout(override)
61
+ return override if override
62
+
63
+ return DEFAULT_TIMEOUT unless defined?(Legion::Settings)
64
+
65
+ settings = Legion::Settings[:llm] rescue nil # rubocop:disable Style/RescueModifier
66
+ return DEFAULT_TIMEOUT unless settings.is_a?(Hash)
67
+
68
+ settings.dig(:routing, :fleet, :timeout_seconds) || DEFAULT_TIMEOUT
69
+ end
70
+
71
+ def publish_request(model:, messages:, intent:, correlation_id:, signed_token:)
72
+ reply_to = Helpers::Rpc.agent_queue_name
73
+ Transport::Messages::InferenceRequest.new(
74
+ model: model, messages: messages, intent: intent,
75
+ reply_to: reply_to, correlation_id: correlation_id,
76
+ signed_token: signed_token
77
+ ).publish
78
+ end
79
+
80
+ def wait_for_response(correlation_id, timeout:)
81
+ { success: false, error: 'fleet_timeout', correlation_id: correlation_id, timeout: timeout }
82
+ end
83
+
84
+ def error_result(reason)
85
+ { success: false, error: reason }
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Runners
8
+ module FleetHandler
9
+ module_function
10
+
11
+ def handle_fleet_request(payload)
12
+ token = payload[:signed_token]
13
+ return { success: false, error: 'invalid_token' } if require_auth? && !valid_token?(token)
14
+
15
+ response = call_local_llm(payload)
16
+ build_response(payload[:correlation_id], response)
17
+ end
18
+
19
+ def require_auth?
20
+ Fleet.require_auth?
21
+ end
22
+
23
+ def valid_token?(token)
24
+ return true if token.nil? && !require_auth?
25
+
26
+ !Helpers::Auth.validate_token(token).nil?
27
+ end
28
+
29
+ def call_local_llm(payload)
30
+ return { error: 'llm_not_available' } unless defined?(Legion::LLM)
31
+
32
+ Legion::LLM.chat(
33
+ model: payload[:model],
34
+ message: payload.dig(:messages, 0, :content)
35
+ )
36
+ end
37
+
38
+ def build_response(correlation_id, response)
39
+ {
40
+ correlation_id: correlation_id,
41
+ response: response,
42
+ input_tokens: extract_token(response, :input_tokens),
43
+ output_tokens: extract_token(response, :output_tokens),
44
+ thinking_tokens: extract_token(response, :thinking_tokens),
45
+ provider: extract_field(response, :provider),
46
+ model_id: extract_field(response, :model)
47
+ }
48
+ end
49
+
50
+ def extract_token(response, field)
51
+ return 0 unless response.respond_to?(field)
52
+
53
+ response.public_send(field).to_i
54
+ end
55
+
56
+ def extract_field(response, field)
57
+ return nil unless response.respond_to?(field)
58
+
59
+ response.public_send(field)
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Runners
8
+ module Inference
9
+ module_function
10
+
11
+ def chat(model: nil, provider: nil, **opts)
12
+ start_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
13
+ response = dispatch_chat(model: model, provider: provider, **opts)
14
+ elapsed_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond) - start_ms
15
+ meter_response(response, request_type: 'chat', provider: provider,
16
+ model_id: model, latency_ms: elapsed_ms, **opts.slice(:tier, :intent))
17
+ response
18
+ end
19
+
20
+ def embed(text: nil, model: nil, provider: nil, **)
21
+ start_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
22
+ response = call_llm(:embed, text: text, model: model, provider: provider, **)
23
+ elapsed_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond) - start_ms
24
+ meter_response(response, request_type: 'embed', provider: provider, model_id: model,
25
+ latency_ms: elapsed_ms)
26
+ response
27
+ end
28
+
29
+ def structured(messages: nil, schema: nil, model: nil, provider: nil, **)
30
+ start_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
31
+ response = call_llm(:structured, messages: messages, schema: schema, model: model,
32
+ provider: provider, **)
33
+ elapsed_ms = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond) - start_ms
34
+ meter_response(response, request_type: 'structured', provider: provider, model_id: model,
35
+ latency_ms: elapsed_ms)
36
+ response
37
+ end
38
+
39
+ def dispatch_chat(message: nil, model: nil, provider: nil, **opts)
40
+ tier = opts[:tier]
41
+ intent = opts[:intent]
42
+ if tier == 'fleet' && fleet_available?
43
+ Fleet.dispatch(model: model, messages: [{ role: 'user', content: message }],
44
+ intent: intent)
45
+ else
46
+ call_llm(:chat, message: message, model: model, provider: provider, **opts)
47
+ end
48
+ end
49
+
50
+ def fleet_available?
51
+ defined?(Legion::Extensions::LLM::Gateway::Runners::Fleet) &&
52
+ Fleet.respond_to?(:fleet_available?) && Fleet.fleet_available?
53
+ end
54
+
55
+ def call_llm(method_name, **)
56
+ return { error: 'llm_not_available' } unless defined?(Legion::LLM)
57
+
58
+ direct = :"#{method_name}_direct"
59
+ if Legion::LLM.respond_to?(direct)
60
+ Legion::LLM.public_send(direct, **)
61
+ else
62
+ Legion::LLM.public_send(method_name, **)
63
+ end
64
+ end
65
+
66
+ def meter_response(response, **)
67
+ Metering.publish_or_spool(build_meter_event(response, **))
68
+ end
69
+
70
+ def build_meter_event(response, **opts)
71
+ Metering.build_event(**base_meter_fields(response, opts), **token_fields(response))
72
+ end
73
+
74
+ def base_meter_fields(response, opts)
75
+ {
76
+ request_type: opts[:request_type],
77
+ provider: extract_provider(response, opts[:provider]),
78
+ model_id: extract_model(response, opts[:model_id]),
79
+ latency_ms: opts[:latency_ms],
80
+ tier: opts[:tier],
81
+ routing_reason: opts[:intent]
82
+ }
83
+ end
84
+
85
+ def token_fields(response)
86
+ {
87
+ input_tokens: extract_tokens(response, :input_tokens),
88
+ output_tokens: extract_tokens(response, :output_tokens),
89
+ thinking_tokens: extract_tokens(response, :thinking_tokens)
90
+ }
91
+ end
92
+
93
+ def extract_tokens(response, field)
94
+ return 0 unless response.respond_to?(field)
95
+
96
+ response.public_send(field).to_i
97
+ end
98
+
99
+ def extract_provider(response, fallback)
100
+ return response.provider if response.respond_to?(:provider)
101
+
102
+ fallback
103
+ end
104
+
105
+ def extract_model(response, fallback)
106
+ return response.model if response.respond_to?(:model)
107
+
108
+ fallback
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Runners
8
+ module Metering
9
+ module_function
10
+
11
+ def build_event(**opts)
12
+ identity_fields(opts).merge(token_fields(opts)).merge(timing_and_context(opts))
13
+ end
14
+
15
+ def identity_fields(opts)
16
+ {
17
+ node_id: opts[:node_id],
18
+ worker_id: opts[:worker_id],
19
+ agent_id: opts[:agent_id],
20
+ request_type: opts[:request_type],
21
+ tier: opts[:tier],
22
+ provider: opts[:provider],
23
+ model_id: opts[:model_id]
24
+ }
25
+ end
26
+
27
+ def token_fields(opts)
28
+ input = opts.fetch(:input_tokens, 0)
29
+ output = opts.fetch(:output_tokens, 0)
30
+ thinking = opts.fetch(:thinking_tokens, 0)
31
+ { input_tokens: input, output_tokens: output, thinking_tokens: thinking,
32
+ total_tokens: input + output + thinking }
33
+ end
34
+
35
+ def timing_and_context(opts)
36
+ {
37
+ latency_ms: opts.fetch(:latency_ms, 0),
38
+ wall_clock_ms: opts.fetch(:wall_clock_ms, 0),
39
+ routing_reason: opts[:routing_reason],
40
+ recorded_at: Time.now.utc.iso8601
41
+ }
42
+ end
43
+
44
+ def publish_or_spool(event)
45
+ if transport_connected?
46
+ publish_event(event)
47
+ :published
48
+ elsif spool_available?
49
+ spool_event(event)
50
+ :spooled
51
+ else
52
+ :dropped
53
+ end
54
+ end
55
+
56
+ def flush_spool
57
+ return 0 unless spool_available? && transport_connected?
58
+
59
+ spool = Legion::Data::Spool.for(Legion::Extensions::LLM::Gateway)
60
+ spool.flush(:metering) { |event| publish_event(event) }
61
+ end
62
+
63
+ def transport_connected?
64
+ !!(defined?(Legion::Transport) &&
65
+ Legion::Transport.respond_to?(:connected?) &&
66
+ Legion::Transport.connected?)
67
+ end
68
+
69
+ def spool_available?
70
+ !!defined?(Legion::Data::Spool)
71
+ end
72
+
73
+ def publish_event(event)
74
+ Legion::Extensions::LLM::Gateway::Transport::Messages::MeteringEvent.new(**event).publish
75
+ end
76
+
77
+ def spool_event(event)
78
+ spool = Legion::Data::Spool.for(Legion::Extensions::LLM::Gateway)
79
+ spool.write(:metering, event)
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Runners
8
+ module MeteringWriter
9
+ module_function
10
+
11
+ def write_metering_record(payload)
12
+ return { success: false, error: 'data_not_connected' } unless data_connected?
13
+
14
+ record = normalize_record(payload)
15
+ Legion::Data.connection[:metering_records].insert(record)
16
+ { success: true, recorded: record }
17
+ end
18
+
19
+ def data_connected?
20
+ !!(defined?(Legion::Data) &&
21
+ Legion::Data.respond_to?(:connection) &&
22
+ !Legion::Data.connection.nil?)
23
+ end
24
+
25
+ def normalize_record(payload)
26
+ identity_fields(payload).merge(metric_fields(payload))
27
+ end
28
+
29
+ def identity_fields(payload)
30
+ {
31
+ worker_id: payload[:worker_id],
32
+ task_id: payload[:task_id],
33
+ provider: payload[:provider],
34
+ model_id: payload[:model_id],
35
+ routing_reason: payload[:routing_reason],
36
+ recorded_at: payload[:recorded_at] || Time.now.utc
37
+ }
38
+ end
39
+
40
+ def metric_fields(payload)
41
+ {
42
+ input_tokens: payload[:input_tokens].to_i,
43
+ output_tokens: payload[:output_tokens].to_i,
44
+ thinking_tokens: payload[:thinking_tokens].to_i,
45
+ total_tokens: payload[:total_tokens].to_i,
46
+ latency_ms: payload[:latency_ms].to_i,
47
+ wall_clock_ms: payload[:wall_clock_ms].to_i
48
+ }
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Transport
8
+ module Exchanges
9
+ class Inference < Legion::Transport::Exchange
10
+ def exchange_name
11
+ 'llm.inference'
12
+ end
13
+
14
+ def exchange_type
15
+ :direct
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Transport
8
+ module Exchanges
9
+ class Metering < Legion::Transport::Exchange
10
+ def exchange_name
11
+ 'llm.metering'
12
+ end
13
+
14
+ def exchange_type
15
+ :topic
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Transport
8
+ module Messages
9
+ class InferenceRequest < Legion::Transport::Message
10
+ def routing_key
11
+ 'inference.request'
12
+ end
13
+
14
+ def type
15
+ 'inference_request'
16
+ end
17
+
18
+ def encrypt?
19
+ false
20
+ end
21
+
22
+ def validate
23
+ raise 'model is required' unless @options[:model]
24
+ raise 'reply_to is required' unless @options[:reply_to]
25
+ raise 'correlation_id is required' unless @options[:correlation_id]
26
+
27
+ @valid = true
28
+ end
29
+
30
+ def message
31
+ {
32
+ model: @options[:model],
33
+ messages: @options[:messages] || [],
34
+ intent: @options[:intent],
35
+ reply_to: @options[:reply_to],
36
+ correlation_id: @options[:correlation_id],
37
+ signed_token: @options[:signed_token],
38
+ provider: @options[:provider],
39
+ tier: @options[:tier]
40
+ }
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Transport
8
+ module Messages
9
+ class InferenceResponse < Legion::Transport::Message
10
+ def routing_key
11
+ 'inference.response'
12
+ end
13
+
14
+ def type
15
+ 'inference_response'
16
+ end
17
+
18
+ def encrypt?
19
+ false
20
+ end
21
+
22
+ def validate
23
+ raise 'correlation_id is required' unless @options[:correlation_id]
24
+
25
+ @valid = true
26
+ end
27
+
28
+ def message
29
+ token_fields.merge(
30
+ correlation_id: @options[:correlation_id],
31
+ response: @options[:response],
32
+ provider: @options[:provider],
33
+ model_id: @options[:model_id],
34
+ error: @options[:error]
35
+ )
36
+ end
37
+
38
+ private
39
+
40
+ def token_fields
41
+ {
42
+ input_tokens: @options[:input_tokens] || 0,
43
+ output_tokens: @options[:output_tokens] || 0,
44
+ thinking_tokens: @options[:thinking_tokens] || 0,
45
+ latency_ms: @options[:latency_ms] || 0
46
+ }
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Transport
8
+ module Messages
9
+ class MeteringEvent < Legion::Transport::Message
10
+ def routing_key
11
+ "metering.#{@options[:request_type] || 'unknown'}"
12
+ end
13
+
14
+ def type
15
+ 'metering_event'
16
+ end
17
+
18
+ def encrypt?
19
+ false
20
+ end
21
+
22
+ def validate
23
+ raise 'request_type is required' unless @options[:request_type]
24
+ raise 'provider is required' unless @options[:provider]
25
+
26
+ @valid = true
27
+ end
28
+
29
+ def message
30
+ identity_fields.merge(token_fields).merge(timing_fields).merge(context_fields)
31
+ end
32
+
33
+ private
34
+
35
+ def identity_fields
36
+ {
37
+ node_id: @options[:node_id],
38
+ worker_id: @options[:worker_id],
39
+ agent_id: @options[:agent_id],
40
+ request_type: @options[:request_type],
41
+ tier: @options[:tier],
42
+ provider: @options[:provider],
43
+ model_id: @options[:model_id]
44
+ }
45
+ end
46
+
47
+ def context_fields
48
+ {
49
+ routing_reason: @options[:routing_reason],
50
+ recorded_at: @options[:recorded_at] || Time.now.utc.iso8601
51
+ }
52
+ end
53
+
54
+ def token_fields
55
+ {
56
+ input_tokens: @options[:input_tokens] || 0,
57
+ output_tokens: @options[:output_tokens] || 0,
58
+ thinking_tokens: @options[:thinking_tokens] || 0,
59
+ total_tokens: @options[:total_tokens] || 0
60
+ }
61
+ end
62
+
63
+ def timing_fields
64
+ {
65
+ latency_ms: @options[:latency_ms] || 0,
66
+ wall_clock_ms: @options[:wall_clock_ms] || 0
67
+ }
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Transport
8
+ module Queues
9
+ class InferenceProcess < Legion::Transport::Queue
10
+ def queue_name
11
+ 'llm.inference.process'
12
+ end
13
+
14
+ def queue_options
15
+ {
16
+ durable: true,
17
+ auto_delete: false
18
+ }
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ module Transport
8
+ module Queues
9
+ class MeteringWrite < Legion::Transport::Queue
10
+ def queue_name
11
+ 'llm.metering.write'
12
+ end
13
+
14
+ def queue_options
15
+ {
16
+ durable: true,
17
+ auto_delete: false
18
+ }
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module LLM
6
+ module Gateway
7
+ VERSION = '0.2.0'
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/llm/gateway/version'
4
+
5
+ module Legion
6
+ module Extensions
7
+ module LLM
8
+ module Gateway
9
+ extend Legion::Extensions::Core if Legion::Extensions.const_defined?(:Core)
10
+ end
11
+ end
12
+ end
13
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lex-llm-gateway
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Esity
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rake
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :development
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: rspec
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: rubocop
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: rubocop-rspec
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ description: 'LLM inference gateway: metering over RabbitMQ, fleet RPC dispatch, local
69
+ disk spool'
70
+ email:
71
+ - matthewdiverson@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - CHANGELOG.md
77
+ - Gemfile
78
+ - LICENSE
79
+ - README.md
80
+ - lex-llm-gateway.gemspec
81
+ - lib/legion/extensions/llm/gateway.rb
82
+ - lib/legion/extensions/llm/gateway/actors/inference_worker.rb
83
+ - lib/legion/extensions/llm/gateway/actors/metering_writer.rb
84
+ - lib/legion/extensions/llm/gateway/actors/spool_flush.rb
85
+ - lib/legion/extensions/llm/gateway/client.rb
86
+ - lib/legion/extensions/llm/gateway/helpers/auth.rb
87
+ - lib/legion/extensions/llm/gateway/helpers/rpc.rb
88
+ - lib/legion/extensions/llm/gateway/runners/fleet.rb
89
+ - lib/legion/extensions/llm/gateway/runners/fleet_handler.rb
90
+ - lib/legion/extensions/llm/gateway/runners/inference.rb
91
+ - lib/legion/extensions/llm/gateway/runners/metering.rb
92
+ - lib/legion/extensions/llm/gateway/runners/metering_writer.rb
93
+ - lib/legion/extensions/llm/gateway/transport/exchanges/inference.rb
94
+ - lib/legion/extensions/llm/gateway/transport/exchanges/metering.rb
95
+ - lib/legion/extensions/llm/gateway/transport/messages/inference_request.rb
96
+ - lib/legion/extensions/llm/gateway/transport/messages/inference_response.rb
97
+ - lib/legion/extensions/llm/gateway/transport/messages/metering_event.rb
98
+ - lib/legion/extensions/llm/gateway/transport/queues/inference_process.rb
99
+ - lib/legion/extensions/llm/gateway/transport/queues/metering_write.rb
100
+ - lib/legion/extensions/llm/gateway/version.rb
101
+ homepage: https://github.com/LegionIO/lex-llm-gateway
102
+ licenses:
103
+ - MIT
104
+ metadata:
105
+ homepage_uri: https://github.com/LegionIO/lex-llm-gateway
106
+ source_code_uri: https://github.com/LegionIO/lex-llm-gateway
107
+ changelog_uri: https://github.com/LegionIO/lex-llm-gateway/blob/main/CHANGELOG.md
108
+ documentation_uri: https://github.com/LegionIO/lex-llm-gateway
109
+ bug_tracker_uri: https://github.com/LegionIO/lex-llm-gateway/issues
110
+ rubygems_mfa_required: 'true'
111
+ rdoc_options: []
112
+ require_paths:
113
+ - lib
114
+ required_ruby_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: '3.4'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ requirements: []
125
+ rubygems_version: 3.6.9
126
+ specification_version: 4
127
+ summary: Legion::Extensions::LLM::Gateway
128
+ test_files: []