lex-llm-vllm 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/CHANGELOG.md +19 -0
- data/README.md +56 -10
- data/lex-llm-vllm.gemspec +1 -1
- data/lib/legion/extensions/llm/vllm/provider.rb +26 -5
- data/lib/legion/extensions/llm/vllm/version.rb +1 -1
- data/lib/legion/extensions/llm/vllm.rb +18 -15
- metadata +3 -7
- data/lib/legion/extensions/llm/vllm/registry_event_builder.rb +0 -122
- data/lib/legion/extensions/llm/vllm/registry_publisher.rb +0 -100
- data/lib/legion/extensions/llm/vllm/transport/exchanges/llm_registry.rb +0 -24
- data/lib/legion/extensions/llm/vllm/transport/messages/registry_event.rb +0 -42
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c7c1de4a067bd42d4675c0485f5e13c4d6fe3a1a17c29a2e23c46d266588dd20
|
|
4
|
+
data.tar.gz: fe503a3a436ef92bcc88015b1d608180d4f98a226cdac4d825c7433c812ee67c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 00bdc87460cf051250b56def2c2a910efe5ff058451a3eff26a7ad1254c5ec9441d3ddb592fa80fd600a83e8479d3593cd1771042e0bacadf0613bb33735ba26
|
|
7
|
+
data.tar.gz: 7d0df28f8edc25b269f64a987e63de90fe89d098e409bbd2dada20ac8f8b981caec3c046a3eeb591c7edb5c025efc80cd480837fc03b81348ab69e160bef9d2b
|
data/.rubocop.yml
CHANGED
|
@@ -12,7 +12,12 @@ Metrics/BlockLength:
|
|
|
12
12
|
Exclude:
|
|
13
13
|
- "*.gemspec"
|
|
14
14
|
- spec/**/*
|
|
15
|
+
Metrics/ClassLength:
|
|
16
|
+
Exclude:
|
|
17
|
+
- lib/legion/extensions/llm/vllm/provider.rb
|
|
15
18
|
Metrics/MethodLength:
|
|
16
19
|
Enabled: false
|
|
20
|
+
RSpec/ExampleLength:
|
|
21
|
+
Max: 8
|
|
17
22
|
RSpec/MultipleExpectations:
|
|
18
23
|
Enabled: false
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,24 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.9 - 2026-04-30
|
|
4
|
+
|
|
5
|
+
- Adopt base provider contract from lex-llm 0.1.9
|
|
6
|
+
- Replace local `RegistryEventBuilder` and `RegistryPublisher` with parameterized base versions
|
|
7
|
+
- Delete local `transport/` directory; base gem now ships shared exchange and message classes
|
|
8
|
+
- Remove deprecated `Provider.register` call; provider options registered via `Configuration.register_provider_options`
|
|
9
|
+
- Simplify `default_settings` to a flat hash (no longer delegates to `ProviderSettings.build`)
|
|
10
|
+
- Override `parse_list_models_response` to populate `context_length` from vLLM `max_model_len` field
|
|
11
|
+
- Require `lex-llm >= 0.1.9`
|
|
12
|
+
|
|
13
|
+
## 0.1.8 - 2026-04-30
|
|
14
|
+
|
|
15
|
+
- Add `Legion::Logging::Helper` to all modules and classes for structured logging
|
|
16
|
+
- Replace all bare rescue blocks with `handle_exception` calls for full observability
|
|
17
|
+
- Add info-level action logging to Provider key actions (health, readiness, list_models, version)
|
|
18
|
+
- Add info-level logging to RegistryPublisher publish methods
|
|
19
|
+
- Remove custom `log_publish_failure` method in favor of standard `handle_exception`
|
|
20
|
+
- Update README to reflect registry publishing, thinking mode, and management endpoints
|
|
21
|
+
|
|
3
22
|
## 0.1.7 - 2026-04-30
|
|
4
23
|
|
|
5
24
|
- Enable stream_usage_supported? for streaming token usage reporting
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# lex-llm-vllm
|
|
2
2
|
|
|
3
|
-
LegionIO LLM provider extension for vLLM.
|
|
3
|
+
LegionIO LLM provider extension for [vLLM](https://docs.vllm.ai/).
|
|
4
4
|
|
|
5
5
|
This gem lives under `Legion::Extensions::Llm::Vllm` and depends on `lex-llm` for shared provider-neutral routing, fleet, and schema primitives.
|
|
6
6
|
|
|
@@ -9,14 +9,17 @@ Load it with `require 'legion/extensions/llm/vllm'`.
|
|
|
9
9
|
## What It Provides
|
|
10
10
|
|
|
11
11
|
- `Legion::Extensions::Llm::Provider` registration as `:vllm`
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
17
|
-
- vLLM
|
|
18
|
-
-
|
|
19
|
-
-
|
|
12
|
+
- Shared `Legion::Extensions::Llm::Provider::OpenAICompatible` request and response handling
|
|
13
|
+
- Chat requests through `POST /v1/chat/completions`
|
|
14
|
+
- Streaming chat with `stream_usage_supported?` for token usage reporting
|
|
15
|
+
- Model discovery through `GET /v1/models`
|
|
16
|
+
- Embeddings through `POST /v1/embeddings`
|
|
17
|
+
- vLLM thinking mode via `chat_template_kwargs` (configurable through `Legion::Settings`)
|
|
18
|
+
- Best-effort `llm.registry` readiness and model availability event publishing when transport is loaded
|
|
19
|
+
- vLLM management helpers: `/health`, `/version`, `/reset_prefix_cache`, `/reset_mm_cache`, `/sleep`, `/wake_up`
|
|
20
|
+
- Normalized OpenAI-compatible capability and modality metadata for discovered models
|
|
21
|
+
- Shared fleet/default settings via `Legion::Extensions::Llm.provider_settings`
|
|
22
|
+
- Full `Legion::Logging::Helper` integration with structured `handle_exception` across all classes
|
|
20
23
|
|
|
21
24
|
## Defaults
|
|
22
25
|
|
|
@@ -47,4 +50,47 @@ Legion::Extensions::Llm.configure do |config|
|
|
|
47
50
|
end
|
|
48
51
|
```
|
|
49
52
|
|
|
50
|
-
|
|
53
|
+
### Thinking Mode
|
|
54
|
+
|
|
55
|
+
Enable vLLM thinking mode globally via settings:
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
# In Legion::Settings or settings JSON
|
|
59
|
+
{ llm: { providers: { vllm: { enable_thinking: true } } } }
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Or pass `thinking: { enabled: true }` per-request. When enabled, the provider adds `chat_template_kwargs: { enable_thinking: true }` to the payload and strips `reasoning_effort`.
|
|
63
|
+
|
|
64
|
+
## Management Endpoints
|
|
65
|
+
|
|
66
|
+
The provider exposes helpers for vLLM server management:
|
|
67
|
+
|
|
68
|
+
| Method | Endpoint | Description |
|
|
69
|
+
|--------|----------|-------------|
|
|
70
|
+
| `health` | `GET /health` | Server health check |
|
|
71
|
+
| `version` | `GET /version` | Server version info |
|
|
72
|
+
| `reset_prefix_cache` | `POST /reset_prefix_cache` | Clear prefix cache |
|
|
73
|
+
| `reset_mm_cache` | `POST /reset_mm_cache` | Clear multimodal cache |
|
|
74
|
+
| `sleep(level:)` | `POST /sleep` | Put server to sleep |
|
|
75
|
+
| `wake_up(tags:)` | `POST /wake_up` | Wake server up |
|
|
76
|
+
|
|
77
|
+
## Registry Publishing
|
|
78
|
+
|
|
79
|
+
When `lex-llm` routing and Legion transport are available, the provider publishes best-effort availability events to the `llm.registry` exchange:
|
|
80
|
+
|
|
81
|
+
- **Readiness events** on `readiness(live: true)` calls
|
|
82
|
+
- **Model availability events** on `list_models` discovery
|
|
83
|
+
|
|
84
|
+
Publishing is async (background threads) and never blocks the caller. All failures are handled gracefully via `handle_exception`.
|
|
85
|
+
|
|
86
|
+
## Development
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
bundle install
|
|
90
|
+
bundle exec rspec
|
|
91
|
+
bundle exec rubocop
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## License
|
|
95
|
+
|
|
96
|
+
MIT
|
data/lex-llm-vllm.gemspec
CHANGED
|
@@ -26,5 +26,5 @@ Gem::Specification.new do |spec|
|
|
|
26
26
|
spec.add_dependency 'legion-json', '>= 1.2.1'
|
|
27
27
|
spec.add_dependency 'legion-logging', '>= 1.3.2'
|
|
28
28
|
spec.add_dependency 'legion-settings', '>= 1.3.14'
|
|
29
|
-
spec.add_dependency 'lex-llm', '>= 0.1.
|
|
29
|
+
spec.add_dependency 'lex-llm', '>= 0.1.9'
|
|
30
30
|
end
|
|
@@ -10,18 +10,17 @@ module Legion
|
|
|
10
10
|
# vLLM provider implementation for the Legion::Extensions::Llm base provider contract.
|
|
11
11
|
class Provider < Legion::Extensions::Llm::Provider
|
|
12
12
|
include Legion::Extensions::Llm::Provider::OpenAICompatible
|
|
13
|
+
include Legion::Logging::Helper
|
|
13
14
|
|
|
14
15
|
class << self
|
|
15
|
-
attr_writer :registry_publisher
|
|
16
|
-
|
|
17
16
|
def slug = 'vllm'
|
|
18
|
-
def local? =
|
|
17
|
+
def local? = false
|
|
19
18
|
def configuration_options = %i[vllm_api_base vllm_api_key]
|
|
20
19
|
def configuration_requirements = []
|
|
21
20
|
def capabilities = Capabilities
|
|
22
21
|
|
|
23
22
|
def registry_publisher
|
|
24
|
-
|
|
23
|
+
Vllm.registry_publisher
|
|
25
24
|
end
|
|
26
25
|
end
|
|
27
26
|
|
|
@@ -66,22 +65,27 @@ module Legion
|
|
|
66
65
|
def wake_up_url = '/wake_up'
|
|
67
66
|
|
|
68
67
|
def health
|
|
68
|
+
log.info { "checking health at #{api_base}#{health_url}" }
|
|
69
69
|
connection.get(health_url).body
|
|
70
70
|
end
|
|
71
71
|
|
|
72
72
|
def readiness(live: false)
|
|
73
|
+
log.info { "checking readiness live=#{live} at #{api_base}" }
|
|
73
74
|
super.tap do |metadata|
|
|
74
75
|
self.class.registry_publisher.publish_readiness_async(metadata) if live
|
|
75
76
|
end
|
|
76
77
|
end
|
|
77
78
|
|
|
78
79
|
def list_models
|
|
80
|
+
log.info { "discovering models from #{api_base}#{models_url}" }
|
|
79
81
|
super.tap do |models|
|
|
82
|
+
log.info { "discovered #{models.size} model(s) from vLLM" }
|
|
80
83
|
self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
|
|
81
84
|
end
|
|
82
85
|
end
|
|
83
86
|
|
|
84
87
|
def version
|
|
88
|
+
log.info { "fetching version from #{api_base}#{version_url}" }
|
|
85
89
|
connection.get(version_url).body
|
|
86
90
|
end
|
|
87
91
|
|
|
@@ -124,10 +128,27 @@ module Legion
|
|
|
124
128
|
|
|
125
129
|
vllm = Legion::Settings.dig(:llm, :providers, :vllm)
|
|
126
130
|
vllm.is_a?(Hash) && (vllm[:enable_thinking] == true || vllm['enable_thinking'] == true)
|
|
127
|
-
rescue StandardError
|
|
131
|
+
rescue StandardError => e
|
|
132
|
+
handle_exception(e, level: :debug, handled: true, operation: 'vllm.thinking_setting')
|
|
128
133
|
false
|
|
129
134
|
end
|
|
130
135
|
|
|
136
|
+
def parse_list_models_response(response, provider, capabilities)
|
|
137
|
+
response.body.fetch('data', []).map do |model|
|
|
138
|
+
critical_capabilities = critical_capabilities_for(capabilities, model)
|
|
139
|
+
Legion::Extensions::Llm::Model::Info.from_hash(
|
|
140
|
+
id: model.fetch('id'),
|
|
141
|
+
name: model['id'],
|
|
142
|
+
provider: provider,
|
|
143
|
+
created_at: model_created_at(model['created']),
|
|
144
|
+
context_length: model['max_model_len'],
|
|
145
|
+
capabilities: critical_capabilities,
|
|
146
|
+
modalities: modalities_for_capabilities(critical_capabilities),
|
|
147
|
+
metadata: model
|
|
148
|
+
)
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
131
152
|
def with_query(path, positional = [], **params)
|
|
132
153
|
pairs = positional + params.compact.map { |key, value| [key.to_s, value] }
|
|
133
154
|
return path if pairs.empty?
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/extensions/llm'
|
|
4
|
-
require 'legion/extensions/llm/vllm/registry_event_builder'
|
|
5
4
|
require 'legion/extensions/llm/vllm/provider'
|
|
6
|
-
require 'legion/extensions/llm/vllm/registry_publisher'
|
|
7
5
|
require 'legion/extensions/llm/vllm/version'
|
|
8
6
|
|
|
9
7
|
module Legion
|
|
@@ -12,29 +10,34 @@ module Legion
|
|
|
12
10
|
# Vllm provider extension namespace.
|
|
13
11
|
module Vllm
|
|
14
12
|
extend ::Legion::Extensions::Core if ::Legion::Extensions.const_defined?(:Core, false)
|
|
13
|
+
extend Legion::Logging::Helper
|
|
15
14
|
|
|
16
15
|
PROVIDER_FAMILY = :vllm
|
|
17
16
|
|
|
18
17
|
def self.default_settings
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
}
|
|
28
|
-
|
|
18
|
+
{
|
|
19
|
+
enabled: false,
|
|
20
|
+
base_url: 'localhost:8000/v1',
|
|
21
|
+
default_model: nil,
|
|
22
|
+
enable_thinking: true,
|
|
23
|
+
model_whitelist: [],
|
|
24
|
+
model_blacklist: [],
|
|
25
|
+
model_cache_ttl: 300,
|
|
26
|
+
tls: { enabled: false, verify: :peer },
|
|
27
|
+
instances: {}
|
|
28
|
+
}
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
def self.provider_class
|
|
32
32
|
Provider
|
|
33
33
|
end
|
|
34
|
+
|
|
35
|
+
def self.registry_publisher
|
|
36
|
+
@registry_publisher ||= Legion::Extensions::Llm::RegistryPublisher.new(provider_family: PROVIDER_FAMILY)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
Legion::Extensions::Llm::Configuration.register_provider_options(Provider.configuration_options)
|
|
34
40
|
end
|
|
35
41
|
end
|
|
36
42
|
end
|
|
37
43
|
end
|
|
38
|
-
|
|
39
|
-
Legion::Extensions::Llm::Provider.register(Legion::Extensions::Llm::Vllm::PROVIDER_FAMILY,
|
|
40
|
-
Legion::Extensions::Llm::Vllm::Provider)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-llm-vllm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- LegionIO
|
|
@@ -57,14 +57,14 @@ dependencies:
|
|
|
57
57
|
requirements:
|
|
58
58
|
- - ">="
|
|
59
59
|
- !ruby/object:Gem::Version
|
|
60
|
-
version: 0.1.
|
|
60
|
+
version: 0.1.9
|
|
61
61
|
type: :runtime
|
|
62
62
|
prerelease: false
|
|
63
63
|
version_requirements: !ruby/object:Gem::Requirement
|
|
64
64
|
requirements:
|
|
65
65
|
- - ">="
|
|
66
66
|
- !ruby/object:Gem::Version
|
|
67
|
-
version: 0.1.
|
|
67
|
+
version: 0.1.9
|
|
68
68
|
description: vLLM provider integration for the LegionIO LLM routing framework.
|
|
69
69
|
email:
|
|
70
70
|
- matthewdiverson@gmail.com
|
|
@@ -84,10 +84,6 @@ files:
|
|
|
84
84
|
- lex-llm-vllm.gemspec
|
|
85
85
|
- lib/legion/extensions/llm/vllm.rb
|
|
86
86
|
- lib/legion/extensions/llm/vllm/provider.rb
|
|
87
|
-
- lib/legion/extensions/llm/vllm/registry_event_builder.rb
|
|
88
|
-
- lib/legion/extensions/llm/vllm/registry_publisher.rb
|
|
89
|
-
- lib/legion/extensions/llm/vllm/transport/exchanges/llm_registry.rb
|
|
90
|
-
- lib/legion/extensions/llm/vllm/transport/messages/registry_event.rb
|
|
91
87
|
- lib/legion/extensions/llm/vllm/version.rb
|
|
92
88
|
homepage: https://github.com/LegionIO/lex-llm-vllm
|
|
93
89
|
licenses:
|
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Legion
|
|
4
|
-
module Extensions
|
|
5
|
-
module Llm
|
|
6
|
-
module Vllm
|
|
7
|
-
# Builds sanitized lex-llm registry envelopes for vLLM provider state.
|
|
8
|
-
class RegistryEventBuilder
|
|
9
|
-
def readiness(readiness)
|
|
10
|
-
registry_event_class.public_send(
|
|
11
|
-
readiness[:ready] ? :available : :unavailable,
|
|
12
|
-
provider_offering(readiness),
|
|
13
|
-
runtime: runtime_metadata,
|
|
14
|
-
health: readiness_health(readiness),
|
|
15
|
-
metadata: readiness_metadata(readiness)
|
|
16
|
-
)
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
def model_available(model, readiness:)
|
|
20
|
-
registry_event_class.available(
|
|
21
|
-
model_offering(model),
|
|
22
|
-
runtime: runtime_metadata,
|
|
23
|
-
health: model_health(readiness),
|
|
24
|
-
metadata: model_metadata(model)
|
|
25
|
-
)
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
private
|
|
29
|
-
|
|
30
|
-
def provider_offering(readiness)
|
|
31
|
-
{
|
|
32
|
-
provider_family: :vllm,
|
|
33
|
-
provider_instance: provider_instance,
|
|
34
|
-
transport: :http,
|
|
35
|
-
model: 'provider-readiness',
|
|
36
|
-
usage_type: :inference,
|
|
37
|
-
capabilities: [],
|
|
38
|
-
health: readiness_health(readiness),
|
|
39
|
-
metadata: { lex: :llm_vllm, provider_readiness: true }
|
|
40
|
-
}
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
def model_offering(model)
|
|
44
|
-
{
|
|
45
|
-
provider_family: :vllm,
|
|
46
|
-
provider_instance: provider_instance,
|
|
47
|
-
transport: :http,
|
|
48
|
-
model: model.id,
|
|
49
|
-
usage_type: usage_type_for(model),
|
|
50
|
-
capabilities: Array(model.capabilities).map(&:to_sym),
|
|
51
|
-
limits: model_limits(model),
|
|
52
|
-
metadata: { lex: :llm_vllm, model_name: model.name }.compact
|
|
53
|
-
}
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
def readiness_health(readiness)
|
|
57
|
-
health = {
|
|
58
|
-
ready: readiness[:ready] == true,
|
|
59
|
-
status: readiness[:ready] ? :available : :unavailable,
|
|
60
|
-
checked: readiness.dig(:health, :checked) != false
|
|
61
|
-
}
|
|
62
|
-
add_readiness_error(health, readiness[:health])
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
def add_readiness_error(health, source)
|
|
66
|
-
error = source.is_a?(Hash) ? source : {}
|
|
67
|
-
error_class = error[:error] || error['error']
|
|
68
|
-
error_message = error[:message] || error['message']
|
|
69
|
-
health[:error_class] = error_class if error_class
|
|
70
|
-
health[:error] = error_message if error_message
|
|
71
|
-
health
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
def model_health(readiness)
|
|
75
|
-
ready = readiness.fetch(:ready, true) == true
|
|
76
|
-
{ ready:, status: ready ? :available : :degraded }
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
def readiness_metadata(readiness)
|
|
80
|
-
{
|
|
81
|
-
extension: :lex_llm_vllm,
|
|
82
|
-
provider: :vllm,
|
|
83
|
-
configured: readiness[:configured] == true,
|
|
84
|
-
live: readiness[:live] == true
|
|
85
|
-
}
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
def model_metadata(model)
|
|
89
|
-
{ extension: :lex_llm_vllm, provider: :vllm, model_type: model.type }
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
def runtime_metadata
|
|
93
|
-
{ node: provider_instance }
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
def model_limits(model)
|
|
97
|
-
{
|
|
98
|
-
context_window: model.context_window,
|
|
99
|
-
max_output_tokens: model.max_output_tokens
|
|
100
|
-
}.compact
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
def usage_type_for(model)
|
|
104
|
-
model.type == 'embedding' ? :embedding : :inference
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
def provider_instance
|
|
108
|
-
configured_node = (::Legion::Settings.dig(:node, :canonical_name) if defined?(::Legion::Settings))
|
|
109
|
-
value = configured_node.to_s.strip
|
|
110
|
-
value.empty? ? :vllm : value.to_sym
|
|
111
|
-
rescue StandardError
|
|
112
|
-
:vllm
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
def registry_event_class
|
|
116
|
-
::Legion::Extensions::Llm::Routing::RegistryEvent
|
|
117
|
-
end
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
end
|
|
122
|
-
end
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Legion
|
|
4
|
-
module Extensions
|
|
5
|
-
module Llm
|
|
6
|
-
module Vllm
|
|
7
|
-
# Best-effort publisher for vLLM provider availability events.
|
|
8
|
-
class RegistryPublisher
|
|
9
|
-
APP_ID = 'lex-llm-vllm'
|
|
10
|
-
|
|
11
|
-
def initialize(builder: RegistryEventBuilder.new)
|
|
12
|
-
@builder = builder
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
def publish_readiness_async(readiness)
|
|
16
|
-
schedule { publish_event(@builder.readiness(readiness)) }
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
def publish_models_async(models, readiness:)
|
|
20
|
-
schedule do
|
|
21
|
-
Array(models).each do |model|
|
|
22
|
-
publish_event(@builder.model_available(model, readiness:))
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
private
|
|
28
|
-
|
|
29
|
-
def schedule(&)
|
|
30
|
-
return false unless publishing_available?
|
|
31
|
-
|
|
32
|
-
Thread.new do
|
|
33
|
-
Thread.current.abort_on_exception = false
|
|
34
|
-
yield
|
|
35
|
-
rescue StandardError => e
|
|
36
|
-
log_publish_failure(e, level: :debug)
|
|
37
|
-
end
|
|
38
|
-
rescue StandardError => e
|
|
39
|
-
log_publish_failure(e, level: :debug)
|
|
40
|
-
false
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
def publish_event(event)
|
|
44
|
-
return false unless publishing_available?
|
|
45
|
-
|
|
46
|
-
message_class.new(event:, app_id: APP_ID).publish(spool: false)
|
|
47
|
-
rescue StandardError => e
|
|
48
|
-
log_publish_failure(e)
|
|
49
|
-
false
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
def publishing_available?
|
|
53
|
-
return false unless registry_event_available?
|
|
54
|
-
return false unless transport_message_available?
|
|
55
|
-
return true unless defined?(::Legion::Transport::Connection)
|
|
56
|
-
return true unless ::Legion::Transport::Connection.respond_to?(:session_open?)
|
|
57
|
-
|
|
58
|
-
::Legion::Transport::Connection.session_open?
|
|
59
|
-
rescue StandardError
|
|
60
|
-
false
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
def registry_event_available?
|
|
64
|
-
defined?(::Legion::Extensions::Llm::Routing::RegistryEvent)
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
def transport_message_available?
|
|
68
|
-
return true if message_class_defined?
|
|
69
|
-
return false unless defined?(::Legion::Transport::Message) && defined?(::Legion::Transport::Exchange)
|
|
70
|
-
|
|
71
|
-
require 'legion/extensions/llm/vllm/transport/messages/registry_event'
|
|
72
|
-
message_class_defined?
|
|
73
|
-
rescue LoadError
|
|
74
|
-
false
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
def message_class_defined?
|
|
78
|
-
defined?(::Legion::Extensions::Llm::Vllm::Transport::Messages::RegistryEvent)
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
def message_class
|
|
82
|
-
::Legion::Extensions::Llm::Vllm::Transport::Messages::RegistryEvent
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
def log_publish_failure(error, level: :warn)
|
|
86
|
-
message = "[lex-llm-vllm] llm.registry publish failed: #{error.class}: #{error.message}"
|
|
87
|
-
logger = ::Legion::Extensions::Llm.logger if defined?(::Legion::Extensions::Llm)
|
|
88
|
-
if logger.respond_to?(level)
|
|
89
|
-
logger.public_send(level, message)
|
|
90
|
-
elsif logger.respond_to?(:debug)
|
|
91
|
-
logger.debug(message)
|
|
92
|
-
end
|
|
93
|
-
rescue StandardError
|
|
94
|
-
nil
|
|
95
|
-
end
|
|
96
|
-
end
|
|
97
|
-
end
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
end
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Legion
|
|
4
|
-
module Extensions
|
|
5
|
-
module Llm
|
|
6
|
-
module Vllm
|
|
7
|
-
module Transport
|
|
8
|
-
module Exchanges
|
|
9
|
-
# Topic exchange for vLLM provider availability events.
|
|
10
|
-
class LlmRegistry < ::Legion::Transport::Exchange
|
|
11
|
-
def exchange_name
|
|
12
|
-
'llm.registry'
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
def default_type
|
|
16
|
-
'topic'
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
end
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'legion/extensions/llm/vllm/transport/exchanges/llm_registry'
|
|
4
|
-
|
|
5
|
-
module Legion
|
|
6
|
-
module Extensions
|
|
7
|
-
module Llm
|
|
8
|
-
module Vllm
|
|
9
|
-
module Transport
|
|
10
|
-
module Messages
|
|
11
|
-
# Publishes lex-llm RegistryEvent envelopes to the llm.registry exchange.
|
|
12
|
-
class RegistryEvent < ::Legion::Transport::Message
|
|
13
|
-
def initialize(event:, **options)
|
|
14
|
-
super(**event.to_h.merge(options))
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
def exchange
|
|
18
|
-
Transport::Exchanges::LlmRegistry
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def routing_key
|
|
22
|
-
@options[:routing_key] || "llm.registry.#{@options.fetch(:event_type)}"
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def type
|
|
26
|
-
'llm.registry.event'
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
def app_id
|
|
30
|
-
@options[:app_id] || RegistryPublisher::APP_ID
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
def persistent # rubocop:disable Naming/PredicateMethod
|
|
34
|
-
false
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
end
|
|
42
|
-
end
|