lex-llm 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +31 -1
- data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +54 -3
- data/lib/legion/extensions/llm/routing/registry_event.rb +167 -0
- data/lib/legion/extensions/llm/stream_accumulator.rb +29 -2
- data/lib/legion/extensions/llm/streaming.rb +11 -8
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +2 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c5b58678c0d7021662b2ef38d80932bd373e3c462b9d05e1004dfc880b1e6d6f
|
|
4
|
+
data.tar.gz: 49a88cc742e128df1bd93882585df89e595c9761194da354af6be93bd4bd4c2e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 273c724d3b7b2945dea092184c8df80952d6ec0c8c38aefbba966a28de91c43c2862be91ac9e918943a7e2e42b5dde4c7b98826852598c7e82c4dd10bbca26e8
|
|
7
|
+
data.tar.gz: 68b38d28e88ad07c333ca0f7e94885a3006b1f3fc727f3c2b7206cba5497b42f848927b2d555db5382abac05123b76074572c7ca6834da0de312b2f30fdd3a03
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.7 - 2026-04-30
|
|
4
|
+
|
|
5
|
+
- Add thinking extraction from OpenAI-compatible streaming chunks (reasoning_content, reasoning, think tags)
|
|
6
|
+
- Add stream_usage_supported? opt-in for streaming token usage reporting
|
|
7
|
+
- Add filtered_chunk method to StreamAccumulator for clean thinking/content separation
|
|
8
|
+
- Wrap streaming callback through accumulator filter for proper SSE event routing
|
|
9
|
+
|
|
10
|
+
## 0.1.6 - 2026-04-28
|
|
11
|
+
|
|
12
|
+
- Add provider-neutral registry event envelopes for future `llm.registry` offering availability, unavailability, degraded, and heartbeat publishing without persistence.
|
|
13
|
+
- Sanitize registry offering payloads and reject sensitive runtime, capacity, health, lane, and metadata keys before publication.
|
|
14
|
+
|
|
3
15
|
## 0.1.5 - 2026-04-28
|
|
4
16
|
|
|
5
17
|
- Add the expanded provider-neutral model offering contract with offering IDs, provider instances, canonical model aliases, model families, and routing metadata.
|
data/README.md
CHANGED
|
@@ -48,7 +48,7 @@ gem 'lex-llm'
|
|
|
48
48
|
Provider extensions should declare `lex-llm` as a gemspec dependency:
|
|
49
49
|
|
|
50
50
|
```ruby
|
|
51
|
-
spec.add_dependency 'lex-llm', '>= 0.1.
|
|
51
|
+
spec.add_dependency 'lex-llm', '>= 0.1.6'
|
|
52
52
|
```
|
|
53
53
|
|
|
54
54
|
For local development across LegionIO repos, prefer a local path override in the app or test `Gemfile`, not a permanent git dependency in the gemspec.
|
|
@@ -171,6 +171,36 @@ registry.filter(
|
|
|
171
171
|
)
|
|
172
172
|
```
|
|
173
173
|
|
|
174
|
+
## Registry Events
|
|
175
|
+
|
|
176
|
+
`Legion::Extensions::Llm::Routing::RegistryEvent` builds dependency-light envelopes for future `llm.registry` publishing. It does not persist registry state or publish messages by itself.
|
|
177
|
+
|
|
178
|
+
```ruby
|
|
179
|
+
event = Legion::Extensions::Llm::Routing::RegistryEvent.available(
|
|
180
|
+
offering,
|
|
181
|
+
runtime: { host_id: 'macbook-m4-max', process: { pid: 12_345 } },
|
|
182
|
+
capacity: { concurrency: 4, queued: 0 },
|
|
183
|
+
health: { ready: true, latency_ms: 180 },
|
|
184
|
+
lane: offering.lane_key,
|
|
185
|
+
metadata: { observed_by: :lex_llm_ollama }
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
event.to_h
|
|
189
|
+
# => {
|
|
190
|
+
# event_id: "...",
|
|
191
|
+
# event_type: :offering_available,
|
|
192
|
+
# occurred_at: "2026-04-28T14:30:15.123456Z",
|
|
193
|
+
# offering: { ... },
|
|
194
|
+
# runtime: { host_id: "macbook-m4-max", process: { pid: 12345 } },
|
|
195
|
+
# capacity: { concurrency: 4, queued: 0 },
|
|
196
|
+
# health: { ready: true, latency_ms: 180 },
|
|
197
|
+
# lane: "llm.fleet.inference.qwen3-6-27b-q4-k-m.ctx32768",
|
|
198
|
+
# metadata: { observed_by: :lex_llm_ollama }
|
|
199
|
+
# }
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
Supported event types are `:offering_available`, `:offering_unavailable`, `:offering_degraded`, and `:offering_heartbeat`. Event offerings are derived from `ModelOffering#to_h`, with sensitive offering fields removed. Optional `runtime`, `capacity`, `health`, `lane`, and `metadata` values are intended for non-secret operational context and reject sensitive keys such as credentials, tokens, secrets, URLs, endpoint paths, prompts, and reply queues.
|
|
203
|
+
|
|
174
204
|
## Fleet Lanes
|
|
175
205
|
|
|
176
206
|
Fleet routing uses shared work lanes derived from model offerings. A lane describes the work required, not the worker that happens to do it.
|
|
@@ -6,6 +6,7 @@ module Legion
|
|
|
6
6
|
class Provider
|
|
7
7
|
# Shared OpenAI-compatible HTTP payload and response adapter.
|
|
8
8
|
module OpenAICompatible
|
|
9
|
+
def stream_usage_supported? = false
|
|
9
10
|
def completion_url = '/v1/chat/completions'
|
|
10
11
|
def stream_url = completion_url
|
|
11
12
|
def models_url = '/v1/models'
|
|
@@ -20,7 +21,7 @@ module Legion
|
|
|
20
21
|
private
|
|
21
22
|
|
|
22
23
|
def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
|
|
23
|
-
{
|
|
24
|
+
payload = {
|
|
24
25
|
model: model.id,
|
|
25
26
|
messages: format_openai_messages(messages),
|
|
26
27
|
temperature: temperature,
|
|
@@ -30,6 +31,8 @@ module Legion
|
|
|
30
31
|
response_format: openai_response_format(schema),
|
|
31
32
|
reasoning_effort: openai_reasoning_effort(thinking)
|
|
32
33
|
}.compact
|
|
34
|
+
payload[:stream_options] = { include_usage: true } if stream && stream_usage_supported?
|
|
35
|
+
payload
|
|
33
36
|
end
|
|
34
37
|
|
|
35
38
|
def format_openai_messages(messages)
|
|
@@ -116,12 +119,14 @@ module Legion
|
|
|
116
119
|
choice = Array(body['choices']).first || {}
|
|
117
120
|
message = choice['message'] || {}
|
|
118
121
|
usage = body['usage'] || {}
|
|
122
|
+
content, thinking = extract_thinking_from_completion(message)
|
|
119
123
|
|
|
120
124
|
Legion::Extensions::Llm::Message.new(
|
|
121
125
|
role: :assistant,
|
|
122
|
-
content:
|
|
126
|
+
content: content,
|
|
123
127
|
model_id: body['model'],
|
|
124
128
|
tool_calls: parse_tool_calls(message['tool_calls']),
|
|
129
|
+
thinking: thinking,
|
|
125
130
|
input_tokens: usage['prompt_tokens'],
|
|
126
131
|
output_tokens: usage['completion_tokens'],
|
|
127
132
|
reasoning_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens'),
|
|
@@ -129,22 +134,68 @@ module Legion
|
|
|
129
134
|
)
|
|
130
135
|
end
|
|
131
136
|
|
|
137
|
+
def extract_thinking_from_completion(message)
|
|
138
|
+
reasoning = message['reasoning_content'] || message['reasoning']
|
|
139
|
+
content = message['content']
|
|
140
|
+
|
|
141
|
+
if reasoning
|
|
142
|
+
[content, Thinking.build(text: reasoning)]
|
|
143
|
+
elsif content.is_a?(String) && content.include?('<think>')
|
|
144
|
+
think_text = content[%r{<think>(.*?)</think>}m, 1]
|
|
145
|
+
clean = content.gsub(%r{<think>.*?</think>}m, '').strip
|
|
146
|
+
[clean, Thinking.build(text: think_text)]
|
|
147
|
+
else
|
|
148
|
+
[content, nil]
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
132
152
|
def build_chunk(data)
|
|
133
153
|
choice = Array(data['choices']).first || {}
|
|
134
154
|
delta = choice['delta'] || {}
|
|
135
155
|
usage = data['usage'] || {}
|
|
156
|
+
content, thinking = extract_thinking_from_chunk(delta)
|
|
136
157
|
|
|
137
158
|
Legion::Extensions::Llm::Chunk.new(
|
|
138
159
|
role: :assistant,
|
|
139
|
-
content:
|
|
160
|
+
content: content,
|
|
140
161
|
model_id: data['model'],
|
|
141
162
|
tool_calls: parse_tool_calls(delta['tool_calls']),
|
|
163
|
+
thinking: thinking,
|
|
142
164
|
input_tokens: usage['prompt_tokens'],
|
|
143
165
|
output_tokens: usage['completion_tokens'],
|
|
144
166
|
raw: data
|
|
145
167
|
)
|
|
146
168
|
end
|
|
147
169
|
|
|
170
|
+
def extract_thinking_from_chunk(delta)
|
|
171
|
+
reasoning = delta['reasoning_content'] || delta['reasoning']
|
|
172
|
+
content = delta['content']
|
|
173
|
+
|
|
174
|
+
if reasoning
|
|
175
|
+
[content, Thinking.build(text: reasoning)]
|
|
176
|
+
elsif content.is_a?(String) && content.include?('<think>')
|
|
177
|
+
clean, think_text = split_think_tags(content)
|
|
178
|
+
[clean, Thinking.build(text: think_text)]
|
|
179
|
+
else
|
|
180
|
+
[content, nil]
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def split_think_tags(text) # rubocop:disable Metrics/PerceivedComplexity
|
|
185
|
+
if text.match?(%r{<think>.*</think>}m)
|
|
186
|
+
thinking = text[%r{<think>(.*?)</think>}m, 1]
|
|
187
|
+
clean = text.gsub(%r{<think>.*?</think>}m, '').strip
|
|
188
|
+
[clean.empty? ? nil : clean, thinking]
|
|
189
|
+
elsif text.start_with?('<think>')
|
|
190
|
+
[nil, text.delete_prefix('<think>')]
|
|
191
|
+
elsif text.include?('</think>')
|
|
192
|
+
parts = text.split('</think>', 2)
|
|
193
|
+
[parts[1]&.strip.then { |s| s&.empty? ? nil : s }, parts[0]]
|
|
194
|
+
else
|
|
195
|
+
[text, nil]
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
148
199
|
def parse_tool_calls(tool_calls)
|
|
149
200
|
return nil unless tool_calls&.any?
|
|
150
201
|
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Routing
|
|
7
|
+
# Serializable provider-neutral envelope for future llm.registry publishing.
|
|
8
|
+
class RegistryEvent
|
|
9
|
+
EVENT_TYPES = %i[
|
|
10
|
+
offering_available
|
|
11
|
+
offering_unavailable
|
|
12
|
+
offering_degraded
|
|
13
|
+
offering_heartbeat
|
|
14
|
+
].freeze
|
|
15
|
+
SENSITIVE_KEYS = %i[
|
|
16
|
+
access_key
|
|
17
|
+
api_key
|
|
18
|
+
authorization
|
|
19
|
+
bearer
|
|
20
|
+
client_secret
|
|
21
|
+
credential
|
|
22
|
+
credentials
|
|
23
|
+
endpoint
|
|
24
|
+
endpoint_url
|
|
25
|
+
password
|
|
26
|
+
path
|
|
27
|
+
private_key
|
|
28
|
+
prompt
|
|
29
|
+
reply_to
|
|
30
|
+
secret
|
|
31
|
+
secrets
|
|
32
|
+
token
|
|
33
|
+
url
|
|
34
|
+
].freeze
|
|
35
|
+
|
|
36
|
+
attr_reader :event_id, :event_type, :occurred_at, :offering, :runtime, :capacity, :health, :lane, :metadata
|
|
37
|
+
|
|
38
|
+
class << self
|
|
39
|
+
def available(offering, **attributes)
|
|
40
|
+
new(event_type: :offering_available, offering:, **attributes)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def unavailable(offering, **attributes)
|
|
44
|
+
new(event_type: :offering_unavailable, offering:, **attributes)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def degraded(offering, **attributes)
|
|
48
|
+
new(event_type: :offering_degraded, offering:, **attributes)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def heartbeat(offering, **attributes)
|
|
52
|
+
new(event_type: :offering_heartbeat, offering:, **attributes)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def initialize(event_type:, offering:, **attributes)
|
|
57
|
+
@event_id = normalize_event_id(attributes.fetch(:event_id, SecureRandom.uuid))
|
|
58
|
+
@event_type = normalize_event_type(event_type)
|
|
59
|
+
@occurred_at = normalize_time(attributes.fetch(:occurred_at, Time.now.utc))
|
|
60
|
+
@offering = normalize_offering(offering)
|
|
61
|
+
@runtime = sanitize_optional_hash(attributes[:runtime], :runtime)
|
|
62
|
+
@capacity = sanitize_optional_hash(attributes[:capacity], :capacity)
|
|
63
|
+
@health = sanitize_optional_hash(attributes[:health], :health)
|
|
64
|
+
@lane = sanitize_optional_value(attributes[:lane], :lane)
|
|
65
|
+
@metadata = sanitize_optional_hash(attributes[:metadata], :metadata)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def to_h
|
|
69
|
+
{
|
|
70
|
+
event_id: event_id,
|
|
71
|
+
event_type: event_type,
|
|
72
|
+
occurred_at: occurred_at.utc.iso8601(6),
|
|
73
|
+
offering: sanitized_offering_hash,
|
|
74
|
+
runtime: runtime,
|
|
75
|
+
capacity: capacity,
|
|
76
|
+
health: health,
|
|
77
|
+
lane: lane,
|
|
78
|
+
metadata: metadata
|
|
79
|
+
}.compact
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
def normalize_event_id(value)
|
|
85
|
+
normalized = value.to_s.strip
|
|
86
|
+
raise ArgumentError, 'event_id is required' if normalized.empty?
|
|
87
|
+
|
|
88
|
+
normalized
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def normalize_event_type(value)
|
|
92
|
+
normalized = value.to_sym
|
|
93
|
+
raise ArgumentError, "unsupported registry event type: #{value}" unless EVENT_TYPES.include?(normalized)
|
|
94
|
+
|
|
95
|
+
normalized
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def normalize_time(value)
|
|
99
|
+
return value.utc if value.respond_to?(:utc)
|
|
100
|
+
|
|
101
|
+
Time.parse(value.to_s).utc
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def normalize_offering(value)
|
|
105
|
+
return value if value.is_a?(ModelOffering)
|
|
106
|
+
|
|
107
|
+
ModelOffering.new(value)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def sanitized_offering_hash
|
|
111
|
+
sanitize_hash(offering.to_h, on_sensitive: :drop)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def sanitize_optional_hash(value, label)
|
|
115
|
+
return nil if value.nil?
|
|
116
|
+
|
|
117
|
+
sanitize_hash(value.to_h, label:)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def sanitize_optional_value(value, label)
|
|
121
|
+
return nil if value.nil?
|
|
122
|
+
return sanitize_hash(value.to_h, label:) if value.respond_to?(:to_h)
|
|
123
|
+
return value unless value.is_a?(Array)
|
|
124
|
+
|
|
125
|
+
sanitize_array(value, label:, path: [])
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def sanitize_hash(hash, label: nil, path: [], on_sensitive: :raise)
|
|
129
|
+
hash.each_with_object({}) do |(key, value), sanitized|
|
|
130
|
+
normalized_key = key.to_sym
|
|
131
|
+
key_path = path + [normalized_key]
|
|
132
|
+
if sensitive_key?(normalized_key)
|
|
133
|
+
raise_sensitive_key!(label, key_path) if on_sensitive == :raise
|
|
134
|
+
|
|
135
|
+
next
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
sanitized[normalized_key] = sanitize_value(value, label:, path: key_path, on_sensitive:)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def sanitize_array(array, label:, path:, on_sensitive: :raise)
|
|
143
|
+
array.map { |value| sanitize_value(value, label:, path:, on_sensitive:) }
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def sanitize_value(value, label:, path:, on_sensitive:)
|
|
147
|
+
return sanitize_hash(value, label:, path:, on_sensitive:) if value.is_a?(Hash)
|
|
148
|
+
return sanitize_array(value, label:, path:, on_sensitive:) if value.is_a?(Array)
|
|
149
|
+
|
|
150
|
+
value
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def sensitive_key?(key)
|
|
154
|
+
normalized = key.to_s.downcase.gsub(/[^a-z0-9]+/, '_').to_sym
|
|
155
|
+
SENSITIVE_KEYS.include?(normalized) ||
|
|
156
|
+
normalized.to_s.end_with?('_key', '_secret', '_token', '_password')
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def raise_sensitive_key!(label, path)
|
|
160
|
+
prefix = label ? "#{label} contains" : 'registry event contains'
|
|
161
|
+
raise ArgumentError, "#{prefix} sensitive key: #{path.join('.')}"
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
@@ -26,12 +26,32 @@ module Legion
|
|
|
26
26
|
Legion::Extensions::Llm.logger.debug { chunk.inspect } if Legion::Extensions::Llm.config.log_stream_debug
|
|
27
27
|
@model_id ||= chunk.model_id
|
|
28
28
|
|
|
29
|
+
@last_content_delta = +''
|
|
30
|
+
@last_thinking_delta = +''
|
|
29
31
|
handle_chunk_content(chunk)
|
|
30
32
|
append_thinking_from_chunk(chunk)
|
|
31
33
|
count_tokens chunk
|
|
32
34
|
Legion::Extensions::Llm.logger.debug { inspect } if Legion::Extensions::Llm.config.log_stream_debug
|
|
33
35
|
end
|
|
34
36
|
|
|
37
|
+
def filtered_chunk(chunk) # rubocop:disable Metrics/PerceivedComplexity
|
|
38
|
+
has_content = !@last_content_delta.empty?
|
|
39
|
+
has_thinking = !@last_thinking_delta.empty?
|
|
40
|
+
has_tokens = chunk.input_tokens&.positive? || chunk.output_tokens&.positive?
|
|
41
|
+
return nil unless has_content || has_thinking || chunk.tool_call? || has_tokens
|
|
42
|
+
|
|
43
|
+
Chunk.new(
|
|
44
|
+
role: :assistant,
|
|
45
|
+
content: has_content ? @last_content_delta : nil,
|
|
46
|
+
thinking: has_thinking ? Thinking.build(text: @last_thinking_delta) : chunk.thinking,
|
|
47
|
+
model_id: chunk.model_id,
|
|
48
|
+
tool_calls: chunk.tool_calls,
|
|
49
|
+
input_tokens: chunk.input_tokens,
|
|
50
|
+
output_tokens: chunk.output_tokens,
|
|
51
|
+
raw: chunk.raw
|
|
52
|
+
)
|
|
53
|
+
end
|
|
54
|
+
|
|
35
55
|
def to_message(response)
|
|
36
56
|
Message.new(
|
|
37
57
|
role: :assistant,
|
|
@@ -137,14 +157,21 @@ module Legion
|
|
|
137
157
|
def append_text_with_thinking(text)
|
|
138
158
|
content_chunk, thinking_chunk = extract_think_tags(text)
|
|
139
159
|
@content << content_chunk
|
|
140
|
-
@
|
|
160
|
+
@last_content_delta << content_chunk
|
|
161
|
+
return unless thinking_chunk
|
|
162
|
+
|
|
163
|
+
@thinking_text << thinking_chunk
|
|
164
|
+
@last_thinking_delta << thinking_chunk
|
|
141
165
|
end
|
|
142
166
|
|
|
143
167
|
def append_thinking_from_chunk(chunk)
|
|
144
168
|
thinking = chunk.thinking
|
|
145
169
|
return unless thinking
|
|
146
170
|
|
|
147
|
-
|
|
171
|
+
if thinking.text
|
|
172
|
+
@thinking_text << thinking.text.to_s
|
|
173
|
+
@last_thinking_delta << thinking.text.to_s
|
|
174
|
+
end
|
|
148
175
|
@thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
|
|
149
176
|
end
|
|
150
177
|
|
|
@@ -12,16 +12,11 @@ module Legion
|
|
|
12
12
|
|
|
13
13
|
response = connection.post stream_url, payload do |req|
|
|
14
14
|
req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
|
|
15
|
+
on_chunk = build_stream_callback(accumulator, block)
|
|
15
16
|
if faraday_1?
|
|
16
|
-
req.options[:on_data] = handle_stream
|
|
17
|
-
accumulator.add chunk
|
|
18
|
-
block.call chunk
|
|
19
|
-
end
|
|
17
|
+
req.options[:on_data] = handle_stream(&on_chunk)
|
|
20
18
|
else
|
|
21
|
-
req.options.on_data = handle_stream
|
|
22
|
-
accumulator.add chunk
|
|
23
|
-
block.call chunk
|
|
24
|
-
end
|
|
19
|
+
req.options.on_data = handle_stream(&on_chunk)
|
|
25
20
|
end
|
|
26
21
|
end
|
|
27
22
|
|
|
@@ -30,6 +25,14 @@ module Legion
|
|
|
30
25
|
message
|
|
31
26
|
end
|
|
32
27
|
|
|
28
|
+
def build_stream_callback(accumulator, block)
|
|
29
|
+
proc do |chunk|
|
|
30
|
+
accumulator.add chunk
|
|
31
|
+
filtered = accumulator.filtered_chunk(chunk)
|
|
32
|
+
block.call(filtered) if filtered
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
33
36
|
def handle_stream(&block)
|
|
34
37
|
build_on_data_handler do |data|
|
|
35
38
|
block.call(build_chunk(data)) if data.is_a?(Hash)
|
|
@@ -39,6 +39,7 @@ module Legion
|
|
|
39
39
|
module Types
|
|
40
40
|
ModelOffering = Routing::ModelOffering unless const_defined?(:ModelOffering, false)
|
|
41
41
|
OfferingRegistry = Routing::OfferingRegistry unless const_defined?(:OfferingRegistry, false)
|
|
42
|
+
RegistryEvent = Routing::RegistryEvent unless const_defined?(:RegistryEvent, false)
|
|
42
43
|
end
|
|
43
44
|
|
|
44
45
|
# Shared routing helpers exposed under the Legion extension namespace.
|
|
@@ -46,6 +47,7 @@ module Legion
|
|
|
46
47
|
LaneKey = ::Legion::Extensions::Llm::Routing::LaneKey unless const_defined?(:LaneKey, false)
|
|
47
48
|
OfferingRegistry = ::Legion::Extensions::Llm::Routing::OfferingRegistry unless const_defined?(:OfferingRegistry,
|
|
48
49
|
false)
|
|
50
|
+
RegistryEvent = ::Legion::Extensions::Llm::Routing::RegistryEvent unless const_defined?(:RegistryEvent, false)
|
|
49
51
|
end
|
|
50
52
|
|
|
51
53
|
class << self
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- LegionIO
|
|
@@ -229,6 +229,7 @@ files:
|
|
|
229
229
|
- lib/legion/extensions/llm/routing/lane_key.rb
|
|
230
230
|
- lib/legion/extensions/llm/routing/model_offering.rb
|
|
231
231
|
- lib/legion/extensions/llm/routing/offering_registry.rb
|
|
232
|
+
- lib/legion/extensions/llm/routing/registry_event.rb
|
|
232
233
|
- lib/legion/extensions/llm/stream_accumulator.rb
|
|
233
234
|
- lib/legion/extensions/llm/streaming.rb
|
|
234
235
|
- lib/legion/extensions/llm/thinking.rb
|