lex-llm 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c5b58678c0d7021662b2ef38d80932bd373e3c462b9d05e1004dfc880b1e6d6f
|
|
4
|
+
data.tar.gz: 49a88cc742e128df1bd93882585df89e595c9761194da354af6be93bd4bd4c2e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 273c724d3b7b2945dea092184c8df80952d6ec0c8c38aefbba966a28de91c43c2862be91ac9e918943a7e2e42b5dde4c7b98826852598c7e82c4dd10bbca26e8
|
|
7
|
+
data.tar.gz: 68b38d28e88ad07c333ca0f7e94885a3006b1f3fc727f3c2b7206cba5497b42f848927b2d555db5382abac05123b76074572c7ca6834da0de312b2f30fdd3a03
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.7 - 2026-04-30
|
|
4
|
+
|
|
5
|
+
- Add thinking extraction from OpenAI-compatible streaming chunks (reasoning_content, reasoning, think tags)
|
|
6
|
+
- Add stream_usage_supported? opt-in for streaming token usage reporting
|
|
7
|
+
- Add filtered_chunk method to StreamAccumulator for clean thinking/content separation
|
|
8
|
+
- Wrap streaming callback through accumulator filter for proper SSE event routing
|
|
9
|
+
|
|
3
10
|
## 0.1.6 - 2026-04-28
|
|
4
11
|
|
|
5
12
|
- Add provider-neutral registry event envelopes for future `llm.registry` offering availability, unavailability, degraded, and heartbeat publishing without persistence.
|
|
@@ -6,6 +6,7 @@ module Legion
|
|
|
6
6
|
class Provider
|
|
7
7
|
# Shared OpenAI-compatible HTTP payload and response adapter.
|
|
8
8
|
module OpenAICompatible
|
|
9
|
+
def stream_usage_supported? = false
|
|
9
10
|
def completion_url = '/v1/chat/completions'
|
|
10
11
|
def stream_url = completion_url
|
|
11
12
|
def models_url = '/v1/models'
|
|
@@ -20,7 +21,7 @@ module Legion
|
|
|
20
21
|
private
|
|
21
22
|
|
|
22
23
|
def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
|
|
23
|
-
{
|
|
24
|
+
payload = {
|
|
24
25
|
model: model.id,
|
|
25
26
|
messages: format_openai_messages(messages),
|
|
26
27
|
temperature: temperature,
|
|
@@ -30,6 +31,8 @@ module Legion
|
|
|
30
31
|
response_format: openai_response_format(schema),
|
|
31
32
|
reasoning_effort: openai_reasoning_effort(thinking)
|
|
32
33
|
}.compact
|
|
34
|
+
payload[:stream_options] = { include_usage: true } if stream && stream_usage_supported?
|
|
35
|
+
payload
|
|
33
36
|
end
|
|
34
37
|
|
|
35
38
|
def format_openai_messages(messages)
|
|
@@ -116,12 +119,14 @@ module Legion
|
|
|
116
119
|
choice = Array(body['choices']).first || {}
|
|
117
120
|
message = choice['message'] || {}
|
|
118
121
|
usage = body['usage'] || {}
|
|
122
|
+
content, thinking = extract_thinking_from_completion(message)
|
|
119
123
|
|
|
120
124
|
Legion::Extensions::Llm::Message.new(
|
|
121
125
|
role: :assistant,
|
|
122
|
-
content:
|
|
126
|
+
content: content,
|
|
123
127
|
model_id: body['model'],
|
|
124
128
|
tool_calls: parse_tool_calls(message['tool_calls']),
|
|
129
|
+
thinking: thinking,
|
|
125
130
|
input_tokens: usage['prompt_tokens'],
|
|
126
131
|
output_tokens: usage['completion_tokens'],
|
|
127
132
|
reasoning_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens'),
|
|
@@ -129,22 +134,68 @@ module Legion
|
|
|
129
134
|
)
|
|
130
135
|
end
|
|
131
136
|
|
|
137
|
+
def extract_thinking_from_completion(message)
|
|
138
|
+
reasoning = message['reasoning_content'] || message['reasoning']
|
|
139
|
+
content = message['content']
|
|
140
|
+
|
|
141
|
+
if reasoning
|
|
142
|
+
[content, Thinking.build(text: reasoning)]
|
|
143
|
+
elsif content.is_a?(String) && content.include?('<think>')
|
|
144
|
+
think_text = content[%r{<think>(.*?)</think>}m, 1]
|
|
145
|
+
clean = content.gsub(%r{<think>.*?</think>}m, '').strip
|
|
146
|
+
[clean, Thinking.build(text: think_text)]
|
|
147
|
+
else
|
|
148
|
+
[content, nil]
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
132
152
|
def build_chunk(data)
|
|
133
153
|
choice = Array(data['choices']).first || {}
|
|
134
154
|
delta = choice['delta'] || {}
|
|
135
155
|
usage = data['usage'] || {}
|
|
156
|
+
content, thinking = extract_thinking_from_chunk(delta)
|
|
136
157
|
|
|
137
158
|
Legion::Extensions::Llm::Chunk.new(
|
|
138
159
|
role: :assistant,
|
|
139
|
-
content:
|
|
160
|
+
content: content,
|
|
140
161
|
model_id: data['model'],
|
|
141
162
|
tool_calls: parse_tool_calls(delta['tool_calls']),
|
|
163
|
+
thinking: thinking,
|
|
142
164
|
input_tokens: usage['prompt_tokens'],
|
|
143
165
|
output_tokens: usage['completion_tokens'],
|
|
144
166
|
raw: data
|
|
145
167
|
)
|
|
146
168
|
end
|
|
147
169
|
|
|
170
|
+
def extract_thinking_from_chunk(delta)
|
|
171
|
+
reasoning = delta['reasoning_content'] || delta['reasoning']
|
|
172
|
+
content = delta['content']
|
|
173
|
+
|
|
174
|
+
if reasoning
|
|
175
|
+
[content, Thinking.build(text: reasoning)]
|
|
176
|
+
elsif content.is_a?(String) && content.include?('<think>')
|
|
177
|
+
clean, think_text = split_think_tags(content)
|
|
178
|
+
[clean, Thinking.build(text: think_text)]
|
|
179
|
+
else
|
|
180
|
+
[content, nil]
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def split_think_tags(text) # rubocop:disable Metrics/PerceivedComplexity
|
|
185
|
+
if text.match?(%r{<think>.*</think>}m)
|
|
186
|
+
thinking = text[%r{<think>(.*?)</think>}m, 1]
|
|
187
|
+
clean = text.gsub(%r{<think>.*?</think>}m, '').strip
|
|
188
|
+
[clean.empty? ? nil : clean, thinking]
|
|
189
|
+
elsif text.start_with?('<think>')
|
|
190
|
+
[nil, text.delete_prefix('<think>')]
|
|
191
|
+
elsif text.include?('</think>')
|
|
192
|
+
parts = text.split('</think>', 2)
|
|
193
|
+
[parts[1]&.strip.then { |s| s&.empty? ? nil : s }, parts[0]]
|
|
194
|
+
else
|
|
195
|
+
[text, nil]
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
148
199
|
def parse_tool_calls(tool_calls)
|
|
149
200
|
return nil unless tool_calls&.any?
|
|
150
201
|
|
|
@@ -26,12 +26,32 @@ module Legion
|
|
|
26
26
|
Legion::Extensions::Llm.logger.debug { chunk.inspect } if Legion::Extensions::Llm.config.log_stream_debug
|
|
27
27
|
@model_id ||= chunk.model_id
|
|
28
28
|
|
|
29
|
+
@last_content_delta = +''
|
|
30
|
+
@last_thinking_delta = +''
|
|
29
31
|
handle_chunk_content(chunk)
|
|
30
32
|
append_thinking_from_chunk(chunk)
|
|
31
33
|
count_tokens chunk
|
|
32
34
|
Legion::Extensions::Llm.logger.debug { inspect } if Legion::Extensions::Llm.config.log_stream_debug
|
|
33
35
|
end
|
|
34
36
|
|
|
37
|
+
def filtered_chunk(chunk) # rubocop:disable Metrics/PerceivedComplexity
|
|
38
|
+
has_content = !@last_content_delta.empty?
|
|
39
|
+
has_thinking = !@last_thinking_delta.empty?
|
|
40
|
+
has_tokens = chunk.input_tokens&.positive? || chunk.output_tokens&.positive?
|
|
41
|
+
return nil unless has_content || has_thinking || chunk.tool_call? || has_tokens
|
|
42
|
+
|
|
43
|
+
Chunk.new(
|
|
44
|
+
role: :assistant,
|
|
45
|
+
content: has_content ? @last_content_delta : nil,
|
|
46
|
+
thinking: has_thinking ? Thinking.build(text: @last_thinking_delta) : chunk.thinking,
|
|
47
|
+
model_id: chunk.model_id,
|
|
48
|
+
tool_calls: chunk.tool_calls,
|
|
49
|
+
input_tokens: chunk.input_tokens,
|
|
50
|
+
output_tokens: chunk.output_tokens,
|
|
51
|
+
raw: chunk.raw
|
|
52
|
+
)
|
|
53
|
+
end
|
|
54
|
+
|
|
35
55
|
def to_message(response)
|
|
36
56
|
Message.new(
|
|
37
57
|
role: :assistant,
|
|
@@ -137,14 +157,21 @@ module Legion
|
|
|
137
157
|
def append_text_with_thinking(text)
|
|
138
158
|
content_chunk, thinking_chunk = extract_think_tags(text)
|
|
139
159
|
@content << content_chunk
|
|
140
|
-
@
|
|
160
|
+
@last_content_delta << content_chunk
|
|
161
|
+
return unless thinking_chunk
|
|
162
|
+
|
|
163
|
+
@thinking_text << thinking_chunk
|
|
164
|
+
@last_thinking_delta << thinking_chunk
|
|
141
165
|
end
|
|
142
166
|
|
|
143
167
|
def append_thinking_from_chunk(chunk)
|
|
144
168
|
thinking = chunk.thinking
|
|
145
169
|
return unless thinking
|
|
146
170
|
|
|
147
|
-
|
|
171
|
+
if thinking.text
|
|
172
|
+
@thinking_text << thinking.text.to_s
|
|
173
|
+
@last_thinking_delta << thinking.text.to_s
|
|
174
|
+
end
|
|
148
175
|
@thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
|
|
149
176
|
end
|
|
150
177
|
|
|
@@ -12,16 +12,11 @@ module Legion
|
|
|
12
12
|
|
|
13
13
|
response = connection.post stream_url, payload do |req|
|
|
14
14
|
req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
|
|
15
|
+
on_chunk = build_stream_callback(accumulator, block)
|
|
15
16
|
if faraday_1?
|
|
16
|
-
req.options[:on_data] = handle_stream
|
|
17
|
-
accumulator.add chunk
|
|
18
|
-
block.call chunk
|
|
19
|
-
end
|
|
17
|
+
req.options[:on_data] = handle_stream(&on_chunk)
|
|
20
18
|
else
|
|
21
|
-
req.options.on_data = handle_stream
|
|
22
|
-
accumulator.add chunk
|
|
23
|
-
block.call chunk
|
|
24
|
-
end
|
|
19
|
+
req.options.on_data = handle_stream(&on_chunk)
|
|
25
20
|
end
|
|
26
21
|
end
|
|
27
22
|
|
|
@@ -30,6 +25,14 @@ module Legion
|
|
|
30
25
|
message
|
|
31
26
|
end
|
|
32
27
|
|
|
28
|
+
def build_stream_callback(accumulator, block)
|
|
29
|
+
proc do |chunk|
|
|
30
|
+
accumulator.add chunk
|
|
31
|
+
filtered = accumulator.filtered_chunk(chunk)
|
|
32
|
+
block.call(filtered) if filtered
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
33
36
|
def handle_stream(&block)
|
|
34
37
|
build_on_data_handler do |data|
|
|
35
38
|
block.call(build_chunk(data)) if data.is_a?(Hash)
|