lex-llm 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d7d400d2739542ca417b189fba9d20f468d32ca6b4c1d4864fcd884a21d31577
4
- data.tar.gz: 1c0ffee1ed602d77d2a295d2f4e7904abcef1ac284754553c3c5f883a78fa023
3
+ metadata.gz: c5b58678c0d7021662b2ef38d80932bd373e3c462b9d05e1004dfc880b1e6d6f
4
+ data.tar.gz: 49a88cc742e128df1bd93882585df89e595c9761194da354af6be93bd4bd4c2e
5
5
  SHA512:
6
- metadata.gz: 07ea1df46e8469e493b89855d983ef1416d38e6907404eae1502340f37f271a43c2de442825b48dbca538907042e520d85f95316803f8a87b08633edf849685a
7
- data.tar.gz: 8ee001e548224a71f050c3224d140d33e652603a9308d6301e539a8f984d8aed922e7c8f8ff3313df4a42b5a693acd2dc896914a71c23e47c42eae90f4a62c9d
6
+ metadata.gz: 273c724d3b7b2945dea092184c8df80952d6ec0c8c38aefbba966a28de91c43c2862be91ac9e918943a7e2e42b5dde4c7b98826852598c7e82c4dd10bbca26e8
7
+ data.tar.gz: 68b38d28e88ad07c333ca0f7e94885a3006b1f3fc727f3c2b7206cba5497b42f848927b2d555db5382abac05123b76074572c7ca6834da0de312b2f30fdd3a03
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.7 - 2026-04-30
4
+
5
+ - Add thinking extraction from OpenAI-compatible streaming chunks (reasoning_content, reasoning, think tags)
6
+ - Add stream_usage_supported? opt-in for streaming token usage reporting
7
+ - Add filtered_chunk method to StreamAccumulator for clean thinking/content separation
8
+ - Wrap streaming callback through accumulator filter for proper SSE event routing
9
+
3
10
  ## 0.1.6 - 2026-04-28
4
11
 
5
12
  - Add provider-neutral registry event envelopes for future `llm.registry` offering availability, unavailability, degraded, and heartbeat publishing without persistence.
@@ -6,6 +6,7 @@ module Legion
6
6
  class Provider
7
7
  # Shared OpenAI-compatible HTTP payload and response adapter.
8
8
  module OpenAICompatible
9
+ def stream_usage_supported? = false
9
10
  def completion_url = '/v1/chat/completions'
10
11
  def stream_url = completion_url
11
12
  def models_url = '/v1/models'
@@ -20,7 +21,7 @@ module Legion
20
21
  private
21
22
 
22
23
  def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
23
- {
24
+ payload = {
24
25
  model: model.id,
25
26
  messages: format_openai_messages(messages),
26
27
  temperature: temperature,
@@ -30,6 +31,8 @@ module Legion
30
31
  response_format: openai_response_format(schema),
31
32
  reasoning_effort: openai_reasoning_effort(thinking)
32
33
  }.compact
34
+ payload[:stream_options] = { include_usage: true } if stream && stream_usage_supported?
35
+ payload
33
36
  end
34
37
 
35
38
  def format_openai_messages(messages)
@@ -116,12 +119,14 @@ module Legion
116
119
  choice = Array(body['choices']).first || {}
117
120
  message = choice['message'] || {}
118
121
  usage = body['usage'] || {}
122
+ content, thinking = extract_thinking_from_completion(message)
119
123
 
120
124
  Legion::Extensions::Llm::Message.new(
121
125
  role: :assistant,
122
- content: message['content'],
126
+ content: content,
123
127
  model_id: body['model'],
124
128
  tool_calls: parse_tool_calls(message['tool_calls']),
129
+ thinking: thinking,
125
130
  input_tokens: usage['prompt_tokens'],
126
131
  output_tokens: usage['completion_tokens'],
127
132
  reasoning_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens'),
@@ -129,22 +134,68 @@ module Legion
129
134
  )
130
135
  end
131
136
 
137
+ def extract_thinking_from_completion(message)
138
+ reasoning = message['reasoning_content'] || message['reasoning']
139
+ content = message['content']
140
+
141
+ if reasoning
142
+ [content, Thinking.build(text: reasoning)]
143
+ elsif content.is_a?(String) && content.include?('<think>')
144
+ think_text = content[%r{<think>(.*?)</think>}m, 1]
145
+ clean = content.gsub(%r{<think>.*?</think>}m, '').strip
146
+ [clean, Thinking.build(text: think_text)]
147
+ else
148
+ [content, nil]
149
+ end
150
+ end
151
+
132
152
  def build_chunk(data)
133
153
  choice = Array(data['choices']).first || {}
134
154
  delta = choice['delta'] || {}
135
155
  usage = data['usage'] || {}
156
+ content, thinking = extract_thinking_from_chunk(delta)
136
157
 
137
158
  Legion::Extensions::Llm::Chunk.new(
138
159
  role: :assistant,
139
- content: delta['content'],
160
+ content: content,
140
161
  model_id: data['model'],
141
162
  tool_calls: parse_tool_calls(delta['tool_calls']),
163
+ thinking: thinking,
142
164
  input_tokens: usage['prompt_tokens'],
143
165
  output_tokens: usage['completion_tokens'],
144
166
  raw: data
145
167
  )
146
168
  end
147
169
 
170
+ def extract_thinking_from_chunk(delta)
171
+ reasoning = delta['reasoning_content'] || delta['reasoning']
172
+ content = delta['content']
173
+
174
+ if reasoning
175
+ [content, Thinking.build(text: reasoning)]
176
+ elsif content.is_a?(String) && content.include?('<think>')
177
+ clean, think_text = split_think_tags(content)
178
+ [clean, Thinking.build(text: think_text)]
179
+ else
180
+ [content, nil]
181
+ end
182
+ end
183
+
184
+ def split_think_tags(text) # rubocop:disable Metrics/PerceivedComplexity
185
+ if text.match?(%r{<think>.*</think>}m)
186
+ thinking = text[%r{<think>(.*?)</think>}m, 1]
187
+ clean = text.gsub(%r{<think>.*?</think>}m, '').strip
188
+ [clean.empty? ? nil : clean, thinking]
189
+ elsif text.start_with?('<think>')
190
+ [nil, text.delete_prefix('<think>')]
191
+ elsif text.include?('</think>')
192
+ parts = text.split('</think>', 2)
193
+ [parts[1]&.strip.then { |s| s&.empty? ? nil : s }, parts[0]]
194
+ else
195
+ [text, nil]
196
+ end
197
+ end
198
+
148
199
  def parse_tool_calls(tool_calls)
149
200
  return nil unless tool_calls&.any?
150
201
 
@@ -26,12 +26,32 @@ module Legion
26
26
  Legion::Extensions::Llm.logger.debug { chunk.inspect } if Legion::Extensions::Llm.config.log_stream_debug
27
27
  @model_id ||= chunk.model_id
28
28
 
29
+ @last_content_delta = +''
30
+ @last_thinking_delta = +''
29
31
  handle_chunk_content(chunk)
30
32
  append_thinking_from_chunk(chunk)
31
33
  count_tokens chunk
32
34
  Legion::Extensions::Llm.logger.debug { inspect } if Legion::Extensions::Llm.config.log_stream_debug
33
35
  end
34
36
 
37
+ def filtered_chunk(chunk) # rubocop:disable Metrics/PerceivedComplexity
38
+ has_content = !@last_content_delta.empty?
39
+ has_thinking = !@last_thinking_delta.empty?
40
+ has_tokens = chunk.input_tokens&.positive? || chunk.output_tokens&.positive?
41
+ return nil unless has_content || has_thinking || chunk.tool_call? || has_tokens
42
+
43
+ Chunk.new(
44
+ role: :assistant,
45
+ content: has_content ? @last_content_delta : nil,
46
+ thinking: has_thinking ? Thinking.build(text: @last_thinking_delta) : chunk.thinking,
47
+ model_id: chunk.model_id,
48
+ tool_calls: chunk.tool_calls,
49
+ input_tokens: chunk.input_tokens,
50
+ output_tokens: chunk.output_tokens,
51
+ raw: chunk.raw
52
+ )
53
+ end
54
+
35
55
  def to_message(response)
36
56
  Message.new(
37
57
  role: :assistant,
@@ -137,14 +157,21 @@ module Legion
137
157
  def append_text_with_thinking(text)
138
158
  content_chunk, thinking_chunk = extract_think_tags(text)
139
159
  @content << content_chunk
140
- @thinking_text << thinking_chunk if thinking_chunk
160
+ @last_content_delta << content_chunk
161
+ return unless thinking_chunk
162
+
163
+ @thinking_text << thinking_chunk
164
+ @last_thinking_delta << thinking_chunk
141
165
  end
142
166
 
143
167
  def append_thinking_from_chunk(chunk)
144
168
  thinking = chunk.thinking
145
169
  return unless thinking
146
170
 
147
- @thinking_text << thinking.text.to_s if thinking.text
171
+ if thinking.text
172
+ @thinking_text << thinking.text.to_s
173
+ @last_thinking_delta << thinking.text.to_s
174
+ end
148
175
  @thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
149
176
  end
150
177
 
@@ -12,16 +12,11 @@ module Legion
12
12
 
13
13
  response = connection.post stream_url, payload do |req|
14
14
  req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
15
+ on_chunk = build_stream_callback(accumulator, block)
15
16
  if faraday_1?
16
- req.options[:on_data] = handle_stream do |chunk|
17
- accumulator.add chunk
18
- block.call chunk
19
- end
17
+ req.options[:on_data] = handle_stream(&on_chunk)
20
18
  else
21
- req.options.on_data = handle_stream do |chunk|
22
- accumulator.add chunk
23
- block.call chunk
24
- end
19
+ req.options.on_data = handle_stream(&on_chunk)
25
20
  end
26
21
  end
27
22
 
@@ -30,6 +25,14 @@ module Legion
30
25
  message
31
26
  end
32
27
 
28
+ def build_stream_callback(accumulator, block)
29
+ proc do |chunk|
30
+ accumulator.add chunk
31
+ filtered = accumulator.filtered_chunk(chunk)
32
+ block.call(filtered) if filtered
33
+ end
34
+ end
35
+
33
36
  def handle_stream(&block)
34
37
  build_on_data_handler do |data|
35
38
  block.call(build_chunk(data)) if data.is_a?(Hash)
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Llm
6
- VERSION = '0.1.6'
6
+ VERSION = '0.1.7'
7
7
  end
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO