lex-llm 0.4.10 → 0.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/legion/extensions/llm/error.rb +42 -0
- data/lib/legion/extensions/llm/provider.rb +2 -0
- data/lib/legion/extensions/llm/responses/thinking_extractor.rb +89 -17
- data/lib/legion/extensions/llm/stream_accumulator.rb +88 -18
- data/lib/legion/extensions/llm/streaming.rb +40 -2
- data/lib/legion/extensions/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6d60f78c459fb43344897e6fdba10730b881f698229058a50a1c1be2564539cf
|
|
4
|
+
data.tar.gz: d7fcedadb69266af972caf1a51d1153bd5270f1fd5e9b45f65d51076fafa07aa
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c60726bfac3eff11cf37d8035ad78c7437b627f465bad31efdac1be3061fe410dc176d805bd168389859fa94773cd994578d265a6534a8e3feed1d37db517988
|
|
7
|
+
data.tar.gz: 40439ec46e06530b9e5d287fe8d5980d57b87c2700343b3282c30deb9cd1b241862812e4264a9842d6a1fea20aa9bcb4f580cf08ed31cc61b73c00c2c753c9ce
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.4.13 - 2026-05-15
|
|
4
|
+
|
|
5
|
+
- Strip provider thinking from OpenAI-compatible responses when local models emit `<thinking>` tags or untagged initial reasoning preambles, and keep those hidden from live streaming content deltas.
|
|
6
|
+
|
|
7
|
+
## 0.4.12 - 2026-05-15
|
|
8
|
+
|
|
9
|
+
- Preserve streamed provider error bodies in a custom Faraday env key so Faraday Net::HTTP finalization cannot replace the buffered body with an empty string before `ErrorMiddleware` parses it.
|
|
10
|
+
|
|
11
|
+
## 0.4.11 - 2026-05-15
|
|
12
|
+
|
|
13
|
+
- Fix `handle_failed_response` to preserve non-200 streaming error bodies across chunks instead of swallowing `ParseError` and falling through to a generic "An unknown error occurred". Complete JSON error bodies still raise typed provider errors immediately; incomplete bodies are buffered onto the Faraday response env for final middleware parsing, with regex fallback extraction for vLLM-style partial `message` fields when the env cannot carry the buffered body.
|
|
14
|
+
|
|
3
15
|
## 0.4.10 - 2026-05-13
|
|
4
16
|
|
|
5
17
|
- Add cache-backed `model_detail` lookup with 24-hour TTL; nil results are not cached; `fetch_model_detail` hook for subclasses to override with live API calls.
|
|
@@ -54,6 +54,8 @@ module Legion
|
|
|
54
54
|
|
|
55
55
|
# Faraday middleware that maps provider-specific API errors to Legion::Extensions::Llm errors.
|
|
56
56
|
class ErrorMiddleware < Faraday::Middleware
|
|
57
|
+
STREAM_ERROR_BODY_KEY = :legion_llm_stream_error_body
|
|
58
|
+
|
|
57
59
|
def initialize(app, options = {})
|
|
58
60
|
super(app)
|
|
59
61
|
@provider = options[:provider]
|
|
@@ -79,6 +81,7 @@ module Legion
|
|
|
79
81
|
].freeze
|
|
80
82
|
|
|
81
83
|
def parse_error(provider:, response:) # rubocop:disable Metrics/PerceivedComplexity
|
|
84
|
+
response = response_with_stream_error_body(response)
|
|
82
85
|
message = provider&.parse_error(response)
|
|
83
86
|
|
|
84
87
|
case response.status
|
|
@@ -116,12 +119,51 @@ module Legion
|
|
|
116
119
|
|
|
117
120
|
private
|
|
118
121
|
|
|
122
|
+
def response_with_stream_error_body(response)
|
|
123
|
+
return response unless empty_body?(response)
|
|
124
|
+
|
|
125
|
+
stream_body = preserved_stream_error_body(response)
|
|
126
|
+
return response if stream_body.to_s.empty?
|
|
127
|
+
|
|
128
|
+
ResponseWithBody.new(response, stream_body)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def empty_body?(response)
|
|
132
|
+
!response.respond_to?(:body) || response.body.to_s.empty?
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def preserved_stream_error_body(response)
|
|
136
|
+
return unless response.respond_to?(:[])
|
|
137
|
+
|
|
138
|
+
response[STREAM_ERROR_BODY_KEY]
|
|
139
|
+
rescue StandardError
|
|
140
|
+
nil
|
|
141
|
+
end
|
|
142
|
+
|
|
119
143
|
def context_length_exceeded?(message)
|
|
120
144
|
return false if message.to_s.empty?
|
|
121
145
|
|
|
122
146
|
CONTEXT_LENGTH_PATTERNS.any? { |pattern| message.match?(pattern) }
|
|
123
147
|
end
|
|
124
148
|
end
|
|
149
|
+
|
|
150
|
+
ResponseWithBody = Struct.new(:response, :body) do
|
|
151
|
+
def status = response.status
|
|
152
|
+
|
|
153
|
+
def [](key)
|
|
154
|
+
response[key] if response.respond_to?(:[])
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def method_missing(method_name, ...)
|
|
158
|
+
return response.public_send(method_name, ...) if response.respond_to?(method_name)
|
|
159
|
+
|
|
160
|
+
super
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
164
|
+
response.respond_to?(method_name, include_private) || super
|
|
165
|
+
end
|
|
166
|
+
end
|
|
125
167
|
end
|
|
126
168
|
end
|
|
127
169
|
end
|
|
@@ -8,9 +8,39 @@ module Legion
|
|
|
8
8
|
module ThinkingExtractor
|
|
9
9
|
Extraction = Struct.new(:content, :thinking, :signature, :metadata, keyword_init: true)
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
THINK_TAG_PAIRS = [
|
|
12
|
+
['<thinking>', '</thinking>'],
|
|
13
|
+
['<think>', '</think>']
|
|
14
|
+
].freeze
|
|
15
|
+
UNTAGGED_PREAMBLE_MAX_LENGTH = 4_000
|
|
16
|
+
UNTAGGED_PREAMBLE_STARTS = [
|
|
17
|
+
'the user',
|
|
18
|
+
'the request',
|
|
19
|
+
'the prompt',
|
|
20
|
+
'the question',
|
|
21
|
+
'i need',
|
|
22
|
+
'i should',
|
|
23
|
+
'i will',
|
|
24
|
+
"i'll",
|
|
25
|
+
'i can',
|
|
26
|
+
'we need',
|
|
27
|
+
'we should',
|
|
28
|
+
'we will',
|
|
29
|
+
"we'll",
|
|
30
|
+
'we can',
|
|
31
|
+
'let me'
|
|
32
|
+
].freeze
|
|
33
|
+
UNTAGGED_PREAMBLE_PATTERNS = [
|
|
34
|
+
/
|
|
35
|
+
\AThe\s+(?:user|request|prompt|question)\b.*\b
|
|
36
|
+
(?:let\s+me|i'll|i\s+will|i\s+should|i\s+need|i\s+can|respond|answer|reply)\b
|
|
37
|
+
/imx,
|
|
38
|
+
/
|
|
39
|
+
\A(?:I|We)\s+(?:need|should|will|can)\s+(?:to\s+)?
|
|
40
|
+
(?:answer|respond|reply|confirm|provide|explain|help)\b
|
|
41
|
+
/imx,
|
|
42
|
+
/\ALet me\s+(?:answer|respond|reply|confirm|provide|explain|help)\b/im
|
|
43
|
+
].freeze
|
|
14
44
|
THINKING_METADATA_KEYS = %i[
|
|
15
45
|
reasoning_content reasoning thinking thinking_text thinking_signature reasoning_signature thought_signature
|
|
16
46
|
].freeze
|
|
@@ -42,20 +72,45 @@ module Legion
|
|
|
42
72
|
remaining = content.dup
|
|
43
73
|
|
|
44
74
|
remaining = consume_next_segment(remaining, clean, thinking_parts) until remaining.empty?
|
|
75
|
+
clean, untagged_thinking = extract_untagged_preamble(clean.strip)
|
|
76
|
+
thinking_parts << untagged_thinking
|
|
45
77
|
|
|
46
|
-
[clean
|
|
78
|
+
[clean, compact_thinking(thinking_parts)]
|
|
47
79
|
end
|
|
48
80
|
private_class_method :extract_from_content
|
|
49
81
|
|
|
82
|
+
def extract_untagged_preamble(content)
|
|
83
|
+
return [content, nil] unless content.is_a?(String)
|
|
84
|
+
|
|
85
|
+
match = content.match(/\A(?<preamble>.+?)\n{2,}(?<visible>.+)\z/m)
|
|
86
|
+
return [content, nil] unless match
|
|
87
|
+
|
|
88
|
+
preamble = match[:preamble].strip
|
|
89
|
+
return [content, nil] unless untagged_reasoning_preamble?(preamble)
|
|
90
|
+
|
|
91
|
+
[match[:visible].sub(/\A[[:space:]]+/, '').strip, preamble]
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def untagged_reasoning_preamble_candidate?(content)
|
|
95
|
+
return false unless content.is_a?(String)
|
|
96
|
+
|
|
97
|
+
text = content.lstrip.downcase
|
|
98
|
+
return false if text.empty?
|
|
99
|
+
|
|
100
|
+
UNTAGGED_PREAMBLE_STARTS.any? do |start|
|
|
101
|
+
start.start_with?(text) || text.start_with?(start)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
50
105
|
def consume_next_segment(remaining, clean, thinking_parts)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
if
|
|
55
|
-
thinking_parts << remaining.slice(0,
|
|
56
|
-
remaining.slice((
|
|
57
|
-
elsif
|
|
58
|
-
consume_open_think_segment(remaining,
|
|
106
|
+
close_match = next_tag_match(remaining, :close)
|
|
107
|
+
open_match = next_tag_match(remaining, :open)
|
|
108
|
+
|
|
109
|
+
if close_match && (open_match.nil? || close_match[:index] < open_match[:index])
|
|
110
|
+
thinking_parts << remaining.slice(0, close_match[:index])
|
|
111
|
+
remaining.slice((close_match[:index] + close_match[:tag].length)..).to_s.sub(/\A[[:space:]]+/, '')
|
|
112
|
+
elsif open_match
|
|
113
|
+
consume_open_think_segment(remaining, open_match, clean, thinking_parts)
|
|
59
114
|
else
|
|
60
115
|
clean << remaining
|
|
61
116
|
+''
|
|
@@ -63,20 +118,37 @@ module Legion
|
|
|
63
118
|
end
|
|
64
119
|
private_class_method :consume_next_segment
|
|
65
120
|
|
|
66
|
-
def consume_open_think_segment(remaining,
|
|
67
|
-
clean << remaining.slice(0,
|
|
68
|
-
after_open = remaining.slice((
|
|
69
|
-
close_index = after_open.index(
|
|
121
|
+
def consume_open_think_segment(remaining, open_match, clean, thinking_parts)
|
|
122
|
+
clean << remaining.slice(0, open_match[:index])
|
|
123
|
+
after_open = remaining.slice((open_match[:index] + open_match[:tag].length)..).to_s
|
|
124
|
+
close_index = after_open.index(open_match[:close_tag])
|
|
70
125
|
unless close_index
|
|
71
126
|
thinking_parts << after_open
|
|
72
127
|
return +''
|
|
73
128
|
end
|
|
74
129
|
|
|
75
130
|
thinking_parts << after_open.slice(0, close_index)
|
|
76
|
-
after_open.slice((close_index +
|
|
131
|
+
after_open.slice((close_index + open_match[:close_tag].length)..).to_s
|
|
77
132
|
end
|
|
78
133
|
private_class_method :consume_open_think_segment
|
|
79
134
|
|
|
135
|
+
def next_tag_match(text, type)
|
|
136
|
+
matches = THINK_TAG_PAIRS.filter_map do |open_tag, close_tag|
|
|
137
|
+
tag = type == :open ? open_tag : close_tag
|
|
138
|
+
index = text.index(tag)
|
|
139
|
+
{ index: index, tag: tag, close_tag: close_tag } if index
|
|
140
|
+
end
|
|
141
|
+
matches.min_by { |match| match[:index] }
|
|
142
|
+
end
|
|
143
|
+
private_class_method :next_tag_match
|
|
144
|
+
|
|
145
|
+
def untagged_reasoning_preamble?(preamble)
|
|
146
|
+
return false if preamble.length > UNTAGGED_PREAMBLE_MAX_LENGTH
|
|
147
|
+
|
|
148
|
+
UNTAGGED_PREAMBLE_PATTERNS.any? { |pattern| preamble.match?(pattern) }
|
|
149
|
+
end
|
|
150
|
+
private_class_method :untagged_reasoning_preamble?
|
|
151
|
+
|
|
80
152
|
def extract_metadata_thinking(metadata)
|
|
81
153
|
compact_thinking(
|
|
82
154
|
[
|
|
@@ -21,6 +21,9 @@ module Legion
|
|
|
21
21
|
@thinking_tokens = nil
|
|
22
22
|
@inside_think_tag = false
|
|
23
23
|
@pending_think_tag = +''
|
|
24
|
+
@active_think_close_tag = nil
|
|
25
|
+
@untagged_preamble_pending = true
|
|
26
|
+
@untagged_preamble_buffer = +''
|
|
24
27
|
@latest_tool_call_id = nil
|
|
25
28
|
end
|
|
26
29
|
|
|
@@ -55,6 +58,8 @@ module Legion
|
|
|
55
58
|
end
|
|
56
59
|
|
|
57
60
|
def to_message(response)
|
|
61
|
+
flush_pending_untagged_preamble
|
|
62
|
+
|
|
58
63
|
Message.new(
|
|
59
64
|
role: :assistant,
|
|
60
65
|
content: content.empty? ? nil : content,
|
|
@@ -171,14 +176,63 @@ module Legion
|
|
|
171
176
|
|
|
172
177
|
def append_text_with_thinking(text)
|
|
173
178
|
content_chunk, thinking_chunk = extract_think_tags(text)
|
|
179
|
+
content_chunk, untagged_thinking = extract_untagged_preamble(content_chunk)
|
|
174
180
|
@content << content_chunk
|
|
175
181
|
@last_content_delta << content_chunk
|
|
182
|
+
if untagged_thinking
|
|
183
|
+
@thinking_text << untagged_thinking
|
|
184
|
+
@last_thinking_delta << untagged_thinking
|
|
185
|
+
end
|
|
176
186
|
return unless thinking_chunk
|
|
177
187
|
|
|
178
188
|
@thinking_text << thinking_chunk
|
|
179
189
|
@last_thinking_delta << thinking_chunk
|
|
180
190
|
end
|
|
181
191
|
|
|
192
|
+
def extract_untagged_preamble(content_chunk)
|
|
193
|
+
return [content_chunk, nil] unless @untagged_preamble_pending
|
|
194
|
+
return [content_chunk, nil] unless @content.empty? && @thinking_text.empty?
|
|
195
|
+
return [content_chunk, nil] if content_chunk.empty?
|
|
196
|
+
|
|
197
|
+
candidate = @untagged_preamble_buffer + content_chunk
|
|
198
|
+
return release_untagged_preamble(candidate) unless candidate_untagged_preamble?(candidate)
|
|
199
|
+
|
|
200
|
+
content, thinking = Responses::ThinkingExtractor.extract_untagged_preamble(candidate)
|
|
201
|
+
return release_untagged_preamble(content, thinking) if thinking
|
|
202
|
+
return release_untagged_preamble(candidate) if complete_untagged_preamble_candidate?(candidate)
|
|
203
|
+
|
|
204
|
+
@untagged_preamble_buffer = candidate
|
|
205
|
+
['', nil]
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def candidate_untagged_preamble?(candidate)
|
|
209
|
+
Responses::ThinkingExtractor.untagged_reasoning_preamble_candidate?(candidate)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def complete_untagged_preamble_candidate?(candidate)
|
|
213
|
+
candidate.match?(/\n{2,}/) || candidate.length > Responses::ThinkingExtractor::UNTAGGED_PREAMBLE_MAX_LENGTH
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def release_untagged_preamble(content, thinking = nil)
|
|
217
|
+
@untagged_preamble_pending = false
|
|
218
|
+
@untagged_preamble_buffer = +''
|
|
219
|
+
[content, thinking]
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def flush_pending_untagged_preamble
|
|
223
|
+
return if @untagged_preamble_buffer.empty?
|
|
224
|
+
|
|
225
|
+
content, thinking = Responses::ThinkingExtractor.extract_untagged_preamble(@untagged_preamble_buffer)
|
|
226
|
+
if thinking
|
|
227
|
+
@content << content
|
|
228
|
+
@thinking_text << thinking
|
|
229
|
+
else
|
|
230
|
+
@content << @untagged_preamble_buffer
|
|
231
|
+
end
|
|
232
|
+
@untagged_preamble_buffer = +''
|
|
233
|
+
@untagged_preamble_pending = false
|
|
234
|
+
end
|
|
235
|
+
|
|
182
236
|
def append_thinking_from_chunk(chunk)
|
|
183
237
|
thinking = chunk.thinking
|
|
184
238
|
return unless thinking
|
|
@@ -191,8 +245,6 @@ module Legion
|
|
|
191
245
|
end
|
|
192
246
|
|
|
193
247
|
def extract_think_tags(text)
|
|
194
|
-
start_tag = '<think>'
|
|
195
|
-
end_tag = '</think>'
|
|
196
248
|
remaining = @pending_think_tag + text
|
|
197
249
|
@pending_think_tag = +''
|
|
198
250
|
|
|
@@ -201,9 +253,9 @@ module Legion
|
|
|
201
253
|
|
|
202
254
|
until remaining.empty?
|
|
203
255
|
remaining = if @inside_think_tag
|
|
204
|
-
consume_think_content(remaining,
|
|
256
|
+
consume_think_content(remaining, @active_think_close_tag, thinking)
|
|
205
257
|
else
|
|
206
|
-
consume_non_think_content(remaining,
|
|
258
|
+
consume_non_think_content(remaining, output)
|
|
207
259
|
end
|
|
208
260
|
end
|
|
209
261
|
|
|
@@ -215,41 +267,59 @@ module Legion
|
|
|
215
267
|
if end_index
|
|
216
268
|
thinking << remaining.slice(0, end_index)
|
|
217
269
|
@inside_think_tag = false
|
|
270
|
+
@active_think_close_tag = nil
|
|
218
271
|
remaining.slice((end_index + end_tag.length)..) || +''
|
|
219
272
|
else
|
|
220
|
-
suffix_len = longest_suffix_prefix(remaining, end_tag)
|
|
273
|
+
suffix_len = longest_suffix_prefix(remaining, [end_tag])
|
|
221
274
|
thinking << remaining.slice(0, remaining.length - suffix_len)
|
|
222
275
|
@pending_think_tag = remaining.slice(-suffix_len, suffix_len)
|
|
223
276
|
+''
|
|
224
277
|
end
|
|
225
278
|
end
|
|
226
279
|
|
|
227
|
-
def consume_non_think_content(remaining,
|
|
228
|
-
unmatched_close = remaining
|
|
229
|
-
|
|
230
|
-
if unmatched_close && (
|
|
280
|
+
def consume_non_think_content(remaining, output)
|
|
281
|
+
unmatched_close = next_stream_tag_match(remaining, :close)
|
|
282
|
+
start_match = next_stream_tag_match(remaining, :open)
|
|
283
|
+
if unmatched_close && (start_match.nil? || unmatched_close[:index] < start_match[:index])
|
|
231
284
|
consume_unmatched_think_close(remaining, unmatched_close)
|
|
232
|
-
elsif
|
|
233
|
-
output << remaining.slice(0,
|
|
285
|
+
elsif start_match
|
|
286
|
+
output << remaining.slice(0, start_match[:index])
|
|
234
287
|
@inside_think_tag = true
|
|
235
|
-
|
|
288
|
+
@active_think_close_tag = start_match[:close_tag]
|
|
289
|
+
remaining.slice((start_match[:index] + start_match[:tag].length)..) || +''
|
|
236
290
|
else
|
|
237
|
-
suffix_len = longest_suffix_prefix(remaining,
|
|
291
|
+
suffix_len = longest_suffix_prefix(remaining, stream_tag_tokens)
|
|
238
292
|
output << remaining.slice(0, remaining.length - suffix_len)
|
|
239
293
|
@pending_think_tag = remaining.slice(-suffix_len, suffix_len)
|
|
240
294
|
+''
|
|
241
295
|
end
|
|
242
296
|
end
|
|
243
297
|
|
|
244
|
-
def consume_unmatched_think_close(remaining,
|
|
245
|
-
|
|
246
|
-
thinking = remaining.slice(0, close_index)
|
|
298
|
+
def consume_unmatched_think_close(remaining, close_match)
|
|
299
|
+
thinking = remaining.slice(0, close_match[:index])
|
|
247
300
|
@thinking_text << thinking
|
|
248
301
|
@last_thinking_delta << thinking
|
|
249
|
-
remaining.slice((
|
|
302
|
+
remaining.slice((close_match[:index] + close_match[:tag].length)..).to_s.sub(/\A[[:space:]]+/, '')
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def next_stream_tag_match(text, type)
|
|
306
|
+
matches = Responses::ThinkingExtractor::THINK_TAG_PAIRS.filter_map do |open_tag, close_tag|
|
|
307
|
+
tag = type == :open ? open_tag : close_tag
|
|
308
|
+
index = text.index(tag)
|
|
309
|
+
{ index: index, tag: tag, close_tag: close_tag } if index
|
|
310
|
+
end
|
|
311
|
+
matches.min_by { |match| match[:index] }
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
def stream_tag_tokens
|
|
315
|
+
Responses::ThinkingExtractor::THINK_TAG_PAIRS.flat_map { |open_tag, close_tag| [open_tag, close_tag] }
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def longest_suffix_prefix(text, tags)
|
|
319
|
+
tags.map { |tag| longest_suffix_prefix_for_tag(text, tag) }.max || 0
|
|
250
320
|
end
|
|
251
321
|
|
|
252
|
-
def
|
|
322
|
+
def longest_suffix_prefix_for_tag(text, tag)
|
|
253
323
|
max = [text.length, tag.length - 1].min
|
|
254
324
|
max.downto(1) do |len|
|
|
255
325
|
return len if text.end_with?(tag[0, len])
|
|
@@ -93,10 +93,48 @@ module Legion
|
|
|
93
93
|
|
|
94
94
|
def handle_failed_response(chunk, buffer, env)
|
|
95
95
|
buffer << chunk
|
|
96
|
+
body_persisted = persist_failed_response_body(buffer, env)
|
|
96
97
|
error_data = Legion::JSON.parse(buffer, symbolize_names: false)
|
|
97
98
|
handle_parsed_error(error_data, env)
|
|
98
|
-
rescue Legion::JSON::ParseError
|
|
99
|
-
|
|
99
|
+
rescue Legion::JSON::ParseError
|
|
100
|
+
return if body_persisted
|
|
101
|
+
|
|
102
|
+
raise_partial_streaming_error(buffer, env)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def persist_failed_response_body(buffer, env)
|
|
106
|
+
custom_persisted = persist_failed_response_custom_body?(buffer, env)
|
|
107
|
+
body_persisted = persist_failed_response_env_body?(buffer, env)
|
|
108
|
+
custom_persisted || body_persisted
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def persist_failed_response_env_body?(buffer, env)
|
|
112
|
+
return false unless env.respond_to?(:body=)
|
|
113
|
+
|
|
114
|
+
env.body = buffer.dup
|
|
115
|
+
true
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def persist_failed_response_custom_body?(buffer, env)
|
|
119
|
+
return false unless env.respond_to?(:[]=)
|
|
120
|
+
|
|
121
|
+
env[ErrorMiddleware::STREAM_ERROR_BODY_KEY] = buffer.dup
|
|
122
|
+
true
|
|
123
|
+
rescue StandardError
|
|
124
|
+
false
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def raise_partial_streaming_error(buffer, env)
|
|
128
|
+
partial = buffer[/"message"\s*:\s*"([^"]{1,200})/, 1]
|
|
129
|
+
status = env&.status || 0
|
|
130
|
+
msg = if partial
|
|
131
|
+
"Provider error (status #{status}): #{partial}"
|
|
132
|
+
else
|
|
133
|
+
"Provider error (status #{status}) - response body incomplete"
|
|
134
|
+
end
|
|
135
|
+
log.warn "[llm][streaming] action=handle_failed_response status=#{status} " \
|
|
136
|
+
"partial_body=#{buffer.length}b msg=#{partial.inspect}"
|
|
137
|
+
raise Legion::Extensions::Llm::ServerError, msg
|
|
100
138
|
end
|
|
101
139
|
|
|
102
140
|
def handle_sse(chunk, parser, env, &)
|