lex-llm 0.4.9 → 0.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/lex-llm.gemspec +1 -0
- data/lib/legion/extensions/llm/connection.rb +1 -1
- data/lib/legion/extensions/llm/credential_sources.rb +27 -1
- data/lib/legion/extensions/llm/error.rb +42 -0
- data/lib/legion/extensions/llm/provider.rb +45 -13
- data/lib/legion/extensions/llm/responses/thinking_extractor.rb +89 -17
- data/lib/legion/extensions/llm/stream_accumulator.rb +88 -18
- data/lib/legion/extensions/llm/streaming.rb +40 -2
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +9 -0
- metadata +15 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6d60f78c459fb43344897e6fdba10730b881f698229058a50a1c1be2564539cf
|
|
4
|
+
data.tar.gz: d7fcedadb69266af972caf1a51d1153bd5270f1fd5e9b45f65d51076fafa07aa
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c60726bfac3eff11cf37d8035ad78c7437b627f465bad31efdac1be3061fe410dc176d805bd168389859fa94773cd994578d265a6534a8e3feed1d37db517988
|
|
7
|
+
data.tar.gz: 40439ec46e06530b9e5d287fe8d5980d57b87c2700343b3282c30deb9cd1b241862812e4264a9842d6a1fea20aa9bcb4f580cf08ed31cc61b73c00c2c753c9ce
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,29 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.4.13 - 2026-05-15
|
|
4
|
+
|
|
5
|
+
- Strip provider thinking from OpenAI-compatible responses when local models emit `<thinking>` tags or untagged initial reasoning preambles, and keep those hidden from live streaming content deltas.
|
|
6
|
+
|
|
7
|
+
## 0.4.12 - 2026-05-15
|
|
8
|
+
|
|
9
|
+
- Preserve streamed provider error bodies in a custom Faraday env key so Faraday Net::HTTP finalization cannot replace the buffered body with an empty string before `ErrorMiddleware` parses it.
|
|
10
|
+
|
|
11
|
+
## 0.4.11 - 2026-05-15
|
|
12
|
+
|
|
13
|
+
- Fix `handle_failed_response` to preserve non-200 streaming error bodies across chunks instead of swallowing `ParseError` and falling through to a generic "An unknown error occurred". Complete JSON error bodies still raise typed provider errors immediately; incomplete bodies are buffered onto the Faraday response env for final middleware parsing, with regex fallback extraction for vLLM-style partial `message` fields when the env cannot carry the buffered body.
|
|
14
|
+
|
|
15
|
+
## 0.4.10 - 2026-05-13
|
|
16
|
+
|
|
17
|
+
- Add cache-backed `model_detail` lookup with 24-hour TTL; nil results are not cached; `fetch_model_detail` hook for subclasses to override with live API calls.
|
|
18
|
+
- Build `model_detail_cache_key` from tier, slug, instance, and credential fingerprint so remote providers never share model detail entries across credentials.
|
|
19
|
+
- Add `credential_cache_fragment` — includes an 8-char SHA-256 credential fingerprint in cache keys for non-local providers.
|
|
20
|
+
- Add `source_tag`, `credential_fingerprint`, and `config_fingerprint` to `CredentialSources` for provenance tracking across discovered instances.
|
|
21
|
+
- Suppress Faraday raw stacktrace dumps on connection failures by setting `errors: false` on the response logger middleware.
|
|
22
|
+
- Rescue `Faraday::ConnectionFailed` in `discover_offerings` and return an empty list with a concise warning instead of propagating the exception.
|
|
23
|
+
- Wire `model_allowed?` filtering into `discover_offerings` so whitelist/blacklist settings are enforced during live discovery (was dead code before).
|
|
24
|
+
- Check instance config first for `model_whitelist`/`model_blacklist` before falling back to provider settings, enabling per-instance override.
|
|
25
|
+
- Add `legion-cache >= 1.3.0` as a runtime dependency and include `Legion::Cache::Helper` in the base `Provider` class.
|
|
26
|
+
|
|
3
27
|
## 0.4.9 - 2026-05-13
|
|
4
28
|
|
|
5
29
|
- Route provider, tool, streaming, model, attachment, connection, credential, and fleet diagnostics through `Legion::Logging::Helper`.
|
data/lex-llm.gemspec
CHANGED
|
@@ -35,6 +35,7 @@ Gem::Specification.new do |spec|
|
|
|
35
35
|
spec.add_dependency 'faraday-multipart', '>= 1'
|
|
36
36
|
spec.add_dependency 'faraday-net_http', '>= 1'
|
|
37
37
|
spec.add_dependency 'faraday-retry', '>= 1'
|
|
38
|
+
spec.add_dependency 'legion-cache', '>= 1.3.0'
|
|
38
39
|
spec.add_dependency 'legion-crypt', '>= 1.5.1'
|
|
39
40
|
spec.add_dependency 'legion-json', '>= 1.2.1'
|
|
40
41
|
spec.add_dependency 'legion-logging', '>= 1.3.2'
|
|
@@ -167,6 +167,30 @@ module Legion
|
|
|
167
167
|
Digest::SHA256.hexdigest(val.to_s)
|
|
168
168
|
end
|
|
169
169
|
|
|
170
|
+
# Build a human-readable source tag describing where a credential was found.
|
|
171
|
+
# Format: "type:location:key" e.g. "env:ANTHROPIC_API_KEY", "file:~/.claude/settings.json:anthropicApiKey"
|
|
172
|
+
def source_tag(type, location, key = nil)
|
|
173
|
+
parts = [type.to_s, location.to_s]
|
|
174
|
+
parts << key.to_s if key && !key.to_s.empty?
|
|
175
|
+
parts.join(':')
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Generate a short fingerprint (first 8 chars of SHA-256) for a credential value.
|
|
179
|
+
# Stable for the lifetime of the credential; safe to log and include in audit events.
|
|
180
|
+
def credential_fingerprint(value)
|
|
181
|
+
return nil if value.nil? || value.to_s.strip.empty?
|
|
182
|
+
|
|
183
|
+
Digest::SHA256.hexdigest(value.to_s)[0, 8]
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Extract fingerprint from a config hash by finding the first credential field.
|
|
187
|
+
def config_fingerprint(config)
|
|
188
|
+
val = config[:api_key] || config['api_key'] ||
|
|
189
|
+
config[:bearer_token] || config['bearer_token'] ||
|
|
190
|
+
config[:access_token] || config['access_token']
|
|
191
|
+
credential_fingerprint(val)
|
|
192
|
+
end
|
|
193
|
+
|
|
170
194
|
# Returns true when the URL points to localhost / 127.0.0.1 / ::1.
|
|
171
195
|
def localhost?(url)
|
|
172
196
|
return false if url.nil?
|
|
@@ -185,7 +209,9 @@ module Legion
|
|
|
185
209
|
module_function :env, :claude_config, :claude_config_value,
|
|
186
210
|
:claude_env_value, :codex_token, :codex_openai_key,
|
|
187
211
|
:setting, :socket_open?, :http_ok?,
|
|
188
|
-
:dedup_credentials, :credential_hash,
|
|
212
|
+
:dedup_credentials, :credential_hash,
|
|
213
|
+
:source_tag, :credential_fingerprint, :config_fingerprint,
|
|
214
|
+
:localhost?
|
|
189
215
|
|
|
190
216
|
# --- private helpers -----------------------------------------------
|
|
191
217
|
|
|
@@ -54,6 +54,8 @@ module Legion
|
|
|
54
54
|
|
|
55
55
|
# Faraday middleware that maps provider-specific API errors to Legion::Extensions::Llm errors.
|
|
56
56
|
class ErrorMiddleware < Faraday::Middleware
|
|
57
|
+
STREAM_ERROR_BODY_KEY = :legion_llm_stream_error_body
|
|
58
|
+
|
|
57
59
|
def initialize(app, options = {})
|
|
58
60
|
super(app)
|
|
59
61
|
@provider = options[:provider]
|
|
@@ -79,6 +81,7 @@ module Legion
|
|
|
79
81
|
].freeze
|
|
80
82
|
|
|
81
83
|
def parse_error(provider:, response:) # rubocop:disable Metrics/PerceivedComplexity
|
|
84
|
+
response = response_with_stream_error_body(response)
|
|
82
85
|
message = provider&.parse_error(response)
|
|
83
86
|
|
|
84
87
|
case response.status
|
|
@@ -116,12 +119,51 @@ module Legion
|
|
|
116
119
|
|
|
117
120
|
private
|
|
118
121
|
|
|
122
|
+
def response_with_stream_error_body(response)
|
|
123
|
+
return response unless empty_body?(response)
|
|
124
|
+
|
|
125
|
+
stream_body = preserved_stream_error_body(response)
|
|
126
|
+
return response if stream_body.to_s.empty?
|
|
127
|
+
|
|
128
|
+
ResponseWithBody.new(response, stream_body)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def empty_body?(response)
|
|
132
|
+
!response.respond_to?(:body) || response.body.to_s.empty?
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def preserved_stream_error_body(response)
|
|
136
|
+
return unless response.respond_to?(:[])
|
|
137
|
+
|
|
138
|
+
response[STREAM_ERROR_BODY_KEY]
|
|
139
|
+
rescue StandardError
|
|
140
|
+
nil
|
|
141
|
+
end
|
|
142
|
+
|
|
119
143
|
def context_length_exceeded?(message)
|
|
120
144
|
return false if message.to_s.empty?
|
|
121
145
|
|
|
122
146
|
CONTEXT_LENGTH_PATTERNS.any? { |pattern| message.match?(pattern) }
|
|
123
147
|
end
|
|
124
148
|
end
|
|
149
|
+
|
|
150
|
+
ResponseWithBody = Struct.new(:response, :body) do
|
|
151
|
+
def status = response.status
|
|
152
|
+
|
|
153
|
+
def [](key)
|
|
154
|
+
response[key] if response.respond_to?(:[])
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def method_missing(method_name, ...)
|
|
158
|
+
return response.public_send(method_name, ...) if response.respond_to?(method_name)
|
|
159
|
+
|
|
160
|
+
super
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
164
|
+
response.respond_to?(method_name, include_private) || super
|
|
165
|
+
end
|
|
166
|
+
end
|
|
125
167
|
end
|
|
126
168
|
end
|
|
127
169
|
end
|
|
@@ -28,6 +28,7 @@ module Legion
|
|
|
28
28
|
class Provider
|
|
29
29
|
include Streaming
|
|
30
30
|
include Legion::Logging::Helper
|
|
31
|
+
include Legion::Cache::Helper
|
|
31
32
|
|
|
32
33
|
attr_reader :config, :connection
|
|
33
34
|
|
|
@@ -123,10 +124,14 @@ module Legion
|
|
|
123
124
|
provider_health = health(live:)
|
|
124
125
|
@cached_offerings = Array(list_models(live:, **filters)).filter_map do |model|
|
|
125
126
|
next unless model_matches_filters?(model, filters)
|
|
127
|
+
next unless model_allowed?(model.id)
|
|
126
128
|
|
|
127
129
|
offering_from_model(model, health: provider_health)
|
|
128
130
|
end
|
|
129
131
|
@cached_offerings
|
|
132
|
+
rescue Faraday::ConnectionFailed => e
|
|
133
|
+
log.warn("[#{slug}] instance=#{provider_instance_id} unreachable: #{e.message}")
|
|
134
|
+
[]
|
|
130
135
|
end
|
|
131
136
|
|
|
132
137
|
def health(live: false)
|
|
@@ -259,6 +264,8 @@ module Legion
|
|
|
259
264
|
error = part['error']
|
|
260
265
|
error.is_a?(String) ? error : part.dig('error', 'message')
|
|
261
266
|
end.join('. ')
|
|
267
|
+
when String
|
|
268
|
+
body[/"message"\s*:\s*"([^"]{1,500})/, 1] || body
|
|
262
269
|
else
|
|
263
270
|
body
|
|
264
271
|
end
|
|
@@ -284,12 +291,14 @@ module Legion
|
|
|
284
291
|
# ── Model allow-list / deny-list filtering ────────────────────────
|
|
285
292
|
|
|
286
293
|
def model_whitelist
|
|
287
|
-
wl =
|
|
294
|
+
wl = config.model_whitelist if config.respond_to?(:model_whitelist)
|
|
295
|
+
wl ||= settings[:model_whitelist] if respond_to?(:settings)
|
|
288
296
|
Array(wl).map { |p| p.to_s.downcase }
|
|
289
297
|
end
|
|
290
298
|
|
|
291
299
|
def model_blacklist
|
|
292
|
-
bl =
|
|
300
|
+
bl = config.model_blacklist if config.respond_to?(:model_blacklist)
|
|
301
|
+
bl ||= settings[:model_blacklist] if respond_to?(:settings)
|
|
293
302
|
Array(bl).map { |p| p.to_s.downcase }
|
|
294
303
|
end
|
|
295
304
|
|
|
@@ -371,21 +380,24 @@ module Legion
|
|
|
371
380
|
nil
|
|
372
381
|
end
|
|
373
382
|
|
|
374
|
-
def
|
|
375
|
-
|
|
383
|
+
def model_detail(model_name)
|
|
384
|
+
key = model_detail_cache_key(model_name)
|
|
385
|
+
cached = cache_get(key)
|
|
386
|
+
return cached if cached
|
|
376
387
|
|
|
377
|
-
|
|
388
|
+
result = fetch_model_detail(model_name)
|
|
389
|
+
cache_set(key, result, ttl: 86_400) if result
|
|
390
|
+
result
|
|
378
391
|
rescue StandardError => e
|
|
379
|
-
handle_exception(e, level: :
|
|
392
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.provider.model_detail',
|
|
393
|
+
model: model_name)
|
|
394
|
+
nil
|
|
380
395
|
end
|
|
381
396
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
rescue StandardError => e
|
|
387
|
-
handle_exception(e, level: :debug, handled: true, operation: 'llm.provider.model_cache_fetch', key:)
|
|
388
|
-
yield
|
|
397
|
+
# Override in subclasses to make a live API call for model detail.
|
|
398
|
+
# Must return a Hash with symbol keys (e.g. { context_window: 128000 }).
|
|
399
|
+
def fetch_model_detail(_model_name)
|
|
400
|
+
nil
|
|
389
401
|
end
|
|
390
402
|
|
|
391
403
|
def cache_instance_key
|
|
@@ -448,6 +460,26 @@ module Legion
|
|
|
448
460
|
|
|
449
461
|
private
|
|
450
462
|
|
|
463
|
+
def model_detail_cache_key(model_name)
|
|
464
|
+
tier = offering_tier
|
|
465
|
+
instance_key = cache_instance_key
|
|
466
|
+
cred_fp = credential_cache_fragment
|
|
467
|
+
key_parts = ['model_info', tier, slug, instance_key, cred_fp, model_name].compact
|
|
468
|
+
key_parts.join('.')
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
def credential_cache_fragment
|
|
472
|
+
return nil if cache_local_instance?
|
|
473
|
+
|
|
474
|
+
cred = config.respond_to?(:bearer_token) && config.bearer_token
|
|
475
|
+
cred ||= config.respond_to?(:api_key) && config.api_key
|
|
476
|
+
cred ||= config.respond_to?(:bedrock_access_key_id) && config.bedrock_access_key_id
|
|
477
|
+
return nil unless cred
|
|
478
|
+
|
|
479
|
+
require 'digest'
|
|
480
|
+
Digest::SHA256.hexdigest(cred.to_s)[0, 8]
|
|
481
|
+
end
|
|
482
|
+
|
|
451
483
|
def validate_paint_inputs!(with:, mask:)
|
|
452
484
|
return if with.nil? && mask.nil?
|
|
453
485
|
|
|
@@ -8,9 +8,39 @@ module Legion
|
|
|
8
8
|
module ThinkingExtractor
|
|
9
9
|
Extraction = Struct.new(:content, :thinking, :signature, :metadata, keyword_init: true)
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
THINK_TAG_PAIRS = [
|
|
12
|
+
['<thinking>', '</thinking>'],
|
|
13
|
+
['<think>', '</think>']
|
|
14
|
+
].freeze
|
|
15
|
+
UNTAGGED_PREAMBLE_MAX_LENGTH = 4_000
|
|
16
|
+
UNTAGGED_PREAMBLE_STARTS = [
|
|
17
|
+
'the user',
|
|
18
|
+
'the request',
|
|
19
|
+
'the prompt',
|
|
20
|
+
'the question',
|
|
21
|
+
'i need',
|
|
22
|
+
'i should',
|
|
23
|
+
'i will',
|
|
24
|
+
"i'll",
|
|
25
|
+
'i can',
|
|
26
|
+
'we need',
|
|
27
|
+
'we should',
|
|
28
|
+
'we will',
|
|
29
|
+
"we'll",
|
|
30
|
+
'we can',
|
|
31
|
+
'let me'
|
|
32
|
+
].freeze
|
|
33
|
+
UNTAGGED_PREAMBLE_PATTERNS = [
|
|
34
|
+
/
|
|
35
|
+
\AThe\s+(?:user|request|prompt|question)\b.*\b
|
|
36
|
+
(?:let\s+me|i'll|i\s+will|i\s+should|i\s+need|i\s+can|respond|answer|reply)\b
|
|
37
|
+
/imx,
|
|
38
|
+
/
|
|
39
|
+
\A(?:I|We)\s+(?:need|should|will|can)\s+(?:to\s+)?
|
|
40
|
+
(?:answer|respond|reply|confirm|provide|explain|help)\b
|
|
41
|
+
/imx,
|
|
42
|
+
/\ALet me\s+(?:answer|respond|reply|confirm|provide|explain|help)\b/im
|
|
43
|
+
].freeze
|
|
14
44
|
THINKING_METADATA_KEYS = %i[
|
|
15
45
|
reasoning_content reasoning thinking thinking_text thinking_signature reasoning_signature thought_signature
|
|
16
46
|
].freeze
|
|
@@ -42,20 +72,45 @@ module Legion
|
|
|
42
72
|
remaining = content.dup
|
|
43
73
|
|
|
44
74
|
remaining = consume_next_segment(remaining, clean, thinking_parts) until remaining.empty?
|
|
75
|
+
clean, untagged_thinking = extract_untagged_preamble(clean.strip)
|
|
76
|
+
thinking_parts << untagged_thinking
|
|
45
77
|
|
|
46
|
-
[clean
|
|
78
|
+
[clean, compact_thinking(thinking_parts)]
|
|
47
79
|
end
|
|
48
80
|
private_class_method :extract_from_content
|
|
49
81
|
|
|
82
|
+
def extract_untagged_preamble(content)
|
|
83
|
+
return [content, nil] unless content.is_a?(String)
|
|
84
|
+
|
|
85
|
+
match = content.match(/\A(?<preamble>.+?)\n{2,}(?<visible>.+)\z/m)
|
|
86
|
+
return [content, nil] unless match
|
|
87
|
+
|
|
88
|
+
preamble = match[:preamble].strip
|
|
89
|
+
return [content, nil] unless untagged_reasoning_preamble?(preamble)
|
|
90
|
+
|
|
91
|
+
[match[:visible].sub(/\A[[:space:]]+/, '').strip, preamble]
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def untagged_reasoning_preamble_candidate?(content)
|
|
95
|
+
return false unless content.is_a?(String)
|
|
96
|
+
|
|
97
|
+
text = content.lstrip.downcase
|
|
98
|
+
return false if text.empty?
|
|
99
|
+
|
|
100
|
+
UNTAGGED_PREAMBLE_STARTS.any? do |start|
|
|
101
|
+
start.start_with?(text) || text.start_with?(start)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
50
105
|
def consume_next_segment(remaining, clean, thinking_parts)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
if
|
|
55
|
-
thinking_parts << remaining.slice(0,
|
|
56
|
-
remaining.slice((
|
|
57
|
-
elsif
|
|
58
|
-
consume_open_think_segment(remaining,
|
|
106
|
+
close_match = next_tag_match(remaining, :close)
|
|
107
|
+
open_match = next_tag_match(remaining, :open)
|
|
108
|
+
|
|
109
|
+
if close_match && (open_match.nil? || close_match[:index] < open_match[:index])
|
|
110
|
+
thinking_parts << remaining.slice(0, close_match[:index])
|
|
111
|
+
remaining.slice((close_match[:index] + close_match[:tag].length)..).to_s.sub(/\A[[:space:]]+/, '')
|
|
112
|
+
elsif open_match
|
|
113
|
+
consume_open_think_segment(remaining, open_match, clean, thinking_parts)
|
|
59
114
|
else
|
|
60
115
|
clean << remaining
|
|
61
116
|
+''
|
|
@@ -63,20 +118,37 @@ module Legion
|
|
|
63
118
|
end
|
|
64
119
|
private_class_method :consume_next_segment
|
|
65
120
|
|
|
66
|
-
def consume_open_think_segment(remaining,
|
|
67
|
-
clean << remaining.slice(0,
|
|
68
|
-
after_open = remaining.slice((
|
|
69
|
-
close_index = after_open.index(
|
|
121
|
+
def consume_open_think_segment(remaining, open_match, clean, thinking_parts)
|
|
122
|
+
clean << remaining.slice(0, open_match[:index])
|
|
123
|
+
after_open = remaining.slice((open_match[:index] + open_match[:tag].length)..).to_s
|
|
124
|
+
close_index = after_open.index(open_match[:close_tag])
|
|
70
125
|
unless close_index
|
|
71
126
|
thinking_parts << after_open
|
|
72
127
|
return +''
|
|
73
128
|
end
|
|
74
129
|
|
|
75
130
|
thinking_parts << after_open.slice(0, close_index)
|
|
76
|
-
after_open.slice((close_index +
|
|
131
|
+
after_open.slice((close_index + open_match[:close_tag].length)..).to_s
|
|
77
132
|
end
|
|
78
133
|
private_class_method :consume_open_think_segment
|
|
79
134
|
|
|
135
|
+
def next_tag_match(text, type)
|
|
136
|
+
matches = THINK_TAG_PAIRS.filter_map do |open_tag, close_tag|
|
|
137
|
+
tag = type == :open ? open_tag : close_tag
|
|
138
|
+
index = text.index(tag)
|
|
139
|
+
{ index: index, tag: tag, close_tag: close_tag } if index
|
|
140
|
+
end
|
|
141
|
+
matches.min_by { |match| match[:index] }
|
|
142
|
+
end
|
|
143
|
+
private_class_method :next_tag_match
|
|
144
|
+
|
|
145
|
+
def untagged_reasoning_preamble?(preamble)
|
|
146
|
+
return false if preamble.length > UNTAGGED_PREAMBLE_MAX_LENGTH
|
|
147
|
+
|
|
148
|
+
UNTAGGED_PREAMBLE_PATTERNS.any? { |pattern| preamble.match?(pattern) }
|
|
149
|
+
end
|
|
150
|
+
private_class_method :untagged_reasoning_preamble?
|
|
151
|
+
|
|
80
152
|
def extract_metadata_thinking(metadata)
|
|
81
153
|
compact_thinking(
|
|
82
154
|
[
|
|
@@ -21,6 +21,9 @@ module Legion
|
|
|
21
21
|
@thinking_tokens = nil
|
|
22
22
|
@inside_think_tag = false
|
|
23
23
|
@pending_think_tag = +''
|
|
24
|
+
@active_think_close_tag = nil
|
|
25
|
+
@untagged_preamble_pending = true
|
|
26
|
+
@untagged_preamble_buffer = +''
|
|
24
27
|
@latest_tool_call_id = nil
|
|
25
28
|
end
|
|
26
29
|
|
|
@@ -55,6 +58,8 @@ module Legion
|
|
|
55
58
|
end
|
|
56
59
|
|
|
57
60
|
def to_message(response)
|
|
61
|
+
flush_pending_untagged_preamble
|
|
62
|
+
|
|
58
63
|
Message.new(
|
|
59
64
|
role: :assistant,
|
|
60
65
|
content: content.empty? ? nil : content,
|
|
@@ -171,14 +176,63 @@ module Legion
|
|
|
171
176
|
|
|
172
177
|
def append_text_with_thinking(text)
|
|
173
178
|
content_chunk, thinking_chunk = extract_think_tags(text)
|
|
179
|
+
content_chunk, untagged_thinking = extract_untagged_preamble(content_chunk)
|
|
174
180
|
@content << content_chunk
|
|
175
181
|
@last_content_delta << content_chunk
|
|
182
|
+
if untagged_thinking
|
|
183
|
+
@thinking_text << untagged_thinking
|
|
184
|
+
@last_thinking_delta << untagged_thinking
|
|
185
|
+
end
|
|
176
186
|
return unless thinking_chunk
|
|
177
187
|
|
|
178
188
|
@thinking_text << thinking_chunk
|
|
179
189
|
@last_thinking_delta << thinking_chunk
|
|
180
190
|
end
|
|
181
191
|
|
|
192
|
+
def extract_untagged_preamble(content_chunk)
|
|
193
|
+
return [content_chunk, nil] unless @untagged_preamble_pending
|
|
194
|
+
return [content_chunk, nil] unless @content.empty? && @thinking_text.empty?
|
|
195
|
+
return [content_chunk, nil] if content_chunk.empty?
|
|
196
|
+
|
|
197
|
+
candidate = @untagged_preamble_buffer + content_chunk
|
|
198
|
+
return release_untagged_preamble(candidate) unless candidate_untagged_preamble?(candidate)
|
|
199
|
+
|
|
200
|
+
content, thinking = Responses::ThinkingExtractor.extract_untagged_preamble(candidate)
|
|
201
|
+
return release_untagged_preamble(content, thinking) if thinking
|
|
202
|
+
return release_untagged_preamble(candidate) if complete_untagged_preamble_candidate?(candidate)
|
|
203
|
+
|
|
204
|
+
@untagged_preamble_buffer = candidate
|
|
205
|
+
['', nil]
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def candidate_untagged_preamble?(candidate)
|
|
209
|
+
Responses::ThinkingExtractor.untagged_reasoning_preamble_candidate?(candidate)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def complete_untagged_preamble_candidate?(candidate)
|
|
213
|
+
candidate.match?(/\n{2,}/) || candidate.length > Responses::ThinkingExtractor::UNTAGGED_PREAMBLE_MAX_LENGTH
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def release_untagged_preamble(content, thinking = nil)
|
|
217
|
+
@untagged_preamble_pending = false
|
|
218
|
+
@untagged_preamble_buffer = +''
|
|
219
|
+
[content, thinking]
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def flush_pending_untagged_preamble
|
|
223
|
+
return if @untagged_preamble_buffer.empty?
|
|
224
|
+
|
|
225
|
+
content, thinking = Responses::ThinkingExtractor.extract_untagged_preamble(@untagged_preamble_buffer)
|
|
226
|
+
if thinking
|
|
227
|
+
@content << content
|
|
228
|
+
@thinking_text << thinking
|
|
229
|
+
else
|
|
230
|
+
@content << @untagged_preamble_buffer
|
|
231
|
+
end
|
|
232
|
+
@untagged_preamble_buffer = +''
|
|
233
|
+
@untagged_preamble_pending = false
|
|
234
|
+
end
|
|
235
|
+
|
|
182
236
|
def append_thinking_from_chunk(chunk)
|
|
183
237
|
thinking = chunk.thinking
|
|
184
238
|
return unless thinking
|
|
@@ -191,8 +245,6 @@ module Legion
|
|
|
191
245
|
end
|
|
192
246
|
|
|
193
247
|
def extract_think_tags(text)
|
|
194
|
-
start_tag = '<think>'
|
|
195
|
-
end_tag = '</think>'
|
|
196
248
|
remaining = @pending_think_tag + text
|
|
197
249
|
@pending_think_tag = +''
|
|
198
250
|
|
|
@@ -201,9 +253,9 @@ module Legion
|
|
|
201
253
|
|
|
202
254
|
until remaining.empty?
|
|
203
255
|
remaining = if @inside_think_tag
|
|
204
|
-
consume_think_content(remaining,
|
|
256
|
+
consume_think_content(remaining, @active_think_close_tag, thinking)
|
|
205
257
|
else
|
|
206
|
-
consume_non_think_content(remaining,
|
|
258
|
+
consume_non_think_content(remaining, output)
|
|
207
259
|
end
|
|
208
260
|
end
|
|
209
261
|
|
|
@@ -215,41 +267,59 @@ module Legion
|
|
|
215
267
|
if end_index
|
|
216
268
|
thinking << remaining.slice(0, end_index)
|
|
217
269
|
@inside_think_tag = false
|
|
270
|
+
@active_think_close_tag = nil
|
|
218
271
|
remaining.slice((end_index + end_tag.length)..) || +''
|
|
219
272
|
else
|
|
220
|
-
suffix_len = longest_suffix_prefix(remaining, end_tag)
|
|
273
|
+
suffix_len = longest_suffix_prefix(remaining, [end_tag])
|
|
221
274
|
thinking << remaining.slice(0, remaining.length - suffix_len)
|
|
222
275
|
@pending_think_tag = remaining.slice(-suffix_len, suffix_len)
|
|
223
276
|
+''
|
|
224
277
|
end
|
|
225
278
|
end
|
|
226
279
|
|
|
227
|
-
def consume_non_think_content(remaining,
|
|
228
|
-
unmatched_close = remaining
|
|
229
|
-
|
|
230
|
-
if unmatched_close && (
|
|
280
|
+
def consume_non_think_content(remaining, output)
|
|
281
|
+
unmatched_close = next_stream_tag_match(remaining, :close)
|
|
282
|
+
start_match = next_stream_tag_match(remaining, :open)
|
|
283
|
+
if unmatched_close && (start_match.nil? || unmatched_close[:index] < start_match[:index])
|
|
231
284
|
consume_unmatched_think_close(remaining, unmatched_close)
|
|
232
|
-
elsif
|
|
233
|
-
output << remaining.slice(0,
|
|
285
|
+
elsif start_match
|
|
286
|
+
output << remaining.slice(0, start_match[:index])
|
|
234
287
|
@inside_think_tag = true
|
|
235
|
-
|
|
288
|
+
@active_think_close_tag = start_match[:close_tag]
|
|
289
|
+
remaining.slice((start_match[:index] + start_match[:tag].length)..) || +''
|
|
236
290
|
else
|
|
237
|
-
suffix_len = longest_suffix_prefix(remaining,
|
|
291
|
+
suffix_len = longest_suffix_prefix(remaining, stream_tag_tokens)
|
|
238
292
|
output << remaining.slice(0, remaining.length - suffix_len)
|
|
239
293
|
@pending_think_tag = remaining.slice(-suffix_len, suffix_len)
|
|
240
294
|
+''
|
|
241
295
|
end
|
|
242
296
|
end
|
|
243
297
|
|
|
244
|
-
def consume_unmatched_think_close(remaining,
|
|
245
|
-
|
|
246
|
-
thinking = remaining.slice(0, close_index)
|
|
298
|
+
def consume_unmatched_think_close(remaining, close_match)
|
|
299
|
+
thinking = remaining.slice(0, close_match[:index])
|
|
247
300
|
@thinking_text << thinking
|
|
248
301
|
@last_thinking_delta << thinking
|
|
249
|
-
remaining.slice((
|
|
302
|
+
remaining.slice((close_match[:index] + close_match[:tag].length)..).to_s.sub(/\A[[:space:]]+/, '')
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def next_stream_tag_match(text, type)
|
|
306
|
+
matches = Responses::ThinkingExtractor::THINK_TAG_PAIRS.filter_map do |open_tag, close_tag|
|
|
307
|
+
tag = type == :open ? open_tag : close_tag
|
|
308
|
+
index = text.index(tag)
|
|
309
|
+
{ index: index, tag: tag, close_tag: close_tag } if index
|
|
310
|
+
end
|
|
311
|
+
matches.min_by { |match| match[:index] }
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
def stream_tag_tokens
|
|
315
|
+
Responses::ThinkingExtractor::THINK_TAG_PAIRS.flat_map { |open_tag, close_tag| [open_tag, close_tag] }
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def longest_suffix_prefix(text, tags)
|
|
319
|
+
tags.map { |tag| longest_suffix_prefix_for_tag(text, tag) }.max || 0
|
|
250
320
|
end
|
|
251
321
|
|
|
252
|
-
def
|
|
322
|
+
def longest_suffix_prefix_for_tag(text, tag)
|
|
253
323
|
max = [text.length, tag.length - 1].min
|
|
254
324
|
max.downto(1) do |len|
|
|
255
325
|
return len if text.end_with?(tag[0, len])
|
|
@@ -93,10 +93,48 @@ module Legion
|
|
|
93
93
|
|
|
94
94
|
def handle_failed_response(chunk, buffer, env)
|
|
95
95
|
buffer << chunk
|
|
96
|
+
body_persisted = persist_failed_response_body(buffer, env)
|
|
96
97
|
error_data = Legion::JSON.parse(buffer, symbolize_names: false)
|
|
97
98
|
handle_parsed_error(error_data, env)
|
|
98
|
-
rescue Legion::JSON::ParseError
|
|
99
|
-
|
|
99
|
+
rescue Legion::JSON::ParseError
|
|
100
|
+
return if body_persisted
|
|
101
|
+
|
|
102
|
+
raise_partial_streaming_error(buffer, env)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def persist_failed_response_body(buffer, env)
|
|
106
|
+
custom_persisted = persist_failed_response_custom_body?(buffer, env)
|
|
107
|
+
body_persisted = persist_failed_response_env_body?(buffer, env)
|
|
108
|
+
custom_persisted || body_persisted
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def persist_failed_response_env_body?(buffer, env)
|
|
112
|
+
return false unless env.respond_to?(:body=)
|
|
113
|
+
|
|
114
|
+
env.body = buffer.dup
|
|
115
|
+
true
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def persist_failed_response_custom_body?(buffer, env)
|
|
119
|
+
return false unless env.respond_to?(:[]=)
|
|
120
|
+
|
|
121
|
+
env[ErrorMiddleware::STREAM_ERROR_BODY_KEY] = buffer.dup
|
|
122
|
+
true
|
|
123
|
+
rescue StandardError
|
|
124
|
+
false
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def raise_partial_streaming_error(buffer, env)
|
|
128
|
+
partial = buffer[/"message"\s*:\s*"([^"]{1,200})/, 1]
|
|
129
|
+
status = env&.status || 0
|
|
130
|
+
msg = if partial
|
|
131
|
+
"Provider error (status #{status}): #{partial}"
|
|
132
|
+
else
|
|
133
|
+
"Provider error (status #{status}) - response body incomplete"
|
|
134
|
+
end
|
|
135
|
+
log.warn "[llm][streaming] action=handle_failed_response status=#{status} " \
|
|
136
|
+
"partial_body=#{buffer.length}b msg=#{partial.inspect}"
|
|
137
|
+
raise Legion::Extensions::Llm::ServerError, msg
|
|
100
138
|
end
|
|
101
139
|
|
|
102
140
|
def handle_sse(chunk, parser, env, &)
|
|
@@ -9,6 +9,15 @@ require 'faraday/multipart'
|
|
|
9
9
|
require 'faraday/retry'
|
|
10
10
|
require 'legion/json'
|
|
11
11
|
require 'legion/logging'
|
|
12
|
+
# legion/cache writes DEBUG lines to $stdout on first load; suppress them here
|
|
13
|
+
# so callers that capture our stdout (e.g. Open3-based integration tests) are unaffected.
|
|
14
|
+
begin
|
|
15
|
+
old_stdout = $stdout
|
|
16
|
+
$stdout = File.open(File::NULL, 'w')
|
|
17
|
+
require 'legion/cache'
|
|
18
|
+
ensure
|
|
19
|
+
$stdout = old_stdout
|
|
20
|
+
end
|
|
12
21
|
require 'logger'
|
|
13
22
|
require 'marcel'
|
|
14
23
|
require 'ruby_llm/schema'
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.13
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- LegionIO
|
|
@@ -108,6 +108,20 @@ dependencies:
|
|
|
108
108
|
- - ">="
|
|
109
109
|
- !ruby/object:Gem::Version
|
|
110
110
|
version: '1'
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: legion-cache
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - ">="
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: 1.3.0
|
|
118
|
+
type: :runtime
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - ">="
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: 1.3.0
|
|
111
125
|
- !ruby/object:Gem::Dependency
|
|
112
126
|
name: legion-crypt
|
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|