lex-llm 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/lib/legion/extensions/llm/chat.rb +3 -2
- data/lib/legion/extensions/llm/models.rb +7 -4
- data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +56 -4
- data/lib/legion/extensions/llm/provider.rb +2 -0
- data/lib/legion/extensions/llm/stream_accumulator.rb +29 -2
- data/lib/legion/extensions/llm/streaming.rb +17 -14
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +1 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d41cf2984b04621d4e0c2a7fa84a7236361a561570b9944358631a76e6699ac9
|
|
4
|
+
data.tar.gz: c84f98866b4f313d240f964d691b6c170d8635f6275ca8c0150b54d6e2d286cf
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a4516de1ebcab041beeabaf718ea11ed3445a943a3c4bbe388936622690001dec8606a8c26d5850f123950ac1d4a09361c4513a8387615e86cc5ef99af133860
|
|
7
|
+
data.tar.gz: 3d10adbbb6684df81ac7090a4a7c0ebd179803069e74b80e13a626b1cd93d67e9ab955bb5259a2ab638373aaf786217b738536478893fe8983366a2f29ee6e99
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.8 - 2026-04-30
|
|
4
|
+
|
|
5
|
+
- Audit all rescue blocks for handle_exception compliance
|
|
6
|
+
- Add Legion::Logging::Helper to Provider, Chat, and Models for structured exception reporting
|
|
7
|
+
- Replace ad-hoc logger.debug/warn calls in rescue blocks with handle_exception across streaming, chat, models, and provider modules
|
|
8
|
+
- Add require for legion/logging in the main entrypoint
|
|
9
|
+
|
|
10
|
+
## 0.1.7 - 2026-04-30
|
|
11
|
+
|
|
12
|
+
- Add thinking extraction from OpenAI-compatible streaming chunks (reasoning_content, reasoning, think tags)
|
|
13
|
+
- Add stream_usage_supported? opt-in for streaming token usage reporting
|
|
14
|
+
- Add filtered_chunk method to StreamAccumulator for clean thinking/content separation
|
|
15
|
+
- Wrap streaming callback through accumulator filter for proper SSE event routing
|
|
16
|
+
|
|
3
17
|
## 0.1.6 - 2026-04-28
|
|
4
18
|
|
|
5
19
|
- Add provider-neutral registry event envelopes for future `llm.registry` offering availability, unavailability, degraded, and heartbeat publishing without persistence.
|
|
@@ -6,6 +6,7 @@ module Legion
|
|
|
6
6
|
# Represents a conversation with an AI model
|
|
7
7
|
class Chat
|
|
8
8
|
include Enumerable
|
|
9
|
+
include Legion::Logging::Helper
|
|
9
10
|
|
|
10
11
|
attr_reader :model, :messages, :tools, :tool_prefs, :params, :headers, :schema
|
|
11
12
|
|
|
@@ -157,8 +158,8 @@ module Legion
|
|
|
157
158
|
if @schema && response.content.is_a?(String) && !response.tool_call?
|
|
158
159
|
begin
|
|
159
160
|
response.content = Legion::JSON.parse(response.content, symbolize_names: false)
|
|
160
|
-
rescue Legion::JSON::ParseError
|
|
161
|
-
|
|
161
|
+
rescue Legion::JSON::ParseError => e
|
|
162
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.chat.complete')
|
|
162
163
|
end
|
|
163
164
|
end
|
|
164
165
|
|
|
@@ -35,6 +35,8 @@ module Legion
|
|
|
35
35
|
].freeze
|
|
36
36
|
|
|
37
37
|
class << self
|
|
38
|
+
include Legion::Logging::Helper
|
|
39
|
+
|
|
38
40
|
def instance
|
|
39
41
|
@instance ||= new
|
|
40
42
|
end
|
|
@@ -51,7 +53,8 @@ module Legion
|
|
|
51
53
|
data = File.exist?(file) ? File.read(file) : '[]'
|
|
52
54
|
models = Legion::JSON.parse(data, symbolize_names: true).map { |model| Model::Info.new(model) }
|
|
53
55
|
filter_models(models)
|
|
54
|
-
rescue Legion::JSON::ParseError
|
|
56
|
+
rescue Legion::JSON::ParseError => e
|
|
57
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.models.read_from_json')
|
|
55
58
|
[]
|
|
56
59
|
end
|
|
57
60
|
|
|
@@ -92,6 +95,8 @@ module Legion
|
|
|
92
95
|
result[:models].concat(provider_class.new(config).list_models)
|
|
93
96
|
result[:fetched_providers] << provider_class.slug
|
|
94
97
|
rescue StandardError => e
|
|
98
|
+
handle_exception(e, level: :warn, handled: true,
|
|
99
|
+
operation: 'llm.models.fetch_provider_models')
|
|
95
100
|
result[:failed] << { name: provider_class.name, slug: provider_class.slug, error: e }
|
|
96
101
|
end
|
|
97
102
|
end
|
|
@@ -170,9 +175,7 @@ module Legion
|
|
|
170
175
|
end
|
|
171
176
|
{ models: models.reject { |model| model.provider.nil? || model.id.nil? }, fetched: true }
|
|
172
177
|
rescue StandardError => e
|
|
173
|
-
|
|
174
|
-
"Failed to fetch models.dev (#{e.class}: #{e.message}). Keeping existing."
|
|
175
|
-
)
|
|
178
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.models.fetch_models_dev')
|
|
176
179
|
{
|
|
177
180
|
models: existing_models.select { |model| model.metadata[:source] == 'models.dev' },
|
|
178
181
|
fetched: false
|
|
@@ -6,6 +6,7 @@ module Legion
|
|
|
6
6
|
class Provider
|
|
7
7
|
# Shared OpenAI-compatible HTTP payload and response adapter.
|
|
8
8
|
module OpenAICompatible
|
|
9
|
+
def stream_usage_supported? = false
|
|
9
10
|
def completion_url = '/v1/chat/completions'
|
|
10
11
|
def stream_url = completion_url
|
|
11
12
|
def models_url = '/v1/models'
|
|
@@ -20,7 +21,7 @@ module Legion
|
|
|
20
21
|
private
|
|
21
22
|
|
|
22
23
|
def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
|
|
23
|
-
{
|
|
24
|
+
payload = {
|
|
24
25
|
model: model.id,
|
|
25
26
|
messages: format_openai_messages(messages),
|
|
26
27
|
temperature: temperature,
|
|
@@ -30,6 +31,8 @@ module Legion
|
|
|
30
31
|
response_format: openai_response_format(schema),
|
|
31
32
|
reasoning_effort: openai_reasoning_effort(thinking)
|
|
32
33
|
}.compact
|
|
34
|
+
payload[:stream_options] = { include_usage: true } if stream && stream_usage_supported?
|
|
35
|
+
payload
|
|
33
36
|
end
|
|
34
37
|
|
|
35
38
|
def format_openai_messages(messages)
|
|
@@ -116,12 +119,14 @@ module Legion
|
|
|
116
119
|
choice = Array(body['choices']).first || {}
|
|
117
120
|
message = choice['message'] || {}
|
|
118
121
|
usage = body['usage'] || {}
|
|
122
|
+
content, thinking = extract_thinking_from_completion(message)
|
|
119
123
|
|
|
120
124
|
Legion::Extensions::Llm::Message.new(
|
|
121
125
|
role: :assistant,
|
|
122
|
-
content:
|
|
126
|
+
content: content,
|
|
123
127
|
model_id: body['model'],
|
|
124
128
|
tool_calls: parse_tool_calls(message['tool_calls']),
|
|
129
|
+
thinking: thinking,
|
|
125
130
|
input_tokens: usage['prompt_tokens'],
|
|
126
131
|
output_tokens: usage['completion_tokens'],
|
|
127
132
|
reasoning_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens'),
|
|
@@ -129,22 +134,68 @@ module Legion
|
|
|
129
134
|
)
|
|
130
135
|
end
|
|
131
136
|
|
|
137
|
+
def extract_thinking_from_completion(message)
|
|
138
|
+
reasoning = message['reasoning_content'] || message['reasoning']
|
|
139
|
+
content = message['content']
|
|
140
|
+
|
|
141
|
+
if reasoning
|
|
142
|
+
[content, Thinking.build(text: reasoning)]
|
|
143
|
+
elsif content.is_a?(String) && content.include?('<think>')
|
|
144
|
+
think_text = content[%r{<think>(.*?)</think>}m, 1]
|
|
145
|
+
clean = content.gsub(%r{<think>.*?</think>}m, '').strip
|
|
146
|
+
[clean, Thinking.build(text: think_text)]
|
|
147
|
+
else
|
|
148
|
+
[content, nil]
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
132
152
|
def build_chunk(data)
|
|
133
153
|
choice = Array(data['choices']).first || {}
|
|
134
154
|
delta = choice['delta'] || {}
|
|
135
155
|
usage = data['usage'] || {}
|
|
156
|
+
content, thinking = extract_thinking_from_chunk(delta)
|
|
136
157
|
|
|
137
158
|
Legion::Extensions::Llm::Chunk.new(
|
|
138
159
|
role: :assistant,
|
|
139
|
-
content:
|
|
160
|
+
content: content,
|
|
140
161
|
model_id: data['model'],
|
|
141
162
|
tool_calls: parse_tool_calls(delta['tool_calls']),
|
|
163
|
+
thinking: thinking,
|
|
142
164
|
input_tokens: usage['prompt_tokens'],
|
|
143
165
|
output_tokens: usage['completion_tokens'],
|
|
144
166
|
raw: data
|
|
145
167
|
)
|
|
146
168
|
end
|
|
147
169
|
|
|
170
|
+
def extract_thinking_from_chunk(delta)
|
|
171
|
+
reasoning = delta['reasoning_content'] || delta['reasoning']
|
|
172
|
+
content = delta['content']
|
|
173
|
+
|
|
174
|
+
if reasoning
|
|
175
|
+
[content, Thinking.build(text: reasoning)]
|
|
176
|
+
elsif content.is_a?(String) && content.include?('<think>')
|
|
177
|
+
clean, think_text = split_think_tags(content)
|
|
178
|
+
[clean, Thinking.build(text: think_text)]
|
|
179
|
+
else
|
|
180
|
+
[content, nil]
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def split_think_tags(text) # rubocop:disable Metrics/PerceivedComplexity
|
|
185
|
+
if text.match?(%r{<think>.*</think>}m)
|
|
186
|
+
thinking = text[%r{<think>(.*?)</think>}m, 1]
|
|
187
|
+
clean = text.gsub(%r{<think>.*?</think>}m, '').strip
|
|
188
|
+
[clean.empty? ? nil : clean, thinking]
|
|
189
|
+
elsif text.start_with?('<think>')
|
|
190
|
+
[nil, text.delete_prefix('<think>')]
|
|
191
|
+
elsif text.include?('</think>')
|
|
192
|
+
parts = text.split('</think>', 2)
|
|
193
|
+
[parts[1]&.strip.then { |s| s&.empty? ? nil : s }, parts[0]]
|
|
194
|
+
else
|
|
195
|
+
[text, nil]
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
148
199
|
def parse_tool_calls(tool_calls)
|
|
149
200
|
return nil unless tool_calls&.any?
|
|
150
201
|
|
|
@@ -167,7 +218,8 @@ module Legion
|
|
|
167
218
|
return arguments if arguments.is_a?(Hash)
|
|
168
219
|
|
|
169
220
|
Legion::JSON.parse(arguments, symbolize_names: false)
|
|
170
|
-
rescue Legion::JSON::ParseError
|
|
221
|
+
rescue Legion::JSON::ParseError => e
|
|
222
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.provider.parse_tool_arguments')
|
|
171
223
|
{}
|
|
172
224
|
end
|
|
173
225
|
|
|
@@ -6,6 +6,7 @@ module Legion
|
|
|
6
6
|
# Base class for LLM providers.
|
|
7
7
|
class Provider
|
|
8
8
|
include Streaming
|
|
9
|
+
include Legion::Logging::Helper
|
|
9
10
|
|
|
10
11
|
attr_reader :config, :connection
|
|
11
12
|
|
|
@@ -131,6 +132,7 @@ module Legion
|
|
|
131
132
|
response = @connection.get(metadata[:endpoints][:health])
|
|
132
133
|
metadata.merge(ready: configured? && health_ready?(response.body), health: response.body)
|
|
133
134
|
rescue StandardError => e
|
|
135
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.provider.readiness')
|
|
134
136
|
metadata.merge(ready: false, health: { error: e.class.name, message: e.message })
|
|
135
137
|
end
|
|
136
138
|
|
|
@@ -26,12 +26,32 @@ module Legion
|
|
|
26
26
|
Legion::Extensions::Llm.logger.debug { chunk.inspect } if Legion::Extensions::Llm.config.log_stream_debug
|
|
27
27
|
@model_id ||= chunk.model_id
|
|
28
28
|
|
|
29
|
+
@last_content_delta = +''
|
|
30
|
+
@last_thinking_delta = +''
|
|
29
31
|
handle_chunk_content(chunk)
|
|
30
32
|
append_thinking_from_chunk(chunk)
|
|
31
33
|
count_tokens chunk
|
|
32
34
|
Legion::Extensions::Llm.logger.debug { inspect } if Legion::Extensions::Llm.config.log_stream_debug
|
|
33
35
|
end
|
|
34
36
|
|
|
37
|
+
def filtered_chunk(chunk) # rubocop:disable Metrics/PerceivedComplexity
|
|
38
|
+
has_content = !@last_content_delta.empty?
|
|
39
|
+
has_thinking = !@last_thinking_delta.empty?
|
|
40
|
+
has_tokens = chunk.input_tokens&.positive? || chunk.output_tokens&.positive?
|
|
41
|
+
return nil unless has_content || has_thinking || chunk.tool_call? || has_tokens
|
|
42
|
+
|
|
43
|
+
Chunk.new(
|
|
44
|
+
role: :assistant,
|
|
45
|
+
content: has_content ? @last_content_delta : nil,
|
|
46
|
+
thinking: has_thinking ? Thinking.build(text: @last_thinking_delta) : chunk.thinking,
|
|
47
|
+
model_id: chunk.model_id,
|
|
48
|
+
tool_calls: chunk.tool_calls,
|
|
49
|
+
input_tokens: chunk.input_tokens,
|
|
50
|
+
output_tokens: chunk.output_tokens,
|
|
51
|
+
raw: chunk.raw
|
|
52
|
+
)
|
|
53
|
+
end
|
|
54
|
+
|
|
35
55
|
def to_message(response)
|
|
36
56
|
Message.new(
|
|
37
57
|
role: :assistant,
|
|
@@ -137,14 +157,21 @@ module Legion
|
|
|
137
157
|
def append_text_with_thinking(text)
|
|
138
158
|
content_chunk, thinking_chunk = extract_think_tags(text)
|
|
139
159
|
@content << content_chunk
|
|
140
|
-
@
|
|
160
|
+
@last_content_delta << content_chunk
|
|
161
|
+
return unless thinking_chunk
|
|
162
|
+
|
|
163
|
+
@thinking_text << thinking_chunk
|
|
164
|
+
@last_thinking_delta << thinking_chunk
|
|
141
165
|
end
|
|
142
166
|
|
|
143
167
|
def append_thinking_from_chunk(chunk)
|
|
144
168
|
thinking = chunk.thinking
|
|
145
169
|
return unless thinking
|
|
146
170
|
|
|
147
|
-
|
|
171
|
+
if thinking.text
|
|
172
|
+
@thinking_text << thinking.text.to_s
|
|
173
|
+
@last_thinking_delta << thinking.text.to_s
|
|
174
|
+
end
|
|
148
175
|
@thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
|
|
149
176
|
end
|
|
150
177
|
|
|
@@ -12,16 +12,11 @@ module Legion
|
|
|
12
12
|
|
|
13
13
|
response = connection.post stream_url, payload do |req|
|
|
14
14
|
req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
|
|
15
|
+
on_chunk = build_stream_callback(accumulator, block)
|
|
15
16
|
if faraday_1?
|
|
16
|
-
req.options[:on_data] = handle_stream
|
|
17
|
-
accumulator.add chunk
|
|
18
|
-
block.call chunk
|
|
19
|
-
end
|
|
17
|
+
req.options[:on_data] = handle_stream(&on_chunk)
|
|
20
18
|
else
|
|
21
|
-
req.options.on_data = handle_stream
|
|
22
|
-
accumulator.add chunk
|
|
23
|
-
block.call chunk
|
|
24
|
-
end
|
|
19
|
+
req.options.on_data = handle_stream(&on_chunk)
|
|
25
20
|
end
|
|
26
21
|
end
|
|
27
22
|
|
|
@@ -30,6 +25,14 @@ module Legion
|
|
|
30
25
|
message
|
|
31
26
|
end
|
|
32
27
|
|
|
28
|
+
def build_stream_callback(accumulator, block)
|
|
29
|
+
proc do |chunk|
|
|
30
|
+
accumulator.add chunk
|
|
31
|
+
filtered = accumulator.filtered_chunk(chunk)
|
|
32
|
+
block.call(filtered) if filtered
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
33
36
|
def handle_stream(&block)
|
|
34
37
|
build_on_data_handler do |data|
|
|
35
38
|
block.call(build_chunk(data)) if data.is_a?(Hash)
|
|
@@ -88,8 +91,8 @@ module Legion
|
|
|
88
91
|
buffer << chunk
|
|
89
92
|
error_data = Legion::JSON.parse(buffer, symbolize_names: false)
|
|
90
93
|
handle_parsed_error(error_data, env)
|
|
91
|
-
rescue Legion::JSON::ParseError
|
|
92
|
-
|
|
94
|
+
rescue Legion::JSON::ParseError => e
|
|
95
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.streaming.handle_failed_response')
|
|
93
96
|
end
|
|
94
97
|
|
|
95
98
|
def handle_sse(chunk, parser, env, &)
|
|
@@ -109,7 +112,7 @@ module Legion
|
|
|
109
112
|
|
|
110
113
|
handle_parsed_error(parsed, env)
|
|
111
114
|
rescue Legion::JSON::ParseError => e
|
|
112
|
-
|
|
115
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.streaming.handle_data')
|
|
113
116
|
end
|
|
114
117
|
|
|
115
118
|
def handle_error_event(data, env)
|
|
@@ -120,7 +123,7 @@ module Legion
|
|
|
120
123
|
error_data = Legion::JSON.parse(data, symbolize_names: false)
|
|
121
124
|
[500, error_data['message'] || 'Unknown streaming error']
|
|
122
125
|
rescue Legion::JSON::ParseError => e
|
|
123
|
-
|
|
126
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.streaming.parse_streaming_error')
|
|
124
127
|
[500, "Failed to parse error: #{data}"]
|
|
125
128
|
end
|
|
126
129
|
|
|
@@ -130,11 +133,11 @@ module Legion
|
|
|
130
133
|
ErrorMiddleware.parse_error(provider: self, response: error_response)
|
|
131
134
|
end
|
|
132
135
|
|
|
133
|
-
def parse_error_from_json(data, env,
|
|
136
|
+
def parse_error_from_json(data, env, _error_message)
|
|
134
137
|
parsed_data = Legion::JSON.parse(data, symbolize_names: false)
|
|
135
138
|
handle_parsed_error(parsed_data, env)
|
|
136
139
|
rescue Legion::JSON::ParseError => e
|
|
137
|
-
|
|
140
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.streaming.parse_error_from_json')
|
|
138
141
|
end
|
|
139
142
|
|
|
140
143
|
def build_stream_error_response(parsed_data, env, status)
|