lex-llm-ollama 0.2.10 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 98c1040c20dfe6ead193a404a6e34fda71e98188859f1e9c828a9778fff1b262
|
|
4
|
+
data.tar.gz: e5ce31cff8c62f5cab9ce6a59d33e90847bfbbe89e2d9445d8f51ce8540fe67c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cf3f73f8533cf2bd1145e3beffd12f602150eae3b4ad0031df24eaecac36bef6536d04e18a15b20cb2d6f529812a5cb63f2dfad28ed0638820a582f6e0ff37c9
|
|
7
|
+
data.tar.gz: da96da090f714b1d0811d475d091f637d4124931168039a2d0de2190b4022860e40a0c099819fb1d359c2ee055daba984d87a9748d9481e635fbe9673ea6eab6
|
|
@@ -249,7 +249,7 @@ module Legion
|
|
|
249
249
|
model: model_id,
|
|
250
250
|
messages: format_messages(messages),
|
|
251
251
|
stream: stream,
|
|
252
|
-
think: thinking
|
|
252
|
+
think: thinking == true,
|
|
253
253
|
keep_alive: ollama_keep_alive,
|
|
254
254
|
format: schema_format(schema),
|
|
255
255
|
options: { temperature: temperature }.compact,
|
|
@@ -258,6 +258,77 @@ module Legion
|
|
|
258
258
|
}.compact
|
|
259
259
|
end
|
|
260
260
|
|
|
261
|
+
def stream_response(connection, payload, additional_headers = {}, &block)
|
|
262
|
+
buffer = +''
|
|
263
|
+
chunks = []
|
|
264
|
+
|
|
265
|
+
connection.post(stream_url, payload) do |req|
|
|
266
|
+
req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
|
|
267
|
+
req.options.on_data = ndjson_handler(buffer, chunks, block)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
finalize_stream(chunks)
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def ndjson_handler(buffer, chunks, block)
|
|
274
|
+
proc do |chunk_data, _bytes, env|
|
|
275
|
+
next if env.respond_to?(:status) && env.status && env.status != 200
|
|
276
|
+
|
|
277
|
+
buffer << chunk_data.to_s
|
|
278
|
+
drain_ndjson_buffer(buffer, chunks, block)
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def drain_ndjson_buffer(buffer, chunks, block)
|
|
283
|
+
while (idx = buffer.index("\n"))
|
|
284
|
+
line = buffer.slice!(0..idx).strip
|
|
285
|
+
next if line.empty?
|
|
286
|
+
|
|
287
|
+
parse_ndjson_line(line, chunks, block)
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def parse_ndjson_line(line, chunks, block)
|
|
292
|
+
parsed = Legion::JSON.parse(line, symbolize_names: false)
|
|
293
|
+
return unless parsed.is_a?(Hash)
|
|
294
|
+
|
|
295
|
+
built = build_chunk(parsed)
|
|
296
|
+
chunks << built
|
|
297
|
+
block&.call(built)
|
|
298
|
+
rescue Legion::JSON::ParseError => e
|
|
299
|
+
handle_exception(e, level: :debug, handled: true, operation: 'ollama.stream_parse')
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def finalize_stream(chunks)
|
|
303
|
+
return Legion::Extensions::Llm::Message.new(role: :assistant, content: nil) if chunks.empty?
|
|
304
|
+
|
|
305
|
+
Legion::Extensions::Llm::Message.new(
|
|
306
|
+
role: :assistant,
|
|
307
|
+
content: join_stream_content(chunks),
|
|
308
|
+
thinking: join_stream_thinking(chunks),
|
|
309
|
+
tool_calls: merge_stream_tool_calls(chunks),
|
|
310
|
+
model_id: chunks.last.model_id,
|
|
311
|
+
input_tokens: chunks.last.input_tokens,
|
|
312
|
+
output_tokens: chunks.last.output_tokens,
|
|
313
|
+
raw: chunks.last.raw
|
|
314
|
+
)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def join_stream_content(chunks)
|
|
318
|
+
text = chunks.filter_map { |c| c.content&.to_s }.join
|
|
319
|
+
text.empty? ? nil : text
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def join_stream_thinking(chunks)
|
|
323
|
+
parts = chunks.filter_map { |c| c.thinking&.text }
|
|
324
|
+
Thinking.build(text: parts.empty? ? nil : parts.join)
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def merge_stream_tool_calls(chunks)
|
|
328
|
+
merged = chunks.filter_map(&:tool_calls).reject(&:empty?).reduce({}, :merge)
|
|
329
|
+
merged.empty? ? nil : merged
|
|
330
|
+
end
|
|
331
|
+
|
|
261
332
|
def format_messages(messages)
|
|
262
333
|
messages.map do |message|
|
|
263
334
|
content = message.content
|
|
@@ -312,11 +383,13 @@ module Legion
|
|
|
312
383
|
def parse_completion_response(response)
|
|
313
384
|
body = response.body
|
|
314
385
|
message = body.fetch('message', {})
|
|
386
|
+
content, thinking = extract_thinking_from_completion(message)
|
|
315
387
|
Legion::Extensions::Llm::Message.new(
|
|
316
388
|
role: :assistant,
|
|
317
|
-
content:
|
|
389
|
+
content: content,
|
|
318
390
|
model_id: body['model'],
|
|
319
391
|
tool_calls: parse_tool_calls(message['tool_calls']),
|
|
392
|
+
thinking: thinking,
|
|
320
393
|
input_tokens: body['prompt_eval_count'],
|
|
321
394
|
output_tokens: body['eval_count'],
|
|
322
395
|
raw: body
|
|
@@ -325,9 +398,12 @@ module Legion
|
|
|
325
398
|
|
|
326
399
|
def build_chunk(data)
|
|
327
400
|
message = data.fetch('message', {})
|
|
401
|
+
thinking = message['thinking']
|
|
328
402
|
Legion::Extensions::Llm::Chunk.new(
|
|
329
403
|
role: :assistant,
|
|
330
404
|
content: message['content'],
|
|
405
|
+
thinking: thinking ? Thinking.build(text: thinking) : nil,
|
|
406
|
+
tool_calls: parse_tool_calls(message['tool_calls']),
|
|
331
407
|
model_id: data['model'],
|
|
332
408
|
input_tokens: data['prompt_eval_count'],
|
|
333
409
|
output_tokens: data['eval_count'],
|
|
@@ -335,6 +411,22 @@ module Legion
|
|
|
335
411
|
)
|
|
336
412
|
end
|
|
337
413
|
|
|
414
|
+
def extract_thinking_from_completion(message)
|
|
415
|
+
extraction = Responses::ThinkingExtractor.extract(
|
|
416
|
+
message['content'],
|
|
417
|
+
metadata: thinking_metadata(message)
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
[
|
|
421
|
+
extraction.content,
|
|
422
|
+
Thinking.build(text: extraction.thinking, signature: extraction.signature)
|
|
423
|
+
]
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def thinking_metadata(message)
|
|
427
|
+
{ thinking: message['thinking'] }.compact
|
|
428
|
+
end
|
|
429
|
+
|
|
338
430
|
def parse_tool_calls(tool_calls)
|
|
339
431
|
return nil unless tool_calls
|
|
340
432
|
|