ollama-client 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,775 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "uri"
5
+ require "json"
6
+ require_relative "errors"
7
+ require_relative "schema_validator"
8
+ require_relative "config"
9
+
10
+ module Ollama
11
+ # Main client class for interacting with Ollama API
12
+ # rubocop:disable Metrics/ClassLength
13
+ class Client
14
+ def initialize(config: nil)
15
+ @config = config || default_config
16
+ @uri = URI("#{@config.base_url}/api/generate")
17
+ @chat_uri = URI("#{@config.base_url}/api/chat")
18
+ @base_uri = URI(@config.base_url)
19
+ end
20
+
21
+ # Chat API method matching JavaScript ollama.chat() interface
22
+ # Supports structured outputs via format parameter
23
+ #
24
+ # ⚠️ WARNING: chat() is NOT recommended for agent planning or tool routing.
25
+ # Use generate() instead for stateless, explicit state injection.
26
+ #
27
+ # @param model [String] Model name (overrides config.model)
28
+ # @param messages [Array<Hash>] Array of message hashes with :role and :content
29
+ # @param format [Hash, nil] JSON Schema for structured outputs
30
+ # @param options [Hash, nil] Additional options (temperature, top_p, etc.)
31
+ # @param strict [Boolean] If true, requires explicit opt-in and disables retries on schema violations
32
+ # @param include_meta [Boolean] If true, returns hash with :data and :meta keys
33
+ # @return [Hash] Parsed and validated JSON response matching the format schema
34
+ # rubocop:disable Metrics/MethodLength
35
+ # rubocop:disable Metrics/ParameterLists
36
+ def chat(messages:, model: nil, format: nil, options: {}, strict: false, allow_chat: false, return_meta: false)
37
+ unless allow_chat || strict
38
+ raise Error,
39
+ "chat() is intentionally gated because it is easy to misuse inside agents. " \
40
+ "Prefer generate(). If you really want chat(), pass allow_chat: true (or strict: true)."
41
+ end
42
+
43
+ attempts = 0
44
+ @current_schema = format # Store for validation
45
+ started_at = monotonic_time
46
+
47
+ begin
48
+ attempts += 1
49
+ attempt_started_at = monotonic_time
50
+ raw = call_chat_api(model: model, messages: messages, format: format, tools: nil, options: options)
51
+ attempt_latency_ms = elapsed_ms(attempt_started_at)
52
+
53
+ emit_response_hook(
54
+ raw,
55
+ {
56
+ endpoint: "/api/chat",
57
+ model: model || @config.model,
58
+ attempt: attempts,
59
+ attempt_latency_ms: attempt_latency_ms
60
+ }
61
+ )
62
+
63
+ parsed = parse_json_response(raw)
64
+
65
+ # CRITICAL: If format is provided, free-text output is forbidden
66
+ if format
67
+ if parsed.nil? || parsed.empty?
68
+ raise SchemaViolationError,
69
+ "Empty or nil response when format schema is required"
70
+ end
71
+
72
+ SchemaValidator.validate!(parsed, format)
73
+ end
74
+
75
+ return parsed unless return_meta
76
+
77
+ {
78
+ "data" => parsed,
79
+ "meta" => {
80
+ "endpoint" => "/api/chat",
81
+ "model" => model || @config.model,
82
+ "attempts" => attempts,
83
+ "latency_ms" => elapsed_ms(started_at)
84
+ }
85
+ }
86
+ rescue NotFoundError => e
87
+ enhanced_error = enhance_not_found_error(e)
88
+ raise enhanced_error
89
+ rescue HTTPError => e
90
+ raise e unless e.retryable?
91
+ raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
92
+
93
+ retry
94
+ rescue InvalidJSONError, SchemaViolationError => e
95
+ raise e if strict
96
+ raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
97
+
98
+ retry
99
+ rescue TimeoutError, Error => e
100
+ raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
101
+
102
+ retry
103
+ end
104
+ end
105
+ # rubocop:enable Metrics/ParameterLists
106
+
107
+ # Raw Chat API method that returns the full parsed response body.
108
+ #
109
+ # This is intended for advanced use cases such as tool-calling loops where
110
+ # callers need access to fields like `message.tool_calls`.
111
+ #
112
+ # @param model [String] Model name (overrides config.model)
113
+ # @param messages [Array<Hash>] Array of message hashes with :role and :content
114
+ # @param format [Hash, nil] JSON Schema for structured outputs (validates message.content JSON when present)
115
+ # @param tools [Array<Hash>, nil] Tool definitions (OpenAI-style schema) sent to Ollama
116
+ # @param options [Hash, nil] Additional options (temperature, top_p, etc.)
117
+ # @return [Hash] Full parsed JSON response body from Ollama
118
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/ParameterLists
119
+ def chat_raw(messages:, model: nil, format: nil, tools: nil, options: {}, strict: false, allow_chat: false,
120
+ return_meta: false, stream: false, &on_chunk)
121
+ unless allow_chat || strict
122
+ raise Error,
123
+ "chat_raw() is intentionally gated because it is easy to misuse inside agents. " \
124
+ "Prefer generate(). If you really want chat_raw(), pass allow_chat: true (or strict: true)."
125
+ end
126
+
127
+ attempts = 0
128
+ @current_schema = format # Store for validation
129
+ started_at = monotonic_time
130
+
131
+ begin
132
+ attempts += 1
133
+ attempt_started_at = monotonic_time
134
+ raw_body =
135
+ if stream
136
+ call_chat_api_raw_stream(
137
+ model: model,
138
+ messages: messages,
139
+ format: format,
140
+ tools: tools,
141
+ options: options,
142
+ &on_chunk
143
+ )
144
+ else
145
+ call_chat_api_raw(model: model, messages: messages, format: format, tools: tools, options: options)
146
+ end
147
+ attempt_latency_ms = elapsed_ms(attempt_started_at)
148
+
149
+ emit_response_hook(
150
+ raw_body.is_a?(Hash) ? raw_body.to_json : raw_body,
151
+ {
152
+ endpoint: "/api/chat",
153
+ model: model || @config.model,
154
+ attempt: attempts,
155
+ attempt_latency_ms: attempt_latency_ms
156
+ }
157
+ )
158
+
159
+ # `raw_body` is either a JSON string (non-stream) or a Hash (stream).
160
+ parsed_body = raw_body.is_a?(Hash) ? raw_body : JSON.parse(raw_body)
161
+
162
+ # If a format schema is provided, validate the assistant content JSON (when present).
163
+ if format
164
+ content = parsed_body.dig("message", "content")
165
+ if content.nil? || content.empty?
166
+ raise SchemaViolationError,
167
+ "Empty or nil response when format schema is required"
168
+ end
169
+
170
+ parsed_content = parse_json_response(content)
171
+ if parsed_content.nil? || parsed_content.empty?
172
+ raise SchemaViolationError,
173
+ "Empty or nil response when format schema is required"
174
+ end
175
+
176
+ SchemaValidator.validate!(parsed_content, format)
177
+ end
178
+
179
+ return parsed_body unless return_meta
180
+
181
+ {
182
+ "data" => parsed_body,
183
+ "meta" => {
184
+ "endpoint" => "/api/chat",
185
+ "model" => model || @config.model,
186
+ "attempts" => attempts,
187
+ "latency_ms" => elapsed_ms(started_at)
188
+ }
189
+ }
190
+ rescue NotFoundError => e
191
+ enhanced_error = enhance_not_found_error(e)
192
+ raise enhanced_error
193
+ rescue HTTPError => e
194
+ raise e unless e.retryable?
195
+ raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
196
+
197
+ retry
198
+ rescue JSON::ParserError => e
199
+ raise InvalidJSONError, "Failed to parse API response: #{e.message}" if strict
200
+ raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
201
+
202
+ retry
203
+ rescue InvalidJSONError, SchemaViolationError => e
204
+ raise e if strict
205
+ raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
206
+
207
+ retry
208
+ rescue TimeoutError, Error => e
209
+ raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
210
+
211
+ retry
212
+ end
213
+ end
214
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/ParameterLists
215
+
216
+ def generate(prompt:, schema:, strict: false, return_meta: false)
217
+ attempts = 0
218
+ @current_schema = schema # Store for prompt enhancement
219
+ started_at = monotonic_time
220
+
221
+ begin
222
+ attempts += 1
223
+ attempt_started_at = monotonic_time
224
+ raw = call_api(prompt)
225
+ attempt_latency_ms = elapsed_ms(attempt_started_at)
226
+
227
+ emit_response_hook(
228
+ raw,
229
+ {
230
+ endpoint: "/api/generate",
231
+ model: @config.model,
232
+ attempt: attempts,
233
+ attempt_latency_ms: attempt_latency_ms
234
+ }
235
+ )
236
+
237
+ parsed = parse_json_response(raw)
238
+
239
+ # CRITICAL: If schema is provided, free-text output is forbidden
240
+ raise SchemaViolationError, "Empty or nil response when schema is required" if parsed.nil? || parsed.empty?
241
+
242
+ SchemaValidator.validate!(parsed, schema)
243
+ return parsed unless return_meta
244
+
245
+ {
246
+ "data" => parsed,
247
+ "meta" => {
248
+ "endpoint" => "/api/generate",
249
+ "model" => @config.model,
250
+ "attempts" => attempts,
251
+ "latency_ms" => elapsed_ms(started_at)
252
+ }
253
+ }
254
+ rescue NotFoundError => e
255
+ # 404 errors are never retried, but we can suggest models
256
+ enhanced_error = enhance_not_found_error(e)
257
+ raise enhanced_error
258
+ rescue HTTPError => e
259
+ # Don't retry non-retryable HTTP errors (400, etc.)
260
+ raise e unless e.retryable?
261
+ raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
262
+
263
+ retry
264
+ rescue InvalidJSONError, SchemaViolationError => e
265
+ raise e if strict
266
+ raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
267
+
268
+ retry
269
+ rescue TimeoutError, Error => e
270
+ raise RetryExhaustedError, "Failed after #{attempts} attempts: #{e.message}" if attempts > @config.retries
271
+
272
+ retry
273
+ end
274
+ end
275
+ # rubocop:enable Metrics/MethodLength
276
+
277
+ def generate_strict!(prompt:, schema:, return_meta: false)
278
+ generate(prompt: prompt, schema: schema, strict: true, return_meta: return_meta)
279
+ end
280
+
281
+ # Lightweight server health check.
282
+ # Returns true/false by default; pass return_meta: true for details.
283
+ # rubocop:disable Metrics/MethodLength
284
+ def health(return_meta: false)
285
+ ping_uri = URI.join(@base_uri.to_s.end_with?("/") ? @base_uri.to_s : "#{@base_uri}/", "api/ping")
286
+ started_at = monotonic_time
287
+
288
+ req = Net::HTTP::Get.new(ping_uri)
289
+ res = Net::HTTP.start(
290
+ ping_uri.hostname,
291
+ ping_uri.port,
292
+ read_timeout: @config.timeout,
293
+ open_timeout: @config.timeout
294
+ ) { |http| http.request(req) }
295
+
296
+ ok = res.is_a?(Net::HTTPSuccess)
297
+ return ok unless return_meta
298
+
299
+ {
300
+ "ok" => ok,
301
+ "meta" => {
302
+ "endpoint" => "/api/ping",
303
+ "status_code" => res.code.to_i,
304
+ "latency_ms" => elapsed_ms(started_at)
305
+ }
306
+ }
307
+ rescue Net::ReadTimeout, Net::OpenTimeout
308
+ return false unless return_meta
309
+
310
+ {
311
+ "ok" => false,
312
+ "meta" => {
313
+ "endpoint" => "/api/ping",
314
+ "error" => "timeout",
315
+ "latency_ms" => elapsed_ms(started_at)
316
+ }
317
+ }
318
+ rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
319
+ return false unless return_meta
320
+
321
+ {
322
+ "ok" => false,
323
+ "meta" => {
324
+ "endpoint" => "/api/ping",
325
+ "error" => e.message,
326
+ "latency_ms" => elapsed_ms(started_at)
327
+ }
328
+ }
329
+ end
330
+ # rubocop:enable Metrics/MethodLength
331
+
332
+ # Public method to list available models
333
+ def list_models
334
+ tags_uri = URI("#{@config.base_url}/api/tags")
335
+ req = Net::HTTP::Get.new(tags_uri)
336
+
337
+ res = Net::HTTP.start(
338
+ tags_uri.hostname,
339
+ tags_uri.port,
340
+ read_timeout: @config.timeout,
341
+ open_timeout: @config.timeout
342
+ ) { |http| http.request(req) }
343
+
344
+ raise Error, "Failed to fetch models: HTTP #{res.code}" unless res.is_a?(Net::HTTPSuccess)
345
+
346
+ body = JSON.parse(res.body)
347
+ body["models"]&.map { |m| m["name"] } || []
348
+ rescue JSON::ParserError => e
349
+ raise InvalidJSONError, "Failed to parse models response: #{e.message}"
350
+ rescue Net::ReadTimeout, Net::OpenTimeout
351
+ raise TimeoutError, "Request timed out after #{@config.timeout}s"
352
+ rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
353
+ raise Error, "Connection failed: #{e.message}"
354
+ end
355
+
356
+ private
357
+
358
+ def handle_http_error(res, requested_model: nil)
359
+ status_code = res.code.to_i
360
+ requested_model ||= @config.model
361
+
362
+ raise NotFoundError.new(res.message, requested_model: requested_model) if status_code == 404
363
+
364
+ # All other errors use HTTPError
365
+ # Retryable: 408, 429, 500, 503 (handled by HTTPError#retryable?)
366
+ # Non-retryable: 400-403, 405-407, 409-428, 430-499, 501, 504-599
367
+ raise HTTPError.new("HTTP #{res.code}: #{res.message}", status_code)
368
+ end
369
+
370
+ def default_config
371
+ if defined?(OllamaClient)
372
+ # Avoid sharing a mutable global config object across clients/threads.
373
+ # The OllamaClient.config instance remains global for convenience,
374
+ # but each Client gets its own copy by default.
375
+ OllamaClient.config.dup
376
+ else
377
+ Config.new
378
+ end
379
+ end
380
+
381
+ def enhance_not_found_error(error)
382
+ return error if error.requested_model.nil?
383
+
384
+ begin
385
+ available_models = list_models
386
+ suggestions = find_similar_models(error.requested_model, available_models)
387
+ NotFoundError.new(error.message, requested_model: error.requested_model, suggestions: suggestions)
388
+ rescue Error
389
+ # If we can't fetch models, return original error
390
+ error
391
+ end
392
+ end
393
+
394
+ def enhance_prompt_for_json(prompt)
395
+ return prompt unless @current_schema
396
+
397
+ # Add JSON instruction if not already present
398
+ return prompt if prompt.match?(/json|JSON/i)
399
+
400
+ schema_summary = summarize_schema(@current_schema)
401
+ json_instruction = "CRITICAL: Respond with ONLY valid JSON (no markdown code blocks, no explanations). " \
402
+ "The JSON must include these exact required fields: #{schema_summary}"
403
+ "#{prompt}\n\n#{json_instruction}"
404
+ end
405
+
406
+ def summarize_schema(schema)
407
+ return "object" unless schema.is_a?(Hash)
408
+
409
+ required = schema["required"] || []
410
+ properties = schema["properties"] || {}
411
+ return "object" if required.empty? && properties.empty?
412
+
413
+ # Create example JSON structure
414
+ example = {}
415
+ required.each do |key|
416
+ prop = properties[key] || {}
417
+ example[key] = case prop["type"]
418
+ when "string" then "string_value"
419
+ when "number" then 0
420
+ when "boolean" then true
421
+ when "array" then []
422
+ else {}
423
+ end
424
+ end
425
+
426
+ required_list = required.map { |k| "\"#{k}\"" }.join(", ")
427
+ example_json = JSON.pretty_generate(example)
428
+ "Required fields: [#{required_list}]. Example structure:\n#{example_json}"
429
+ end
430
+
431
+ def parse_json_response(raw)
432
+ json_text = extract_json_fragment(raw)
433
+ JSON.parse(json_text)
434
+ rescue JSON::ParserError => e
435
+ raise InvalidJSONError, "Failed to parse extracted JSON: #{e.message}. Extracted: #{json_text&.slice(0, 200)}..."
436
+ end
437
+
438
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
439
+ def extract_json_fragment(text)
440
+ raise InvalidJSONError, "Empty response body" if text.nil? || text.empty?
441
+
442
+ stripped = text.lstrip
443
+
444
+ # Fast path: the whole (trimmed) body is valid JSON (including primitives).
445
+ if stripped.start_with?("{", "[", "\"", "-", "t", "f", "n") || stripped.match?(/\A\d/)
446
+ begin
447
+ JSON.parse(stripped)
448
+ return stripped
449
+ rescue JSON::ParserError
450
+ # Fall back to extraction below (common with prefix/suffix noise).
451
+ end
452
+ end
453
+
454
+ start_idx = text.index(/[{\[]/)
455
+ raise InvalidJSONError, "No JSON found in response. Response: #{text[0..200]}..." unless start_idx
456
+
457
+ stack = []
458
+ in_string = false
459
+ escape = false
460
+
461
+ i = start_idx
462
+ while i < text.length
463
+ ch = text.getbyte(i)
464
+
465
+ if in_string
466
+ if escape
467
+ escape = false
468
+ elsif ch == 92 # backslash
469
+ escape = true
470
+ elsif ch == 34 # double-quote
471
+ in_string = false
472
+ end
473
+ else
474
+ case ch
475
+ when 34 # double-quote
476
+ in_string = true
477
+ when 123 # {
478
+ stack << 125 # }
479
+ when 91 # [
480
+ stack << 93 # ]
481
+ when 125, 93 # }, ]
482
+ expected = stack.pop
483
+ raise InvalidJSONError, "Malformed JSON in response. Response: #{text[start_idx, 200]}..." if expected != ch
484
+ return text[start_idx..i] if stack.empty?
485
+ end
486
+ end
487
+
488
+ i += 1
489
+ end
490
+
491
+ raise InvalidJSONError, "Incomplete JSON in response. Response: #{text[start_idx, 200]}..."
492
+ end
493
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
494
+
495
+ def emit_response_hook(raw, meta)
496
+ hook = @config.on_response
497
+ return unless hook.respond_to?(:call)
498
+
499
+ hook.call(raw, meta)
500
+ rescue StandardError
501
+ # Observability hooks must never break the main flow.
502
+ nil
503
+ end
504
+
505
+ def monotonic_time
506
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
507
+ end
508
+
509
+ def elapsed_ms(started_at)
510
+ ((monotonic_time - started_at) * 1000.0).round(1)
511
+ end
512
+
513
+ def find_similar_models(requested, available, limit: 5)
514
+ return [] if available.empty?
515
+
516
+ # Simple similarity: models containing the requested name or vice versa
517
+ requested_lower = requested.downcase
518
+ matches = available.select do |model|
519
+ model_lower = model.downcase
520
+ model_lower.include?(requested_lower) || requested_lower.include?(model_lower)
521
+ end
522
+
523
+ # Also try fuzzy matching on model name parts
524
+ if matches.empty?
525
+ requested_parts = requested_lower.split(/[:._-]/)
526
+ matches = available.select do |model|
527
+ model_parts = model.downcase.split(/[:._-]/)
528
+ requested_parts.any? { |part| model_parts.any? { |mp| mp.include?(part) || part.include?(mp) } }
529
+ end
530
+ end
531
+
532
+ matches.first(limit)
533
+ end
534
+
535
+ def call_chat_api(model:, messages:, format:, tools:, options:)
536
+ req = Net::HTTP::Post.new(@chat_uri)
537
+ req["Content-Type"] = "application/json"
538
+
539
+ # Build request body
540
+ body = {
541
+ model: model || @config.model,
542
+ messages: messages,
543
+ stream: false
544
+ }
545
+
546
+ # Merge options (temperature, top_p, etc.) with config defaults
547
+ body_options = {
548
+ temperature: options[:temperature] || @config.temperature,
549
+ top_p: options[:top_p] || @config.top_p,
550
+ num_ctx: options[:num_ctx] || @config.num_ctx
551
+ }
552
+ body[:options] = body_options
553
+
554
+ # Use Ollama's native format parameter for structured outputs
555
+ body[:format] = format if format
556
+ body[:tools] = tools if tools
557
+
558
+ req.body = body.to_json
559
+
560
+ res = Net::HTTP.start(
561
+ @chat_uri.hostname,
562
+ @chat_uri.port,
563
+ read_timeout: @config.timeout,
564
+ open_timeout: @config.timeout
565
+ ) { |http| http.request(req) }
566
+
567
+ handle_http_error(res, requested_model: model || @config.model) unless res.is_a?(Net::HTTPSuccess)
568
+
569
+ response_body = JSON.parse(res.body)
570
+ # Chat API returns message.content, not response
571
+ response_body["message"]["content"]
572
+ rescue JSON::ParserError => e
573
+ raise InvalidJSONError, "Failed to parse API response: #{e.message}"
574
+ rescue Net::ReadTimeout, Net::OpenTimeout
575
+ raise TimeoutError, "Request timed out after #{@config.timeout}s"
576
+ rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
577
+ raise Error, "Connection failed: #{e.message}"
578
+ end
579
+
580
+ def call_api(prompt)
581
+ req = Net::HTTP::Post.new(@uri)
582
+ req["Content-Type"] = "application/json"
583
+
584
+ # Build request body
585
+ body = {
586
+ model: @config.model,
587
+ prompt: prompt,
588
+ stream: false,
589
+ temperature: @config.temperature,
590
+ top_p: @config.top_p,
591
+ num_ctx: @config.num_ctx
592
+ }
593
+
594
+ # Use Ollama's native format parameter for structured outputs
595
+ if @current_schema
596
+ body[:format] = @current_schema
597
+ # Also enhance prompt as fallback (some models work better with both)
598
+ body[:prompt] = enhance_prompt_for_json(prompt)
599
+ end
600
+
601
+ req.body = body.to_json
602
+
603
+ res = Net::HTTP.start(
604
+ @uri.hostname,
605
+ @uri.port,
606
+ read_timeout: @config.timeout,
607
+ open_timeout: @config.timeout
608
+ ) { |http| http.request(req) }
609
+
610
+ handle_http_error(res) unless res.is_a?(Net::HTTPSuccess)
611
+
612
+ body = JSON.parse(res.body)
613
+ body["response"]
614
+ rescue JSON::ParserError => e
615
+ raise InvalidJSONError, "Failed to parse API response: #{e.message}"
616
+ rescue Net::ReadTimeout, Net::OpenTimeout
617
+ raise TimeoutError, "Request timed out after #{@config.timeout}s"
618
+ rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
619
+ raise Error, "Connection failed: #{e.message}"
620
+ end
621
+
622
+ def call_chat_api_raw(model:, messages:, format:, tools:, options:)
623
+ req = Net::HTTP::Post.new(@chat_uri)
624
+ req["Content-Type"] = "application/json"
625
+
626
+ body = {
627
+ model: model || @config.model,
628
+ messages: messages,
629
+ stream: false
630
+ }
631
+
632
+ body_options = {
633
+ temperature: options[:temperature] || @config.temperature,
634
+ top_p: options[:top_p] || @config.top_p,
635
+ num_ctx: options[:num_ctx] || @config.num_ctx
636
+ }
637
+ body[:options] = body_options
638
+
639
+ body[:format] = format if format
640
+ body[:tools] = tools if tools
641
+
642
+ req.body = body.to_json
643
+
644
+ res = Net::HTTP.start(
645
+ @chat_uri.hostname,
646
+ @chat_uri.port,
647
+ read_timeout: @config.timeout,
648
+ open_timeout: @config.timeout
649
+ ) { |http| http.request(req) }
650
+
651
+ handle_http_error(res, requested_model: model || @config.model) unless res.is_a?(Net::HTTPSuccess)
652
+
653
+ res.body
654
+ rescue Net::ReadTimeout, Net::OpenTimeout
655
+ raise TimeoutError, "Request timed out after #{@config.timeout}s"
656
+ rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
657
+ raise Error, "Connection failed: #{e.message}"
658
+ end
659
+
660
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity, Metrics/BlockLength
661
+ def call_chat_api_raw_stream(model:, messages:, format:, tools:, options:)
662
+ req = Net::HTTP::Post.new(@chat_uri)
663
+ req["Content-Type"] = "application/json"
664
+
665
+ body = {
666
+ model: model || @config.model,
667
+ messages: messages,
668
+ stream: true
669
+ }
670
+
671
+ body_options = {
672
+ temperature: options[:temperature] || @config.temperature,
673
+ top_p: options[:top_p] || @config.top_p,
674
+ num_ctx: options[:num_ctx] || @config.num_ctx
675
+ }
676
+ body[:options] = body_options
677
+
678
+ body[:format] = format if format
679
+ body[:tools] = tools if tools
680
+
681
+ req.body = body.to_json
682
+
683
+ final_obj = nil
684
+ aggregated = {
685
+ "message" => {
686
+ "role" => "assistant",
687
+ "content" => +""
688
+ }
689
+ }
690
+
691
+ buffer = +""
692
+
693
+ Net::HTTP.start(
694
+ @chat_uri.hostname,
695
+ @chat_uri.port,
696
+ read_timeout: @config.timeout,
697
+ open_timeout: @config.timeout
698
+ ) do |http|
699
+ http.request(req) do |res|
700
+ handle_http_error(res, requested_model: model || @config.model) unless res.is_a?(Net::HTTPSuccess)
701
+
702
+ res.read_body do |chunk|
703
+ buffer << chunk
704
+
705
+ while (newline_idx = buffer.index("\n"))
706
+ line = buffer.slice!(0, newline_idx + 1).strip
707
+ next if line.empty?
708
+
709
+ # Tolerate SSE framing (e.g. "data: {...}") and ignore non-data lines.
710
+ if line.start_with?("data:")
711
+ line = line.sub(/\Adata:\s*/, "").strip
712
+ elsif line.start_with?("event:") || line.start_with?(":")
713
+ next
714
+ end
715
+
716
+ next if line.empty? || line == "[DONE]"
717
+
718
+ obj = JSON.parse(line)
719
+
720
+ # Expose the raw chunk to callers (presentation only).
721
+ yield(obj) if block_given?
722
+
723
+ msg = obj["message"]
724
+ if msg.is_a?(Hash)
725
+ delta_content = msg["content"]
726
+ aggregated["message"]["content"] << delta_content.to_s if delta_content
727
+
728
+ aggregated["message"]["tool_calls"] = msg["tool_calls"] if msg["tool_calls"]
729
+
730
+ aggregated["message"]["role"] = msg["role"] if msg["role"]
731
+ end
732
+
733
+ # Many Ollama stream payloads include `done: true` on the last line.
734
+ final_obj = obj if obj["done"] == true
735
+ end
736
+ end
737
+ end
738
+ end
739
+
740
+ # Prefer returning the final "done: true" frame (it typically contains
741
+ # useful metadata like durations), but always use our aggregated message
742
+ # content/tool_calls since streaming payloads often send deltas.
743
+ if final_obj.is_a?(Hash)
744
+ combined = final_obj.dup
745
+ combined_message =
746
+ if combined["message"].is_a?(Hash)
747
+ combined["message"].dup
748
+ else
749
+ {}
750
+ end
751
+
752
+ agg_message = aggregated["message"] || {}
753
+
754
+ agg_content = agg_message["content"].to_s
755
+ combined_message["content"] = agg_content unless agg_content.empty?
756
+
757
+ combined_message["tool_calls"] = agg_message["tool_calls"] if agg_message.key?("tool_calls")
758
+ combined_message["role"] ||= agg_message["role"] if agg_message["role"]
759
+
760
+ combined["message"] = combined_message unless combined_message.empty?
761
+ return combined
762
+ end
763
+
764
+ aggregated
765
+ rescue JSON::ParserError => e
766
+ raise InvalidJSONError, "Failed to parse streaming response: #{e.message}"
767
+ rescue Net::ReadTimeout, Net::OpenTimeout
768
+ raise TimeoutError, "Request timed out after #{@config.timeout}s"
769
+ rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
770
+ raise Error, "Connection failed: #{e.message}"
771
+ end
772
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity, Metrics/BlockLength
773
+ end
774
+ # rubocop:enable Metrics/ClassLength
775
+ end