lex-ollama 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c524d3518516ca7731280a6e84a2b354b12b47b516545a23bec97f6eda373a90
4
- data.tar.gz: fbfcff5614ac931e74219661eb792f295dfb65d125c93ddb190b40eae74c6f81
3
+ metadata.gz: 7f82aeecea946b03e08e2dc80a8ec66504276a2bb28aaaca5528d02105328166
4
+ data.tar.gz: 6b7b392634ec069693a0b0b030b1619a0a5ae1d3cbb34c2440124c1c52d15e4a
5
5
  SHA512:
6
- metadata.gz: 8975e31624faf65d869fcbe9c512fbf9d35b468bae3b5267726126312ccbe4520f8c5ff339550ce1d2ea44b2561c90d383ae847c67d826e538dd04a2f8a5c2ff
7
- data.tar.gz: 4a01f66393c2a78bbd9cccb707ed19156338501f1de46e2afa78508b4be7d93354e90a01a7eef0acc14c18b817d39189d5fc50152da7905ce311d9d537bf0bfe
6
+ metadata.gz: 25b18ed44dbad71930004a3384dc897e37b245d3cdafa98122e0210a22ac5d5e6be343ab0133e519f8228f026d254e4993ee597f13368590c2fa81971329c6a8
7
+ data.tar.gz: 39b9f4ed1e8a7ccd447b03770a9757c7e2cdbcea03341f07c2dda561db72e3556029152c76256e6f8b85b6d0cb858773e2d7ee9989d5559ffdc9daccfcf6b966
data/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.2.0] - 2026-03-31
4
+
5
+ ### Added
6
+ - `Helpers::Errors` — Faraday exception classification (TimeoutError, ConnectionFailed) with exponential backoff retry (`with_retry`, 3 retries, 0.5s base delay)
7
+ - `Helpers::Usage` — standardized usage hash normalization from Ollama response fields (`prompt_eval_count` -> `input_tokens`, `eval_count` -> `output_tokens`, plus duration fields)
8
+ - `Helpers::Client#streaming_client` — Faraday connection without JSON response middleware for streaming endpoints
9
+ - `Runners::Completions#generate_stream` — streaming generate with per-chunk block callback and full text accumulation
10
+ - `Runners::Chat#chat_stream` — streaming chat with per-chunk block callback and full text accumulation
11
+
12
+ ### Changed
13
+ - All runner methods wrapped in `Helpers::Errors.with_retry` for production reliability
14
+ - `Runners::Completions#generate` now returns a `usage:` key with standardized token/duration counts
15
+ - `Runners::Chat#chat` now returns a `usage:` key with standardized token/duration counts
16
+ - `Client` class now overrides `streaming_client` for host passthrough
17
+
3
18
  ## [0.1.0] - 2026-03-31
4
19
 
5
20
  ### Added
data/README.md CHANGED
@@ -12,9 +12,11 @@ gem install lex-ollama
12
12
 
13
13
  ### Completions
14
14
  - `generate` - Generate a text completion (POST /api/generate)
15
+ - `generate_stream` - Stream a text completion with per-chunk callbacks
15
16
 
16
17
  ### Chat
17
18
  - `chat` - Generate a chat completion with message history and tool support (POST /api/chat)
19
+ - `chat_stream` - Stream a chat completion with per-chunk callbacks
18
20
 
19
21
  ### Models
20
22
  - `create_model` - Create a model from another model, GGUF, or safetensors (POST /api/create)
@@ -54,6 +56,27 @@ result = client.embed(model: 'all-minilm', input: 'Some text to embed')
54
56
 
55
57
  # List models
56
58
  result = client.list_models
59
+
60
+ # Streaming generate
61
+ client.generate_stream(model: 'llama3.2', prompt: 'Tell me a story') do |event|
62
+ case event[:type]
63
+ when :delta then print event[:text]
64
+ when :done then puts "\nDone!"
65
+ end
66
+ end
67
+
68
+ # Streaming chat
69
+ client.chat_stream(model: 'llama3.2', messages: [{ role: 'user', content: 'Hello!' }]) do |event|
70
+ print event[:text] if event[:type] == :delta
71
+ end
72
+ ```
73
+
74
+ All API calls include automatic retry with exponential backoff on connection failures and timeouts.
75
+
76
+ Generate and chat responses include standardized `usage:` data:
77
+ ```ruby
78
+ result = client.generate(model: 'llama3.2', prompt: 'Hello')
79
+ result[:usage] # => { input_tokens: 1, output_tokens: 5, total_duration: ..., ... }
57
80
  ```
58
81
 
59
82
  ## Requirements
@@ -29,6 +29,10 @@ module Legion
29
29
  def client(**override)
30
30
  super(**@opts, **override)
31
31
  end
32
+
33
+ def streaming_client(**override)
34
+ super(**@opts, **override)
35
+ end
32
36
  end
33
37
  end
34
38
  end
@@ -18,6 +18,15 @@ module Legion
18
18
  conn.options.open_timeout = 10
19
19
  end
20
20
  end
21
+
22
+ def streaming_client(host: DEFAULT_HOST, **)
23
+ Faraday.new(url: host) do |conn|
24
+ conn.request :json
25
+ conn.headers['Content-Type'] = 'application/json'
26
+ conn.options.timeout = 300
27
+ conn.options.open_timeout = 10
28
+ end
29
+ end
21
30
  end
22
31
  end
23
32
  end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Ollama
6
+ module Helpers
7
+ module Errors
8
+ MAX_RETRIES = 3
9
+ BASE_DELAY = 0.5
10
+ MAX_DELAY = 16
11
+
12
+ RETRYABLE_EXCEPTIONS = [
13
+ Faraday::TimeoutError,
14
+ Faraday::ConnectionFailed
15
+ ].freeze
16
+
17
+ module_function
18
+
19
+ def retryable?(exception)
20
+ RETRYABLE_EXCEPTIONS.any? { |klass| exception.is_a?(klass) }
21
+ end
22
+
23
+ def with_retry(max_retries: MAX_RETRIES)
24
+ retries = 0
25
+ begin
26
+ yield
27
+ rescue *RETRYABLE_EXCEPTIONS
28
+ retries += 1
29
+ raise if retries > max_retries
30
+
31
+ delay = [BASE_DELAY * (2**(retries - 1)), MAX_DELAY].min
32
+ sleep(delay)
33
+ retry
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Ollama
6
+ module Helpers
7
+ module Usage
8
+ EMPTY_USAGE = {
9
+ input_tokens: 0,
10
+ output_tokens: 0,
11
+ total_duration: 0,
12
+ load_duration: 0,
13
+ prompt_eval_duration: 0,
14
+ eval_duration: 0
15
+ }.freeze
16
+
17
+ module_function
18
+
19
+ def from_response(body)
20
+ return EMPTY_USAGE.dup unless body.is_a?(Hash)
21
+
22
+ {
23
+ input_tokens: body['prompt_eval_count'] || 0,
24
+ output_tokens: body['eval_count'] || 0,
25
+ total_duration: body['total_duration'] || 0,
26
+ load_duration: body['load_duration'] || 0,
27
+ prompt_eval_duration: body['prompt_eval_duration'] || 0,
28
+ eval_duration: body['eval_duration'] || 0
29
+ }
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'legion/extensions/ollama/helpers/client'
4
+ require 'legion/extensions/ollama/helpers/errors'
4
5
 
5
6
  module Legion
6
7
  module Extensions
@@ -10,14 +11,16 @@ module Legion
10
11
  extend Legion::Extensions::Ollama::Helpers::Client
11
12
 
12
13
  def check_blob(digest:, **)
13
- response = client(**).head("/api/blobs/#{digest}")
14
+ response = Helpers::Errors.with_retry { client(**).head("/api/blobs/#{digest}") }
14
15
  { result: response.status == 200, status: response.status }
15
16
  end
16
17
 
17
18
  def push_blob(digest:, body:, **)
18
- response = client(**).post("/api/blobs/#{digest}") do |req|
19
- req.headers['Content-Type'] = 'application/octet-stream'
20
- req.body = body
19
+ response = Helpers::Errors.with_retry do
20
+ client(**).post("/api/blobs/#{digest}") do |req|
21
+ req.headers['Content-Type'] = 'application/octet-stream'
22
+ req.body = body
23
+ end
21
24
  end
22
25
  { result: response.status == 201, status: response.status }
23
26
  end
@@ -1,6 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'json'
3
4
  require 'legion/extensions/ollama/helpers/client'
5
+ require 'legion/extensions/ollama/helpers/errors'
6
+ require 'legion/extensions/ollama/helpers/usage'
4
7
 
5
8
  module Legion
6
9
  module Extensions
@@ -12,8 +15,40 @@ module Legion
12
15
  def chat(model:, messages:, tools: nil, format: nil, options: nil, stream: false, keep_alive: nil, think: nil, **)
13
16
  body = { model: model, messages: messages, tools: tools, format: format, options: options,
14
17
  stream: stream, keep_alive: keep_alive, think: think }.compact
15
- response = client(**).post('/api/chat', body)
16
- { result: response.body, status: response.status }
18
+ response = Helpers::Errors.with_retry { client(**).post('/api/chat', body) }
19
+ { result: response.body, usage: Helpers::Usage.from_response(response.body), status: response.status }
20
+ end
21
+
22
+ def chat_stream(model:, messages:, tools: nil, format: nil, options: nil, keep_alive: nil, think: nil, **, &block)
23
+ body = { model: model, messages: messages, tools: tools, format: format, options: options,
24
+ stream: true, keep_alive: keep_alive, think: think }.compact
25
+ accumulated = +''
26
+ final_response = nil
27
+ buffer = +''
28
+
29
+ Helpers::Errors.with_retry do
30
+ streaming_client(**).post('/api/chat', body) do |req|
31
+ req.options.on_data = proc do |chunk, _size|
32
+ buffer << chunk
33
+ while (idx = buffer.index("\n"))
34
+ line = buffer.slice!(0, idx + 1).strip
35
+ next if line.empty?
36
+
37
+ parsed = ::JSON.parse(line)
38
+ if parsed['done']
39
+ final_response = parsed
40
+ block&.call({ type: :done, data: parsed })
41
+ else
42
+ text = parsed.dig('message', 'content') || ''
43
+ accumulated << text
44
+ block&.call({ type: :delta, text: text })
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+
51
+ { result: accumulated, usage: Helpers::Usage.from_response(final_response), status: 200 }
17
52
  end
18
53
 
19
54
  include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
@@ -1,6 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'json'
3
4
  require 'legion/extensions/ollama/helpers/client'
5
+ require 'legion/extensions/ollama/helpers/errors'
6
+ require 'legion/extensions/ollama/helpers/usage'
4
7
 
5
8
  module Legion
6
9
  module Extensions
@@ -12,8 +15,40 @@ module Legion
12
15
  def generate(model:, prompt: nil, images: nil, format: nil, options: nil, system: nil, stream: false, keep_alive: nil, **)
13
16
  body = { model: model, prompt: prompt, images: images, format: format, options: options,
14
17
  system: system, stream: stream, keep_alive: keep_alive }.compact
15
- response = client(**).post('/api/generate', body)
16
- { result: response.body, status: response.status }
18
+ response = Helpers::Errors.with_retry { client(**).post('/api/generate', body) }
19
+ { result: response.body, usage: Helpers::Usage.from_response(response.body), status: response.status }
20
+ end
21
+
22
+ def generate_stream(model:, prompt: nil, images: nil, format: nil, options: nil, system: nil, keep_alive: nil, **, &block)
23
+ body = { model: model, prompt: prompt, images: images, format: format, options: options,
24
+ system: system, stream: true, keep_alive: keep_alive }.compact
25
+ accumulated = +''
26
+ final_response = nil
27
+ buffer = +''
28
+
29
+ Helpers::Errors.with_retry do
30
+ streaming_client(**).post('/api/generate', body) do |req|
31
+ req.options.on_data = proc do |chunk, _size|
32
+ buffer << chunk
33
+ while (idx = buffer.index("\n"))
34
+ line = buffer.slice!(0, idx + 1).strip
35
+ next if line.empty?
36
+
37
+ parsed = ::JSON.parse(line)
38
+ if parsed['done']
39
+ final_response = parsed
40
+ block&.call({ type: :done, data: parsed })
41
+ else
42
+ text = parsed['response'] || ''
43
+ accumulated << text
44
+ block&.call({ type: :delta, text: text })
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+
51
+ { result: accumulated, usage: Helpers::Usage.from_response(final_response), status: 200 }
17
52
  end
18
53
 
19
54
  include Legion::Extensions::Helpers::Lex if Legion::Extensions.const_defined?(:Helpers) &&
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'legion/extensions/ollama/helpers/client'
4
+ require 'legion/extensions/ollama/helpers/errors'
4
5
 
5
6
  module Legion
6
7
  module Extensions
@@ -12,7 +13,7 @@ module Legion
12
13
  def embed(model:, input:, truncate: nil, options: nil, keep_alive: nil, dimensions: nil, **)
13
14
  body = { model: model, input: input, truncate: truncate, options: options,
14
15
  keep_alive: keep_alive, dimensions: dimensions }.compact
15
- response = client(**).post('/api/embed', body)
16
+ response = Helpers::Errors.with_retry { client(**).post('/api/embed', body) }
16
17
  { result: response.body, status: response.status }
17
18
  end
18
19
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'legion/extensions/ollama/helpers/client'
4
+ require 'legion/extensions/ollama/helpers/errors'
4
5
 
5
6
  module Legion
6
7
  module Extensions
@@ -12,49 +13,51 @@ module Legion
12
13
  def create_model(model:, from: nil, files: nil, system: nil, stream: false, quantize: nil, **)
13
14
  body = { model: model, from: from, files: files, system: system,
14
15
  stream: stream, quantize: quantize }.compact
15
- response = client(**).post('/api/create', body)
16
+ response = Helpers::Errors.with_retry { client(**).post('/api/create', body) }
16
17
  { result: response.body, status: response.status }
17
18
  end
18
19
 
19
20
  def list_models(**)
20
- response = client(**).get('/api/tags')
21
+ response = Helpers::Errors.with_retry { client(**).get('/api/tags') }
21
22
  { result: response.body, status: response.status }
22
23
  end
23
24
 
24
25
  def show_model(model:, verbose: nil, **)
25
26
  body = { model: model, verbose: verbose }.compact
26
- response = client(**).post('/api/show', body)
27
+ response = Helpers::Errors.with_retry { client(**).post('/api/show', body) }
27
28
  { result: response.body, status: response.status }
28
29
  end
29
30
 
30
31
  def copy_model(source:, destination:, **)
31
32
  body = { source: source, destination: destination }
32
- response = client(**).post('/api/copy', body)
33
+ response = Helpers::Errors.with_retry { client(**).post('/api/copy', body) }
33
34
  { result: response.status == 200, status: response.status }
34
35
  end
35
36
 
36
37
  def delete_model(model:, **)
37
38
  body = { model: model }
38
- response = client(**).delete('/api/delete') do |req|
39
- req.body = body
39
+ response = Helpers::Errors.with_retry do
40
+ client(**).delete('/api/delete') do |req|
41
+ req.body = body
42
+ end
40
43
  end
41
44
  { result: response.status == 200, status: response.status }
42
45
  end
43
46
 
44
47
  def pull_model(model:, insecure: nil, stream: false, **)
45
48
  body = { model: model, insecure: insecure, stream: stream }.compact
46
- response = client(**).post('/api/pull', body)
49
+ response = Helpers::Errors.with_retry { client(**).post('/api/pull', body) }
47
50
  { result: response.body, status: response.status }
48
51
  end
49
52
 
50
53
  def push_model(model:, insecure: nil, stream: false, **)
51
54
  body = { model: model, insecure: insecure, stream: stream }.compact
52
- response = client(**).post('/api/push', body)
55
+ response = Helpers::Errors.with_retry { client(**).post('/api/push', body) }
53
56
  { result: response.body, status: response.status }
54
57
  end
55
58
 
56
59
  def list_running(**)
57
- response = client(**).get('/api/ps')
60
+ response = Helpers::Errors.with_retry { client(**).get('/api/ps') }
58
61
  { result: response.body, status: response.status }
59
62
  end
60
63
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'legion/extensions/ollama/helpers/client'
4
+ require 'legion/extensions/ollama/helpers/errors'
4
5
 
5
6
  module Legion
6
7
  module Extensions
@@ -10,7 +11,7 @@ module Legion
10
11
  extend Legion::Extensions::Ollama::Helpers::Client
11
12
 
12
13
  def server_version(**)
13
- response = client(**).get('/api/version')
14
+ response = Helpers::Errors.with_retry { client(**).get('/api/version') }
14
15
  { result: response.body, status: response.status }
15
16
  end
16
17
 
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Ollama
6
- VERSION = '0.1.0'
6
+ VERSION = '0.2.0'
7
7
  end
8
8
  end
9
9
  end
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'legion/extensions/ollama/version'
4
4
  require 'legion/extensions/ollama/helpers/client'
5
+ require 'legion/extensions/ollama/helpers/errors'
6
+ require 'legion/extensions/ollama/helpers/usage'
5
7
  require 'legion/extensions/ollama/runners/completions'
6
8
  require 'legion/extensions/ollama/runners/chat'
7
9
  require 'legion/extensions/ollama/runners/models'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-ollama
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -44,6 +44,8 @@ files:
44
44
  - lib/legion/extensions/ollama.rb
45
45
  - lib/legion/extensions/ollama/client.rb
46
46
  - lib/legion/extensions/ollama/helpers/client.rb
47
+ - lib/legion/extensions/ollama/helpers/errors.rb
48
+ - lib/legion/extensions/ollama/helpers/usage.rb
47
49
  - lib/legion/extensions/ollama/runners/blobs.rb
48
50
  - lib/legion/extensions/ollama/runners/chat.rb
49
51
  - lib/legion/extensions/ollama/runners/completions.rb