lex-llm-ollama 0.2.13 → 0.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/CHANGELOG.md +19 -0
- data/Gemfile +0 -7
- data/README.md +185 -24
- data/lex-llm-ollama.gemspec +1 -1
- data/lib/legion/extensions/llm/ollama/provider.rb +124 -59
- data/lib/legion/extensions/llm/ollama/translator.rb +497 -0
- data/lib/legion/extensions/llm/ollama/version.rb +1 -1
- data/lib/legion/extensions/llm/ollama.rb +7 -6
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7850eb1a4f0fcf50d9d0a86de7b9c2e60fa47154e1c6b330a492abeb00c25803
|
|
4
|
+
data.tar.gz: 24e040db015065dd7e508a995aa0f2b072910da41552029a1ff561993961331a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 762912cf8067d8b1c9019ea2d1d10261e234abac127ad1eeeecb5d2b7e41219c09f6294f68f022f0ad33b1f2eb95332db8b1ed3521eeef84aecfbeb11c3f186e
|
|
7
|
+
data.tar.gz: 9f99c4bc9f342d1061077d9dd8f663b35a0a9c962515cddbcf76d19a55fa734040ab8f4b7f7fd6767395d3dd9582913c6b2dbd3ac3010d10c787e932f218fd6a
|
data/.rubocop.yml
CHANGED
|
@@ -22,6 +22,12 @@ Metrics/CyclomaticComplexity:
|
|
|
22
22
|
Enabled: false
|
|
23
23
|
Metrics/PerceivedComplexity:
|
|
24
24
|
Enabled: false
|
|
25
|
+
Metrics/ClassLength:
|
|
26
|
+
Enabled: false
|
|
27
|
+
Lint/DuplicateBranch:
|
|
28
|
+
Enabled: false
|
|
29
|
+
Lint/UselessConstantScoping:
|
|
30
|
+
Enabled: false
|
|
25
31
|
RSpec/MultipleExpectations:
|
|
26
32
|
Enabled: false
|
|
27
33
|
RSpec/ExampleLength:
|
|
@@ -32,3 +38,5 @@ RSpec/InstanceVariable:
|
|
|
32
38
|
Enabled: false
|
|
33
39
|
Style/Documentation:
|
|
34
40
|
Enabled: false
|
|
41
|
+
Style/AsciiComments:
|
|
42
|
+
Enabled: false
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,24 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.17 - 2026-06-16
|
|
4
|
+
|
|
5
|
+
- dependency updates, code quality improvements
|
|
6
|
+
|
|
7
|
+
## 0.2.16 - 2026-06-15
|
|
8
|
+
|
|
9
|
+
- **CapabilityPolicy integration** — Optional capabilities default false; API-provided capabilities tagged as `:model_metadata`. Settings overrides at provider/instance/model level supported.
|
|
10
|
+
|
|
11
|
+
## 0.2.15 - 2026-06-13
|
|
12
|
+
|
|
13
|
+
- **Gemfile cleanup** — Remove local path overrides; dependencies resolve from gemspec via rubygems.
|
|
14
|
+
- **Canonical tool support** — Use `ToolSchema.extract`, add `:tools` capability, canonical normalization for tool parameter schemas.
|
|
15
|
+
- 147 examples, 0 failures; 17 files, 0 rubocop offenses.
|
|
16
|
+
|
|
17
|
+
## 0.2.14 - 2026-06-05
|
|
18
|
+
|
|
19
|
+
- Verified specs and RuboCop compliance (52 examples, 0 failures; 15 files, 0 offenses)
|
|
20
|
+
- Updated README with comprehensive extension index covering architecture, classes, configuration, and usage
|
|
21
|
+
|
|
3
22
|
## 0.2.13 - 2026-06-02
|
|
4
23
|
|
|
5
24
|
- **Scope discovery refresh to Ollama only** — `DiscoveryRefresh#manual` now calls `Discovery.refresh_discovered_models!(provider: :ollama)` instead of `Discovery.run`, which previously triggered model discovery for all registered providers (anthropic, bedrock, etc.) and caused cross-provider coupling
|
data/Gemfile
CHANGED
|
@@ -2,13 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
source 'https://rubygems.org'
|
|
4
4
|
|
|
5
|
-
group :test do
|
|
6
|
-
llm_base_path = ENV.fetch('LEX_LLM_PATH', File.expand_path('../lex-llm', __dir__))
|
|
7
|
-
transport_path = ENV.fetch('LEGION_TRANSPORT_PATH', File.expand_path('../../legion-transport', __dir__))
|
|
8
|
-
gem 'legion-transport', path: transport_path if File.directory?(transport_path)
|
|
9
|
-
gem 'lex-llm', path: llm_base_path if File.directory?(llm_base_path)
|
|
10
|
-
end
|
|
11
|
-
|
|
12
5
|
gemspec
|
|
13
6
|
|
|
14
7
|
group :development do
|
data/README.md
CHANGED
|
@@ -2,38 +2,134 @@
|
|
|
2
2
|
|
|
3
3
|
LegionIO LLM provider extension for [Ollama](https://ollama.ai).
|
|
4
4
|
|
|
5
|
-
This gem lives under `Legion::Extensions::Llm::Ollama` and depends on `lex-llm >= 0.4.3` for shared provider-neutral routing, response normalization, fleet envelopes, responder execution, transport, and registry primitives. It does not carry a runtime `legion-llm` dependency; `legion-llm` owns higher-level routing and
|
|
5
|
+
This gem lives under `Legion::Extensions::Llm::Ollama` and depends on `lex-llm >= 0.4.3` for shared provider-neutral routing, response normalization, fleet envelopes, responder execution, transport, and registry primitives. It does not carry a runtime `legion-llm` dependency; `legion-llm` owns higher-level routing and discovers this provider through normal extension loading.
|
|
6
6
|
|
|
7
7
|
Load it with `require 'legion/extensions/llm/ollama'`.
|
|
8
8
|
|
|
9
9
|
## What It Provides
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
11
|
+
| Feature | Endpoint | Provider Method |
|
|
12
|
+
|---------|----------|----------------|
|
|
13
|
+
| Chat completion | `POST /api/chat` | Inherited from `Lex-llm` base provider |
|
|
14
|
+
| Streaming chat | `POST /api/chat` | `stream_response` |
|
|
15
|
+
| List models | `GET /api/tags` | `list_models` |
|
|
16
|
+
| Running models | `GET /api/ps` | `list_running_models` |
|
|
17
|
+
| Model details | `POST /api/show` | `show_model`, `fetch_model_detail` |
|
|
18
|
+
| Pull models | `POST /api/pull` | `pull_model` |
|
|
19
|
+
| Embeddings | `POST /api/embed` | Inherited from `Lex-llm` base provider |
|
|
20
|
+
| Readiness check | `GET /api/version` | `readiness(live: false)` |
|
|
21
|
+
|
|
22
|
+
All responses pass through the shared `Lex-llm` normalization layer: `Message`, `Chunk`, `Embedding`, and `Model::Info`.
|
|
23
|
+
|
|
24
|
+
## File Index
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
lib/
|
|
28
|
+
legion/extensions/llm/ollama.rb # Extension entry point, instance discovery, default settings
|
|
29
|
+
legion/extensions/llm/ollama/provider.rb # Provider — chat, stream, embed, models, offerings
|
|
30
|
+
legion/extensions/llm/ollama/version.rb # VERSION constant
|
|
31
|
+
legion/extensions/llm/ollama/actors/
|
|
32
|
+
discovery_refresh.rb # Periodic model discovery actor (Every, 30min default)
|
|
33
|
+
fleet_worker.rb # Fleet request subscription actor (Subscription)
|
|
34
|
+
legion/extensions/llm/ollama/runners/
|
|
35
|
+
fleet_worker.rb # Fleet request execution runner (delegates to lex-llm)
|
|
36
|
+
```
|
|
22
37
|
|
|
23
38
|
## Architecture
|
|
24
39
|
|
|
25
40
|
```
|
|
26
41
|
Legion::Extensions::Llm::Ollama
|
|
27
|
-
├── Provider
|
|
28
|
-
├──
|
|
29
|
-
├──
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
42
|
+
├── Provider # Ollama provider implementation
|
|
43
|
+
│ ├── Capabilities # Capability predicates (chat, streaming, vision, functions, embeddings)
|
|
44
|
+
│ ├── #render_payload # Build Ollama chat payload from messages, tools, schema
|
|
45
|
+
│ ├── #stream_response # NDJSON streaming via Faraday on_data
|
|
46
|
+
│ ├── #discover_offerings # Build ModelOffering array from live/cached models
|
|
47
|
+
│ ├── #fetch_model_detail # Call /api/show, extract context_window + capabilities
|
|
48
|
+
│ ├── #render_embedding_payload # Build Ollama embedding payload
|
|
49
|
+
│ └── (inherited from lex-llm) # Chat, embedding, connection, registry helpers
|
|
50
|
+
├── Actor::DiscoveryRefresh # Every actor; refreshes model list, repopulates auto rules
|
|
51
|
+
├── Actor::FleetWorker # Subscription actor; gates on respond_to_requests
|
|
52
|
+
└── Runners::FleetWorker # Module function; delegates to ProviderResponder.call
|
|
53
|
+
|
|
54
|
+
Shared from lex-llm:
|
|
55
|
+
├── Fleet::ProviderResponder # Fleet request execution harness
|
|
56
|
+
├── RegistryPublisher # Publishes readiness + model events to llm.registry
|
|
57
|
+
├── RegistryEventBuilder # Builds registry event payloads
|
|
58
|
+
├── AutoRegistration # Self-registers discovered instances
|
|
59
|
+
└── CredentialSources # Socket probing + setting lookup for instance discovery
|
|
35
60
|
```
|
|
36
61
|
|
|
62
|
+
## Key Classes
|
|
63
|
+
|
|
64
|
+
### `Legion::Extensions::Llm::Ollama` (module)
|
|
65
|
+
|
|
66
|
+
- **`default_settings`** — Returns the full settings schema via `Lex-llm.provider_settings`.
|
|
67
|
+
- **`provider_class`** — Returns `Provider`.
|
|
68
|
+
- **`discover_instances`** — Probes `127.0.0.1:11434` socket + reads configured instances from settings.
|
|
69
|
+
- **`normalize_instance_config(config)`** — Normalizes `endpoint`/`api_base`/`ollama_api_base` aliases to `base_url`.
|
|
70
|
+
- **`registry_publisher`** — Lazily instantiated `RegistryPublisher` for the `:ollama` family.
|
|
71
|
+
|
|
72
|
+
### `Provider`
|
|
73
|
+
|
|
74
|
+
Extends `Legion::Extensions::Llm::Provider`. Implements the Ollama-specific contract:
|
|
75
|
+
|
|
76
|
+
| Method | Purpose |
|
|
77
|
+
|--------|---------|
|
|
78
|
+
| `api_base` | Resolves base URL from `resolve_base_url`, settings, or default `127.0.0.1:11434` |
|
|
79
|
+
| `completion_url` | `/api/chat` |
|
|
80
|
+
| `stream_url` | `/api/chat` |
|
|
81
|
+
| `models_url` | `/api/tags` |
|
|
82
|
+
| `running_models_url` | `/api/ps` |
|
|
83
|
+
| `show_model_url` | `/api/show` |
|
|
84
|
+
| `embedding_url` | `/api/embed` |
|
|
85
|
+
| `pull_url` | `/api/pull` |
|
|
86
|
+
| `version_url` | `/api/version` |
|
|
87
|
+
| `list_running_models` | GET `/api/ps`, returns array of running model hashes |
|
|
88
|
+
| `readiness(live:)` | Checks Ollama version endpoint; publishes readiness event when `live: true` |
|
|
89
|
+
| `list_models` | GET `/api/tags`, parses and publishes model events via registry |
|
|
90
|
+
| `show_model(model)` | POST `/api/show`, returns raw model detail hash |
|
|
91
|
+
| `fetch_model_detail(model)` | Wraps `show_model`; extracts `context_window` and `capabilities` |
|
|
92
|
+
| `pull_model(model, stream:)` | POST `/api/pull` to download a model |
|
|
93
|
+
| `discover_offerings(live:)` | Builds `ModelOffering` array from live or cached models |
|
|
94
|
+
| `render_payload(...)` | Converts Legion messages/tools to Ollama NDJSON format |
|
|
95
|
+
| `stream_response(conn, payload)` | Posts with Faraday `on_data` handler for NDJSON streaming |
|
|
96
|
+
| `parse_completion_response(resp)` | Normalizes Ollama chat response to `Legion::Extensions::Llm::Message` |
|
|
97
|
+
| `build_chunk(data)` | Normalizes a stream NDJSON line to `Legion::Extensions::Llm::Chunk` |
|
|
98
|
+
| `render_embedding_payload(text, model:, dimensions:)` | Builds embedding request body |
|
|
99
|
+
| `parse_embedding_response(resp, ...)` | Normalizes embedding response to `Legion::Extensions::Llm::Embedding` |
|
|
100
|
+
|
|
101
|
+
### `Capabilities` (module inside Provider)
|
|
102
|
+
|
|
103
|
+
Module functions providing capability predicates used during offering construction:
|
|
104
|
+
|
|
105
|
+
| Method | Always Returns |
|
|
106
|
+
|--------|---------------|
|
|
107
|
+
| `chat?(model)` | `true` |
|
|
108
|
+
| `streaming?(model)` | `true` |
|
|
109
|
+
| `vision?(model)` | `true` |
|
|
110
|
+
| `functions?(model)` | `true` |
|
|
111
|
+
| `embeddings?(model)` | `true` |
|
|
112
|
+
|
|
113
|
+
### `CONTEXT_WINDOWS` (constant)
|
|
114
|
+
|
|
115
|
+
Static fallback map keyed by model name prefix (e.g., `'qwen3' => 128_000`). Used when `/api/show` is unavailable to infer context window. Covers qwen, llama, gemma, mistral, deepseek, phi, command-r, codellama, and embedding families.
|
|
116
|
+
|
|
117
|
+
### `Actor::DiscoveryRefresh`
|
|
118
|
+
|
|
119
|
+
An `Every` actor that runs every 30 minutes (configurable via `settings[:extensions][:llm][:ollama][:discovery_interval]`). On each tick:
|
|
120
|
+
|
|
121
|
+
1. Calls `Legion::LLM::Discovery.refresh_discovered_models!(provider: :ollama)`
|
|
122
|
+
2. Repopulates auto routing rules if `Legion::LLM::Router` is available
|
|
123
|
+
3. Invalidates the offerings cache if `Legion::LLM::Inventory` is available
|
|
124
|
+
|
|
125
|
+
### `Actor::FleetWorker`
|
|
126
|
+
|
|
127
|
+
A `Subscription` actor that starts only when at least one instance has `fleet.respond_to_requests: true`. Routes messages to the fleet worker runner.
|
|
128
|
+
|
|
129
|
+
### `Runners::FleetWorker`
|
|
130
|
+
|
|
131
|
+
A module with `handle_fleet_request(payload, delivery:, properties:)`. Delegates to `Legion::Extensions::Llm::Fleet::ProviderResponder.call` with the Ollama provider family, provider class, and instance discovery lambda.
|
|
132
|
+
|
|
37
133
|
## Defaults
|
|
38
134
|
|
|
39
135
|
```ruby
|
|
@@ -65,21 +161,24 @@ Legion::Extensions::Llm::Ollama.default_settings
|
|
|
65
161
|
|
|
66
162
|
## Configuration
|
|
67
163
|
|
|
68
|
-
|
|
164
|
+
### Instance Discovery
|
|
165
|
+
|
|
166
|
+
`discover_instances` auto-detects a local instance when the socket at `127.0.0.1:11434` is reachable. Additional instances can be defined in settings using any of the recognized endpoint aliases (`base_url`, `endpoint`, `api_base`, `ollama_api_base`); the extension normalizes all to `base_url`.
|
|
69
167
|
|
|
70
168
|
```yaml
|
|
71
169
|
extensions:
|
|
72
170
|
llm:
|
|
73
171
|
ollama:
|
|
172
|
+
discovery_interval: 1800 # DiscoveryRefresh actor interval (seconds)
|
|
74
173
|
instances:
|
|
75
174
|
lab:
|
|
76
175
|
base_url: http://ollama-lab:11434
|
|
77
176
|
default_model: qwen3.5:latest
|
|
78
177
|
```
|
|
79
178
|
|
|
80
|
-
|
|
179
|
+
### Fleet Responder
|
|
81
180
|
|
|
82
|
-
Provider instances can opt in to consuming Legion LLM fleet requests. The
|
|
181
|
+
Provider instances can opt in to consuming Legion LLM fleet requests. The fleet actor only starts when at least one instance enables `respond_to_requests`, and the runner delegates execution to the shared `lex-llm` responder helper.
|
|
83
182
|
|
|
84
183
|
```yaml
|
|
85
184
|
extensions:
|
|
@@ -96,14 +195,76 @@ extensions:
|
|
|
96
195
|
- embed
|
|
97
196
|
```
|
|
98
197
|
|
|
198
|
+
## Ollama API Surface
|
|
199
|
+
|
|
200
|
+
| Legion Method | Ollama Route | HTTP Verb |
|
|
201
|
+
|---------------|-------------|-----------|
|
|
202
|
+
| Chat | `/api/chat` | POST |
|
|
203
|
+
| Stream chat | `/api/chat` | POST |
|
|
204
|
+
| List models | `/api/tags` | GET |
|
|
205
|
+
| Running models | `/api/ps` | GET |
|
|
206
|
+
| Model details | `/api/show` | POST |
|
|
207
|
+
| Pull model | `/api/pull` | POST |
|
|
208
|
+
| Embeddings | `/api/embed` | POST |
|
|
209
|
+
| Readiness | `/api/version` | GET |
|
|
210
|
+
|
|
211
|
+
## Error Handling
|
|
212
|
+
|
|
213
|
+
Every rescue block uses `handle_exception` from `Legion::Logging::Helper` with explicit `level`, `handled:`, and `operation:` parameters. Connection failures during `discover_offerings` produce a warn-level log and return an empty array (never raise).
|
|
214
|
+
|
|
215
|
+
## Usage
|
|
216
|
+
|
|
217
|
+
```ruby
|
|
218
|
+
require 'legion/extensions/llm/ollama'
|
|
219
|
+
|
|
220
|
+
# Access the module
|
|
221
|
+
Legion::Extensions::Llm::Ollama.discover_instances
|
|
222
|
+
Legion::Extensions::Llm::Ollama.default_settings
|
|
223
|
+
|
|
224
|
+
# Create a provider instance (usually done by lex-llm routing)
|
|
225
|
+
provider = Legion::Extensions::Llm::Ollama::Provider.new(config:)
|
|
226
|
+
|
|
227
|
+
# Discover offerings
|
|
228
|
+
provider.discover_offerings(live: true)
|
|
229
|
+
|
|
230
|
+
# Chat
|
|
231
|
+
result = provider.chat(messages: [...], model: 'llama3', temperature: 0.7)
|
|
232
|
+
|
|
233
|
+
# Stream chat
|
|
234
|
+
provider.stream_chat(messages: [...], model: 'llama3') do |chunk|
|
|
235
|
+
print chunk.content
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Embeddings
|
|
239
|
+
embeddings = provider.embed(text: "Hello world", model: 'nomic-embed-text')
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## Dependencies
|
|
243
|
+
|
|
244
|
+
| Gem | Minimum Version | Purpose |
|
|
245
|
+
|-----|----------------|---------|
|
|
246
|
+
| `lex-llm` | `>= 0.4.3` | Base provider contract, routing, fleet responder, registry, credential sources |
|
|
247
|
+
| `legion-transport` | `>= 1.4.14` | Faraday connection management |
|
|
248
|
+
| `legion-json` | — | JSON serialization (`Legion::JSON`) |
|
|
249
|
+
| `legion-logging` | — | Structured logging (`Legion::Logging::Helper`) |
|
|
250
|
+
| `legion-settings` | — | Configuration access |
|
|
251
|
+
| `legion-extensions` | — | Extension framework (`Core`, `Actors::Every`, `Actors::Subscription`) |
|
|
252
|
+
|
|
99
253
|
## Development
|
|
100
254
|
|
|
101
255
|
```bash
|
|
256
|
+
cd /Users/matt.iverson@optum.com/rubymine/legion/extensions-ai/lex-llm-ollama
|
|
102
257
|
bundle install
|
|
103
|
-
|
|
258
|
+
|
|
259
|
+
# Run specs
|
|
260
|
+
bundle exec rspec
|
|
261
|
+
|
|
262
|
+
# Lint (auto-correct)
|
|
104
263
|
bundle exec rubocop -A
|
|
105
264
|
```
|
|
106
265
|
|
|
266
|
+
Spec count: 52 examples across 7 spec files.
|
|
267
|
+
|
|
107
268
|
## License
|
|
108
269
|
|
|
109
270
|
MIT
|
data/lex-llm-ollama.gemspec
CHANGED
|
@@ -27,5 +27,5 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.add_dependency 'legion-logging', '>= 1.3.2'
|
|
28
28
|
spec.add_dependency 'legion-settings', '>= 1.3.14'
|
|
29
29
|
spec.add_dependency 'legion-transport', '>= 1.4.14'
|
|
30
|
-
spec.add_dependency 'lex-llm', '>= 0.
|
|
30
|
+
spec.add_dependency 'lex-llm', '>= 0.5.0'
|
|
31
31
|
end
|
|
@@ -8,7 +8,7 @@ module Legion
|
|
|
8
8
|
module Llm
|
|
9
9
|
module Ollama
|
|
10
10
|
# Ollama provider implementation for the Legion::Extensions::Llm base provider contract.
|
|
11
|
-
class Provider < Legion::Extensions::Llm::Provider
|
|
11
|
+
class Provider < Legion::Extensions::Llm::Provider
|
|
12
12
|
include Legion::Logging::Helper
|
|
13
13
|
|
|
14
14
|
class << self
|
|
@@ -41,6 +41,10 @@ module Legion
|
|
|
41
41
|
Ollama.default_settings
|
|
42
42
|
end
|
|
43
43
|
|
|
44
|
+
def translator
|
|
45
|
+
@translator ||= Translator.new(config: config)
|
|
46
|
+
end
|
|
47
|
+
|
|
44
48
|
def api_base
|
|
45
49
|
resolve_base_url || normalize_url(settings[:base_url] || settings[:endpoint] || 'http://127.0.0.1:11434')
|
|
46
50
|
end
|
|
@@ -112,10 +116,11 @@ module Legion
|
|
|
112
116
|
log.debug do
|
|
113
117
|
"ollama provider discovering offerings live=#{live} cached_model_count=#{Array(@cached_models).size}"
|
|
114
118
|
end
|
|
119
|
+
running_ids = live ? running_model_ids : []
|
|
115
120
|
offerings = resolve_models(live).filter_map do |model_info|
|
|
116
121
|
next unless model_allowed?(model_info.id)
|
|
117
122
|
|
|
118
|
-
offering_from_model(model_info)
|
|
123
|
+
offering_from_model(model_info, loaded: running_ids.include?(model_info.id.to_s))
|
|
119
124
|
end
|
|
120
125
|
log.debug { "ollama provider built offering_count=#{offerings.size} live=#{live}" }
|
|
121
126
|
offerings
|
|
@@ -159,7 +164,14 @@ module Legion
|
|
|
159
164
|
end
|
|
160
165
|
end
|
|
161
166
|
|
|
162
|
-
def
|
|
167
|
+
def running_model_ids
|
|
168
|
+
Array(list_running_models).filter_map do |m|
|
|
169
|
+
m['name'] || m[:name] || m['model'] || m[:model]
|
|
170
|
+
end.map(&:to_s)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def offering_from_model(model_info, loaded: false)
|
|
174
|
+
policy = resolve_capability_policy(model_info)
|
|
163
175
|
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
164
176
|
provider_family: :ollama,
|
|
165
177
|
instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
|
|
@@ -167,18 +179,64 @@ module Legion
|
|
|
167
179
|
tier: offering_tier,
|
|
168
180
|
model: model_info.id,
|
|
169
181
|
usage_type: offering_usage_type(model_info),
|
|
170
|
-
capabilities:
|
|
182
|
+
capabilities: policy[:capabilities],
|
|
183
|
+
capability_sources: policy[:sources],
|
|
171
184
|
limits: offering_limits(model_info),
|
|
172
|
-
metadata: offering_metadata(model_info)
|
|
185
|
+
metadata: offering_metadata(model_info).merge(loaded: loaded)
|
|
173
186
|
)
|
|
174
187
|
end
|
|
175
188
|
|
|
176
|
-
def
|
|
177
|
-
model_info.
|
|
189
|
+
def resolve_capability_policy(model_info)
|
|
190
|
+
model_id = model_info.id.to_s
|
|
191
|
+
Legion::Extensions::Llm::CapabilityPolicy.resolve(
|
|
192
|
+
real: capabilities_from_api(model_info),
|
|
193
|
+
provider_catalog: {},
|
|
194
|
+
probe: {},
|
|
195
|
+
provider_envelope: { streaming: true },
|
|
196
|
+
provider_config: provider_level_config,
|
|
197
|
+
instance_config: instance_level_config,
|
|
198
|
+
model_config: model_level_config(model_id)
|
|
199
|
+
)
|
|
178
200
|
end
|
|
179
201
|
|
|
180
|
-
def
|
|
181
|
-
model_info.capabilities.
|
|
202
|
+
def capabilities_from_api(model_info)
|
|
203
|
+
Array(model_info.capabilities).each_with_object({}) do |cap, hash|
|
|
204
|
+
sym = cap.to_s.downcase.to_sym
|
|
205
|
+
hash[sym] = true
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def provider_level_config
|
|
210
|
+
raw = CredentialSources.setting(:extensions, :llm, :ollama)
|
|
211
|
+
return {} unless raw.is_a?(Hash)
|
|
212
|
+
|
|
213
|
+
raw.reject { |k, _| k.to_sym == :instances }
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def instance_level_config
|
|
217
|
+
extract_config_hash
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def model_level_config(model_id)
|
|
221
|
+
data = extract_config_hash
|
|
222
|
+
models = data[:models]
|
|
223
|
+
return {} unless models.is_a?(Hash)
|
|
224
|
+
|
|
225
|
+
models[model_id.to_sym] || models[model_id.to_s] || models[model_id] || {}
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def extract_config_hash
|
|
229
|
+
return config.to_h if config.respond_to?(:to_h) && !config.is_a?(Legion::Extensions::Llm::HashConfig)
|
|
230
|
+
|
|
231
|
+
if config.is_a?(Legion::Extensions::Llm::HashConfig)
|
|
232
|
+
config.instance_variable_get(:@data) || {}
|
|
233
|
+
else
|
|
234
|
+
{}
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def offering_usage_type(model_info)
|
|
239
|
+
model_info.embedding? ? :embedding : :inference
|
|
182
240
|
end
|
|
183
241
|
|
|
184
242
|
def offering_limits(model_info)
|
|
@@ -357,16 +415,16 @@ module Legion
|
|
|
357
415
|
def format_tools(tools)
|
|
358
416
|
return nil if tools.empty?
|
|
359
417
|
|
|
360
|
-
tool_names = tools.values.filter_map { |tool|
|
|
418
|
+
tool_names = tools.values.filter_map { |tool| Legion::Extensions::Llm::Canonical::ToolSchema.tool_name(tool) }
|
|
361
419
|
log.debug { "ollama provider formatting tools count=#{tools.size} names=#{tool_names.join(',')}" }
|
|
362
420
|
|
|
363
421
|
tools.values.map do |tool|
|
|
364
422
|
{
|
|
365
423
|
type: 'function',
|
|
366
424
|
function: {
|
|
367
|
-
name: tool
|
|
368
|
-
description: tool
|
|
369
|
-
parameters: tool
|
|
425
|
+
name: Legion::Extensions::Llm::Canonical::ToolSchema.tool_name(tool),
|
|
426
|
+
description: Legion::Extensions::Llm::Canonical::ToolSchema.tool_description(tool),
|
|
427
|
+
parameters: Legion::Extensions::Llm::Canonical::ToolSchema.extract(tool)
|
|
370
428
|
}
|
|
371
429
|
}
|
|
372
430
|
end
|
|
@@ -380,67 +438,74 @@ module Legion
|
|
|
380
438
|
|
|
381
439
|
def parse_completion_response(response)
|
|
382
440
|
body = response.body
|
|
383
|
-
|
|
384
|
-
|
|
441
|
+
canonical = translator.parse_response(body)
|
|
442
|
+
to_legacy_message(canonical, body)
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
def build_chunk(data)
|
|
446
|
+
canonical_chunk = translator.parse_chunk(data)
|
|
447
|
+
return nil if canonical_chunk.nil?
|
|
448
|
+
|
|
449
|
+
to_legacy_chunk(canonical_chunk, data)
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
def to_legacy_message(canonical, raw_body)
|
|
453
|
+
usage = canonical.usage
|
|
385
454
|
Legion::Extensions::Llm::Message.new(
|
|
386
455
|
role: :assistant,
|
|
387
|
-
content:
|
|
388
|
-
model_id:
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
456
|
+
content: canonical.text,
|
|
457
|
+
model_id: canonical.model,
|
|
458
|
+
thinking: if canonical.thinking
|
|
459
|
+
Legion::Extensions::Llm::Thinking.build(
|
|
460
|
+
text: canonical.thinking.content, signature: canonical.thinking.signature
|
|
461
|
+
)
|
|
462
|
+
end,
|
|
463
|
+
tool_calls: legacy_tool_calls(canonical.tool_calls),
|
|
464
|
+
input_tokens: usage&.input_tokens,
|
|
465
|
+
output_tokens: usage&.output_tokens,
|
|
466
|
+
raw: raw_body
|
|
394
467
|
)
|
|
395
468
|
end
|
|
396
469
|
|
|
397
|
-
def
|
|
398
|
-
message = data.fetch('message', {})
|
|
399
|
-
thinking = message['thinking']
|
|
470
|
+
def to_legacy_chunk(canonical_chunk, raw_data)
|
|
400
471
|
Legion::Extensions::Llm::Chunk.new(
|
|
401
472
|
role: :assistant,
|
|
402
|
-
content:
|
|
403
|
-
thinking:
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
473
|
+
content: canonical_chunk.text_delta? ? canonical_chunk.delta : nil,
|
|
474
|
+
thinking: if canonical_chunk.thinking_delta?
|
|
475
|
+
Legion::Extensions::Llm::Thinking.build(
|
|
476
|
+
text: canonical_chunk.delta
|
|
477
|
+
)
|
|
478
|
+
end,
|
|
479
|
+
tool_calls: legacy_streaming_tool_calls(canonical_chunk),
|
|
480
|
+
model_id: raw_data['model'] || raw_data[:model],
|
|
481
|
+
input_tokens: canonical_chunk.usage&.input_tokens ||
|
|
482
|
+
raw_data['prompt_eval_count'] || raw_data[:prompt_eval_count],
|
|
483
|
+
output_tokens: canonical_chunk.usage&.output_tokens ||
|
|
484
|
+
raw_data['eval_count'] || raw_data[:eval_count],
|
|
485
|
+
raw: raw_data
|
|
409
486
|
)
|
|
410
487
|
end
|
|
411
488
|
|
|
412
|
-
def
|
|
413
|
-
|
|
414
|
-
message['content'],
|
|
415
|
-
metadata: thinking_metadata(message)
|
|
416
|
-
)
|
|
489
|
+
def legacy_tool_calls(canonical_tool_calls)
|
|
490
|
+
return nil if canonical_tool_calls.nil? || canonical_tool_calls.empty?
|
|
417
491
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
def thinking_metadata(message)
|
|
425
|
-
{ thinking: message['thinking'] }.compact
|
|
492
|
+
canonical_tool_calls.to_h do |tc|
|
|
493
|
+
[
|
|
494
|
+
(tc.name || tc.id).to_s.to_sym,
|
|
495
|
+
Legion::Extensions::Llm::ToolCall.new(id: tc.id, name: tc.name, arguments: tc.arguments || {})
|
|
496
|
+
]
|
|
497
|
+
end
|
|
426
498
|
end
|
|
427
499
|
|
|
428
|
-
def
|
|
429
|
-
return nil unless
|
|
500
|
+
def legacy_streaming_tool_calls(canonical_chunk)
|
|
501
|
+
return nil unless canonical_chunk.tool_call_delta?
|
|
430
502
|
|
|
431
|
-
|
|
503
|
+
tc = canonical_chunk.tool_call
|
|
504
|
+
return nil unless tc
|
|
432
505
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
function.fetch('name').to_sym,
|
|
437
|
-
Legion::Extensions::Llm::ToolCall.new(
|
|
438
|
-
id: call['id'] || function['name'],
|
|
439
|
-
name: function['name'],
|
|
440
|
-
arguments: function['arguments'] || {}
|
|
441
|
-
)
|
|
442
|
-
]
|
|
443
|
-
end
|
|
506
|
+
{ (tc.name || tc.id).to_s.to_sym => Legion::Extensions::Llm::ToolCall.new(
|
|
507
|
+
id: tc.id, name: tc.name, arguments: tc.arguments || ''
|
|
508
|
+
) }
|
|
444
509
|
end
|
|
445
510
|
|
|
446
511
|
def parse_list_models_response(response, provider, _capabilities)
|
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/llm/canonical'
|
|
4
|
+
require 'legion/extensions/llm/responses/thinking_extractor'
|
|
5
|
+
require 'legion/json'
|
|
6
|
+
require 'legion/logging'
|
|
7
|
+
|
|
8
|
+
module Legion
|
|
9
|
+
module Extensions
|
|
10
|
+
module Llm
|
|
11
|
+
module Ollama
|
|
12
|
+
# Canonical provider translator for Ollama (/api/chat NDJSON wire format).
|
|
13
|
+
#
|
|
14
|
+
# Implements render_request, parse_response, parse_chunk, and capabilities.
|
|
15
|
+
# Ollama uses NDJSON streaming (not SSE), native tool calling, and the `think`
|
|
16
|
+
# flag for extended thinking support.
|
|
17
|
+
#
|
|
18
|
+
# Ollama quirks (declared in capabilities):
|
|
19
|
+
# - tool_calls_as_text: false — Ollama returns structured tool_calls natively.
|
|
20
|
+
# - forced_tool_choice: false — Ollama does not support forced tool selection.
|
|
21
|
+
# - assistant_prefill: false — Ollama does not support assistant prefill.
|
|
22
|
+
class Translator
|
|
23
|
+
include Legion::Logging::Helper
|
|
24
|
+
|
|
25
|
+
# Ollama-specific stop_reason mapping (done_reason field).
|
|
26
|
+
OLLAMA_STOP_REASON_MAP = {
|
|
27
|
+
'stop' => :end_turn,
|
|
28
|
+
'tool_use' => :tool_use,
|
|
29
|
+
'length' => :max_tokens
|
|
30
|
+
}.freeze
|
|
31
|
+
FALLBACK_STOP_REASON = :end_turn
|
|
32
|
+
|
|
33
|
+
# G18 parameter mapping: canonical params -> Ollama options keys.
|
|
34
|
+
PARAM_OPTIONS_KEYS = {
|
|
35
|
+
max_tokens: :num_predict,
|
|
36
|
+
temperature: :temperature,
|
|
37
|
+
top_p: :top_p,
|
|
38
|
+
top_k: :top_k,
|
|
39
|
+
stop_sequences: :stop,
|
|
40
|
+
seed: :seed,
|
|
41
|
+
frequency_penalty: :frequency_penalty,
|
|
42
|
+
presence_penalty: :presence_penalty
|
|
43
|
+
}.freeze
|
|
44
|
+
|
|
45
|
+
SUPPORTED_PARAMS = %i[
|
|
46
|
+
max_tokens temperature top_p top_k stop_sequences
|
|
47
|
+
seed frequency_penalty presence_penalty
|
|
48
|
+
].freeze
|
|
49
|
+
|
|
50
|
+
def initialize(config: nil)
|
|
51
|
+
@config = config
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Render a canonical request into Ollama /api/chat wire payload.
|
|
55
|
+
def render_request(request)
|
|
56
|
+
model = request.metadata&.dig(:model) || 'default'
|
|
57
|
+
messages = format_messages(request)
|
|
58
|
+
payload = {
|
|
59
|
+
model: model,
|
|
60
|
+
messages: messages,
|
|
61
|
+
stream: request.stream
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
payload[:tools] = format_tools(request.tools) unless request.tools.to_h.empty?
|
|
65
|
+
apply_options(payload, request.params)
|
|
66
|
+
apply_thinking_config(payload, request)
|
|
67
|
+
apply_response_format(payload, request.params)
|
|
68
|
+
|
|
69
|
+
log.debug do
|
|
70
|
+
"[llm][ollama-translator] action=render_request model=#{model} stream=#{request.stream} " \
|
|
71
|
+
"message_count=#{messages.size} tools=#{request.tools&.size || 0}"
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
payload.compact
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Parse an Ollama /api/chat completion response into a Canonical::Response.
|
|
78
|
+
def parse_response(wire)
|
|
79
|
+
return canonical_error_response(wire) unless wire.is_a?(Hash)
|
|
80
|
+
return Canonical::Response.from_hash(wire) if canonical_response?(wire)
|
|
81
|
+
|
|
82
|
+
message = wire[:message] || wire['message'] || {}
|
|
83
|
+
content = message[:content] || message['content'] || ''
|
|
84
|
+
tool_calls_raw = message[:tool_calls] || message['tool_calls']
|
|
85
|
+
model = wire[:model] || wire['model']
|
|
86
|
+
done_reason = wire[:done_reason] || wire['done_reason']
|
|
87
|
+
done = wire[:done] || wire['done']
|
|
88
|
+
|
|
89
|
+
extraction = Responses::ThinkingExtractor.extract(
|
|
90
|
+
content,
|
|
91
|
+
metadata: thinking_metadata(message)
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
text = extraction.content || ''
|
|
95
|
+
thinking = build_canonical_thinking(extraction)
|
|
96
|
+
tool_calls = parse_tool_calls(tool_calls_raw)
|
|
97
|
+
stop_reason = map_stop_reason(done_reason, done)
|
|
98
|
+
|
|
99
|
+
usage = Canonical::Usage.from_hash({
|
|
100
|
+
input_tokens: wire[:prompt_eval_count] || wire['prompt_eval_count'],
|
|
101
|
+
output_tokens: wire[:eval_count] || wire['eval_count']
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
Canonical::Response.build(
|
|
105
|
+
text: text.to_s,
|
|
106
|
+
thinking: thinking,
|
|
107
|
+
tool_calls: tool_calls,
|
|
108
|
+
usage: usage,
|
|
109
|
+
stop_reason: stop_reason,
|
|
110
|
+
model: model,
|
|
111
|
+
metadata: {}
|
|
112
|
+
)
|
|
113
|
+
rescue StandardError => e
|
|
114
|
+
handle_exception(e, level: :error, handled: false, operation: 'ollama.translator.parse_response')
|
|
115
|
+
raise
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Parse a single NDJSON chunk into a Canonical::Chunk or nil.
|
|
119
|
+
def parse_chunk(raw)
|
|
120
|
+
return nil if raw.nil?
|
|
121
|
+
|
|
122
|
+
data = normalize_chunk_input(raw)
|
|
123
|
+
return nil if data.nil?
|
|
124
|
+
|
|
125
|
+
# Handle canonical-form chunks (from conformance fixtures)
|
|
126
|
+
return handle_canonical_chunk(data) if data['type'] || data[:type]
|
|
127
|
+
|
|
128
|
+
parse_ollama_chunk(data)
|
|
129
|
+
rescue StandardError => e
|
|
130
|
+
handle_exception(e, level: :error, handled: false, operation: 'ollama.translator.parse_chunk')
|
|
131
|
+
raise
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Declared capabilities for the Ollama provider.
|
|
135
|
+
def capabilities
|
|
136
|
+
{
|
|
137
|
+
provider: 'ollama',
|
|
138
|
+
streaming: true,
|
|
139
|
+
tool_calls: true,
|
|
140
|
+
thinking: true,
|
|
141
|
+
vision: true,
|
|
142
|
+
embeddings: true,
|
|
143
|
+
tool_calls_as_text: false,
|
|
144
|
+
forced_tool_choice: false,
|
|
145
|
+
assistant_prefill: false
|
|
146
|
+
}.freeze
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
private
|
|
150
|
+
|
|
151
|
+
attr_reader :config
|
|
152
|
+
|
|
153
|
+
# -- Message formatting --
|
|
154
|
+
|
|
155
|
+
def format_messages(request)
|
|
156
|
+
messages = format_request_messages(request.messages)
|
|
157
|
+
|
|
158
|
+
if request.system.to_s.strip.empty?
|
|
159
|
+
messages
|
|
160
|
+
else
|
|
161
|
+
[{ role: 'system', content: request.system.strip }] + messages
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def format_request_messages(messages)
|
|
166
|
+
return [] if messages.nil? || messages.empty?
|
|
167
|
+
|
|
168
|
+
messages.map { |msg| format_message(msg) }
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def format_message(msg)
|
|
172
|
+
role = msg.role.to_s
|
|
173
|
+
content = format_message_content(msg)
|
|
174
|
+
result = { role: role, content: content }
|
|
175
|
+
|
|
176
|
+
images = extract_images(msg.content)
|
|
177
|
+
result[:images] = images unless images.empty?
|
|
178
|
+
|
|
179
|
+
result[:tool_call_id] = msg.tool_call_id if msg.tool_call_id
|
|
180
|
+
result.compact
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def format_message_content(msg)
|
|
184
|
+
content = msg.content
|
|
185
|
+
return content if content.is_a?(String)
|
|
186
|
+
|
|
187
|
+
case content
|
|
188
|
+
when Array
|
|
189
|
+
extract_text_from_blocks(content)
|
|
190
|
+
when Canonical::ContentBlock
|
|
191
|
+
content.text? ? content.text.to_s : content.to_s
|
|
192
|
+
else
|
|
193
|
+
content.to_s
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def extract_text_from_blocks(blocks)
|
|
198
|
+
parts = blocks.filter_map do |block|
|
|
199
|
+
case block
|
|
200
|
+
when Canonical::ContentBlock
|
|
201
|
+
format_content_block_text(block)
|
|
202
|
+
when Hash
|
|
203
|
+
block_hash = block.transform_keys(&:to_sym)
|
|
204
|
+
block_hash[:text]&.to_s
|
|
205
|
+
else
|
|
206
|
+
block.to_s
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
parts.join
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def format_content_block_text(block)
|
|
213
|
+
case block.type
|
|
214
|
+
when :text, :thinking
|
|
215
|
+
block.text.to_s
|
|
216
|
+
when :tool_use
|
|
217
|
+
Legion::JSON.dump({ name: block.name, arguments: block.input || {} })
|
|
218
|
+
when :tool_result
|
|
219
|
+
block.text.to_s
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def extract_images(content)
|
|
224
|
+
return [] unless content.is_a?(Array)
|
|
225
|
+
|
|
226
|
+
content.filter_map do |block|
|
|
227
|
+
next unless block.is_a?(Canonical::ContentBlock) && block.type == :image
|
|
228
|
+
|
|
229
|
+
block.data
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# -- Tool formatting --
|
|
234
|
+
|
|
235
|
+
def format_tools(tools)
|
|
236
|
+
return nil if tools.to_h.empty?
|
|
237
|
+
|
|
238
|
+
tools.to_h.values.map do |tool|
|
|
239
|
+
tool_hash = if tool.is_a?(Canonical::ToolDefinition)
|
|
240
|
+
{ name: tool.name, description: tool.description, parameters: tool.parameters }
|
|
241
|
+
elsif tool.is_a?(Hash)
|
|
242
|
+
tool.transform_keys(&:to_sym)
|
|
243
|
+
else
|
|
244
|
+
tool
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
name = tool_hash[:name] || tool_hash['name']
|
|
248
|
+
description = (tool_hash[:description] || tool_hash['description'] || '').to_s
|
|
249
|
+
raw_params = tool_hash[:parameters] || tool_hash[:input_schema]
|
|
250
|
+
raw_params = raw_params.to_h if raw_params.respond_to?(:to_h) && !raw_params.is_a?(Hash)
|
|
251
|
+
parameters = Legion::Extensions::Llm::Canonical::ToolDefinition.normalize_parameters(raw_params)
|
|
252
|
+
|
|
253
|
+
{
|
|
254
|
+
type: 'function',
|
|
255
|
+
function: {
|
|
256
|
+
name: name.to_s,
|
|
257
|
+
description: description,
|
|
258
|
+
parameters: parameters
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# -- Parameter mapping (G18) --
|
|
265
|
+
|
|
266
|
+
def apply_options(payload, params)
|
|
267
|
+
return unless params.is_a?(Canonical::Params)
|
|
268
|
+
|
|
269
|
+
options = {}
|
|
270
|
+
SUPPORTED_PARAMS.each do |param_key|
|
|
271
|
+
value = params.public_send(param_key)
|
|
272
|
+
next if value.nil?
|
|
273
|
+
|
|
274
|
+
wire_key = PARAM_OPTIONS_KEYS[param_key]
|
|
275
|
+
options[wire_key] = case param_key
|
|
276
|
+
when :stop_sequences
|
|
277
|
+
Array(value)
|
|
278
|
+
else
|
|
279
|
+
value
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
payload[:options] = options unless options.empty?
|
|
284
|
+
|
|
285
|
+
return unless params.max_thinking_tokens
|
|
286
|
+
|
|
287
|
+
log.debug do
|
|
288
|
+
'[llm][ollama-translator] action=drop_unsupported_param param=max_thinking_tokens ' \
|
|
289
|
+
"value=#{params.max_thinking_tokens} reason=ollama_not_supported"
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# -- Thinking configuration --
|
|
294
|
+
|
|
295
|
+
def apply_thinking_config(payload, request)
|
|
296
|
+
return unless enable_thinking?(request)
|
|
297
|
+
|
|
298
|
+
payload[:think] = true
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def enable_thinking?(request)
|
|
302
|
+
return true if request.thinking.is_a?(Canonical::Thinking::Config) && request.thinking.enabled?
|
|
303
|
+
return true if request.thinking.is_a?(Hash) && (request.thinking[:enabled] != false)
|
|
304
|
+
|
|
305
|
+
false
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# -- Response format --
|
|
309
|
+
|
|
310
|
+
def apply_response_format(payload, params)
|
|
311
|
+
return unless params.is_a?(Canonical::Params) && params.response_format
|
|
312
|
+
|
|
313
|
+
format_value = params.response_format
|
|
314
|
+
payload[:format] = if format_value.is_a?(Hash)
|
|
315
|
+
schema = format_value[:schema] || format_value['schema'] ||
|
|
316
|
+
format_value[:json_schema] || format_value['json_schema']
|
|
317
|
+
schema || format_value
|
|
318
|
+
else
|
|
319
|
+
format_value
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# -- Response parsing --
|
|
324
|
+
|
|
325
|
+
def canonical_response?(wire)
|
|
326
|
+
wire.key?(:text) || wire.key?('text') || wire.key?(:stop_reason) || wire.key?('stop_reason')
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
def canonical_error_response(wire)
|
|
330
|
+
body = wire.is_a?(Hash) ? wire : {}
|
|
331
|
+
error_info = body['error'] || body[:error] ||
|
|
332
|
+
{ type: 'parse_error', message: 'Failed to parse response' }
|
|
333
|
+
|
|
334
|
+
Canonical::Response.build(
|
|
335
|
+
text: '',
|
|
336
|
+
tool_calls: [],
|
|
337
|
+
usage: Canonical::Usage.from_hash(body['usage'] || body[:usage] || {}),
|
|
338
|
+
stop_reason: :error,
|
|
339
|
+
model: body['model'] || body[:model],
|
|
340
|
+
metadata: { error: error_info }
|
|
341
|
+
)
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def thinking_metadata(message)
|
|
345
|
+
thinking = message[:thinking] || message['thinking']
|
|
346
|
+
return {} unless thinking
|
|
347
|
+
|
|
348
|
+
{ thinking: thinking }
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def build_canonical_thinking(extraction)
|
|
352
|
+
return nil unless extraction.thinking || extraction.signature
|
|
353
|
+
|
|
354
|
+
Canonical::Thinking.new(
|
|
355
|
+
content: extraction.thinking,
|
|
356
|
+
signature: extraction.signature
|
|
357
|
+
)
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
def parse_tool_calls(tool_calls_raw)
|
|
361
|
+
return [] unless tool_calls_raw.is_a?(Array) && !tool_calls_raw.empty?
|
|
362
|
+
|
|
363
|
+
tool_calls_raw.filter_map do |call|
|
|
364
|
+
call = call.transform_keys(&:to_sym) if call.is_a?(Hash)
|
|
365
|
+
function = call[:function] || call['function'] || {}
|
|
366
|
+
function = function.transform_keys(&:to_sym) if function.is_a?(Hash)
|
|
367
|
+
|
|
368
|
+
name = function[:name] || function['name']
|
|
369
|
+
id = call[:id] || call['id'] || name
|
|
370
|
+
args = parse_tool_arguments(function[:arguments] || function['arguments'])
|
|
371
|
+
|
|
372
|
+
Canonical::ToolCall.build(
|
|
373
|
+
id: id.to_s,
|
|
374
|
+
name: name.to_s,
|
|
375
|
+
arguments: args,
|
|
376
|
+
source: :client
|
|
377
|
+
)
|
|
378
|
+
rescue StandardError => e
|
|
379
|
+
handle_exception(e, level: :warn, handled: true, operation: 'ollama.translator.parse_tool_call')
|
|
380
|
+
nil
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
def parse_tool_arguments(arguments)
|
|
385
|
+
return {} if arguments.nil? || arguments == ''
|
|
386
|
+
return arguments if arguments.is_a?(Hash)
|
|
387
|
+
|
|
388
|
+
Legion::JSON.load(arguments)
|
|
389
|
+
rescue Legion::JSON::ParseError
|
|
390
|
+
{}
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
def map_stop_reason(done_reason, done = nil)
|
|
394
|
+
if done_reason
|
|
395
|
+
OLLAMA_STOP_REASON_MAP.fetch(done_reason.to_s, FALLBACK_STOP_REASON)
|
|
396
|
+
elsif done
|
|
397
|
+
FALLBACK_STOP_REASON
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
# -- Chunk parsing --
|
|
402
|
+
|
|
403
|
+
def normalize_chunk_input(raw)
|
|
404
|
+
return nil if raw.is_a?(String) && raw.strip.empty?
|
|
405
|
+
|
|
406
|
+
raw.is_a?(Hash) ? raw : parse_json_safely(raw)
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def handle_canonical_chunk(data)
|
|
410
|
+
normalized = data.is_a?(Hash) && data.keys.first.is_a?(Symbol) ? data : data.transform_keys(&:to_sym)
|
|
411
|
+
Canonical::Chunk.from_hash(normalized)
|
|
412
|
+
rescue StandardError => e
|
|
413
|
+
log.debug { "[llm][ollama-translator] action=canonical_chunk_parse_error error=#{e.message}" }
|
|
414
|
+
nil
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def parse_ollama_chunk(data)
|
|
418
|
+
message = data[:message] || data['message'] || {}
|
|
419
|
+
done = data[:done] || data['done']
|
|
420
|
+
done_reason = data[:done_reason] || data['done_reason']
|
|
421
|
+
request_id = data[:request_id] || data['request_id'] || data[:id] || data['id']
|
|
422
|
+
|
|
423
|
+
# Tool call delta
|
|
424
|
+
tool_calls = message[:tool_calls] || message['tool_calls']
|
|
425
|
+
return build_tool_call_chunk(tool_calls, request_id) unless Array(tool_calls).empty?
|
|
426
|
+
|
|
427
|
+
# Thinking delta
|
|
428
|
+
thinking_content = message[:thinking] || message['thinking']
|
|
429
|
+
unless thinking_content.to_s.empty?
|
|
430
|
+
return Canonical::Chunk.thinking_delta(
|
|
431
|
+
delta: thinking_content.to_s,
|
|
432
|
+
request_id: request_id
|
|
433
|
+
)
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
# Text delta — emit content even on done chunks (Ollama's final chunk may carry text)
|
|
437
|
+
content = message[:content] || message['content']
|
|
438
|
+
unless content.to_s.empty?
|
|
439
|
+
return Canonical::Chunk.text_delta(
|
|
440
|
+
delta: content.to_s,
|
|
441
|
+
request_id: request_id
|
|
442
|
+
)
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
# Done chunk (only when no content/thinking/tool_calls to emit)
|
|
446
|
+
return build_done_chunk(data, done_reason, request_id) if done
|
|
447
|
+
|
|
448
|
+
nil
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
def build_done_chunk(data, done_reason, request_id)
|
|
452
|
+
usage = Canonical::Usage.from_hash({
|
|
453
|
+
input_tokens: data[:prompt_eval_count] || data['prompt_eval_count'],
|
|
454
|
+
output_tokens: data[:eval_count] || data['eval_count']
|
|
455
|
+
})
|
|
456
|
+
|
|
457
|
+
Canonical::Chunk.done(
|
|
458
|
+
request_id: request_id,
|
|
459
|
+
usage: usage,
|
|
460
|
+
stop_reason: map_stop_reason(done_reason, true)
|
|
461
|
+
)
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
def build_tool_call_chunk(tool_calls, request_id)
|
|
465
|
+
first_call = tool_calls.first
|
|
466
|
+
first_call = first_call.transform_keys(&:to_sym) if first_call.is_a?(Hash)
|
|
467
|
+
function = first_call[:function] || first_call['function'] || {}
|
|
468
|
+
function = function.transform_keys(&:to_sym) if function.is_a?(Hash)
|
|
469
|
+
|
|
470
|
+
tc = Canonical::ToolCall.build(
|
|
471
|
+
id: (first_call[:id] || first_call['id'] || function[:name] || 'synthesized').to_s,
|
|
472
|
+
name: (function[:name] || function['name']).to_s,
|
|
473
|
+
arguments: parse_tool_arguments(function[:arguments] || function['arguments']),
|
|
474
|
+
source: :client
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
Canonical::Chunk.tool_call_delta(
|
|
478
|
+
tool_call: tc,
|
|
479
|
+
request_id: request_id
|
|
480
|
+
)
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
# -- JSON helpers --
|
|
484
|
+
|
|
485
|
+
def parse_json_safely(raw)
|
|
486
|
+
return nil unless raw.is_a?(String)
|
|
487
|
+
|
|
488
|
+
Legion::JSON.load(raw)
|
|
489
|
+
rescue Legion::JSON::ParseError => e
|
|
490
|
+
log.debug { "[llm][ollama-translator] action=json_parse_error error=#{e.message}" }
|
|
491
|
+
nil
|
|
492
|
+
end
|
|
493
|
+
end
|
|
494
|
+
end
|
|
495
|
+
end
|
|
496
|
+
end
|
|
497
|
+
end
|
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
require 'legion/extensions/llm'
|
|
4
4
|
require 'legion/extensions/llm/ollama/provider'
|
|
5
|
+
require 'legion/extensions/llm/ollama/translator'
|
|
5
6
|
require 'legion/extensions/llm/ollama/version'
|
|
6
7
|
require 'legion/logging/helper'
|
|
8
|
+
require_relative 'ollama/actors/discovery_refresh'
|
|
7
9
|
|
|
8
10
|
module Legion
|
|
9
11
|
module Extensions
|
|
@@ -30,10 +32,7 @@ module Legion
|
|
|
30
32
|
fleet: {
|
|
31
33
|
enabled: false,
|
|
32
34
|
respond_to_requests: false,
|
|
33
|
-
capabilities: %i[chat stream_chat embed]
|
|
34
|
-
lanes: [],
|
|
35
|
-
concurrency: 1,
|
|
36
|
-
queue_suffix: nil
|
|
35
|
+
capabilities: %i[chat stream_chat embed tools]
|
|
37
36
|
}
|
|
38
37
|
}
|
|
39
38
|
)
|
|
@@ -73,7 +72,8 @@ module Legion
|
|
|
73
72
|
instances[:local] = {
|
|
74
73
|
base_url: 'http://127.0.0.1:11434',
|
|
75
74
|
tier: :local,
|
|
76
|
-
capabilities:
|
|
75
|
+
capabilities: {},
|
|
76
|
+
provider_capabilities: { streaming: true }
|
|
77
77
|
}
|
|
78
78
|
end
|
|
79
79
|
|
|
@@ -85,7 +85,8 @@ module Legion
|
|
|
85
85
|
configured.each do |name, config|
|
|
86
86
|
instances[name.to_sym] = normalize_instance_config(config).merge(
|
|
87
87
|
tier: :direct,
|
|
88
|
-
capabilities:
|
|
88
|
+
capabilities: {},
|
|
89
|
+
provider_capabilities: { streaming: true }
|
|
89
90
|
)
|
|
90
91
|
end
|
|
91
92
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-llm-ollama
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.17
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- LegionIO
|
|
@@ -71,14 +71,14 @@ dependencies:
|
|
|
71
71
|
requirements:
|
|
72
72
|
- - ">="
|
|
73
73
|
- !ruby/object:Gem::Version
|
|
74
|
-
version: 0.
|
|
74
|
+
version: 0.5.0
|
|
75
75
|
type: :runtime
|
|
76
76
|
prerelease: false
|
|
77
77
|
version_requirements: !ruby/object:Gem::Requirement
|
|
78
78
|
requirements:
|
|
79
79
|
- - ">="
|
|
80
80
|
- !ruby/object:Gem::Version
|
|
81
|
-
version: 0.
|
|
81
|
+
version: 0.5.0
|
|
82
82
|
description: Ollama provider integration for the LegionIO LLM routing framework.
|
|
83
83
|
email:
|
|
84
84
|
- matthewdiverson@gmail.com
|
|
@@ -101,6 +101,7 @@ files:
|
|
|
101
101
|
- lib/legion/extensions/llm/ollama/actors/fleet_worker.rb
|
|
102
102
|
- lib/legion/extensions/llm/ollama/provider.rb
|
|
103
103
|
- lib/legion/extensions/llm/ollama/runners/fleet_worker.rb
|
|
104
|
+
- lib/legion/extensions/llm/ollama/translator.rb
|
|
104
105
|
- lib/legion/extensions/llm/ollama/version.rb
|
|
105
106
|
homepage: https://github.com/LegionIO/lex-llm-ollama
|
|
106
107
|
licenses:
|