lex-llm-ollama 0.2.14 → 0.2.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/CHANGELOG.md +42 -0
- data/Gemfile +0 -7
- data/lex-llm-ollama.gemspec +1 -1
- data/lib/legion/extensions/llm/ollama/actors/discovery_refresh.rb +142 -12
- data/lib/legion/extensions/llm/ollama/provider.rb +155 -79
- data/lib/legion/extensions/llm/ollama/translator.rb +497 -0
- data/lib/legion/extensions/llm/ollama/version.rb +1 -1
- data/lib/legion/extensions/llm/ollama.rb +7 -6
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9105e1f109fd5c83078224391ceb669ea321943a2075d85d5a77cf48b73d16e7
|
|
4
|
+
data.tar.gz: 1bd18adb284f8b8fa5c12e0049f310973989efb5a20070dcbb172754f64f941b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 175570e4fdf0574998741b731718675d643f03c1163183bd41a28a7c90151c71919150060c9d87ff27840f0bcf5203b90dba85e81680f1ec74d3f494b22c85f0
|
|
7
|
+
data.tar.gz: 9ebb03d6cdf2078303f8ac674cfe939a73770a0ce7b721e86c6daaeb63169c0039dfca9e7063138e83896322820939b203f68265da9a2125066ee4e05ecefcd4
|
data/.rubocop.yml
CHANGED
|
@@ -22,6 +22,12 @@ Metrics/CyclomaticComplexity:
|
|
|
22
22
|
Enabled: false
|
|
23
23
|
Metrics/PerceivedComplexity:
|
|
24
24
|
Enabled: false
|
|
25
|
+
Metrics/ClassLength:
|
|
26
|
+
Enabled: false
|
|
27
|
+
Lint/DuplicateBranch:
|
|
28
|
+
Enabled: false
|
|
29
|
+
Lint/UselessConstantScoping:
|
|
30
|
+
Enabled: false
|
|
25
31
|
RSpec/MultipleExpectations:
|
|
26
32
|
Enabled: false
|
|
27
33
|
RSpec/ExampleLength:
|
|
@@ -32,3 +38,5 @@ RSpec/InstanceVariable:
|
|
|
32
38
|
Enabled: false
|
|
33
39
|
Style/Documentation:
|
|
34
40
|
Enabled: false
|
|
41
|
+
Style/AsciiComments:
|
|
42
|
+
Enabled: false
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,47 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.2.22] - 2026-06-20
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Stub shared registry publishing through `RegistryPublisher#schedule` in specs so async availability-event coverage stays stable after the shared publisher moved off raw `Thread.new`.
|
|
7
|
+
|
|
8
|
+
## [0.2.21] - 2026-06-20
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- Stop bulk-publishing Ollama model availability from `list_models`; discovery now emits one registry event per seen model from the shared `lex-llm` policy-filter path so blocked models stay observable without duplicate publishes.
|
|
12
|
+
|
|
13
|
+
## [0.2.20] - 2026-06-20
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
- Slow the live discovery refresh cadence from 60 seconds to 300 seconds for Ollama instances; `extensions.llm.ollama.discovery_interval` still overrides the default.
|
|
17
|
+
|
|
18
|
+
## [0.2.19] - 2026-06-20
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
- Route Ollama capability overrides through the shared `lex-llm` provider contract and preserve the canonical singular `:embedding` capability on embedding offerings.
|
|
22
|
+
|
|
23
|
+
## [0.2.18] - 2026-06-19
|
|
24
|
+
|
|
25
|
+
### Changed
|
|
26
|
+
- Adopt `Legion::Extensions::Llm::Inventory::ScopedRefresher` mixin (lex-llm 0.6.0). Discovery
|
|
27
|
+
refresh actors now write directly to the live `Inventory` catalog via `Inventory.write_lane`.
|
|
28
|
+
- Pin `lex-llm >= 0.6.0` and `legion-llm >= 0.14.0` in gemspec.
|
|
29
|
+
- Standard `weight: 100` default added to provider instance settings schema.
|
|
30
|
+
|
|
31
|
+
## 0.2.17 - 2026-06-16
|
|
32
|
+
|
|
33
|
+
- dependency updates, code quality improvements
|
|
34
|
+
|
|
35
|
+
## 0.2.16 - 2026-06-15
|
|
36
|
+
|
|
37
|
+
- **CapabilityPolicy integration** — Optional capabilities default false; API-provided capabilities tagged as `:model_metadata`. Settings overrides at provider/instance/model level supported.
|
|
38
|
+
|
|
39
|
+
## 0.2.15 - 2026-06-13
|
|
40
|
+
|
|
41
|
+
- **Gemfile cleanup** — Remove local path overrides; dependencies resolve from gemspec via rubygems.
|
|
42
|
+
- **Canonical tool support** — Use `ToolSchema.extract`, add `:tools` capability, canonical normalization for tool parameter schemas.
|
|
43
|
+
- 147 examples, 0 failures; 17 files, 0 rubocop offenses.
|
|
44
|
+
|
|
3
45
|
## 0.2.14 - 2026-06-05
|
|
4
46
|
|
|
5
47
|
- Verified specs and RuboCop compliance (52 examples, 0 failures; 15 files, 0 offenses)
|
data/Gemfile
CHANGED
|
@@ -2,13 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
source 'https://rubygems.org'
|
|
4
4
|
|
|
5
|
-
group :test do
|
|
6
|
-
llm_base_path = ENV.fetch('LEX_LLM_PATH', File.expand_path('../lex-llm', __dir__))
|
|
7
|
-
transport_path = ENV.fetch('LEGION_TRANSPORT_PATH', File.expand_path('../../legion-transport', __dir__))
|
|
8
|
-
gem 'legion-transport', path: transport_path if File.directory?(transport_path)
|
|
9
|
-
gem 'lex-llm', path: llm_base_path if File.directory?(llm_base_path)
|
|
10
|
-
end
|
|
11
|
-
|
|
12
5
|
gemspec
|
|
13
6
|
|
|
14
7
|
group :development do
|
data/lex-llm-ollama.gemspec
CHANGED
|
@@ -27,5 +27,5 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.add_dependency 'legion-logging', '>= 1.3.2'
|
|
28
28
|
spec.add_dependency 'legion-settings', '>= 1.3.14'
|
|
29
29
|
spec.add_dependency 'legion-transport', '>= 1.4.14'
|
|
30
|
-
spec.add_dependency 'lex-llm', '>= 0.
|
|
30
|
+
spec.add_dependency 'lex-llm', '>= 0.6.0'
|
|
31
31
|
end
|
|
@@ -1,11 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'digest'
|
|
4
|
+
|
|
3
5
|
begin
|
|
4
6
|
require 'legion/extensions/actors/every'
|
|
5
7
|
rescue LoadError => e
|
|
6
8
|
warn(e.message) if $VERBOSE
|
|
7
9
|
end
|
|
8
10
|
|
|
11
|
+
begin
|
|
12
|
+
require 'legion/extensions/llm/inventory/scoped_refresher'
|
|
13
|
+
rescue LoadError => e
|
|
14
|
+
warn(e.message) if $VERBOSE
|
|
15
|
+
end
|
|
16
|
+
|
|
9
17
|
return unless defined?(Legion::Extensions::Actors::Every)
|
|
10
18
|
|
|
11
19
|
module Legion
|
|
@@ -16,7 +24,11 @@ module Legion
|
|
|
16
24
|
class DiscoveryRefresh < Legion::Extensions::Actors::Every
|
|
17
25
|
include Legion::Logging::Helper
|
|
18
26
|
|
|
19
|
-
|
|
27
|
+
if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher)
|
|
28
|
+
include Legion::Extensions::Llm::Inventory::ScopedRefresher
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.every_seconds = 300
|
|
20
32
|
|
|
21
33
|
def runner_class = self.class
|
|
22
34
|
def runner_function = 'manual'
|
|
@@ -26,25 +38,143 @@ module Legion
|
|
|
26
38
|
def generate_task? = false
|
|
27
39
|
|
|
28
40
|
def time
|
|
29
|
-
return
|
|
41
|
+
return self.class.every_seconds unless defined?(Legion::Settings)
|
|
30
42
|
|
|
31
|
-
Legion::Settings.dig(:extensions, :llm, :ollama, :discovery_interval) ||
|
|
43
|
+
Legion::Settings.dig(:extensions, :llm, :ollama, :discovery_interval) || self.class.every_seconds
|
|
32
44
|
end
|
|
33
45
|
|
|
34
|
-
def
|
|
35
|
-
|
|
36
|
-
|
|
46
|
+
def scope_key(**)
|
|
47
|
+
{ provider: :ollama }
|
|
48
|
+
end
|
|
37
49
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
50
|
+
def compute_lanes_for_scope(**)
|
|
51
|
+
return [] unless defined?(Legion::LLM::Call::Registry)
|
|
52
|
+
|
|
53
|
+
lanes = []
|
|
54
|
+
ollama_instances.each do |instance|
|
|
55
|
+
collect_lanes_for_instance(instance, lanes)
|
|
56
|
+
rescue StandardError => e
|
|
57
|
+
handle_exception(e, level: :warn, handled: true,
|
|
58
|
+
operation: 'ollama.discovery_refresh.compute_lanes',
|
|
59
|
+
instance: instance[:instance])
|
|
44
60
|
end
|
|
61
|
+
lanes
|
|
62
|
+
rescue StandardError => e
|
|
63
|
+
handle_exception(e, level: :warn, handled: true,
|
|
64
|
+
operation: 'ollama.discovery_refresh.compute_lanes_for_scope')
|
|
65
|
+
[]
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def credential_hash(**)
|
|
69
|
+
settings = ollama_settings
|
|
70
|
+
Digest::SHA256.hexdigest(settings[:api_key].to_s + settings[:instances].to_s)[0, 16]
|
|
71
|
+
rescue StandardError => e
|
|
72
|
+
handle_exception(e, level: :warn, handled: true, operation: 'ollama.discovery_refresh.credential_hash')
|
|
73
|
+
'unknown'
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def manual(**)
|
|
77
|
+
tick if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher) &&
|
|
78
|
+
respond_to?(:tick, true)
|
|
45
79
|
rescue StandardError => e
|
|
46
80
|
handle_exception(e, level: :warn, handled: true, operation: 'ollama.actor.discovery_refresh')
|
|
47
81
|
end
|
|
82
|
+
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
def ollama_instances
|
|
86
|
+
Legion::LLM::Call::Registry.all_instances.select do |e|
|
|
87
|
+
(e[:provider] || '').to_sym == :ollama
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def collect_lanes_for_instance(instance, lanes)
|
|
92
|
+
adapter = instance[:adapter]
|
|
93
|
+
return unless adapter.respond_to?(:discover_offerings)
|
|
94
|
+
|
|
95
|
+
Array(adapter.discover_offerings(live: true)).each do |raw_offering|
|
|
96
|
+
offering = offering_to_hash(raw_offering)
|
|
97
|
+
next unless offering
|
|
98
|
+
|
|
99
|
+
model = offering[:model] || offering['model']
|
|
100
|
+
next unless model
|
|
101
|
+
|
|
102
|
+
lane = build_lane(offering, instance)
|
|
103
|
+
lanes << lane
|
|
104
|
+
lanes << fleet_lane(lane, instance) if emit_fleet_lane?(lane)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def offering_to_hash(offering)
|
|
109
|
+
return nil if offering.nil?
|
|
110
|
+
return offering if offering.is_a?(Hash)
|
|
111
|
+
|
|
112
|
+
hash = offering.to_h
|
|
113
|
+
hash[:type] ||= hash[:usage_type]
|
|
114
|
+
hash[:enabled] = offering.respond_to?(:enabled?) ? offering.enabled? : true
|
|
115
|
+
hash
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def build_lane(offering, instance)
|
|
119
|
+
instance_id = instance[:instance] || instance[:instance_id] || instance[:id]
|
|
120
|
+
raw_tier = offering[:tier] || :local
|
|
121
|
+
offer_type = offering[:type]
|
|
122
|
+
type = %i[embed embedding].include?(offer_type) ? :embedding : :inference
|
|
123
|
+
capabilities = normalize_capabilities(offering[:capabilities] || [])
|
|
124
|
+
model = offering[:model] || offering['model']
|
|
125
|
+
|
|
126
|
+
lane_id = Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(
|
|
127
|
+
tier: raw_tier, provider_family: :ollama,
|
|
128
|
+
instance_id: instance_id, type: type, model: model
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
{
|
|
132
|
+
id: lane_id,
|
|
133
|
+
tier: raw_tier,
|
|
134
|
+
provider_family: :ollama,
|
|
135
|
+
instance_id: instance_id,
|
|
136
|
+
model: model,
|
|
137
|
+
canonical_model_alias: offering[:canonical_model_alias] || offering[:name],
|
|
138
|
+
type: type,
|
|
139
|
+
capabilities: capabilities,
|
|
140
|
+
limits: offering[:limits] || {},
|
|
141
|
+
enabled: offering.fetch(:enabled, true),
|
|
142
|
+
cost: offering[:cost] || {}
|
|
143
|
+
}
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def emit_fleet_lane?(lane)
|
|
147
|
+
return false unless lane[:type] == :inference
|
|
148
|
+
|
|
149
|
+
ollama_settings&.dig(:fleet, :dispatch, :enabled)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def fleet_lane(lane, instance)
|
|
153
|
+
fleet_id = Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(
|
|
154
|
+
tier: :fleet, provider_family: :ollama,
|
|
155
|
+
instance_id: instance[:instance] || instance[:instance_id],
|
|
156
|
+
type: lane[:type], model: lane[:model]
|
|
157
|
+
)
|
|
158
|
+
lane.merge(id: fleet_id, tier: :fleet)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def normalize_capabilities(caps)
|
|
162
|
+
# Inventory::Capabilities lives in lex-llm; the previous fallback (`return []
|
|
163
|
+
# unless defined?(...)`) silently swallowed every capability the operator
|
|
164
|
+
# declared via enable_thinking/enable_tools when the constant wasn't loaded.
|
|
165
|
+
# Always normalize through the shared vocabulary so aliases collapse.
|
|
166
|
+
if defined?(Legion::Extensions::Llm::Inventory::Capabilities)
|
|
167
|
+
Legion::Extensions::Llm::Inventory::Capabilities.normalize(caps)
|
|
168
|
+
else
|
|
169
|
+
Array(caps).compact.map(&:to_sym).uniq
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def ollama_settings
|
|
174
|
+
Legion::Settings.dig(:extensions, :llm, :ollama)
|
|
175
|
+
rescue StandardError
|
|
176
|
+
{}
|
|
177
|
+
end
|
|
48
178
|
end
|
|
49
179
|
end
|
|
50
180
|
end
|
|
@@ -8,7 +8,7 @@ module Legion
|
|
|
8
8
|
module Llm
|
|
9
9
|
module Ollama
|
|
10
10
|
# Ollama provider implementation for the Legion::Extensions::Llm base provider contract.
|
|
11
|
-
class Provider < Legion::Extensions::Llm::Provider
|
|
11
|
+
class Provider < Legion::Extensions::Llm::Provider
|
|
12
12
|
include Legion::Logging::Helper
|
|
13
13
|
|
|
14
14
|
class << self
|
|
@@ -41,6 +41,10 @@ module Legion
|
|
|
41
41
|
Ollama.default_settings
|
|
42
42
|
end
|
|
43
43
|
|
|
44
|
+
def translator
|
|
45
|
+
@translator ||= Translator.new(config: config)
|
|
46
|
+
end
|
|
47
|
+
|
|
44
48
|
def api_base
|
|
45
49
|
resolve_base_url || normalize_url(settings[:base_url] || settings[:endpoint] || 'http://127.0.0.1:11434')
|
|
46
50
|
end
|
|
@@ -73,14 +77,27 @@ module Legion
|
|
|
73
77
|
end
|
|
74
78
|
end
|
|
75
79
|
|
|
76
|
-
def list_models
|
|
80
|
+
def list_models(live: false, **filters)
|
|
77
81
|
log.debug { "ollama provider discovering models endpoint=#{api_base}#{models_url}" }
|
|
78
82
|
super.tap do |models|
|
|
79
83
|
log.debug { "ollama provider discovered model_count=#{models.size}" }
|
|
80
|
-
self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
|
|
81
84
|
end
|
|
82
85
|
end
|
|
83
86
|
|
|
87
|
+
def discover_offerings(live: false, raise_on_unreachable: false, **filters)
|
|
88
|
+
return filter_cached_offerings(Array(@cached_offerings), filters) unless live
|
|
89
|
+
|
|
90
|
+
provider_health = health(live:)
|
|
91
|
+
@cached_offerings = discover_live_offerings(filters, provider_health, live:)
|
|
92
|
+
log_discover_complete(@cached_offerings)
|
|
93
|
+
@cached_offerings
|
|
94
|
+
rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
|
|
95
|
+
log.warn("[#{slug}] instance=#{provider_instance_id} unreachable: #{e.message}")
|
|
96
|
+
raise if raise_on_unreachable
|
|
97
|
+
|
|
98
|
+
[]
|
|
99
|
+
end
|
|
100
|
+
|
|
84
101
|
def show_model(model)
|
|
85
102
|
log.debug { "ollama provider fetching model details model=#{model}" }
|
|
86
103
|
connection.post(show_model_url, { model: model }).body
|
|
@@ -108,24 +125,42 @@ module Legion
|
|
|
108
125
|
raise
|
|
109
126
|
end
|
|
110
127
|
|
|
111
|
-
|
|
112
|
-
log.debug do
|
|
113
|
-
"ollama provider discovering offerings live=#{live} cached_model_count=#{Array(@cached_models).size}"
|
|
114
|
-
end
|
|
115
|
-
offerings = resolve_models(live).filter_map do |model_info|
|
|
116
|
-
next unless model_allowed?(model_info.id)
|
|
128
|
+
private
|
|
117
129
|
|
|
118
|
-
|
|
130
|
+
def discovery_registry_readiness(provider_health, live:)
|
|
131
|
+
{
|
|
132
|
+
provider: slug.to_sym,
|
|
133
|
+
configured: configured?,
|
|
134
|
+
ready: provider_health[:ready] == true,
|
|
135
|
+
live: live,
|
|
136
|
+
health: provider_health
|
|
137
|
+
}
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def discover_live_offerings(filters, provider_health, live:)
|
|
141
|
+
readiness = discovery_registry_readiness(provider_health, live:)
|
|
142
|
+
Array(list_models(live:, **filters)).filter_map do |model|
|
|
143
|
+
self.class.registry_publisher.publish_models_async([model], readiness:)
|
|
144
|
+
next unless model_matches_filters?(model, filters)
|
|
145
|
+
next unless model_allowed?(model.id)
|
|
146
|
+
|
|
147
|
+
log_model_discovered(model)
|
|
148
|
+
offering_from_model(model, health: provider_health)
|
|
119
149
|
end
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
log.
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def log_model_discovered(model)
|
|
153
|
+
log.debug(
|
|
154
|
+
"[#{slug}] instance=#{provider_instance_id} action=model_discovered " \
|
|
155
|
+
"model=#{model.id} family=#{model.family}"
|
|
156
|
+
)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def log_discover_complete(offerings)
|
|
160
|
+
log.info(
|
|
161
|
+
"[#{slug}] instance=#{provider_instance_id} action=discover_complete " \
|
|
162
|
+
"model_count=#{Array(offerings).size}"
|
|
163
|
+
)
|
|
129
164
|
end
|
|
130
165
|
|
|
131
166
|
CONTEXT_WINDOWS = {
|
|
@@ -149,8 +184,6 @@ module Legion
|
|
|
149
184
|
'bge' => 512
|
|
150
185
|
}.freeze
|
|
151
186
|
|
|
152
|
-
private
|
|
153
|
-
|
|
154
187
|
def resolve_models(live)
|
|
155
188
|
if live
|
|
156
189
|
@cached_models = list_models
|
|
@@ -159,7 +192,26 @@ module Legion
|
|
|
159
192
|
end
|
|
160
193
|
end
|
|
161
194
|
|
|
162
|
-
def
|
|
195
|
+
def running_model_ids
|
|
196
|
+
Array(list_running_models).filter_map do |m|
|
|
197
|
+
m['name'] || m[:name] || m['model'] || m[:model]
|
|
198
|
+
end.map(&:to_s)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def offering_from_model(model_info, health: {})
|
|
202
|
+
loaded = begin
|
|
203
|
+
running_model_ids.include?(model_info.id.to_s)
|
|
204
|
+
rescue StandardError
|
|
205
|
+
health.is_a?(Hash) ? health.fetch(:loaded, false) : false
|
|
206
|
+
end
|
|
207
|
+
policy = resolve_capability_policy(model_info)
|
|
208
|
+
embedding_model = model_info.embedding?
|
|
209
|
+
capabilities = embedding_model ? [:embedding] : policy[:capabilities]
|
|
210
|
+
capability_sources = if embedding_model
|
|
211
|
+
policy[:sources].merge(embedding: { value: true, source: :model_metadata })
|
|
212
|
+
else
|
|
213
|
+
policy[:sources]
|
|
214
|
+
end
|
|
163
215
|
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
164
216
|
provider_family: :ollama,
|
|
165
217
|
instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
|
|
@@ -167,18 +219,35 @@ module Legion
|
|
|
167
219
|
tier: offering_tier,
|
|
168
220
|
model: model_info.id,
|
|
169
221
|
usage_type: offering_usage_type(model_info),
|
|
170
|
-
capabilities:
|
|
222
|
+
capabilities: capabilities,
|
|
223
|
+
capability_sources: capability_sources,
|
|
171
224
|
limits: offering_limits(model_info),
|
|
172
|
-
metadata: offering_metadata(model_info)
|
|
225
|
+
metadata: offering_metadata(model_info).merge(loaded: loaded)
|
|
173
226
|
)
|
|
174
227
|
end
|
|
175
228
|
|
|
176
|
-
def
|
|
177
|
-
model_info.
|
|
229
|
+
def resolve_capability_policy(model_info)
|
|
230
|
+
model_id = model_info.id.to_s
|
|
231
|
+
Legion::Extensions::Llm::CapabilityPolicy.resolve(
|
|
232
|
+
real: capabilities_from_api(model_info),
|
|
233
|
+
provider_catalog: {},
|
|
234
|
+
probe: {},
|
|
235
|
+
provider_envelope: { streaming: true },
|
|
236
|
+
provider_config: provider_capability_config,
|
|
237
|
+
instance_config: instance_capability_config,
|
|
238
|
+
model_config: model_capability_config(model_id)
|
|
239
|
+
)
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def capabilities_from_api(model_info)
|
|
243
|
+
Array(model_info.capabilities).each_with_object({}) do |cap, hash|
|
|
244
|
+
sym = cap.to_s.downcase.to_sym
|
|
245
|
+
hash[sym] = true
|
|
246
|
+
end
|
|
178
247
|
end
|
|
179
248
|
|
|
180
|
-
def
|
|
181
|
-
model_info.
|
|
249
|
+
def offering_usage_type(model_info)
|
|
250
|
+
model_info.embedding? ? :embedding : :inference
|
|
182
251
|
end
|
|
183
252
|
|
|
184
253
|
def offering_limits(model_info)
|
|
@@ -294,7 +363,7 @@ module Legion
|
|
|
294
363
|
chunks << built
|
|
295
364
|
block&.call(built)
|
|
296
365
|
rescue Legion::JSON::ParseError => e
|
|
297
|
-
handle_exception(e, level: :
|
|
366
|
+
handle_exception(e, level: :warn, handled: true, operation: 'ollama.stream_parse')
|
|
298
367
|
end
|
|
299
368
|
|
|
300
369
|
def finalize_stream(chunks)
|
|
@@ -357,16 +426,16 @@ module Legion
|
|
|
357
426
|
def format_tools(tools)
|
|
358
427
|
return nil if tools.empty?
|
|
359
428
|
|
|
360
|
-
tool_names = tools.values.filter_map { |tool|
|
|
429
|
+
tool_names = tools.values.filter_map { |tool| Legion::Extensions::Llm::Canonical::ToolSchema.tool_name(tool) }
|
|
361
430
|
log.debug { "ollama provider formatting tools count=#{tools.size} names=#{tool_names.join(',')}" }
|
|
362
431
|
|
|
363
432
|
tools.values.map do |tool|
|
|
364
433
|
{
|
|
365
434
|
type: 'function',
|
|
366
435
|
function: {
|
|
367
|
-
name: tool
|
|
368
|
-
description: tool
|
|
369
|
-
parameters: tool
|
|
436
|
+
name: Legion::Extensions::Llm::Canonical::ToolSchema.tool_name(tool),
|
|
437
|
+
description: Legion::Extensions::Llm::Canonical::ToolSchema.tool_description(tool),
|
|
438
|
+
parameters: Legion::Extensions::Llm::Canonical::ToolSchema.extract(tool)
|
|
370
439
|
}
|
|
371
440
|
}
|
|
372
441
|
end
|
|
@@ -380,67 +449,74 @@ module Legion
|
|
|
380
449
|
|
|
381
450
|
def parse_completion_response(response)
|
|
382
451
|
body = response.body
|
|
383
|
-
|
|
384
|
-
|
|
452
|
+
canonical = translator.parse_response(body)
|
|
453
|
+
to_legacy_message(canonical, body)
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
def build_chunk(data)
|
|
457
|
+
canonical_chunk = translator.parse_chunk(data)
|
|
458
|
+
return nil if canonical_chunk.nil?
|
|
459
|
+
|
|
460
|
+
to_legacy_chunk(canonical_chunk, data)
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
def to_legacy_message(canonical, raw_body)
|
|
464
|
+
usage = canonical.usage
|
|
385
465
|
Legion::Extensions::Llm::Message.new(
|
|
386
466
|
role: :assistant,
|
|
387
|
-
content:
|
|
388
|
-
model_id:
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
467
|
+
content: canonical.text,
|
|
468
|
+
model_id: canonical.model,
|
|
469
|
+
thinking: if canonical.thinking
|
|
470
|
+
Legion::Extensions::Llm::Thinking.build(
|
|
471
|
+
text: canonical.thinking.content, signature: canonical.thinking.signature
|
|
472
|
+
)
|
|
473
|
+
end,
|
|
474
|
+
tool_calls: legacy_tool_calls(canonical.tool_calls),
|
|
475
|
+
input_tokens: usage&.input_tokens,
|
|
476
|
+
output_tokens: usage&.output_tokens,
|
|
477
|
+
raw: raw_body
|
|
394
478
|
)
|
|
395
479
|
end
|
|
396
480
|
|
|
397
|
-
def
|
|
398
|
-
message = data.fetch('message', {})
|
|
399
|
-
thinking = message['thinking']
|
|
481
|
+
def to_legacy_chunk(canonical_chunk, raw_data)
|
|
400
482
|
Legion::Extensions::Llm::Chunk.new(
|
|
401
483
|
role: :assistant,
|
|
402
|
-
content:
|
|
403
|
-
thinking:
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
484
|
+
content: canonical_chunk.text_delta? ? canonical_chunk.delta : nil,
|
|
485
|
+
thinking: if canonical_chunk.thinking_delta?
|
|
486
|
+
Legion::Extensions::Llm::Thinking.build(
|
|
487
|
+
text: canonical_chunk.delta
|
|
488
|
+
)
|
|
489
|
+
end,
|
|
490
|
+
tool_calls: legacy_streaming_tool_calls(canonical_chunk),
|
|
491
|
+
model_id: raw_data['model'] || raw_data[:model],
|
|
492
|
+
input_tokens: canonical_chunk.usage&.input_tokens ||
|
|
493
|
+
raw_data['prompt_eval_count'] || raw_data[:prompt_eval_count],
|
|
494
|
+
output_tokens: canonical_chunk.usage&.output_tokens ||
|
|
495
|
+
raw_data['eval_count'] || raw_data[:eval_count],
|
|
496
|
+
raw: raw_data
|
|
409
497
|
)
|
|
410
498
|
end
|
|
411
499
|
|
|
412
|
-
def
|
|
413
|
-
|
|
414
|
-
message['content'],
|
|
415
|
-
metadata: thinking_metadata(message)
|
|
416
|
-
)
|
|
417
|
-
|
|
418
|
-
[
|
|
419
|
-
extraction.content,
|
|
420
|
-
Thinking.build(text: extraction.thinking, signature: extraction.signature)
|
|
421
|
-
]
|
|
422
|
-
end
|
|
500
|
+
def legacy_tool_calls(canonical_tool_calls)
|
|
501
|
+
return nil if canonical_tool_calls.nil? || canonical_tool_calls.empty?
|
|
423
502
|
|
|
424
|
-
|
|
425
|
-
|
|
503
|
+
canonical_tool_calls.to_h do |tc|
|
|
504
|
+
[
|
|
505
|
+
(tc.name || tc.id).to_s.to_sym,
|
|
506
|
+
Legion::Extensions::Llm::ToolCall.new(id: tc.id, name: tc.name, arguments: tc.arguments || {})
|
|
507
|
+
]
|
|
508
|
+
end
|
|
426
509
|
end
|
|
427
510
|
|
|
428
|
-
def
|
|
429
|
-
return nil unless
|
|
511
|
+
def legacy_streaming_tool_calls(canonical_chunk)
|
|
512
|
+
return nil unless canonical_chunk.tool_call_delta?
|
|
430
513
|
|
|
431
|
-
|
|
514
|
+
tc = canonical_chunk.tool_call
|
|
515
|
+
return nil unless tc
|
|
432
516
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
function.fetch('name').to_sym,
|
|
437
|
-
Legion::Extensions::Llm::ToolCall.new(
|
|
438
|
-
id: call['id'] || function['name'],
|
|
439
|
-
name: function['name'],
|
|
440
|
-
arguments: function['arguments'] || {}
|
|
441
|
-
)
|
|
442
|
-
]
|
|
443
|
-
end
|
|
517
|
+
{ (tc.name || tc.id).to_s.to_sym => Legion::Extensions::Llm::ToolCall.new(
|
|
518
|
+
id: tc.id, name: tc.name, arguments: tc.arguments || ''
|
|
519
|
+
) }
|
|
444
520
|
end
|
|
445
521
|
|
|
446
522
|
def parse_list_models_response(response, provider, _capabilities)
|
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/llm/canonical'
|
|
4
|
+
require 'legion/extensions/llm/responses/thinking_extractor'
|
|
5
|
+
require 'legion/json'
|
|
6
|
+
require 'legion/logging'
|
|
7
|
+
|
|
8
|
+
module Legion
|
|
9
|
+
module Extensions
|
|
10
|
+
module Llm
|
|
11
|
+
module Ollama
|
|
12
|
+
# Canonical provider translator for Ollama (/api/chat NDJSON wire format).
|
|
13
|
+
#
|
|
14
|
+
# Implements render_request, parse_response, parse_chunk, and capabilities.
|
|
15
|
+
# Ollama uses NDJSON streaming (not SSE), native tool calling, and the `think`
|
|
16
|
+
# flag for extended thinking support.
|
|
17
|
+
#
|
|
18
|
+
# Ollama quirks (declared in capabilities):
|
|
19
|
+
# - tool_calls_as_text: false — Ollama returns structured tool_calls natively.
|
|
20
|
+
# - forced_tool_choice: false — Ollama does not support forced tool selection.
|
|
21
|
+
# - assistant_prefill: false — Ollama does not support assistant prefill.
|
|
22
|
+
class Translator
|
|
23
|
+
include Legion::Logging::Helper
|
|
24
|
+
|
|
25
|
+
# Ollama-specific stop_reason mapping (done_reason field).
|
|
26
|
+
OLLAMA_STOP_REASON_MAP = {
|
|
27
|
+
'stop' => :end_turn,
|
|
28
|
+
'tool_use' => :tool_use,
|
|
29
|
+
'length' => :max_tokens
|
|
30
|
+
}.freeze
|
|
31
|
+
FALLBACK_STOP_REASON = :end_turn
|
|
32
|
+
|
|
33
|
+
# G18 parameter mapping: canonical params -> Ollama options keys.
|
|
34
|
+
PARAM_OPTIONS_KEYS = {
|
|
35
|
+
max_tokens: :num_predict,
|
|
36
|
+
temperature: :temperature,
|
|
37
|
+
top_p: :top_p,
|
|
38
|
+
top_k: :top_k,
|
|
39
|
+
stop_sequences: :stop,
|
|
40
|
+
seed: :seed,
|
|
41
|
+
frequency_penalty: :frequency_penalty,
|
|
42
|
+
presence_penalty: :presence_penalty
|
|
43
|
+
}.freeze
|
|
44
|
+
|
|
45
|
+
SUPPORTED_PARAMS = %i[
|
|
46
|
+
max_tokens temperature top_p top_k stop_sequences
|
|
47
|
+
seed frequency_penalty presence_penalty
|
|
48
|
+
].freeze
|
|
49
|
+
|
|
50
|
+
def initialize(config: nil)
|
|
51
|
+
@config = config
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Render a canonical request into Ollama /api/chat wire payload.
|
|
55
|
+
def render_request(request)
|
|
56
|
+
model = request.metadata&.dig(:model) || 'default'
|
|
57
|
+
messages = format_messages(request)
|
|
58
|
+
payload = {
|
|
59
|
+
model: model,
|
|
60
|
+
messages: messages,
|
|
61
|
+
stream: request.stream
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
payload[:tools] = format_tools(request.tools) unless request.tools.to_h.empty?
|
|
65
|
+
apply_options(payload, request.params)
|
|
66
|
+
apply_thinking_config(payload, request)
|
|
67
|
+
apply_response_format(payload, request.params)
|
|
68
|
+
|
|
69
|
+
log.debug do
|
|
70
|
+
"[llm][ollama-translator] action=render_request model=#{model} stream=#{request.stream} " \
|
|
71
|
+
"message_count=#{messages.size} tools=#{request.tools&.size || 0}"
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
payload.compact
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Parse an Ollama /api/chat completion response into a Canonical::Response.
|
|
78
|
+
def parse_response(wire)
|
|
79
|
+
return canonical_error_response(wire) unless wire.is_a?(Hash)
|
|
80
|
+
return Canonical::Response.from_hash(wire) if canonical_response?(wire)
|
|
81
|
+
|
|
82
|
+
message = wire[:message] || wire['message'] || {}
|
|
83
|
+
content = message[:content] || message['content'] || ''
|
|
84
|
+
tool_calls_raw = message[:tool_calls] || message['tool_calls']
|
|
85
|
+
model = wire[:model] || wire['model']
|
|
86
|
+
done_reason = wire[:done_reason] || wire['done_reason']
|
|
87
|
+
done = wire[:done] || wire['done']
|
|
88
|
+
|
|
89
|
+
extraction = Responses::ThinkingExtractor.extract(
|
|
90
|
+
content,
|
|
91
|
+
metadata: thinking_metadata(message)
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
text = extraction.content || ''
|
|
95
|
+
thinking = build_canonical_thinking(extraction)
|
|
96
|
+
tool_calls = parse_tool_calls(tool_calls_raw)
|
|
97
|
+
stop_reason = map_stop_reason(done_reason, done)
|
|
98
|
+
|
|
99
|
+
usage = Canonical::Usage.from_hash({
|
|
100
|
+
input_tokens: wire[:prompt_eval_count] || wire['prompt_eval_count'],
|
|
101
|
+
output_tokens: wire[:eval_count] || wire['eval_count']
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
Canonical::Response.build(
|
|
105
|
+
text: text.to_s,
|
|
106
|
+
thinking: thinking,
|
|
107
|
+
tool_calls: tool_calls,
|
|
108
|
+
usage: usage,
|
|
109
|
+
stop_reason: stop_reason,
|
|
110
|
+
model: model,
|
|
111
|
+
metadata: {}
|
|
112
|
+
)
|
|
113
|
+
rescue StandardError => e
|
|
114
|
+
handle_exception(e, level: :error, handled: false, operation: 'ollama.translator.parse_response')
|
|
115
|
+
raise
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Parse a single NDJSON chunk into a Canonical::Chunk or nil.
|
|
119
|
+
def parse_chunk(raw)
|
|
120
|
+
return nil if raw.nil?
|
|
121
|
+
|
|
122
|
+
data = normalize_chunk_input(raw)
|
|
123
|
+
return nil if data.nil?
|
|
124
|
+
|
|
125
|
+
# Handle canonical-form chunks (from conformance fixtures)
|
|
126
|
+
return handle_canonical_chunk(data) if data['type'] || data[:type]
|
|
127
|
+
|
|
128
|
+
parse_ollama_chunk(data)
|
|
129
|
+
rescue StandardError => e
|
|
130
|
+
handle_exception(e, level: :error, handled: false, operation: 'ollama.translator.parse_chunk')
|
|
131
|
+
raise
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Declared capabilities for the Ollama provider.
|
|
135
|
+
def capabilities
|
|
136
|
+
{
|
|
137
|
+
provider: 'ollama',
|
|
138
|
+
streaming: true,
|
|
139
|
+
tool_calls: true,
|
|
140
|
+
thinking: true,
|
|
141
|
+
vision: true,
|
|
142
|
+
embeddings: true,
|
|
143
|
+
tool_calls_as_text: false,
|
|
144
|
+
forced_tool_choice: false,
|
|
145
|
+
assistant_prefill: false
|
|
146
|
+
}.freeze
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
private
|
|
150
|
+
|
|
151
|
+
attr_reader :config
|
|
152
|
+
|
|
153
|
+
# -- Message formatting --
|
|
154
|
+
|
|
155
|
+
def format_messages(request)
|
|
156
|
+
messages = format_request_messages(request.messages)
|
|
157
|
+
|
|
158
|
+
if request.system.to_s.strip.empty?
|
|
159
|
+
messages
|
|
160
|
+
else
|
|
161
|
+
[{ role: 'system', content: request.system.strip }] + messages
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def format_request_messages(messages)
|
|
166
|
+
return [] if messages.nil? || messages.empty?
|
|
167
|
+
|
|
168
|
+
messages.map { |msg| format_message(msg) }
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def format_message(msg)
|
|
172
|
+
role = msg.role.to_s
|
|
173
|
+
content = format_message_content(msg)
|
|
174
|
+
result = { role: role, content: content }
|
|
175
|
+
|
|
176
|
+
images = extract_images(msg.content)
|
|
177
|
+
result[:images] = images unless images.empty?
|
|
178
|
+
|
|
179
|
+
result[:tool_call_id] = msg.tool_call_id if msg.tool_call_id
|
|
180
|
+
result.compact
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def format_message_content(msg)
|
|
184
|
+
content = msg.content
|
|
185
|
+
return content if content.is_a?(String)
|
|
186
|
+
|
|
187
|
+
case content
|
|
188
|
+
when Array
|
|
189
|
+
extract_text_from_blocks(content)
|
|
190
|
+
when Canonical::ContentBlock
|
|
191
|
+
content.text? ? content.text.to_s : content.to_s
|
|
192
|
+
else
|
|
193
|
+
content.to_s
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def extract_text_from_blocks(blocks)
|
|
198
|
+
parts = blocks.filter_map do |block|
|
|
199
|
+
case block
|
|
200
|
+
when Canonical::ContentBlock
|
|
201
|
+
format_content_block_text(block)
|
|
202
|
+
when Hash
|
|
203
|
+
block_hash = block.transform_keys(&:to_sym)
|
|
204
|
+
block_hash[:text]&.to_s
|
|
205
|
+
else
|
|
206
|
+
block.to_s
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
parts.join
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def format_content_block_text(block)
|
|
213
|
+
case block.type
|
|
214
|
+
when :text, :thinking
|
|
215
|
+
block.text.to_s
|
|
216
|
+
when :tool_use
|
|
217
|
+
Legion::JSON.dump({ name: block.name, arguments: block.input || {} })
|
|
218
|
+
when :tool_result
|
|
219
|
+
block.text.to_s
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def extract_images(content)
|
|
224
|
+
return [] unless content.is_a?(Array)
|
|
225
|
+
|
|
226
|
+
content.filter_map do |block|
|
|
227
|
+
next unless block.is_a?(Canonical::ContentBlock) && block.type == :image
|
|
228
|
+
|
|
229
|
+
block.data
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# -- Tool formatting --
|
|
234
|
+
|
|
235
|
+
def format_tools(tools)
|
|
236
|
+
return nil if tools.to_h.empty?
|
|
237
|
+
|
|
238
|
+
tools.to_h.values.map do |tool|
|
|
239
|
+
tool_hash = if tool.is_a?(Canonical::ToolDefinition)
|
|
240
|
+
{ name: tool.name, description: tool.description, parameters: tool.parameters }
|
|
241
|
+
elsif tool.is_a?(Hash)
|
|
242
|
+
tool.transform_keys(&:to_sym)
|
|
243
|
+
else
|
|
244
|
+
tool
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
name = tool_hash[:name] || tool_hash['name']
|
|
248
|
+
description = (tool_hash[:description] || tool_hash['description'] || '').to_s
|
|
249
|
+
raw_params = tool_hash[:parameters] || tool_hash[:input_schema]
|
|
250
|
+
raw_params = raw_params.to_h if raw_params.respond_to?(:to_h) && !raw_params.is_a?(Hash)
|
|
251
|
+
parameters = Legion::Extensions::Llm::Canonical::ToolDefinition.normalize_parameters(raw_params)
|
|
252
|
+
|
|
253
|
+
{
|
|
254
|
+
type: 'function',
|
|
255
|
+
function: {
|
|
256
|
+
name: name.to_s,
|
|
257
|
+
description: description,
|
|
258
|
+
parameters: parameters
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# -- Parameter mapping (G18) --
|
|
265
|
+
|
|
266
|
+
def apply_options(payload, params)
|
|
267
|
+
return unless params.is_a?(Canonical::Params)
|
|
268
|
+
|
|
269
|
+
options = {}
|
|
270
|
+
SUPPORTED_PARAMS.each do |param_key|
|
|
271
|
+
value = params.public_send(param_key)
|
|
272
|
+
next if value.nil?
|
|
273
|
+
|
|
274
|
+
wire_key = PARAM_OPTIONS_KEYS[param_key]
|
|
275
|
+
options[wire_key] = case param_key
|
|
276
|
+
when :stop_sequences
|
|
277
|
+
Array(value)
|
|
278
|
+
else
|
|
279
|
+
value
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
payload[:options] = options unless options.empty?
|
|
284
|
+
|
|
285
|
+
return unless params.max_thinking_tokens
|
|
286
|
+
|
|
287
|
+
log.debug do
|
|
288
|
+
'[llm][ollama-translator] action=drop_unsupported_param param=max_thinking_tokens ' \
|
|
289
|
+
"value=#{params.max_thinking_tokens} reason=ollama_not_supported"
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# -- Thinking configuration --
|
|
294
|
+
|
|
295
|
+
def apply_thinking_config(payload, request)
|
|
296
|
+
return unless enable_thinking?(request)
|
|
297
|
+
|
|
298
|
+
payload[:think] = true
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def enable_thinking?(request)
|
|
302
|
+
return true if request.thinking.is_a?(Canonical::Thinking::Config) && request.thinking.enabled?
|
|
303
|
+
return true if request.thinking.is_a?(Hash) && (request.thinking[:enabled] != false)
|
|
304
|
+
|
|
305
|
+
false
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# -- Response format --
|
|
309
|
+
|
|
310
|
+
def apply_response_format(payload, params)
|
|
311
|
+
return unless params.is_a?(Canonical::Params) && params.response_format
|
|
312
|
+
|
|
313
|
+
format_value = params.response_format
|
|
314
|
+
payload[:format] = if format_value.is_a?(Hash)
|
|
315
|
+
schema = format_value[:schema] || format_value['schema'] ||
|
|
316
|
+
format_value[:json_schema] || format_value['json_schema']
|
|
317
|
+
schema || format_value
|
|
318
|
+
else
|
|
319
|
+
format_value
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# -- Response parsing --
|
|
324
|
+
|
|
325
|
+
def canonical_response?(wire)
|
|
326
|
+
wire.key?(:text) || wire.key?('text') || wire.key?(:stop_reason) || wire.key?('stop_reason')
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
def canonical_error_response(wire)
|
|
330
|
+
body = wire.is_a?(Hash) ? wire : {}
|
|
331
|
+
error_info = body['error'] || body[:error] ||
|
|
332
|
+
{ type: 'parse_error', message: 'Failed to parse response' }
|
|
333
|
+
|
|
334
|
+
Canonical::Response.build(
|
|
335
|
+
text: '',
|
|
336
|
+
tool_calls: [],
|
|
337
|
+
usage: Canonical::Usage.from_hash(body['usage'] || body[:usage] || {}),
|
|
338
|
+
stop_reason: :error,
|
|
339
|
+
model: body['model'] || body[:model],
|
|
340
|
+
metadata: { error: error_info }
|
|
341
|
+
)
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def thinking_metadata(message)
|
|
345
|
+
thinking = message[:thinking] || message['thinking']
|
|
346
|
+
return {} unless thinking
|
|
347
|
+
|
|
348
|
+
{ thinking: thinking }
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def build_canonical_thinking(extraction)
|
|
352
|
+
return nil unless extraction.thinking || extraction.signature
|
|
353
|
+
|
|
354
|
+
Canonical::Thinking.new(
|
|
355
|
+
content: extraction.thinking,
|
|
356
|
+
signature: extraction.signature
|
|
357
|
+
)
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
def parse_tool_calls(tool_calls_raw)
|
|
361
|
+
return [] unless tool_calls_raw.is_a?(Array) && !tool_calls_raw.empty?
|
|
362
|
+
|
|
363
|
+
tool_calls_raw.filter_map do |call|
|
|
364
|
+
call = call.transform_keys(&:to_sym) if call.is_a?(Hash)
|
|
365
|
+
function = call[:function] || call['function'] || {}
|
|
366
|
+
function = function.transform_keys(&:to_sym) if function.is_a?(Hash)
|
|
367
|
+
|
|
368
|
+
name = function[:name] || function['name']
|
|
369
|
+
id = call[:id] || call['id'] || name
|
|
370
|
+
args = parse_tool_arguments(function[:arguments] || function['arguments'])
|
|
371
|
+
|
|
372
|
+
Canonical::ToolCall.build(
|
|
373
|
+
id: id.to_s,
|
|
374
|
+
name: name.to_s,
|
|
375
|
+
arguments: args,
|
|
376
|
+
source: :client
|
|
377
|
+
)
|
|
378
|
+
rescue StandardError => e
|
|
379
|
+
handle_exception(e, level: :warn, handled: true, operation: 'ollama.translator.parse_tool_call')
|
|
380
|
+
nil
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
def parse_tool_arguments(arguments)
|
|
385
|
+
return {} if arguments.nil? || arguments == ''
|
|
386
|
+
return arguments if arguments.is_a?(Hash)
|
|
387
|
+
|
|
388
|
+
Legion::JSON.load(arguments)
|
|
389
|
+
rescue Legion::JSON::ParseError
|
|
390
|
+
{}
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
def map_stop_reason(done_reason, done = nil)
|
|
394
|
+
if done_reason
|
|
395
|
+
OLLAMA_STOP_REASON_MAP.fetch(done_reason.to_s, FALLBACK_STOP_REASON)
|
|
396
|
+
elsif done
|
|
397
|
+
FALLBACK_STOP_REASON
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
# -- Chunk parsing --
|
|
402
|
+
|
|
403
|
+
def normalize_chunk_input(raw)
|
|
404
|
+
return nil if raw.is_a?(String) && raw.strip.empty?
|
|
405
|
+
|
|
406
|
+
raw.is_a?(Hash) ? raw : parse_json_safely(raw)
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def handle_canonical_chunk(data)
|
|
410
|
+
normalized = data.is_a?(Hash) && data.keys.first.is_a?(Symbol) ? data : data.transform_keys(&:to_sym)
|
|
411
|
+
Canonical::Chunk.from_hash(normalized)
|
|
412
|
+
rescue StandardError => e
|
|
413
|
+
log.debug { "[llm][ollama-translator] action=canonical_chunk_parse_error error=#{e.message}" }
|
|
414
|
+
nil
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def parse_ollama_chunk(data)
|
|
418
|
+
message = data[:message] || data['message'] || {}
|
|
419
|
+
done = data[:done] || data['done']
|
|
420
|
+
done_reason = data[:done_reason] || data['done_reason']
|
|
421
|
+
request_id = data[:request_id] || data['request_id'] || data[:id] || data['id']
|
|
422
|
+
|
|
423
|
+
# Tool call delta
|
|
424
|
+
tool_calls = message[:tool_calls] || message['tool_calls']
|
|
425
|
+
return build_tool_call_chunk(tool_calls, request_id) unless Array(tool_calls).empty?
|
|
426
|
+
|
|
427
|
+
# Thinking delta
|
|
428
|
+
thinking_content = message[:thinking] || message['thinking']
|
|
429
|
+
unless thinking_content.to_s.empty?
|
|
430
|
+
return Canonical::Chunk.thinking_delta(
|
|
431
|
+
delta: thinking_content.to_s,
|
|
432
|
+
request_id: request_id
|
|
433
|
+
)
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
# Text delta — emit content even on done chunks (Ollama's final chunk may carry text)
|
|
437
|
+
content = message[:content] || message['content']
|
|
438
|
+
unless content.to_s.empty?
|
|
439
|
+
return Canonical::Chunk.text_delta(
|
|
440
|
+
delta: content.to_s,
|
|
441
|
+
request_id: request_id
|
|
442
|
+
)
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
# Done chunk (only when no content/thinking/tool_calls to emit)
|
|
446
|
+
return build_done_chunk(data, done_reason, request_id) if done
|
|
447
|
+
|
|
448
|
+
nil
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
def build_done_chunk(data, done_reason, request_id)
|
|
452
|
+
usage = Canonical::Usage.from_hash({
|
|
453
|
+
input_tokens: data[:prompt_eval_count] || data['prompt_eval_count'],
|
|
454
|
+
output_tokens: data[:eval_count] || data['eval_count']
|
|
455
|
+
})
|
|
456
|
+
|
|
457
|
+
Canonical::Chunk.done(
|
|
458
|
+
request_id: request_id,
|
|
459
|
+
usage: usage,
|
|
460
|
+
stop_reason: map_stop_reason(done_reason, true)
|
|
461
|
+
)
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
def build_tool_call_chunk(tool_calls, request_id)
|
|
465
|
+
first_call = tool_calls.first
|
|
466
|
+
first_call = first_call.transform_keys(&:to_sym) if first_call.is_a?(Hash)
|
|
467
|
+
function = first_call[:function] || first_call['function'] || {}
|
|
468
|
+
function = function.transform_keys(&:to_sym) if function.is_a?(Hash)
|
|
469
|
+
|
|
470
|
+
tc = Canonical::ToolCall.build(
|
|
471
|
+
id: (first_call[:id] || first_call['id'] || function[:name] || 'synthesized').to_s,
|
|
472
|
+
name: (function[:name] || function['name']).to_s,
|
|
473
|
+
arguments: parse_tool_arguments(function[:arguments] || function['arguments']),
|
|
474
|
+
source: :client
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
Canonical::Chunk.tool_call_delta(
|
|
478
|
+
tool_call: tc,
|
|
479
|
+
request_id: request_id
|
|
480
|
+
)
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
# -- JSON helpers --
|
|
484
|
+
|
|
485
|
+
def parse_json_safely(raw)
|
|
486
|
+
return nil unless raw.is_a?(String)
|
|
487
|
+
|
|
488
|
+
Legion::JSON.load(raw)
|
|
489
|
+
rescue Legion::JSON::ParseError => e
|
|
490
|
+
log.debug { "[llm][ollama-translator] action=json_parse_error error=#{e.message}" }
|
|
491
|
+
nil
|
|
492
|
+
end
|
|
493
|
+
end
|
|
494
|
+
end
|
|
495
|
+
end
|
|
496
|
+
end
|
|
497
|
+
end
|
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
require 'legion/extensions/llm'
|
|
4
4
|
require 'legion/extensions/llm/ollama/provider'
|
|
5
|
+
require 'legion/extensions/llm/ollama/translator'
|
|
5
6
|
require 'legion/extensions/llm/ollama/version'
|
|
6
7
|
require 'legion/logging/helper'
|
|
8
|
+
require_relative 'ollama/actors/discovery_refresh'
|
|
7
9
|
|
|
8
10
|
module Legion
|
|
9
11
|
module Extensions
|
|
@@ -30,10 +32,7 @@ module Legion
|
|
|
30
32
|
fleet: {
|
|
31
33
|
enabled: false,
|
|
32
34
|
respond_to_requests: false,
|
|
33
|
-
capabilities: %i[chat stream_chat embed]
|
|
34
|
-
lanes: [],
|
|
35
|
-
concurrency: 1,
|
|
36
|
-
queue_suffix: nil
|
|
35
|
+
capabilities: %i[chat stream_chat embed tools]
|
|
37
36
|
}
|
|
38
37
|
}
|
|
39
38
|
)
|
|
@@ -73,7 +72,8 @@ module Legion
|
|
|
73
72
|
instances[:local] = {
|
|
74
73
|
base_url: 'http://127.0.0.1:11434',
|
|
75
74
|
tier: :local,
|
|
76
|
-
capabilities:
|
|
75
|
+
capabilities: {},
|
|
76
|
+
provider_capabilities: { streaming: true }
|
|
77
77
|
}
|
|
78
78
|
end
|
|
79
79
|
|
|
@@ -85,7 +85,8 @@ module Legion
|
|
|
85
85
|
configured.each do |name, config|
|
|
86
86
|
instances[name.to_sym] = normalize_instance_config(config).merge(
|
|
87
87
|
tier: :direct,
|
|
88
|
-
capabilities:
|
|
88
|
+
capabilities: {},
|
|
89
|
+
provider_capabilities: { streaming: true }
|
|
89
90
|
)
|
|
90
91
|
end
|
|
91
92
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-llm-ollama
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.22
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- LegionIO
|
|
@@ -71,14 +71,14 @@ dependencies:
|
|
|
71
71
|
requirements:
|
|
72
72
|
- - ">="
|
|
73
73
|
- !ruby/object:Gem::Version
|
|
74
|
-
version: 0.
|
|
74
|
+
version: 0.6.0
|
|
75
75
|
type: :runtime
|
|
76
76
|
prerelease: false
|
|
77
77
|
version_requirements: !ruby/object:Gem::Requirement
|
|
78
78
|
requirements:
|
|
79
79
|
- - ">="
|
|
80
80
|
- !ruby/object:Gem::Version
|
|
81
|
-
version: 0.
|
|
81
|
+
version: 0.6.0
|
|
82
82
|
description: Ollama provider integration for the LegionIO LLM routing framework.
|
|
83
83
|
email:
|
|
84
84
|
- matthewdiverson@gmail.com
|
|
@@ -101,6 +101,7 @@ files:
|
|
|
101
101
|
- lib/legion/extensions/llm/ollama/actors/fleet_worker.rb
|
|
102
102
|
- lib/legion/extensions/llm/ollama/provider.rb
|
|
103
103
|
- lib/legion/extensions/llm/ollama/runners/fleet_worker.rb
|
|
104
|
+
- lib/legion/extensions/llm/ollama/translator.rb
|
|
104
105
|
- lib/legion/extensions/llm/ollama/version.rb
|
|
105
106
|
homepage: https://github.com/LegionIO/lex-llm-ollama
|
|
106
107
|
licenses:
|