legion-llm 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ci.yml +16 -0
- data/.gitignore +18 -0
- data/.rubocop.yml +56 -0
- data/CHANGELOG.md +71 -0
- data/CLAUDE.md +388 -0
- data/Gemfile +14 -0
- data/LICENSE +20 -0
- data/README.md +615 -0
- data/docs/plans/2026-03-15-ollama-discovery-design.md +164 -0
- data/docs/plans/2026-03-15-ollama-discovery-implementation.md +1147 -0
- data/legion-llm.gemspec +32 -0
- data/lib/legion/llm/bedrock_bearer_auth.rb +53 -0
- data/lib/legion/llm/compressor.rb +75 -0
- data/lib/legion/llm/discovery/ollama.rb +88 -0
- data/lib/legion/llm/discovery/system.rb +139 -0
- data/lib/legion/llm/escalation_history.rb +28 -0
- data/lib/legion/llm/helpers/llm.rb +59 -0
- data/lib/legion/llm/providers.rb +88 -0
- data/lib/legion/llm/quality_checker.rb +56 -0
- data/lib/legion/llm/router/escalation_chain.rb +49 -0
- data/lib/legion/llm/router/health_tracker.rb +160 -0
- data/lib/legion/llm/router/resolution.rb +43 -0
- data/lib/legion/llm/router/rule.rb +103 -0
- data/lib/legion/llm/router.rb +279 -0
- data/lib/legion/llm/settings.rb +97 -0
- data/lib/legion/llm/transport/exchanges/escalation.rb +14 -0
- data/lib/legion/llm/transport/messages/escalation_event.rb +13 -0
- data/lib/legion/llm/version.rb +7 -0
- data/lib/legion/llm.rb +264 -0
- metadata +136 -0
data/lib/legion/llm.rb
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'ruby_llm'
|
|
4
|
+
require 'legion/llm/version'
|
|
5
|
+
require 'legion/llm/settings'
|
|
6
|
+
require 'legion/llm/providers'
|
|
7
|
+
require 'legion/llm/router'
|
|
8
|
+
require 'legion/llm/compressor'
|
|
9
|
+
require 'legion/llm/quality_checker'
|
|
10
|
+
require 'legion/llm/escalation_history'
|
|
11
|
+
|
|
12
|
+
module Legion
|
|
13
|
+
module LLM
|
|
14
|
+
class EscalationExhausted < StandardError; end
|
|
15
|
+
|
|
16
|
+
class << self
|
|
17
|
+
include Legion::LLM::Providers
|
|
18
|
+
|
|
19
|
+
def start
|
|
20
|
+
Legion::Logging.debug 'Legion::LLM is running start'
|
|
21
|
+
|
|
22
|
+
configure_providers
|
|
23
|
+
run_discovery
|
|
24
|
+
set_defaults
|
|
25
|
+
|
|
26
|
+
@started = true
|
|
27
|
+
Legion::Settings[:llm][:connected] = true
|
|
28
|
+
Legion::Logging.info 'Legion::LLM started'
|
|
29
|
+
ping_provider
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def shutdown
|
|
33
|
+
Legion::Settings[:llm][:connected] = false
|
|
34
|
+
@started = false
|
|
35
|
+
Legion::Logging.info 'Legion::LLM shut down'
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def started?
|
|
39
|
+
@started == true
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def settings
|
|
43
|
+
if Legion.const_defined?('Settings')
|
|
44
|
+
Legion::Settings[:llm]
|
|
45
|
+
else
|
|
46
|
+
Legion::LLM::Settings.default
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Create a new chat session
|
|
51
|
+
# @param model [String] model ID (e.g., "us.anthropic.claude-sonnet-4-6-v1")
|
|
52
|
+
# @param provider [Symbol] provider slug (e.g., :bedrock, :anthropic)
|
|
53
|
+
# @param intent [Hash, nil] routing intent (capability, privacy, etc.)
|
|
54
|
+
# @param tier [Symbol, nil] explicit tier override — skips rule matching
|
|
55
|
+
# @param escalate [Boolean, nil] enable escalation retry loop (nil = auto from settings)
|
|
56
|
+
# @param max_escalations [Integer, nil] max escalation attempts override
|
|
57
|
+
# @param quality_check [Proc, nil] custom quality check callable
|
|
58
|
+
# @param message [String, nil] message to send (required for escalation)
|
|
59
|
+
# @param kwargs [Hash] additional options passed to RubyLLM.chat
|
|
60
|
+
# @return [RubyLLM::Chat]
|
|
61
|
+
# TODO: fleet tier dispatch via Transport (Phase 3)
|
|
62
|
+
def chat(model: nil, provider: nil, intent: nil, tier: nil, escalate: nil,
|
|
63
|
+
max_escalations: nil, quality_check: nil, message: nil, **)
|
|
64
|
+
escalate = escalation_enabled? if escalate.nil?
|
|
65
|
+
|
|
66
|
+
if escalate && message
|
|
67
|
+
chat_with_escalation(
|
|
68
|
+
model: model, provider: provider, intent: intent, tier: tier,
|
|
69
|
+
max_escalations: max_escalations, quality_check: quality_check,
|
|
70
|
+
message: message, **
|
|
71
|
+
)
|
|
72
|
+
else
|
|
73
|
+
chat_single(model: model, provider: provider, intent: intent, tier: tier, **)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Generate embeddings
|
|
78
|
+
# @param text [String, Array<String>] text to embed
|
|
79
|
+
# @param model [String] embedding model ID
|
|
80
|
+
# @return [RubyLLM::Embedding]
|
|
81
|
+
def embed(text, model: nil)
|
|
82
|
+
if model
|
|
83
|
+
RubyLLM.embed(text, model: model)
|
|
84
|
+
else
|
|
85
|
+
RubyLLM.embed(text)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Create a configured agent instance
|
|
90
|
+
# @param agent_class [Class] a RubyLLM::Agent subclass
|
|
91
|
+
# @param kwargs [Hash] additional options
|
|
92
|
+
# @return [RubyLLM::Agent]
|
|
93
|
+
def agent(agent_class, **)
|
|
94
|
+
agent_class.new(**)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
private
|
|
98
|
+
|
|
99
|
+
def chat_single(model:, provider:, intent:, tier:, **kwargs)
|
|
100
|
+
if (intent || tier) && Router.routing_enabled?
|
|
101
|
+
resolution = Router.resolve(intent: intent, tier: tier, model: model, provider: provider)
|
|
102
|
+
if resolution
|
|
103
|
+
model = resolution.model
|
|
104
|
+
provider = resolution.provider
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
model ||= settings[:default_model]
|
|
109
|
+
provider ||= settings[:default_provider]
|
|
110
|
+
|
|
111
|
+
opts = {}
|
|
112
|
+
opts[:model] = model if model
|
|
113
|
+
opts[:provider] = provider if provider
|
|
114
|
+
opts.merge!(kwargs)
|
|
115
|
+
|
|
116
|
+
RubyLLM.chat(**opts)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def chat_with_escalation(model:, provider:, intent:, tier:, max_escalations:, quality_check:, message:, **kwargs)
|
|
120
|
+
chain = Router.resolve_chain(
|
|
121
|
+
intent: intent, tier: tier, model: model, provider: provider,
|
|
122
|
+
max_escalations: max_escalations
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
threshold = escalation_quality_threshold
|
|
126
|
+
history = []
|
|
127
|
+
|
|
128
|
+
chain.each do |resolution|
|
|
129
|
+
start_time = Time.now
|
|
130
|
+
begin
|
|
131
|
+
opts = { model: resolution.model, provider: resolution.provider }
|
|
132
|
+
opts.merge!(kwargs)
|
|
133
|
+
chat_obj = RubyLLM.chat(**opts)
|
|
134
|
+
response = chat_obj.ask(message)
|
|
135
|
+
|
|
136
|
+
duration_ms = ((Time.now - start_time) * 1000).round
|
|
137
|
+
result = QualityChecker.check(response, quality_threshold: threshold, quality_check: quality_check)
|
|
138
|
+
|
|
139
|
+
if result.passed
|
|
140
|
+
report_health(:success, resolution, duration_ms)
|
|
141
|
+
history << build_attempt(resolution, :success, [], duration_ms)
|
|
142
|
+
attach_escalation_history(response, history, resolution, chain)
|
|
143
|
+
publish_escalation_event(history, :success) if history.size > 1
|
|
144
|
+
return response
|
|
145
|
+
else
|
|
146
|
+
report_health(:quality_failure, resolution, duration_ms, failures: result.failures)
|
|
147
|
+
history << build_attempt(resolution, :quality_failure, result.failures, duration_ms)
|
|
148
|
+
end
|
|
149
|
+
rescue StandardError => e
|
|
150
|
+
duration_ms = ((Time.now - start_time) * 1000).round
|
|
151
|
+
report_health(:error, resolution, duration_ms)
|
|
152
|
+
history << build_attempt(resolution, :error, [e.class.name], duration_ms)
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
publish_escalation_event(history, :exhausted) if history.size > 1
|
|
157
|
+
raise EscalationExhausted, "All #{history.size} escalation attempts failed"
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def build_attempt(resolution, outcome, failures, duration_ms)
|
|
161
|
+
{ model: resolution.model, provider: resolution.provider, tier: resolution.tier,
|
|
162
|
+
outcome: outcome, failures: failures, duration_ms: duration_ms }
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def attach_escalation_history(response, history, resolution, chain)
|
|
166
|
+
return unless response.respond_to?(:extend)
|
|
167
|
+
|
|
168
|
+
response.extend(EscalationHistory)
|
|
169
|
+
history.each { |h| response.record_escalation_attempt(**h) }
|
|
170
|
+
response.final_resolution = resolution
|
|
171
|
+
response.escalation_chain = chain
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def report_health(signal, resolution, duration_ms, failures: nil)
|
|
175
|
+
return unless Router.routing_enabled?
|
|
176
|
+
|
|
177
|
+
metadata = { duration_ms: duration_ms }
|
|
178
|
+
metadata[:failures] = failures if failures
|
|
179
|
+
Router.health_tracker.report(provider: resolution.provider, signal: signal, value: 1, metadata: metadata)
|
|
180
|
+
Router.health_tracker.report(provider: resolution.provider, signal: :latency, value: duration_ms, metadata: {})
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def publish_escalation_event(history, final_outcome)
|
|
184
|
+
return unless defined?(Legion::Transport)
|
|
185
|
+
|
|
186
|
+
Legion::Logging.debug("Escalation event: #{final_outcome}, #{history.size} attempts") if Legion.const_defined?('Logging')
|
|
187
|
+
rescue StandardError
|
|
188
|
+
nil
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def escalation_enabled?
|
|
192
|
+
routing = settings[:routing]
|
|
193
|
+
return false unless routing.is_a?(Hash)
|
|
194
|
+
|
|
195
|
+
esc = routing[:escalation] || {}
|
|
196
|
+
esc[:enabled] == true
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def escalation_quality_threshold
|
|
200
|
+
routing = settings[:routing]
|
|
201
|
+
return 50 unless routing.is_a?(Hash)
|
|
202
|
+
|
|
203
|
+
esc = routing[:escalation] || {}
|
|
204
|
+
esc.fetch(:quality_threshold, 50)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def set_defaults
|
|
208
|
+
default_model = settings[:default_model]
|
|
209
|
+
default_provider = settings[:default_provider]
|
|
210
|
+
|
|
211
|
+
RubyLLM.configure do |c|
|
|
212
|
+
c.default_model = default_model if default_model
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
return unless default_model.nil? && default_provider.nil?
|
|
216
|
+
|
|
217
|
+
# Auto-detect: use first enabled provider's sensible default
|
|
218
|
+
auto_configure_defaults
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def run_discovery
|
|
222
|
+
return unless settings.dig(:providers, :ollama, :enabled)
|
|
223
|
+
|
|
224
|
+
Discovery::Ollama.refresh!
|
|
225
|
+
Discovery::System.refresh!
|
|
226
|
+
|
|
227
|
+
names = Discovery::Ollama.model_names
|
|
228
|
+
count = names.size
|
|
229
|
+
Legion::Logging.info "Ollama: #{count} model#{'s' unless count == 1} available (#{names.join(', ')})"
|
|
230
|
+
Legion::Logging.info "System: #{Discovery::System.total_memory_mb} MB total, " \
|
|
231
|
+
"#{Discovery::System.available_memory_mb} MB available"
|
|
232
|
+
rescue StandardError => e
|
|
233
|
+
Legion::Logging.warn "Discovery failed: #{e.message}"
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def ping_provider
|
|
237
|
+
model = settings[:default_model]
|
|
238
|
+
provider = settings[:default_provider]
|
|
239
|
+
return unless model && provider
|
|
240
|
+
|
|
241
|
+
start_time = Time.now
|
|
242
|
+
RubyLLM.chat(model: model, provider: provider).ask('Respond with only the word: pong')
|
|
243
|
+
elapsed = ((Time.now - start_time) * 1000).round
|
|
244
|
+
Legion::Logging.info "LLM ping #{provider}/#{model}: pong (#{elapsed}ms)"
|
|
245
|
+
rescue StandardError => e
|
|
246
|
+
Legion::Logging.warn "LLM ping failed for #{provider}/#{model}: #{e.message}"
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def auto_configure_defaults
|
|
250
|
+
settings[:providers].each do |provider, config|
|
|
251
|
+
next unless config&.dig(:enabled)
|
|
252
|
+
|
|
253
|
+
model = config[:default_model]
|
|
254
|
+
next unless model
|
|
255
|
+
|
|
256
|
+
settings[:default_model] = model
|
|
257
|
+
settings[:default_provider] = provider
|
|
258
|
+
Legion::Logging.info "Auto-configured default: #{model} via #{provider}"
|
|
259
|
+
break
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: legion-llm
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.3.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Esity
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: legion-logging
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: legion-settings
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '0'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '0'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: ruby_llm
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '1.0'
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '1.0'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: tzinfo
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '2.0'
|
|
61
|
+
type: :runtime
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '2.0'
|
|
68
|
+
description: Provides LLM capabilities (chat, embeddings, tool use, agents) to LegionIO
|
|
69
|
+
extensions
|
|
70
|
+
email:
|
|
71
|
+
- matthewdiverson@gmail.com
|
|
72
|
+
executables: []
|
|
73
|
+
extensions: []
|
|
74
|
+
extra_rdoc_files:
|
|
75
|
+
- CHANGELOG.md
|
|
76
|
+
- LICENSE
|
|
77
|
+
- README.md
|
|
78
|
+
files:
|
|
79
|
+
- ".github/workflows/ci.yml"
|
|
80
|
+
- ".gitignore"
|
|
81
|
+
- ".rubocop.yml"
|
|
82
|
+
- CHANGELOG.md
|
|
83
|
+
- CLAUDE.md
|
|
84
|
+
- Gemfile
|
|
85
|
+
- LICENSE
|
|
86
|
+
- README.md
|
|
87
|
+
- docs/plans/2026-03-15-ollama-discovery-design.md
|
|
88
|
+
- docs/plans/2026-03-15-ollama-discovery-implementation.md
|
|
89
|
+
- legion-llm.gemspec
|
|
90
|
+
- lib/legion/llm.rb
|
|
91
|
+
- lib/legion/llm/bedrock_bearer_auth.rb
|
|
92
|
+
- lib/legion/llm/compressor.rb
|
|
93
|
+
- lib/legion/llm/discovery/ollama.rb
|
|
94
|
+
- lib/legion/llm/discovery/system.rb
|
|
95
|
+
- lib/legion/llm/escalation_history.rb
|
|
96
|
+
- lib/legion/llm/helpers/llm.rb
|
|
97
|
+
- lib/legion/llm/providers.rb
|
|
98
|
+
- lib/legion/llm/quality_checker.rb
|
|
99
|
+
- lib/legion/llm/router.rb
|
|
100
|
+
- lib/legion/llm/router/escalation_chain.rb
|
|
101
|
+
- lib/legion/llm/router/health_tracker.rb
|
|
102
|
+
- lib/legion/llm/router/resolution.rb
|
|
103
|
+
- lib/legion/llm/router/rule.rb
|
|
104
|
+
- lib/legion/llm/settings.rb
|
|
105
|
+
- lib/legion/llm/transport/exchanges/escalation.rb
|
|
106
|
+
- lib/legion/llm/transport/messages/escalation_event.rb
|
|
107
|
+
- lib/legion/llm/version.rb
|
|
108
|
+
homepage: https://github.com/LegionIO/legion-llm
|
|
109
|
+
licenses:
|
|
110
|
+
- Apache-2.0
|
|
111
|
+
metadata:
|
|
112
|
+
bug_tracker_uri: https://github.com/LegionIO/legion-llm/issues
|
|
113
|
+
changelog_uri: https://github.com/LegionIO/legion-llm/blob/main/CHANGELOG.md
|
|
114
|
+
documentation_uri: https://github.com/LegionIO/legion-llm
|
|
115
|
+
homepage_uri: https://github.com/LegionIO/LegionIO
|
|
116
|
+
source_code_uri: https://github.com/LegionIO/legion-llm
|
|
117
|
+
wiki_uri: https://github.com/LegionIO/legion-llm/wiki
|
|
118
|
+
rubygems_mfa_required: 'true'
|
|
119
|
+
rdoc_options: []
|
|
120
|
+
require_paths:
|
|
121
|
+
- lib
|
|
122
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
123
|
+
requirements:
|
|
124
|
+
- - ">="
|
|
125
|
+
- !ruby/object:Gem::Version
|
|
126
|
+
version: '3.4'
|
|
127
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
|
+
requirements:
|
|
129
|
+
- - ">="
|
|
130
|
+
- !ruby/object:Gem::Version
|
|
131
|
+
version: '0'
|
|
132
|
+
requirements: []
|
|
133
|
+
rubygems_version: 3.6.9
|
|
134
|
+
specification_version: 4
|
|
135
|
+
summary: LLM integration for the LegionIO framework via ruby_llm
|
|
136
|
+
test_files: []
|