legion-llm 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module LLM
5
+ module Transport
6
+ module Messages
7
+ class EscalationEvent < ::Legion::Transport::Message
8
+ routing_key 'llm.escalation.completed'
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module LLM
5
+ VERSION = '0.3.1'
6
+ end
7
+ end
data/lib/legion/llm.rb ADDED
@@ -0,0 +1,264 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ruby_llm'
4
+ require 'legion/llm/version'
5
+ require 'legion/llm/settings'
6
+ require 'legion/llm/providers'
7
+ require 'legion/llm/router'
8
+ require 'legion/llm/compressor'
9
+ require 'legion/llm/quality_checker'
10
+ require 'legion/llm/escalation_history'
11
+
12
+ module Legion
13
+ module LLM
14
+ class EscalationExhausted < StandardError; end
15
+
16
+ class << self
17
+ include Legion::LLM::Providers
18
+
19
+ def start
20
+ Legion::Logging.debug 'Legion::LLM is running start'
21
+
22
+ configure_providers
23
+ run_discovery
24
+ set_defaults
25
+
26
+ @started = true
27
+ Legion::Settings[:llm][:connected] = true
28
+ Legion::Logging.info 'Legion::LLM started'
29
+ ping_provider
30
+ end
31
+
32
+ def shutdown
33
+ Legion::Settings[:llm][:connected] = false
34
+ @started = false
35
+ Legion::Logging.info 'Legion::LLM shut down'
36
+ end
37
+
38
+ def started?
39
+ @started == true
40
+ end
41
+
42
+ def settings
43
+ if Legion.const_defined?('Settings')
44
+ Legion::Settings[:llm]
45
+ else
46
+ Legion::LLM::Settings.default
47
+ end
48
+ end
49
+
50
+ # Create a new chat session
51
+ # @param model [String] model ID (e.g., "us.anthropic.claude-sonnet-4-6-v1")
52
+ # @param provider [Symbol] provider slug (e.g., :bedrock, :anthropic)
53
+ # @param intent [Hash, nil] routing intent (capability, privacy, etc.)
54
+ # @param tier [Symbol, nil] explicit tier override — skips rule matching
55
+ # @param escalate [Boolean, nil] enable escalation retry loop (nil = auto from settings)
56
+ # @param max_escalations [Integer, nil] max escalation attempts override
57
+ # @param quality_check [Proc, nil] custom quality check callable
58
+ # @param message [String, nil] message to send (required for escalation)
59
+ # @param kwargs [Hash] additional options passed to RubyLLM.chat
60
+ # @return [RubyLLM::Chat]
61
+ # TODO: fleet tier dispatch via Transport (Phase 3)
62
+ def chat(model: nil, provider: nil, intent: nil, tier: nil, escalate: nil,
63
+ max_escalations: nil, quality_check: nil, message: nil, **)
64
+ escalate = escalation_enabled? if escalate.nil?
65
+
66
+ if escalate && message
67
+ chat_with_escalation(
68
+ model: model, provider: provider, intent: intent, tier: tier,
69
+ max_escalations: max_escalations, quality_check: quality_check,
70
+ message: message, **
71
+ )
72
+ else
73
+ chat_single(model: model, provider: provider, intent: intent, tier: tier, **)
74
+ end
75
+ end
76
+
77
+ # Generate embeddings
78
+ # @param text [String, Array<String>] text to embed
79
+ # @param model [String] embedding model ID
80
+ # @return [RubyLLM::Embedding]
81
+ def embed(text, model: nil)
82
+ if model
83
+ RubyLLM.embed(text, model: model)
84
+ else
85
+ RubyLLM.embed(text)
86
+ end
87
+ end
88
+
89
+ # Create a configured agent instance
90
+ # @param agent_class [Class] a RubyLLM::Agent subclass
91
+ # @param kwargs [Hash] additional options
92
+ # @return [RubyLLM::Agent]
93
+ def agent(agent_class, **)
94
+ agent_class.new(**)
95
+ end
96
+
97
+ private
98
+
99
+ def chat_single(model:, provider:, intent:, tier:, **kwargs)
100
+ if (intent || tier) && Router.routing_enabled?
101
+ resolution = Router.resolve(intent: intent, tier: tier, model: model, provider: provider)
102
+ if resolution
103
+ model = resolution.model
104
+ provider = resolution.provider
105
+ end
106
+ end
107
+
108
+ model ||= settings[:default_model]
109
+ provider ||= settings[:default_provider]
110
+
111
+ opts = {}
112
+ opts[:model] = model if model
113
+ opts[:provider] = provider if provider
114
+ opts.merge!(kwargs)
115
+
116
+ RubyLLM.chat(**opts)
117
+ end
118
+
119
+ def chat_with_escalation(model:, provider:, intent:, tier:, max_escalations:, quality_check:, message:, **kwargs)
120
+ chain = Router.resolve_chain(
121
+ intent: intent, tier: tier, model: model, provider: provider,
122
+ max_escalations: max_escalations
123
+ )
124
+
125
+ threshold = escalation_quality_threshold
126
+ history = []
127
+
128
+ chain.each do |resolution|
129
+ start_time = Time.now
130
+ begin
131
+ opts = { model: resolution.model, provider: resolution.provider }
132
+ opts.merge!(kwargs)
133
+ chat_obj = RubyLLM.chat(**opts)
134
+ response = chat_obj.ask(message)
135
+
136
+ duration_ms = ((Time.now - start_time) * 1000).round
137
+ result = QualityChecker.check(response, quality_threshold: threshold, quality_check: quality_check)
138
+
139
+ if result.passed
140
+ report_health(:success, resolution, duration_ms)
141
+ history << build_attempt(resolution, :success, [], duration_ms)
142
+ attach_escalation_history(response, history, resolution, chain)
143
+ publish_escalation_event(history, :success) if history.size > 1
144
+ return response
145
+ else
146
+ report_health(:quality_failure, resolution, duration_ms, failures: result.failures)
147
+ history << build_attempt(resolution, :quality_failure, result.failures, duration_ms)
148
+ end
149
+ rescue StandardError => e
150
+ duration_ms = ((Time.now - start_time) * 1000).round
151
+ report_health(:error, resolution, duration_ms)
152
+ history << build_attempt(resolution, :error, [e.class.name], duration_ms)
153
+ end
154
+ end
155
+
156
+ publish_escalation_event(history, :exhausted) if history.size > 1
157
+ raise EscalationExhausted, "All #{history.size} escalation attempts failed"
158
+ end
159
+
160
+ def build_attempt(resolution, outcome, failures, duration_ms)
161
+ { model: resolution.model, provider: resolution.provider, tier: resolution.tier,
162
+ outcome: outcome, failures: failures, duration_ms: duration_ms }
163
+ end
164
+
165
+ def attach_escalation_history(response, history, resolution, chain)
166
+ return unless response.respond_to?(:extend)
167
+
168
+ response.extend(EscalationHistory)
169
+ history.each { |h| response.record_escalation_attempt(**h) }
170
+ response.final_resolution = resolution
171
+ response.escalation_chain = chain
172
+ end
173
+
174
+ def report_health(signal, resolution, duration_ms, failures: nil)
175
+ return unless Router.routing_enabled?
176
+
177
+ metadata = { duration_ms: duration_ms }
178
+ metadata[:failures] = failures if failures
179
+ Router.health_tracker.report(provider: resolution.provider, signal: signal, value: 1, metadata: metadata)
180
+ Router.health_tracker.report(provider: resolution.provider, signal: :latency, value: duration_ms, metadata: {})
181
+ end
182
+
183
+ def publish_escalation_event(history, final_outcome)
184
+ return unless defined?(Legion::Transport)
185
+
186
+ Legion::Logging.debug("Escalation event: #{final_outcome}, #{history.size} attempts") if Legion.const_defined?('Logging')
187
+ rescue StandardError
188
+ nil
189
+ end
190
+
191
+ def escalation_enabled?
192
+ routing = settings[:routing]
193
+ return false unless routing.is_a?(Hash)
194
+
195
+ esc = routing[:escalation] || {}
196
+ esc[:enabled] == true
197
+ end
198
+
199
+ def escalation_quality_threshold
200
+ routing = settings[:routing]
201
+ return 50 unless routing.is_a?(Hash)
202
+
203
+ esc = routing[:escalation] || {}
204
+ esc.fetch(:quality_threshold, 50)
205
+ end
206
+
207
+ def set_defaults
208
+ default_model = settings[:default_model]
209
+ default_provider = settings[:default_provider]
210
+
211
+ RubyLLM.configure do |c|
212
+ c.default_model = default_model if default_model
213
+ end
214
+
215
+ return unless default_model.nil? && default_provider.nil?
216
+
217
+ # Auto-detect: use first enabled provider's sensible default
218
+ auto_configure_defaults
219
+ end
220
+
221
+ def run_discovery
222
+ return unless settings.dig(:providers, :ollama, :enabled)
223
+
224
+ Discovery::Ollama.refresh!
225
+ Discovery::System.refresh!
226
+
227
+ names = Discovery::Ollama.model_names
228
+ count = names.size
229
+ Legion::Logging.info "Ollama: #{count} model#{'s' unless count == 1} available (#{names.join(', ')})"
230
+ Legion::Logging.info "System: #{Discovery::System.total_memory_mb} MB total, " \
231
+ "#{Discovery::System.available_memory_mb} MB available"
232
+ rescue StandardError => e
233
+ Legion::Logging.warn "Discovery failed: #{e.message}"
234
+ end
235
+
236
+ def ping_provider
237
+ model = settings[:default_model]
238
+ provider = settings[:default_provider]
239
+ return unless model && provider
240
+
241
+ start_time = Time.now
242
+ RubyLLM.chat(model: model, provider: provider).ask('Respond with only the word: pong')
243
+ elapsed = ((Time.now - start_time) * 1000).round
244
+ Legion::Logging.info "LLM ping #{provider}/#{model}: pong (#{elapsed}ms)"
245
+ rescue StandardError => e
246
+ Legion::Logging.warn "LLM ping failed for #{provider}/#{model}: #{e.message}"
247
+ end
248
+
249
+ def auto_configure_defaults
250
+ settings[:providers].each do |provider, config|
251
+ next unless config&.dig(:enabled)
252
+
253
+ model = config[:default_model]
254
+ next unless model
255
+
256
+ settings[:default_model] = model
257
+ settings[:default_provider] = provider
258
+ Legion::Logging.info "Auto-configured default: #{model} via #{provider}"
259
+ break
260
+ end
261
+ end
262
+ end
263
+ end
264
+ end
metadata ADDED
@@ -0,0 +1,136 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: legion-llm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.1
5
+ platform: ruby
6
+ authors:
7
+ - Esity
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: legion-logging
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: legion-settings
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: ruby_llm
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '1.0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '1.0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: tzinfo
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '2.0'
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '2.0'
68
+ description: Provides LLM capabilities (chat, embeddings, tool use, agents) to LegionIO
69
+ extensions
70
+ email:
71
+ - matthewdiverson@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files:
75
+ - CHANGELOG.md
76
+ - LICENSE
77
+ - README.md
78
+ files:
79
+ - ".github/workflows/ci.yml"
80
+ - ".gitignore"
81
+ - ".rubocop.yml"
82
+ - CHANGELOG.md
83
+ - CLAUDE.md
84
+ - Gemfile
85
+ - LICENSE
86
+ - README.md
87
+ - docs/plans/2026-03-15-ollama-discovery-design.md
88
+ - docs/plans/2026-03-15-ollama-discovery-implementation.md
89
+ - legion-llm.gemspec
90
+ - lib/legion/llm.rb
91
+ - lib/legion/llm/bedrock_bearer_auth.rb
92
+ - lib/legion/llm/compressor.rb
93
+ - lib/legion/llm/discovery/ollama.rb
94
+ - lib/legion/llm/discovery/system.rb
95
+ - lib/legion/llm/escalation_history.rb
96
+ - lib/legion/llm/helpers/llm.rb
97
+ - lib/legion/llm/providers.rb
98
+ - lib/legion/llm/quality_checker.rb
99
+ - lib/legion/llm/router.rb
100
+ - lib/legion/llm/router/escalation_chain.rb
101
+ - lib/legion/llm/router/health_tracker.rb
102
+ - lib/legion/llm/router/resolution.rb
103
+ - lib/legion/llm/router/rule.rb
104
+ - lib/legion/llm/settings.rb
105
+ - lib/legion/llm/transport/exchanges/escalation.rb
106
+ - lib/legion/llm/transport/messages/escalation_event.rb
107
+ - lib/legion/llm/version.rb
108
+ homepage: https://github.com/LegionIO/legion-llm
109
+ licenses:
110
+ - Apache-2.0
111
+ metadata:
112
+ bug_tracker_uri: https://github.com/LegionIO/legion-llm/issues
113
+ changelog_uri: https://github.com/LegionIO/legion-llm/blob/main/CHANGELOG.md
114
+ documentation_uri: https://github.com/LegionIO/legion-llm
115
+ homepage_uri: https://github.com/LegionIO/LegionIO
116
+ source_code_uri: https://github.com/LegionIO/legion-llm
117
+ wiki_uri: https://github.com/LegionIO/legion-llm/wiki
118
+ rubygems_mfa_required: 'true'
119
+ rdoc_options: []
120
+ require_paths:
121
+ - lib
122
+ required_ruby_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '3.4'
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ requirements: []
133
+ rubygems_version: 3.6.9
134
+ specification_version: 4
135
+ summary: LLM integration for the LegionIO framework via ruby_llm
136
+ test_files: []