legion-llm 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ci.yml +16 -0
- data/.gitignore +18 -0
- data/.rubocop.yml +56 -0
- data/CHANGELOG.md +71 -0
- data/CLAUDE.md +388 -0
- data/Gemfile +14 -0
- data/LICENSE +20 -0
- data/README.md +615 -0
- data/docs/plans/2026-03-15-ollama-discovery-design.md +164 -0
- data/docs/plans/2026-03-15-ollama-discovery-implementation.md +1147 -0
- data/legion-llm.gemspec +32 -0
- data/lib/legion/llm/bedrock_bearer_auth.rb +53 -0
- data/lib/legion/llm/compressor.rb +75 -0
- data/lib/legion/llm/discovery/ollama.rb +88 -0
- data/lib/legion/llm/discovery/system.rb +139 -0
- data/lib/legion/llm/escalation_history.rb +28 -0
- data/lib/legion/llm/helpers/llm.rb +59 -0
- data/lib/legion/llm/providers.rb +88 -0
- data/lib/legion/llm/quality_checker.rb +56 -0
- data/lib/legion/llm/router/escalation_chain.rb +49 -0
- data/lib/legion/llm/router/health_tracker.rb +160 -0
- data/lib/legion/llm/router/resolution.rb +43 -0
- data/lib/legion/llm/router/rule.rb +103 -0
- data/lib/legion/llm/router.rb +279 -0
- data/lib/legion/llm/settings.rb +97 -0
- data/lib/legion/llm/transport/exchanges/escalation.rb +14 -0
- data/lib/legion/llm/transport/messages/escalation_event.rb +13 -0
- data/lib/legion/llm/version.rb +7 -0
- data/lib/legion/llm.rb +264 -0
- metadata +136 -0
data/legion-llm.gemspec
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/legion/llm/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'legion-llm'
|
|
7
|
+
spec.version = Legion::LLM::VERSION
|
|
8
|
+
spec.authors = ['Esity']
|
|
9
|
+
spec.email = ['matthewdiverson@gmail.com']
|
|
10
|
+
spec.summary = 'LLM integration for the LegionIO framework via ruby_llm'
|
|
11
|
+
spec.description = 'Provides LLM capabilities (chat, embeddings, tool use, agents) to LegionIO extensions'
|
|
12
|
+
spec.homepage = 'https://github.com/LegionIO/legion-llm'
|
|
13
|
+
spec.license = 'Apache-2.0'
|
|
14
|
+
spec.require_paths = ['lib']
|
|
15
|
+
spec.required_ruby_version = '>= 3.4'
|
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
17
|
+
spec.extra_rdoc_files = %w[README.md LICENSE CHANGELOG.md]
|
|
18
|
+
spec.metadata = {
|
|
19
|
+
'bug_tracker_uri' => 'https://github.com/LegionIO/legion-llm/issues',
|
|
20
|
+
'changelog_uri' => 'https://github.com/LegionIO/legion-llm/blob/main/CHANGELOG.md',
|
|
21
|
+
'documentation_uri' => 'https://github.com/LegionIO/legion-llm',
|
|
22
|
+
'homepage_uri' => 'https://github.com/LegionIO/LegionIO',
|
|
23
|
+
'source_code_uri' => 'https://github.com/LegionIO/legion-llm',
|
|
24
|
+
'wiki_uri' => 'https://github.com/LegionIO/legion-llm/wiki',
|
|
25
|
+
'rubygems_mfa_required' => 'true'
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
spec.add_dependency 'legion-logging'
|
|
29
|
+
spec.add_dependency 'legion-settings'
|
|
30
|
+
spec.add_dependency 'ruby_llm', '>= 1.0'
|
|
31
|
+
spec.add_dependency 'tzinfo', '>= 2.0'
|
|
32
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Monkey-patch RubyLLM's Bedrock provider to support AWS Bearer Token
|
|
4
|
+
# authentication (Identity Center / SSO) as an alternative to SigV4.
|
|
5
|
+
#
|
|
6
|
+
# When `bedrock_bearer_token` is set on the RubyLLM configuration,
|
|
7
|
+
# requests use a simple `Authorization: Bearer <token>` header instead
|
|
8
|
+
# of the full SigV4 signing process.
|
|
9
|
+
|
|
10
|
+
require 'ruby_llm'
|
|
11
|
+
|
|
12
|
+
module RubyLLM
|
|
13
|
+
class Configuration
|
|
14
|
+
attr_accessor :bedrock_bearer_token
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
module Providers
|
|
18
|
+
class Bedrock
|
|
19
|
+
class << self
|
|
20
|
+
def configuration_requirements
|
|
21
|
+
config = RubyLLM.config
|
|
22
|
+
if config.bedrock_bearer_token
|
|
23
|
+
%i[bedrock_bearer_token bedrock_region]
|
|
24
|
+
else
|
|
25
|
+
%i[bedrock_api_key bedrock_secret_key bedrock_region]
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
module Auth
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
alias sigv4_sign_headers sign_headers
|
|
34
|
+
|
|
35
|
+
def sign_headers(method, path, body, base_url: api_base)
|
|
36
|
+
if @config.bedrock_bearer_token
|
|
37
|
+
bearer_headers(path, body)
|
|
38
|
+
else
|
|
39
|
+
sigv4_sign_headers(method, path, body, base_url: base_url)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def bearer_headers(_path, body)
|
|
44
|
+
{
|
|
45
|
+
'Authorization' => "Bearer #{@config.bedrock_bearer_token}",
|
|
46
|
+
'Content-Type' => 'application/json',
|
|
47
|
+
'X-Amz-Content-Sha256' => Digest::SHA256.hexdigest(body)
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Compressor
|
|
6
|
+
NONE = 0
|
|
7
|
+
LIGHT = 1
|
|
8
|
+
MODERATE = 2
|
|
9
|
+
AGGRESSIVE = 3
|
|
10
|
+
|
|
11
|
+
LEVEL_WORDS = {
|
|
12
|
+
1 => %w[a an the just very really basically actually simply quite rather somewhat],
|
|
13
|
+
2 => %w[however moreover furthermore additionally consequently therefore thus hence
|
|
14
|
+
meanwhile nevertheless nonetheless accordingly indeed certainly],
|
|
15
|
+
3 => %w[also then still even already yet again please note that]
|
|
16
|
+
}.freeze
|
|
17
|
+
|
|
18
|
+
class << self
|
|
19
|
+
def compress(text, level: LIGHT)
|
|
20
|
+
return text if text.nil? || text.empty? || level <= NONE
|
|
21
|
+
|
|
22
|
+
segments = split_segments(text)
|
|
23
|
+
result = segments.map { |seg| seg[:protected] ? seg[:text] : compress_prose(seg[:text], level) }.join
|
|
24
|
+
|
|
25
|
+
result = collapse_whitespace(result) if level >= AGGRESSIVE
|
|
26
|
+
result
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def stopwords_for_level(level)
|
|
30
|
+
return [] if level <= NONE
|
|
31
|
+
|
|
32
|
+
(1..[level, AGGRESSIVE].min).flat_map { |l| LEVEL_WORDS.fetch(l, []) }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def split_segments(text)
|
|
38
|
+
segments = []
|
|
39
|
+
|
|
40
|
+
# Split on fenced code blocks first (```...```)
|
|
41
|
+
parts = text.split(/(```.*?```)/m)
|
|
42
|
+
parts.each do |part|
|
|
43
|
+
if part.start_with?('```')
|
|
44
|
+
segments << { text: part, protected: true }
|
|
45
|
+
else
|
|
46
|
+
# Within non-fenced text, split on inline code (`...`)
|
|
47
|
+
subparts = part.split(/(`[^`\n]+`)/)
|
|
48
|
+
subparts.each do |sub|
|
|
49
|
+
protected = sub.start_with?('`') && sub.end_with?('`') && sub.length > 1
|
|
50
|
+
segments << { text: sub, protected: protected }
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
segments
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def compress_prose(text, level)
|
|
59
|
+
words = stopwords_for_level(level)
|
|
60
|
+
return text if words.empty?
|
|
61
|
+
|
|
62
|
+
pattern = /\b(#{words.join('|')})\b ?/i
|
|
63
|
+
result = text.gsub(pattern, '')
|
|
64
|
+
|
|
65
|
+
# Clean up double spaces left by removals
|
|
66
|
+
result.gsub(/ +/, ' ')
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def collapse_whitespace(text)
|
|
70
|
+
text.gsub(/\n{3,}/, "\n\n")
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'faraday'
|
|
4
|
+
require 'json'
|
|
5
|
+
|
|
6
|
+
module Legion
|
|
7
|
+
module LLM
|
|
8
|
+
module Discovery
|
|
9
|
+
module Ollama
|
|
10
|
+
class << self
|
|
11
|
+
def models
|
|
12
|
+
ensure_fresh
|
|
13
|
+
@models || []
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def model_names
|
|
17
|
+
models.map { |m| m['name'] }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def model_available?(name)
|
|
21
|
+
model_names.include?(name)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def model_size(name)
|
|
25
|
+
models.find { |m| m['name'] == name }&.dig('size')
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def refresh!
|
|
29
|
+
response = connection.get('/api/tags')
|
|
30
|
+
if response.success?
|
|
31
|
+
parsed = ::JSON.parse(response.body)
|
|
32
|
+
@models = parsed['models'] || []
|
|
33
|
+
else
|
|
34
|
+
@models ||= []
|
|
35
|
+
end
|
|
36
|
+
rescue StandardError
|
|
37
|
+
@models ||= []
|
|
38
|
+
ensure
|
|
39
|
+
@last_refreshed_at = Time.now
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def reset!
|
|
43
|
+
@models = nil
|
|
44
|
+
@last_refreshed_at = nil
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def stale?
|
|
48
|
+
return true if @last_refreshed_at.nil?
|
|
49
|
+
|
|
50
|
+
ttl = discovery_settings[:refresh_seconds] || 60
|
|
51
|
+
Time.now - @last_refreshed_at > ttl
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def ensure_fresh
|
|
57
|
+
refresh! if stale?
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def connection
|
|
61
|
+
base = ollama_base_url
|
|
62
|
+
Faraday.new(url: base) do |f|
|
|
63
|
+
f.options.timeout = 2
|
|
64
|
+
f.options.open_timeout = 2
|
|
65
|
+
f.adapter Faraday.default_adapter
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def ollama_base_url
|
|
70
|
+
return 'http://localhost:11434' unless Legion.const_defined?('Settings')
|
|
71
|
+
|
|
72
|
+
Legion::Settings[:llm].dig(:providers, :ollama, :base_url) || 'http://localhost:11434'
|
|
73
|
+
rescue StandardError
|
|
74
|
+
'http://localhost:11434'
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def discovery_settings
|
|
78
|
+
return {} unless Legion.const_defined?('Settings')
|
|
79
|
+
|
|
80
|
+
Legion::Settings[:llm][:discovery] || {}
|
|
81
|
+
rescue StandardError
|
|
82
|
+
{}
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Discovery
|
|
6
|
+
module System
|
|
7
|
+
class << self
|
|
8
|
+
def total_memory_mb
|
|
9
|
+
ensure_total_fresh
|
|
10
|
+
@total_memory_mb
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def available_memory_mb
|
|
14
|
+
ensure_available_fresh
|
|
15
|
+
@available_memory_mb
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def memory_pressure?
|
|
19
|
+
avail = available_memory_mb
|
|
20
|
+
return false if avail.nil?
|
|
21
|
+
|
|
22
|
+
floor = discovery_settings[:memory_floor_mb] || 2048
|
|
23
|
+
avail < floor
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def platform
|
|
27
|
+
@platform ||= detect_platform
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def refresh!
|
|
31
|
+
@total_fetched_at = nil
|
|
32
|
+
@available_fetched_at = nil
|
|
33
|
+
@total_memory_mb = nil
|
|
34
|
+
@available_memory_mb = nil
|
|
35
|
+
@last_refreshed_at = Time.now
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def reset!
|
|
39
|
+
@total_memory_mb = nil
|
|
40
|
+
@available_memory_mb = nil
|
|
41
|
+
@total_fetched_at = nil
|
|
42
|
+
@available_fetched_at = nil
|
|
43
|
+
@last_refreshed_at = nil
|
|
44
|
+
@platform = nil
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def stale?
|
|
48
|
+
return true if @last_refreshed_at.nil?
|
|
49
|
+
|
|
50
|
+
ttl = discovery_settings[:refresh_seconds] || 60
|
|
51
|
+
Time.now - @last_refreshed_at > ttl
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def ensure_total_fresh
|
|
57
|
+
refresh! if stale?
|
|
58
|
+
return unless @total_fetched_at.nil?
|
|
59
|
+
|
|
60
|
+
fetch_total
|
|
61
|
+
@total_fetched_at = Time.now
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def ensure_available_fresh
|
|
65
|
+
refresh! if stale?
|
|
66
|
+
return unless @available_fetched_at.nil?
|
|
67
|
+
|
|
68
|
+
fetch_available
|
|
69
|
+
@available_fetched_at = Time.now
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def fetch_total
|
|
73
|
+
case platform
|
|
74
|
+
when :macos then fetch_macos_total
|
|
75
|
+
when :linux then fetch_linux_total
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def fetch_available
|
|
80
|
+
case platform
|
|
81
|
+
when :macos then fetch_macos_available
|
|
82
|
+
when :linux then fetch_linux_available
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def detect_platform
|
|
87
|
+
case RbConfig::CONFIG['host_os']
|
|
88
|
+
when /darwin/i then :macos
|
|
89
|
+
when /linux/i then :linux
|
|
90
|
+
else :unknown
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def fetch_macos_total
|
|
95
|
+
raw = `sysctl -n hw.memsize`.strip.to_i
|
|
96
|
+
@total_memory_mb = raw / 1024 / 1024
|
|
97
|
+
rescue StandardError
|
|
98
|
+
@total_memory_mb = nil
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def fetch_macos_available
|
|
102
|
+
vm_output = `vm_stat`
|
|
103
|
+
page_size = vm_output[/page size of (\d+) bytes/, 1]&.to_i || 16_384
|
|
104
|
+
free = vm_output[/Pages free:\s+(\d+)/, 1].to_i
|
|
105
|
+
inactive = vm_output[/Pages inactive:\s+(\d+)/, 1].to_i
|
|
106
|
+
@available_memory_mb = (free + inactive) * page_size / 1024 / 1024
|
|
107
|
+
rescue StandardError
|
|
108
|
+
@available_memory_mb = nil
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def fetch_linux_total
|
|
112
|
+
meminfo = File.read('/proc/meminfo')
|
|
113
|
+
total_kb = meminfo[/MemTotal:\s+(\d+)/, 1].to_i
|
|
114
|
+
@total_memory_mb = total_kb / 1024
|
|
115
|
+
rescue StandardError
|
|
116
|
+
@total_memory_mb = nil
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def fetch_linux_available
|
|
120
|
+
meminfo = File.read('/proc/meminfo')
|
|
121
|
+
free_kb = meminfo[/MemFree:\s+(\d+)/, 1].to_i
|
|
122
|
+
inactive_kb = meminfo[/Inactive:\s+(\d+)/, 1].to_i
|
|
123
|
+
@available_memory_mb = (free_kb + inactive_kb) / 1024
|
|
124
|
+
rescue StandardError
|
|
125
|
+
@available_memory_mb = nil
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def discovery_settings
|
|
129
|
+
return {} unless Legion.const_defined?('Settings')
|
|
130
|
+
|
|
131
|
+
Legion::Settings[:llm][:discovery] || {}
|
|
132
|
+
rescue StandardError
|
|
133
|
+
{}
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module EscalationHistory
|
|
6
|
+
attr_accessor :final_resolution, :escalation_chain
|
|
7
|
+
|
|
8
|
+
def escalation_history
|
|
9
|
+
@escalation_history ||= []
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def escalated?
|
|
13
|
+
escalation_history.size > 1
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def record_escalation_attempt(model:, provider:, tier:, outcome:, failures:, duration_ms:)
|
|
17
|
+
escalation_history << {
|
|
18
|
+
model: model,
|
|
19
|
+
provider: provider,
|
|
20
|
+
tier: tier,
|
|
21
|
+
outcome: outcome,
|
|
22
|
+
failures: failures,
|
|
23
|
+
duration_ms: duration_ms
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Helpers
|
|
6
|
+
module LLM
|
|
7
|
+
# Quick chat from any extension runner
|
|
8
|
+
# @param message [String] the prompt
|
|
9
|
+
# @param model [String] optional model override
|
|
10
|
+
# @param provider [Symbol] optional provider override
|
|
11
|
+
# @param intent [Hash, nil] routing intent (capability, privacy, etc.)
|
|
12
|
+
# @param tier [Symbol, nil] explicit tier override
|
|
13
|
+
# @param tools [Array<Class>] optional RubyLLM::Tool subclasses
|
|
14
|
+
# @param instructions [String] optional system instructions
|
|
15
|
+
# @param escalate [Boolean, nil] enable model escalation on low-quality responses
|
|
16
|
+
# @param max_escalations [Integer, nil] max escalation attempts
|
|
17
|
+
# @param quality_check [Proc, nil] callable that returns true if response is acceptable
|
|
18
|
+
# @return [RubyLLM::Message] the assistant response
|
|
19
|
+
def llm_chat(message, model: nil, provider: nil, intent: nil, tier: nil, tools: [], instructions: nil, # rubocop:disable Metrics/ParameterLists
|
|
20
|
+
compress: 0, escalate: nil, max_escalations: nil, quality_check: nil)
|
|
21
|
+
if compress.positive?
|
|
22
|
+
message = Legion::LLM::Compressor.compress(message, level: compress)
|
|
23
|
+
instructions = Legion::LLM::Compressor.compress(instructions, level: compress) if instructions
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# When escalation is active, chat() handles ask() internally via message: kwarg
|
|
27
|
+
if escalate
|
|
28
|
+
return Legion::LLM.chat(model: model, provider: provider, intent: intent, tier: tier,
|
|
29
|
+
escalate: true, max_escalations: max_escalations,
|
|
30
|
+
quality_check: quality_check, message: message)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
chat = Legion::LLM.chat(model: model, provider: provider, intent: intent, tier: tier, escalate: false)
|
|
34
|
+
chat.with_instructions(instructions) if instructions
|
|
35
|
+
chat.with_tools(*tools) unless tools.empty?
|
|
36
|
+
chat.ask(message)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Quick embed from any extension runner
|
|
40
|
+
# @param text [String, Array<String>] text to embed
|
|
41
|
+
# @param model [String] optional model override
|
|
42
|
+
# @return [RubyLLM::Embedding]
|
|
43
|
+
def llm_embed(text, model: nil)
|
|
44
|
+
Legion::LLM.embed(text, model: model)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Get a raw chat object for multi-turn conversations
|
|
48
|
+
# @param model [String] optional model override
|
|
49
|
+
# @param provider [Symbol] optional provider override
|
|
50
|
+
# @param intent [Hash, nil] routing intent (capability, privacy, etc.)
|
|
51
|
+
# @param tier [Symbol, nil] explicit tier override
|
|
52
|
+
# @return [RubyLLM::Chat]
|
|
53
|
+
def llm_session(model: nil, provider: nil, intent: nil, tier: nil)
|
|
54
|
+
Legion::LLM.chat(model: model, provider: provider, intent: intent, tier: tier, escalate: false)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Providers
|
|
6
|
+
def configure_providers
|
|
7
|
+
settings[:providers].each do |provider, config|
|
|
8
|
+
next unless config[:enabled]
|
|
9
|
+
|
|
10
|
+
apply_provider_config(provider, config)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def apply_provider_config(provider, config)
|
|
15
|
+
case provider
|
|
16
|
+
when :bedrock
|
|
17
|
+
configure_bedrock(config)
|
|
18
|
+
when :anthropic
|
|
19
|
+
configure_anthropic(config)
|
|
20
|
+
when :openai
|
|
21
|
+
configure_openai(config)
|
|
22
|
+
when :gemini
|
|
23
|
+
configure_gemini(config)
|
|
24
|
+
when :ollama
|
|
25
|
+
configure_ollama(config)
|
|
26
|
+
else
|
|
27
|
+
Legion::Logging.warn "Unknown LLM provider: #{provider}"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def configure_bedrock(config)
|
|
32
|
+
has_sigv4 = config[:api_key] && config[:secret_key]
|
|
33
|
+
has_bearer = config[:bearer_token]
|
|
34
|
+
return unless has_sigv4 || has_bearer
|
|
35
|
+
|
|
36
|
+
require 'legion/llm/bedrock_bearer_auth' if has_bearer
|
|
37
|
+
|
|
38
|
+
RubyLLM.configure do |c|
|
|
39
|
+
if has_bearer
|
|
40
|
+
c.bedrock_bearer_token = config[:bearer_token]
|
|
41
|
+
else
|
|
42
|
+
c.bedrock_api_key = config[:api_key]
|
|
43
|
+
c.bedrock_secret_key = config[:secret_key]
|
|
44
|
+
c.bedrock_session_token = config[:session_token] if config[:session_token]
|
|
45
|
+
end
|
|
46
|
+
c.bedrock_region = config[:region] || 'us-east-2'
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
auth_mode = has_bearer ? 'bearer token' : 'SigV4'
|
|
50
|
+
Legion::Logging.info "Configured Bedrock provider (#{config[:region]}, #{auth_mode})"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def configure_anthropic(config)
|
|
54
|
+
return unless config[:api_key]
|
|
55
|
+
|
|
56
|
+
RubyLLM.configure do |c|
|
|
57
|
+
c.anthropic_api_key = config[:api_key]
|
|
58
|
+
end
|
|
59
|
+
Legion::Logging.info 'Configured Anthropic provider'
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def configure_openai(config)
|
|
63
|
+
return unless config[:api_key]
|
|
64
|
+
|
|
65
|
+
RubyLLM.configure do |c|
|
|
66
|
+
c.openai_api_key = config[:api_key]
|
|
67
|
+
end
|
|
68
|
+
Legion::Logging.info 'Configured OpenAI provider'
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def configure_gemini(config)
|
|
72
|
+
return unless config[:api_key]
|
|
73
|
+
|
|
74
|
+
RubyLLM.configure do |c|
|
|
75
|
+
c.gemini_api_key = config[:api_key]
|
|
76
|
+
end
|
|
77
|
+
Legion::Logging.info 'Configured Gemini provider'
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def configure_ollama(config)
|
|
81
|
+
RubyLLM.configure do |c|
|
|
82
|
+
c.ollama_api_base = config[:base_url] if config[:base_url]
|
|
83
|
+
end
|
|
84
|
+
Legion::Logging.info "Configured Ollama provider (#{config[:base_url]})"
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module LLM
|
|
7
|
+
module QualityChecker
|
|
8
|
+
QualityResult = Struct.new(:passed, :failures)
|
|
9
|
+
|
|
10
|
+
REPETITION_MIN_LENGTH = 20
|
|
11
|
+
REPETITION_THRESHOLD = 3
|
|
12
|
+
DEFAULT_QUALITY_THRESHOLD = 50
|
|
13
|
+
|
|
14
|
+
class << self
|
|
15
|
+
def check(response, quality_threshold: DEFAULT_QUALITY_THRESHOLD, json_expected: false, quality_check: nil)
|
|
16
|
+
failures = []
|
|
17
|
+
content = response.content
|
|
18
|
+
|
|
19
|
+
failures << :empty_response if content.nil? || content.strip.empty?
|
|
20
|
+
|
|
21
|
+
unless failures.include?(:empty_response)
|
|
22
|
+
failures << :too_short if content.length < quality_threshold
|
|
23
|
+
failures << :repetition if repetitive?(content)
|
|
24
|
+
failures << :json_parse_failure if json_expected && !valid_json?(content)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
failures << :custom_check_failed if quality_check.respond_to?(:call) && !quality_check.call(response)
|
|
28
|
+
|
|
29
|
+
QualityResult.new(passed: failures.empty?, failures: failures)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def repetitive?(content)
|
|
35
|
+
return false if content.length < REPETITION_MIN_LENGTH * REPETITION_THRESHOLD
|
|
36
|
+
|
|
37
|
+
seen = {}
|
|
38
|
+
(0..(content.length - REPETITION_MIN_LENGTH)).step(REPETITION_MIN_LENGTH) do |i|
|
|
39
|
+
chunk = content[i, REPETITION_MIN_LENGTH]
|
|
40
|
+
seen[chunk] = (seen[chunk] || 0) + 1
|
|
41
|
+
return true if seen[chunk] >= REPETITION_THRESHOLD
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
false
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def valid_json?(content)
|
|
48
|
+
::JSON.parse(content)
|
|
49
|
+
true
|
|
50
|
+
rescue ::JSON::ParserError
|
|
51
|
+
false
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Router
|
|
6
|
+
class EscalationChain
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
9
|
+
attr_reader :max_attempts
|
|
10
|
+
|
|
11
|
+
def initialize(resolutions:, max_attempts: 3)
|
|
12
|
+
@resolutions = resolutions.dup.freeze
|
|
13
|
+
@max_attempts = max_attempts
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def primary
|
|
17
|
+
@resolutions.first
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def each(&)
|
|
21
|
+
return enum_for(:each) unless block_given?
|
|
22
|
+
|
|
23
|
+
padded_resolutions.each(&)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def size
|
|
27
|
+
@resolutions.size
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def empty?
|
|
31
|
+
@resolutions.empty?
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def to_a
|
|
35
|
+
@resolutions.dup
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def padded_resolutions
|
|
41
|
+
return @resolutions.first(@max_attempts) if @resolutions.size >= @max_attempts
|
|
42
|
+
|
|
43
|
+
last = @resolutions.last
|
|
44
|
+
(@resolutions + Array.new(@max_attempts - @resolutions.size) { last }).first(@max_attempts)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|