legion-llm 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/legion/llm/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'legion-llm'
7
+ spec.version = Legion::LLM::VERSION
8
+ spec.authors = ['Esity']
9
+ spec.email = ['matthewdiverson@gmail.com']
10
+ spec.summary = 'LLM integration for the LegionIO framework via ruby_llm'
11
+ spec.description = 'Provides LLM capabilities (chat, embeddings, tool use, agents) to LegionIO extensions'
12
+ spec.homepage = 'https://github.com/LegionIO/legion-llm'
13
+ spec.license = 'Apache-2.0'
14
+ spec.require_paths = ['lib']
15
+ spec.required_ruby_version = '>= 3.4'
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.extra_rdoc_files = %w[README.md LICENSE CHANGELOG.md]
18
+ spec.metadata = {
19
+ 'bug_tracker_uri' => 'https://github.com/LegionIO/legion-llm/issues',
20
+ 'changelog_uri' => 'https://github.com/LegionIO/legion-llm/blob/main/CHANGELOG.md',
21
+ 'documentation_uri' => 'https://github.com/LegionIO/legion-llm',
22
+ 'homepage_uri' => 'https://github.com/LegionIO/LegionIO',
23
+ 'source_code_uri' => 'https://github.com/LegionIO/legion-llm',
24
+ 'wiki_uri' => 'https://github.com/LegionIO/legion-llm/wiki',
25
+ 'rubygems_mfa_required' => 'true'
26
+ }
27
+
28
+ spec.add_dependency 'legion-logging'
29
+ spec.add_dependency 'legion-settings'
30
+ spec.add_dependency 'ruby_llm', '>= 1.0'
31
+ spec.add_dependency 'tzinfo', '>= 2.0'
32
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Monkey-patch RubyLLM's Bedrock provider to support AWS Bearer Token
4
+ # authentication (Identity Center / SSO) as an alternative to SigV4.
5
+ #
6
+ # When `bedrock_bearer_token` is set on the RubyLLM configuration,
7
+ # requests use a simple `Authorization: Bearer <token>` header instead
8
+ # of the full SigV4 signing process.
9
+
10
+ require 'ruby_llm'
11
+
12
+ module RubyLLM
13
+ class Configuration
14
+ attr_accessor :bedrock_bearer_token
15
+ end
16
+
17
+ module Providers
18
+ class Bedrock
19
+ class << self
20
+ def configuration_requirements
21
+ config = RubyLLM.config
22
+ if config.bedrock_bearer_token
23
+ %i[bedrock_bearer_token bedrock_region]
24
+ else
25
+ %i[bedrock_api_key bedrock_secret_key bedrock_region]
26
+ end
27
+ end
28
+ end
29
+
30
+ module Auth
31
+ private
32
+
33
+ alias sigv4_sign_headers sign_headers
34
+
35
+ def sign_headers(method, path, body, base_url: api_base)
36
+ if @config.bedrock_bearer_token
37
+ bearer_headers(path, body)
38
+ else
39
+ sigv4_sign_headers(method, path, body, base_url: base_url)
40
+ end
41
+ end
42
+
43
+ def bearer_headers(_path, body)
44
+ {
45
+ 'Authorization' => "Bearer #{@config.bedrock_bearer_token}",
46
+ 'Content-Type' => 'application/json',
47
+ 'X-Amz-Content-Sha256' => Digest::SHA256.hexdigest(body)
48
+ }
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module LLM
5
+ module Compressor
6
+ NONE = 0
7
+ LIGHT = 1
8
+ MODERATE = 2
9
+ AGGRESSIVE = 3
10
+
11
+ LEVEL_WORDS = {
12
+ 1 => %w[a an the just very really basically actually simply quite rather somewhat],
13
+ 2 => %w[however moreover furthermore additionally consequently therefore thus hence
14
+ meanwhile nevertheless nonetheless accordingly indeed certainly],
15
+ 3 => %w[also then still even already yet again please note that]
16
+ }.freeze
17
+
18
+ class << self
19
+ def compress(text, level: LIGHT)
20
+ return text if text.nil? || text.empty? || level <= NONE
21
+
22
+ segments = split_segments(text)
23
+ result = segments.map { |seg| seg[:protected] ? seg[:text] : compress_prose(seg[:text], level) }.join
24
+
25
+ result = collapse_whitespace(result) if level >= AGGRESSIVE
26
+ result
27
+ end
28
+
29
+ def stopwords_for_level(level)
30
+ return [] if level <= NONE
31
+
32
+ (1..[level, AGGRESSIVE].min).flat_map { |l| LEVEL_WORDS.fetch(l, []) }
33
+ end
34
+
35
+ private
36
+
37
+ def split_segments(text)
38
+ segments = []
39
+
40
+ # Split on fenced code blocks first (```...```)
41
+ parts = text.split(/(```.*?```)/m)
42
+ parts.each do |part|
43
+ if part.start_with?('```')
44
+ segments << { text: part, protected: true }
45
+ else
46
+ # Within non-fenced text, split on inline code (`...`)
47
+ subparts = part.split(/(`[^`\n]+`)/)
48
+ subparts.each do |sub|
49
+ protected = sub.start_with?('`') && sub.end_with?('`') && sub.length > 1
50
+ segments << { text: sub, protected: protected }
51
+ end
52
+ end
53
+ end
54
+
55
+ segments
56
+ end
57
+
58
+ def compress_prose(text, level)
59
+ words = stopwords_for_level(level)
60
+ return text if words.empty?
61
+
62
+ pattern = /\b(#{words.join('|')})\b ?/i
63
+ result = text.gsub(pattern, '')
64
+
65
+ # Clean up double spaces left by removals
66
+ result.gsub(/ +/, ' ')
67
+ end
68
+
69
+ def collapse_whitespace(text)
70
+ text.gsub(/\n{3,}/, "\n\n")
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday'
4
+ require 'json'
5
+
6
+ module Legion
7
+ module LLM
8
+ module Discovery
9
+ module Ollama
10
+ class << self
11
+ def models
12
+ ensure_fresh
13
+ @models || []
14
+ end
15
+
16
+ def model_names
17
+ models.map { |m| m['name'] }
18
+ end
19
+
20
+ def model_available?(name)
21
+ model_names.include?(name)
22
+ end
23
+
24
+ def model_size(name)
25
+ models.find { |m| m['name'] == name }&.dig('size')
26
+ end
27
+
28
+ def refresh!
29
+ response = connection.get('/api/tags')
30
+ if response.success?
31
+ parsed = ::JSON.parse(response.body)
32
+ @models = parsed['models'] || []
33
+ else
34
+ @models ||= []
35
+ end
36
+ rescue StandardError
37
+ @models ||= []
38
+ ensure
39
+ @last_refreshed_at = Time.now
40
+ end
41
+
42
+ def reset!
43
+ @models = nil
44
+ @last_refreshed_at = nil
45
+ end
46
+
47
+ def stale?
48
+ return true if @last_refreshed_at.nil?
49
+
50
+ ttl = discovery_settings[:refresh_seconds] || 60
51
+ Time.now - @last_refreshed_at > ttl
52
+ end
53
+
54
+ private
55
+
56
+ def ensure_fresh
57
+ refresh! if stale?
58
+ end
59
+
60
+ def connection
61
+ base = ollama_base_url
62
+ Faraday.new(url: base) do |f|
63
+ f.options.timeout = 2
64
+ f.options.open_timeout = 2
65
+ f.adapter Faraday.default_adapter
66
+ end
67
+ end
68
+
69
+ def ollama_base_url
70
+ return 'http://localhost:11434' unless Legion.const_defined?('Settings')
71
+
72
+ Legion::Settings[:llm].dig(:providers, :ollama, :base_url) || 'http://localhost:11434'
73
+ rescue StandardError
74
+ 'http://localhost:11434'
75
+ end
76
+
77
+ def discovery_settings
78
+ return {} unless Legion.const_defined?('Settings')
79
+
80
+ Legion::Settings[:llm][:discovery] || {}
81
+ rescue StandardError
82
+ {}
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module LLM
5
+ module Discovery
6
+ module System
7
+ class << self
8
+ def total_memory_mb
9
+ ensure_total_fresh
10
+ @total_memory_mb
11
+ end
12
+
13
+ def available_memory_mb
14
+ ensure_available_fresh
15
+ @available_memory_mb
16
+ end
17
+
18
+ def memory_pressure?
19
+ avail = available_memory_mb
20
+ return false if avail.nil?
21
+
22
+ floor = discovery_settings[:memory_floor_mb] || 2048
23
+ avail < floor
24
+ end
25
+
26
+ def platform
27
+ @platform ||= detect_platform
28
+ end
29
+
30
+ def refresh!
31
+ @total_fetched_at = nil
32
+ @available_fetched_at = nil
33
+ @total_memory_mb = nil
34
+ @available_memory_mb = nil
35
+ @last_refreshed_at = Time.now
36
+ end
37
+
38
+ def reset!
39
+ @total_memory_mb = nil
40
+ @available_memory_mb = nil
41
+ @total_fetched_at = nil
42
+ @available_fetched_at = nil
43
+ @last_refreshed_at = nil
44
+ @platform = nil
45
+ end
46
+
47
+ def stale?
48
+ return true if @last_refreshed_at.nil?
49
+
50
+ ttl = discovery_settings[:refresh_seconds] || 60
51
+ Time.now - @last_refreshed_at > ttl
52
+ end
53
+
54
+ private
55
+
56
+ def ensure_total_fresh
57
+ refresh! if stale?
58
+ return unless @total_fetched_at.nil?
59
+
60
+ fetch_total
61
+ @total_fetched_at = Time.now
62
+ end
63
+
64
+ def ensure_available_fresh
65
+ refresh! if stale?
66
+ return unless @available_fetched_at.nil?
67
+
68
+ fetch_available
69
+ @available_fetched_at = Time.now
70
+ end
71
+
72
+ def fetch_total
73
+ case platform
74
+ when :macos then fetch_macos_total
75
+ when :linux then fetch_linux_total
76
+ end
77
+ end
78
+
79
+ def fetch_available
80
+ case platform
81
+ when :macos then fetch_macos_available
82
+ when :linux then fetch_linux_available
83
+ end
84
+ end
85
+
86
+ def detect_platform
87
+ case RbConfig::CONFIG['host_os']
88
+ when /darwin/i then :macos
89
+ when /linux/i then :linux
90
+ else :unknown
91
+ end
92
+ end
93
+
94
+ def fetch_macos_total
95
+ raw = `sysctl -n hw.memsize`.strip.to_i
96
+ @total_memory_mb = raw / 1024 / 1024
97
+ rescue StandardError
98
+ @total_memory_mb = nil
99
+ end
100
+
101
+ def fetch_macos_available
102
+ vm_output = `vm_stat`
103
+ page_size = vm_output[/page size of (\d+) bytes/, 1]&.to_i || 16_384
104
+ free = vm_output[/Pages free:\s+(\d+)/, 1].to_i
105
+ inactive = vm_output[/Pages inactive:\s+(\d+)/, 1].to_i
106
+ @available_memory_mb = (free + inactive) * page_size / 1024 / 1024
107
+ rescue StandardError
108
+ @available_memory_mb = nil
109
+ end
110
+
111
+ def fetch_linux_total
112
+ meminfo = File.read('/proc/meminfo')
113
+ total_kb = meminfo[/MemTotal:\s+(\d+)/, 1].to_i
114
+ @total_memory_mb = total_kb / 1024
115
+ rescue StandardError
116
+ @total_memory_mb = nil
117
+ end
118
+
119
+ def fetch_linux_available
120
+ meminfo = File.read('/proc/meminfo')
121
+ free_kb = meminfo[/MemFree:\s+(\d+)/, 1].to_i
122
+ inactive_kb = meminfo[/Inactive:\s+(\d+)/, 1].to_i
123
+ @available_memory_mb = (free_kb + inactive_kb) / 1024
124
+ rescue StandardError
125
+ @available_memory_mb = nil
126
+ end
127
+
128
+ def discovery_settings
129
+ return {} unless Legion.const_defined?('Settings')
130
+
131
+ Legion::Settings[:llm][:discovery] || {}
132
+ rescue StandardError
133
+ {}
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module LLM
5
+ module EscalationHistory
6
+ attr_accessor :final_resolution, :escalation_chain
7
+
8
+ def escalation_history
9
+ @escalation_history ||= []
10
+ end
11
+
12
+ def escalated?
13
+ escalation_history.size > 1
14
+ end
15
+
16
+ def record_escalation_attempt(model:, provider:, tier:, outcome:, failures:, duration_ms:)
17
+ escalation_history << {
18
+ model: model,
19
+ provider: provider,
20
+ tier: tier,
21
+ outcome: outcome,
22
+ failures: failures,
23
+ duration_ms: duration_ms
24
+ }
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Helpers
6
+ module LLM
7
+ # Quick chat from any extension runner
8
+ # @param message [String] the prompt
9
+ # @param model [String] optional model override
10
+ # @param provider [Symbol] optional provider override
11
+ # @param intent [Hash, nil] routing intent (capability, privacy, etc.)
12
+ # @param tier [Symbol, nil] explicit tier override
13
+ # @param tools [Array<Class>] optional RubyLLM::Tool subclasses
14
+ # @param instructions [String] optional system instructions
15
+ # @param escalate [Boolean, nil] enable model escalation on low-quality responses
16
+ # @param max_escalations [Integer, nil] max escalation attempts
17
+ # @param quality_check [Proc, nil] callable that returns true if response is acceptable
18
+ # @return [RubyLLM::Message] the assistant response
19
+ def llm_chat(message, model: nil, provider: nil, intent: nil, tier: nil, tools: [], instructions: nil, # rubocop:disable Metrics/ParameterLists
20
+ compress: 0, escalate: nil, max_escalations: nil, quality_check: nil)
21
+ if compress.positive?
22
+ message = Legion::LLM::Compressor.compress(message, level: compress)
23
+ instructions = Legion::LLM::Compressor.compress(instructions, level: compress) if instructions
24
+ end
25
+
26
+ # When escalation is active, chat() handles ask() internally via message: kwarg
27
+ if escalate
28
+ return Legion::LLM.chat(model: model, provider: provider, intent: intent, tier: tier,
29
+ escalate: true, max_escalations: max_escalations,
30
+ quality_check: quality_check, message: message)
31
+ end
32
+
33
+ chat = Legion::LLM.chat(model: model, provider: provider, intent: intent, tier: tier, escalate: false)
34
+ chat.with_instructions(instructions) if instructions
35
+ chat.with_tools(*tools) unless tools.empty?
36
+ chat.ask(message)
37
+ end
38
+
39
+ # Quick embed from any extension runner
40
+ # @param text [String, Array<String>] text to embed
41
+ # @param model [String] optional model override
42
+ # @return [RubyLLM::Embedding]
43
+ def llm_embed(text, model: nil)
44
+ Legion::LLM.embed(text, model: model)
45
+ end
46
+
47
+ # Get a raw chat object for multi-turn conversations
48
+ # @param model [String] optional model override
49
+ # @param provider [Symbol] optional provider override
50
+ # @param intent [Hash, nil] routing intent (capability, privacy, etc.)
51
+ # @param tier [Symbol, nil] explicit tier override
52
+ # @return [RubyLLM::Chat]
53
+ def llm_session(model: nil, provider: nil, intent: nil, tier: nil)
54
+ Legion::LLM.chat(model: model, provider: provider, intent: intent, tier: tier, escalate: false)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module LLM
5
+ module Providers
6
+ def configure_providers
7
+ settings[:providers].each do |provider, config|
8
+ next unless config[:enabled]
9
+
10
+ apply_provider_config(provider, config)
11
+ end
12
+ end
13
+
14
+ def apply_provider_config(provider, config)
15
+ case provider
16
+ when :bedrock
17
+ configure_bedrock(config)
18
+ when :anthropic
19
+ configure_anthropic(config)
20
+ when :openai
21
+ configure_openai(config)
22
+ when :gemini
23
+ configure_gemini(config)
24
+ when :ollama
25
+ configure_ollama(config)
26
+ else
27
+ Legion::Logging.warn "Unknown LLM provider: #{provider}"
28
+ end
29
+ end
30
+
31
+ def configure_bedrock(config)
32
+ has_sigv4 = config[:api_key] && config[:secret_key]
33
+ has_bearer = config[:bearer_token]
34
+ return unless has_sigv4 || has_bearer
35
+
36
+ require 'legion/llm/bedrock_bearer_auth' if has_bearer
37
+
38
+ RubyLLM.configure do |c|
39
+ if has_bearer
40
+ c.bedrock_bearer_token = config[:bearer_token]
41
+ else
42
+ c.bedrock_api_key = config[:api_key]
43
+ c.bedrock_secret_key = config[:secret_key]
44
+ c.bedrock_session_token = config[:session_token] if config[:session_token]
45
+ end
46
+ c.bedrock_region = config[:region] || 'us-east-2'
47
+ end
48
+
49
+ auth_mode = has_bearer ? 'bearer token' : 'SigV4'
50
+ Legion::Logging.info "Configured Bedrock provider (#{config[:region]}, #{auth_mode})"
51
+ end
52
+
53
+ def configure_anthropic(config)
54
+ return unless config[:api_key]
55
+
56
+ RubyLLM.configure do |c|
57
+ c.anthropic_api_key = config[:api_key]
58
+ end
59
+ Legion::Logging.info 'Configured Anthropic provider'
60
+ end
61
+
62
+ def configure_openai(config)
63
+ return unless config[:api_key]
64
+
65
+ RubyLLM.configure do |c|
66
+ c.openai_api_key = config[:api_key]
67
+ end
68
+ Legion::Logging.info 'Configured OpenAI provider'
69
+ end
70
+
71
+ def configure_gemini(config)
72
+ return unless config[:api_key]
73
+
74
+ RubyLLM.configure do |c|
75
+ c.gemini_api_key = config[:api_key]
76
+ end
77
+ Legion::Logging.info 'Configured Gemini provider'
78
+ end
79
+
80
+ def configure_ollama(config)
81
+ RubyLLM.configure do |c|
82
+ c.ollama_api_base = config[:base_url] if config[:base_url]
83
+ end
84
+ Legion::Logging.info "Configured Ollama provider (#{config[:base_url]})"
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module Legion
6
+ module LLM
7
+ module QualityChecker
8
+ QualityResult = Struct.new(:passed, :failures)
9
+
10
+ REPETITION_MIN_LENGTH = 20
11
+ REPETITION_THRESHOLD = 3
12
+ DEFAULT_QUALITY_THRESHOLD = 50
13
+
14
+ class << self
15
+ def check(response, quality_threshold: DEFAULT_QUALITY_THRESHOLD, json_expected: false, quality_check: nil)
16
+ failures = []
17
+ content = response.content
18
+
19
+ failures << :empty_response if content.nil? || content.strip.empty?
20
+
21
+ unless failures.include?(:empty_response)
22
+ failures << :too_short if content.length < quality_threshold
23
+ failures << :repetition if repetitive?(content)
24
+ failures << :json_parse_failure if json_expected && !valid_json?(content)
25
+ end
26
+
27
+ failures << :custom_check_failed if quality_check.respond_to?(:call) && !quality_check.call(response)
28
+
29
+ QualityResult.new(passed: failures.empty?, failures: failures)
30
+ end
31
+
32
+ private
33
+
34
+ def repetitive?(content)
35
+ return false if content.length < REPETITION_MIN_LENGTH * REPETITION_THRESHOLD
36
+
37
+ seen = {}
38
+ (0..(content.length - REPETITION_MIN_LENGTH)).step(REPETITION_MIN_LENGTH) do |i|
39
+ chunk = content[i, REPETITION_MIN_LENGTH]
40
+ seen[chunk] = (seen[chunk] || 0) + 1
41
+ return true if seen[chunk] >= REPETITION_THRESHOLD
42
+ end
43
+
44
+ false
45
+ end
46
+
47
+ def valid_json?(content)
48
+ ::JSON.parse(content)
49
+ true
50
+ rescue ::JSON::ParserError
51
+ false
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module LLM
5
+ module Router
6
+ class EscalationChain
7
+ include Enumerable
8
+
9
+ attr_reader :max_attempts
10
+
11
+ def initialize(resolutions:, max_attempts: 3)
12
+ @resolutions = resolutions.dup.freeze
13
+ @max_attempts = max_attempts
14
+ end
15
+
16
+ def primary
17
+ @resolutions.first
18
+ end
19
+
20
+ def each(&)
21
+ return enum_for(:each) unless block_given?
22
+
23
+ padded_resolutions.each(&)
24
+ end
25
+
26
+ def size
27
+ @resolutions.size
28
+ end
29
+
30
+ def empty?
31
+ @resolutions.empty?
32
+ end
33
+
34
+ def to_a
35
+ @resolutions.dup
36
+ end
37
+
38
+ private
39
+
40
+ def padded_resolutions
41
+ return @resolutions.first(@max_attempts) if @resolutions.size >= @max_attempts
42
+
43
+ last = @resolutions.last
44
+ (@resolutions + Array.new(@max_attempts - @resolutions.size) { last }).first(@max_attempts)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end