lex-llm 0.3.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +49 -0
  3. data/README.md +18 -2
  4. data/lex-llm.gemspec +3 -0
  5. data/lib/legion/extensions/llm/auto_registration.rb +7 -36
  6. data/lib/legion/extensions/llm/embedding.rb +1 -1
  7. data/lib/legion/extensions/llm/error.rb +14 -0
  8. data/lib/legion/extensions/llm/errors/unsupported_capability.rb +21 -0
  9. data/lib/legion/extensions/llm/fleet/default_exchange_reply.rb +81 -0
  10. data/lib/legion/extensions/llm/fleet/envelope_validation.rb +39 -0
  11. data/lib/legion/extensions/llm/fleet/protocol.rb +16 -0
  12. data/lib/legion/extensions/llm/fleet/provider_responder.rb +304 -0
  13. data/lib/legion/extensions/llm/fleet/publish_safety.rb +123 -0
  14. data/lib/legion/extensions/llm/fleet/settings.rb +66 -0
  15. data/lib/legion/extensions/llm/fleet/token_error.rb +11 -0
  16. data/lib/legion/extensions/llm/fleet/token_validator.rb +205 -0
  17. data/lib/legion/extensions/llm/fleet/worker_execution.rb +165 -0
  18. data/lib/legion/extensions/llm/message.rb +9 -3
  19. data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +37 -36
  20. data/lib/legion/extensions/llm/provider.rb +198 -4
  21. data/lib/legion/extensions/llm/provider_contract.rb +21 -0
  22. data/lib/legion/extensions/llm/provider_settings.rb +18 -1
  23. data/lib/legion/extensions/llm/responses/chat_response.rb +43 -0
  24. data/lib/legion/extensions/llm/responses/embedding_response.rb +38 -0
  25. data/lib/legion/extensions/llm/responses/stream_chunk.rb +43 -0
  26. data/lib/legion/extensions/llm/responses/thinking_extractor.rb +155 -0
  27. data/lib/legion/extensions/llm/stream_accumulator.rb +12 -1
  28. data/lib/legion/extensions/llm/transport/exchanges/fleet.rb +24 -0
  29. data/lib/legion/extensions/llm/transport/messages/fleet_error.rb +64 -0
  30. data/lib/legion/extensions/llm/transport/messages/fleet_request.rb +155 -0
  31. data/lib/legion/extensions/llm/transport/messages/fleet_response.rb +63 -0
  32. data/lib/legion/extensions/llm/version.rb +1 -1
  33. data/lib/legion/extensions/llm.rb +38 -11
  34. metadata +62 -1
@@ -39,17 +39,17 @@ module Legion
39
39
  messages.map do |message|
40
40
  {
41
41
  role: message.role.to_s,
42
- content: openai_content(message.content),
42
+ content: openai_content(message.content, role: message.role),
43
43
  tool_call_id: message.tool_call_id,
44
44
  tool_calls: format_openai_tool_calls(message.tool_calls)
45
45
  }.compact
46
46
  end
47
47
  end
48
48
 
49
- def openai_content(content)
49
+ def openai_content(content, role:)
50
50
  return content.format if content.is_a?(Legion::Extensions::Llm::Content::Raw)
51
- return content unless content.respond_to?(:attachments)
52
- return content.text.to_s if content.attachments.empty?
51
+ return sanitize_openai_text(content, role:) unless content.respond_to?(:attachments)
52
+ return sanitize_openai_text(content.text.to_s, role:) if content.attachments.empty?
53
53
 
54
54
  openai_content_parts(content)
55
55
  end
@@ -63,6 +63,12 @@ module Legion
63
63
  parts
64
64
  end
65
65
 
66
+ def sanitize_openai_text(text, role:)
67
+ return text unless role.to_sym == :assistant && text.is_a?(String)
68
+
69
+ Responses::ThinkingExtractor.extract(text).content
70
+ end
71
+
66
72
  def format_openai_tool_calls(tool_calls)
67
73
  return nil unless tool_calls&.any?
68
74
 
@@ -135,18 +141,29 @@ module Legion
135
141
  end
136
142
 
137
143
  def extract_thinking_from_completion(message)
138
- reasoning = message['reasoning_content'] || message['reasoning']
139
- content = message['content']
144
+ extraction = Responses::ThinkingExtractor.extract(
145
+ message['content'],
146
+ metadata: thinking_metadata(message)
147
+ )
140
148
 
141
- if reasoning
142
- [content, Thinking.build(text: reasoning)]
143
- elsif content.is_a?(String) && content.include?('<think>')
144
- think_text = content[%r{<think>(.*?)</think>}m, 1]
145
- clean = content.gsub(%r{<think>.*?</think>}m, '').strip
146
- [clean, Thinking.build(text: think_text)]
147
- else
148
- [content, nil]
149
- end
149
+ [
150
+ extraction.content,
151
+ Thinking.build(
152
+ text: extraction.thinking,
153
+ signature: extraction.signature
154
+ )
155
+ ]
156
+ end
157
+
158
+ def thinking_metadata(message)
159
+ {
160
+ reasoning_content: message['reasoning_content'],
161
+ reasoning: message['reasoning'],
162
+ thinking: message['thinking'],
163
+ thinking_text: message['thinking_text'],
164
+ thinking_signature: message['thinking_signature'],
165
+ reasoning_signature: message['reasoning_signature']
166
+ }.compact
150
167
  end
151
168
 
152
169
  def build_chunk(data)
@@ -173,39 +190,23 @@ module Legion
173
190
 
174
191
  if reasoning
175
192
  [content, Thinking.build(text: reasoning)]
176
- elsif content.is_a?(String) && content.include?('<think>')
177
- clean, think_text = split_think_tags(content)
178
- [clean, Thinking.build(text: think_text)]
179
193
  else
180
194
  [content, nil]
181
195
  end
182
196
  end
183
197
 
184
- def split_think_tags(text) # rubocop:disable Metrics/PerceivedComplexity
185
- if text.match?(%r{<think>.*</think>}m)
186
- thinking = text[%r{<think>(.*?)</think>}m, 1]
187
- clean = text.gsub(%r{<think>.*?</think>}m, '').strip
188
- [clean.empty? ? nil : clean, thinking]
189
- elsif text.start_with?('<think>')
190
- [nil, text.delete_prefix('<think>')]
191
- elsif text.include?('</think>')
192
- parts = text.split('</think>', 2)
193
- [parts[1]&.strip.then { |s| s&.empty? ? nil : s }, parts[0]]
194
- else
195
- [text, nil]
196
- end
197
- end
198
-
199
198
  def parse_tool_calls(tool_calls)
200
199
  return nil unless tool_calls&.any?
201
200
 
202
201
  tool_calls.to_h do |call|
203
202
  function = call.fetch('function', {})
204
- name = function.fetch('name')
203
+ name = function['name']
204
+ id = call['id'] || name || call['index']
205
+ key = name || id
205
206
  [
206
- name.to_sym,
207
+ key.to_s.to_sym,
207
208
  Legion::Extensions::Llm::ToolCall.new(
208
- id: call['id'] || name,
209
+ id: id&.to_s,
209
210
  name: name,
210
211
  arguments: parse_tool_arguments(function['arguments'])
211
212
  )
@@ -62,6 +62,16 @@ module Legion
62
62
  end
63
63
 
64
64
  # rubocop:disable Metrics/ParameterLists
65
+ def chat(messages:, model:, tools: [], temperature: nil, params: {}, headers: {}, schema: nil, thinking: nil,
66
+ tool_prefs: nil)
67
+ complete(messages, tools:, temperature:, model:, params:, headers:, schema:, thinking:, tool_prefs:)
68
+ end
69
+
70
+ def stream_chat(messages:, model:, tools: [], temperature: nil, params: {}, headers: {}, schema: nil,
71
+ thinking: nil, tool_prefs: nil, &)
72
+ complete(messages, tools:, temperature:, model:, params:, headers:, schema:, thinking:, tool_prefs:, &)
73
+ end
74
+
65
75
  def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil,
66
76
  tool_prefs: nil, &)
67
77
  normalized_temperature = maybe_normalize_temperature(temperature, model)
@@ -88,14 +98,55 @@ module Legion
88
98
  end
89
99
  # rubocop:enable Metrics/ParameterLists
90
100
 
91
- def list_models
101
+ def list_models(live: false, **filters)
102
+ _ = [live, filters]
92
103
  response = @connection.get models_url
93
104
  parse_list_models_response response, slug, capabilities
94
105
  end
95
106
 
96
- def embed(text, model:, dimensions:)
97
- payload = render_embedding_payload(text, model:, dimensions:)
98
- response = @connection.post(embedding_url(model:), payload)
107
+ def discover_offerings(live: false, **filters)
108
+ return filter_cached_offerings(Array(@cached_offerings), filters) unless live
109
+
110
+ provider_health = health(live:)
111
+ @cached_offerings = Array(list_models(live:, **filters)).filter_map do |model|
112
+ next unless model_matches_filters?(model, filters)
113
+
114
+ offering_from_model(model, health: provider_health)
115
+ end
116
+ @cached_offerings
117
+ end
118
+
119
+ def health(live: false)
120
+ readiness_data = readiness(live:)
121
+ raw_health = readiness_data[:health] || readiness_data['health'] || {}
122
+ status = health_status(readiness_data, raw_health)
123
+ {
124
+ provider: slug.to_sym,
125
+ instance_id: provider_instance_id,
126
+ status:,
127
+ ready: readiness_data[:ready] == true || readiness_data['ready'] == true,
128
+ circuit_state: status == 'healthy' ? 'closed' : 'open',
129
+ latency_ms: raw_health[:latency_ms] || raw_health['latency_ms'],
130
+ raw: raw_health
131
+ }.compact
132
+ rescue StandardError => e
133
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.provider.health')
134
+ {
135
+ provider: slug.to_sym,
136
+ instance_id: provider_instance_id,
137
+ status: 'unhealthy',
138
+ ready: false,
139
+ circuit_state: 'open',
140
+ error: e.class.name,
141
+ message: e.message
142
+ }
143
+ end
144
+
145
+ def embed(text:, model:, dimensions: nil, params: {}, headers: {})
146
+ payload = Utils.deep_merge(render_embedding_payload(text, model:, dimensions:), params)
147
+ response = @connection.post(embedding_url(model:), payload) do |req|
148
+ req.headers = headers.merge(req.headers) unless headers.empty?
149
+ end
99
150
  parse_embedding_response(response, model:, text:)
100
151
  end
101
152
 
@@ -112,6 +163,18 @@ module Legion
112
163
  parse_image_response(response, model:)
113
164
  end
114
165
 
166
+ def image(prompt:, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
167
+ paint(prompt, model:, size:, with:, mask:, params:)
168
+ end
169
+
170
+ def count_tokens(messages:, model:, params: {})
171
+ _ = [model, params]
172
+ Array(messages).sum do |message|
173
+ content = message.respond_to?(:content) ? message.content : message[:content] || message['content']
174
+ estimate_text_tokens(content)
175
+ end
176
+ end
177
+
115
178
  def transcribe(audio_file, model:, language:, **)
116
179
  file_part = build_audio_file_part(audio_file)
117
180
  payload = render_transcription_payload(file_part, model:, language:, **)
@@ -319,6 +382,12 @@ module Legion
319
382
  end
320
383
  end
321
384
 
385
+ def provider_instance_id
386
+ return config.instance_id.to_sym if config.respond_to?(:instance_id) && config.instance_id
387
+
388
+ :default
389
+ end
390
+
322
391
  class << self
323
392
  def name
324
393
  to_s.split('::').last
@@ -369,6 +438,131 @@ module Legion
369
438
  raise UnsupportedAttachmentError, "#{name} does not support image references in paint"
370
439
  end
371
440
 
441
+ def offering_from_model(model, health: {})
442
+ Routing::ModelOffering.new(
443
+ provider_family: slug.to_sym,
444
+ provider_instance: model.instance || provider_instance_id,
445
+ transport: offering_transport,
446
+ tier: offering_tier,
447
+ model: model.id,
448
+ canonical_model_alias: model.name,
449
+ model_family: model.family,
450
+ usage_type: offering_usage_type(model),
451
+ capabilities: model.capabilities,
452
+ limits: offering_limits(model),
453
+ health:,
454
+ metadata: offering_metadata(model)
455
+ )
456
+ end
457
+
458
+ def offering_transport
459
+ local? ? :local : :http
460
+ end
461
+
462
+ def offering_tier
463
+ local? ? :local : :direct
464
+ end
465
+
466
+ def offering_usage_type(model)
467
+ model.embedding? ? :embedding : :inference
468
+ end
469
+
470
+ def offering_limits(model)
471
+ {
472
+ context_window: model.context_length,
473
+ max_output_tokens: model.max_output_tokens
474
+ }.compact
475
+ end
476
+
477
+ def offering_metadata(model)
478
+ {
479
+ raw_model: model.id,
480
+ parameter_count: model.parameter_count,
481
+ parameter_size: model.parameter_size,
482
+ quantization: model.quantization,
483
+ size_bytes: model.size_bytes,
484
+ modalities_input: model.modalities_input,
485
+ modalities_output: model.modalities_output
486
+ }.merge(model.metadata || {}).compact
487
+ end
488
+
489
+ def model_matches_filters?(model, filters)
490
+ return true if filters.empty?
491
+
492
+ filters.all? do |key, value|
493
+ blank_filter_value?(value) || model_matches_filter?(model, key, value)
494
+ end
495
+ end
496
+
497
+ def blank_filter_value?(value)
498
+ value.nil? || (value.respond_to?(:empty?) && value.empty?)
499
+ end
500
+
501
+ def model_matches_filter?(model, key, value)
502
+ case key.to_sym
503
+ when :capability, :capabilities
504
+ Array(value).all? { |capability| model.supports?(capability) }
505
+ when :type, :usage_type, :purpose
506
+ offering_usage_type(model).to_s == value.to_s || model.type.to_s == value.to_s
507
+ when :model, :id, :name
508
+ [model.id, model.name].map(&:to_s).include?(value.to_s)
509
+ when :instance, :instance_id, :provider_instance
510
+ provider_instance_id.to_s == value.to_s || model.instance.to_s == value.to_s
511
+ else
512
+ true
513
+ end
514
+ end
515
+
516
+ def filter_cached_offerings(offerings, filters)
517
+ return offerings if filters.empty?
518
+
519
+ offerings.select do |offering|
520
+ filters.all? do |key, value|
521
+ blank_filter_value?(value) || offering_matches_filter?(offering, key, value)
522
+ end
523
+ end
524
+ end
525
+
526
+ def offering_matches_filter?(offering, key, value)
527
+ case key.to_sym
528
+ when :provider, :provider_family
529
+ offering.provider_family.to_s == value.to_s
530
+ when :capability, :capabilities
531
+ Array(value).all? { |capability| offering.supports?(capability) }
532
+ when :type, :usage_type, :purpose
533
+ offering.usage_type.to_s == value.to_s
534
+ when :model, :id, :name
535
+ [offering.model, offering.canonical_model_alias].compact.map(&:to_s).include?(value.to_s)
536
+ when :instance, :instance_id, :provider_instance
537
+ [offering.provider_instance, offering.instance_id].compact.map(&:to_s).include?(value.to_s)
538
+ else
539
+ true
540
+ end
541
+ end
542
+
543
+ def health_status(readiness_data, raw_health)
544
+ return 'healthy' if readiness_data[:ready] == true || readiness_data['ready'] == true
545
+
546
+ status = raw_health[:status] || raw_health['status'] || raw_health[:state] || raw_health['state']
547
+ return 'healthy' if %w[ok ready healthy running].include?(status.to_s.downcase)
548
+
549
+ 'unhealthy'
550
+ end
551
+
552
+ def estimate_text_tokens(content)
553
+ text = case content
554
+ when Content
555
+ [content.text, *content.attachments.map(&:to_s)].compact.join(' ')
556
+ when Array
557
+ content.map do |part|
558
+ part.respond_to?(:[]) ? part[:text] || part['text'] || part.to_s : part.to_s
559
+ end.join(' ')
560
+ else
561
+ content.to_s
562
+ end
563
+ [(text.length / 4.0).ceil, 1].max
564
+ end
565
+
372
566
  def build_audio_file_part(file_path)
373
567
  expanded_path = File.expand_path(file_path)
374
568
  mime_type = Marcel::MimeType.for(Pathname.new(expanded_path))
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ # Documents the canonical public provider method signatures shared by provider gems.
7
+ module ProviderContract
8
+ REQUIRED_SIGNATURES = {
9
+ chat: [%i[keyreq messages], %i[keyreq model]],
10
+ stream_chat: [%i[keyreq messages], %i[keyreq model]],
11
+ embed: [%i[keyreq text], %i[keyreq model]],
12
+ image: [%i[keyreq prompt], %i[keyreq model]],
13
+ list_models: [%i[key live], %i[keyrest filters]],
14
+ discover_offerings: [%i[key live], %i[keyrest filters]],
15
+ health: [%i[key live]],
16
+ count_tokens: [%i[keyreq messages], %i[keyreq model], %i[key params]]
17
+ }.freeze
18
+ end
19
+ end
20
+ end
21
+ end
@@ -7,7 +7,10 @@ module Legion
7
7
  module ProviderSettings
8
8
  module_function
9
9
 
10
- def build(family:, instance: {}, enabled: true, discovery: {}, instances: {})
10
+ def build(family:, instance: {}, enabled: true, discovery: {}, instances: {}, fleet: nil, gateways: nil, # rubocop:disable Metrics/ParameterLists
11
+ **legacy_settings)
12
+ validate_provider_defaults!(fleet:, gateways:, legacy_settings:)
13
+
11
14
  deep_merge(
12
15
  Legion::Extensions::Llm.default_settings,
13
16
  {
@@ -27,6 +30,20 @@ module Legion
27
30
  )
28
31
  end
29
32
 
33
+ def validate_provider_defaults!(fleet:, gateways:, legacy_settings:)
34
+ if fleet
35
+ raise ArgumentError,
36
+ 'Provider fleet defaults must be nested under an instance, for example instances.default.fleet'
37
+ end
38
+
39
+ raise ArgumentError, 'Provider gateways settings are no longer supported; use instances instead' if gateways
40
+
41
+ return if legacy_settings.empty?
42
+
43
+ invalid_keys = legacy_settings.keys.map(&:to_sym)
44
+ raise ArgumentError, "Unsupported top-level provider settings: #{invalid_keys.join(', ')}"
45
+ end
46
+
30
47
  def deep_dup(value)
31
48
  case value
32
49
  when Hash
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Responses
7
+ # Normalized non-streaming chat provider response.
8
+ class ChatResponse
9
+ attr_reader :content, :thinking, :metadata, :model, :tool_calls, :tokens, :raw, :internal_metadata
10
+
11
+ def initialize(content:, thinking: nil, metadata: {}, model: nil, tool_calls: nil, tokens: nil, raw: nil) # rubocop:disable Metrics/ParameterLists
12
+ extraction = ThinkingExtractor.extract(content, metadata: metadata)
13
+
14
+ @content = extraction.content
15
+ @thinking = thinking || extraction.thinking
16
+ @metadata = extraction.metadata
17
+ @internal_metadata = metadata.to_h
18
+ @model = model
19
+ @tool_calls = tool_calls
20
+ @tokens = tokens
21
+ @raw = raw
22
+
23
+ freeze
24
+ end
25
+
26
+ def to_h
27
+ {
28
+ content: content,
29
+ metadata: metadata,
30
+ model: model,
31
+ tool_calls: tool_calls,
32
+ tokens: tokens
33
+ }.compact
34
+ end
35
+
36
+ def to_internal_h
37
+ to_h.merge(thinking: thinking, metadata: internal_metadata, raw: raw).compact
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Responses
7
+ # Normalized embedding provider response.
8
+ class EmbeddingResponse
9
+ attr_reader :vectors, :model, :tokens, :metadata, :raw
10
+
11
+ def initialize(vectors:, model:, tokens: nil, metadata: {}, raw: nil)
12
+ @vectors = vectors
13
+ @model = model
14
+ @tokens = tokens
15
+ @metadata = ThinkingExtractor.extract(nil, metadata: metadata).metadata
16
+ @internal_metadata = metadata.to_h
17
+ @raw = raw
18
+
19
+ freeze
20
+ end
21
+
22
+ def to_h
23
+ {
24
+ vectors: vectors,
25
+ model: model,
26
+ tokens: tokens,
27
+ metadata: metadata
28
+ }.compact
29
+ end
30
+
31
+ def to_internal_h
32
+ to_h.merge(metadata: @internal_metadata, raw: raw).compact
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Responses
7
+ # Normalized streaming provider response chunk.
8
+ class StreamChunk
9
+ attr_reader :content, :thinking, :metadata, :model, :tool_calls, :tokens, :raw, :internal_metadata
10
+
11
+ def initialize(content: nil, thinking: nil, metadata: {}, model: nil, tool_calls: nil, tokens: nil, raw: nil) # rubocop:disable Metrics/ParameterLists
12
+ extraction = ThinkingExtractor.extract(content, metadata: metadata)
13
+
14
+ @content = extraction.content
15
+ @thinking = thinking || extraction.thinking
16
+ @metadata = extraction.metadata
17
+ @internal_metadata = metadata.to_h
18
+ @model = model
19
+ @tool_calls = tool_calls
20
+ @tokens = tokens
21
+ @raw = raw
22
+
23
+ freeze
24
+ end
25
+
26
+ def to_h
27
+ {
28
+ content: content,
29
+ metadata: metadata,
30
+ model: model,
31
+ tool_calls: tool_calls,
32
+ tokens: tokens
33
+ }.compact
34
+ end
35
+
36
+ def to_internal_h
37
+ to_h.merge(thinking: thinking, metadata: internal_metadata, raw: raw).compact
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end