lex-llm 0.3.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +43 -0
  3. data/README.md +18 -2
  4. data/lex-llm.gemspec +1 -0
  5. data/lib/legion/extensions/llm/auto_registration.rb +7 -36
  6. data/lib/legion/extensions/llm/embedding.rb +1 -1
  7. data/lib/legion/extensions/llm/error.rb +14 -0
  8. data/lib/legion/extensions/llm/errors/unsupported_capability.rb +21 -0
  9. data/lib/legion/extensions/llm/fleet/default_exchange_reply.rb +81 -0
  10. data/lib/legion/extensions/llm/fleet/envelope_validation.rb +39 -0
  11. data/lib/legion/extensions/llm/fleet/protocol.rb +16 -0
  12. data/lib/legion/extensions/llm/fleet/publish_safety.rb +123 -0
  13. data/lib/legion/extensions/llm/message.rb +9 -3
  14. data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +37 -36
  15. data/lib/legion/extensions/llm/provider.rb +198 -4
  16. data/lib/legion/extensions/llm/provider_contract.rb +21 -0
  17. data/lib/legion/extensions/llm/provider_settings.rb +18 -1
  18. data/lib/legion/extensions/llm/responses/chat_response.rb +43 -0
  19. data/lib/legion/extensions/llm/responses/embedding_response.rb +38 -0
  20. data/lib/legion/extensions/llm/responses/stream_chunk.rb +43 -0
  21. data/lib/legion/extensions/llm/responses/thinking_extractor.rb +155 -0
  22. data/lib/legion/extensions/llm/stream_accumulator.rb +12 -1
  23. data/lib/legion/extensions/llm/transport/exchanges/fleet.rb +24 -0
  24. data/lib/legion/extensions/llm/transport/messages/fleet_error.rb +64 -0
  25. data/lib/legion/extensions/llm/transport/messages/fleet_request.rb +155 -0
  26. data/lib/legion/extensions/llm/transport/messages/fleet_response.rb +63 -0
  27. data/lib/legion/extensions/llm/version.rb +1 -1
  28. data/lib/legion/extensions/llm.rb +31 -11
  29. metadata +29 -1
@@ -62,6 +62,16 @@ module Legion
62
62
  end
63
63
 
64
64
  # rubocop:disable Metrics/ParameterLists
65
+ def chat(messages:, model:, tools: [], temperature: nil, params: {}, headers: {}, schema: nil, thinking: nil,
66
+ tool_prefs: nil)
67
+ complete(messages, tools:, temperature:, model:, params:, headers:, schema:, thinking:, tool_prefs:)
68
+ end
69
+
70
+ def stream_chat(messages:, model:, tools: [], temperature: nil, params: {}, headers: {}, schema: nil,
71
+ thinking: nil, tool_prefs: nil, &)
72
+ complete(messages, tools:, temperature:, model:, params:, headers:, schema:, thinking:, tool_prefs:, &)
73
+ end
74
+
65
75
  def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil,
66
76
  tool_prefs: nil, &)
67
77
  normalized_temperature = maybe_normalize_temperature(temperature, model)
@@ -88,14 +98,55 @@ module Legion
88
98
  end
89
99
  # rubocop:enable Metrics/ParameterLists
90
100
 
91
- def list_models
101
+ def list_models(live: false, **filters)
102
+ _ = [live, filters]
92
103
  response = @connection.get models_url
93
104
  parse_list_models_response response, slug, capabilities
94
105
  end
95
106
 
96
- def embed(text, model:, dimensions:)
97
- payload = render_embedding_payload(text, model:, dimensions:)
98
- response = @connection.post(embedding_url(model:), payload)
107
+ def discover_offerings(live: false, **filters)
108
+ return filter_cached_offerings(Array(@cached_offerings), filters) unless live
109
+
110
+ provider_health = health(live:)
111
+ @cached_offerings = Array(list_models(live:, **filters)).filter_map do |model|
112
+ next unless model_matches_filters?(model, filters)
113
+
114
+ offering_from_model(model, health: provider_health)
115
+ end
116
+ @cached_offerings
117
+ end
118
+
119
+ def health(live: false)
120
+ readiness_data = readiness(live:)
121
+ raw_health = readiness_data[:health] || readiness_data['health'] || {}
122
+ status = health_status(readiness_data, raw_health)
123
+ {
124
+ provider: slug.to_sym,
125
+ instance_id: provider_instance_id,
126
+ status:,
127
+ ready: readiness_data[:ready] == true || readiness_data['ready'] == true,
128
+ circuit_state: status == 'healthy' ? 'closed' : 'open',
129
+ latency_ms: raw_health[:latency_ms] || raw_health['latency_ms'],
130
+ raw: raw_health
131
+ }.compact
132
+ rescue StandardError => e
133
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.provider.health')
134
+ {
135
+ provider: slug.to_sym,
136
+ instance_id: provider_instance_id,
137
+ status: 'unhealthy',
138
+ ready: false,
139
+ circuit_state: 'open',
140
+ error: e.class.name,
141
+ message: e.message
142
+ }
143
+ end
144
+
145
+ def embed(text:, model:, dimensions: nil, params: {}, headers: {})
146
+ payload = Utils.deep_merge(render_embedding_payload(text, model:, dimensions:), params)
147
+ response = @connection.post(embedding_url(model:), payload) do |req|
148
+ req.headers = headers.merge(req.headers) unless headers.empty?
149
+ end
99
150
  parse_embedding_response(response, model:, text:)
100
151
  end
101
152
 
@@ -112,6 +163,18 @@ module Legion
112
163
  parse_image_response(response, model:)
113
164
  end
114
165
 
166
+ def image(prompt:, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
167
+ paint(prompt, model:, size:, with:, mask:, params:)
168
+ end
169
+
170
+ def count_tokens(messages:, model:, params: {})
171
+ _ = [model, params]
172
+ Array(messages).sum do |message|
173
+ content = message.respond_to?(:content) ? message.content : message[:content] || message['content']
174
+ estimate_text_tokens(content)
175
+ end
176
+ end
177
+
115
178
  def transcribe(audio_file, model:, language:, **)
116
179
  file_part = build_audio_file_part(audio_file)
117
180
  payload = render_transcription_payload(file_part, model:, language:, **)
@@ -319,6 +382,12 @@ module Legion
319
382
  end
320
383
  end
321
384
 
385
+ def provider_instance_id
386
+ return config.instance_id.to_sym if config.respond_to?(:instance_id) && config.instance_id
387
+
388
+ :default
389
+ end
390
+
322
391
  class << self
323
392
  def name
324
393
  to_s.split('::').last
@@ -369,6 +438,131 @@ module Legion
369
438
  raise UnsupportedAttachmentError, "#{name} does not support image references in paint"
370
439
  end
371
440
 
441
+ def offering_from_model(model, health: {})
442
+ Routing::ModelOffering.new(
443
+ provider_family: slug.to_sym,
444
+ provider_instance: model.instance || provider_instance_id,
445
+ transport: offering_transport,
446
+ tier: offering_tier,
447
+ model: model.id,
448
+ canonical_model_alias: model.name,
449
+ model_family: model.family,
450
+ usage_type: offering_usage_type(model),
451
+ capabilities: model.capabilities,
452
+ limits: offering_limits(model),
453
+ health:,
454
+ metadata: offering_metadata(model)
455
+ )
456
+ end
457
+
458
+ def offering_transport
459
+ local? ? :local : :http
460
+ end
461
+
462
+ def offering_tier
463
+ local? ? :local : :direct
464
+ end
465
+
466
+ def offering_usage_type(model)
467
+ model.embedding? ? :embedding : :inference
468
+ end
469
+
470
+ def offering_limits(model)
471
+ {
472
+ context_window: model.context_length,
473
+ max_output_tokens: model.max_output_tokens
474
+ }.compact
475
+ end
476
+
477
+ def offering_metadata(model)
478
+ {
479
+ raw_model: model.id,
480
+ parameter_count: model.parameter_count,
481
+ parameter_size: model.parameter_size,
482
+ quantization: model.quantization,
483
+ size_bytes: model.size_bytes,
484
+ modalities_input: model.modalities_input,
485
+ modalities_output: model.modalities_output
486
+ }.merge(model.metadata || {}).compact
487
+ end
488
+
489
+ def model_matches_filters?(model, filters)
490
+ return true if filters.empty?
491
+
492
+ filters.all? do |key, value|
493
+ blank_filter_value?(value) || model_matches_filter?(model, key, value)
494
+ end
495
+ end
496
+
497
+ def blank_filter_value?(value)
498
+ value.nil? || (value.respond_to?(:empty?) && value.empty?)
499
+ end
500
+
501
+ def model_matches_filter?(model, key, value)
502
+ case key.to_sym
503
+ when :capability, :capabilities
504
+ Array(value).all? { |capability| model.supports?(capability) }
505
+ when :type, :usage_type, :purpose
506
+ offering_usage_type(model).to_s == value.to_s || model.type.to_s == value.to_s
507
+ when :model, :id, :name
508
+ [model.id, model.name].map(&:to_s).include?(value.to_s)
509
+ when :instance, :instance_id, :provider_instance
510
+ provider_instance_id.to_s == value.to_s || model.instance.to_s == value.to_s
511
+ else
512
+ true
513
+ end
514
+ end
515
+
516
+ def filter_cached_offerings(offerings, filters)
517
+ return offerings if filters.empty?
518
+
519
+ offerings.select do |offering|
520
+ filters.all? do |key, value|
521
+ blank_filter_value?(value) || offering_matches_filter?(offering, key, value)
522
+ end
523
+ end
524
+ end
525
+
526
+ def offering_matches_filter?(offering, key, value)
527
+ case key.to_sym
528
+ when :provider, :provider_family
529
+ offering.provider_family.to_s == value.to_s
530
+ when :capability, :capabilities
531
+ Array(value).all? { |capability| offering.supports?(capability) }
532
+ when :type, :usage_type, :purpose
533
+ offering.usage_type.to_s == value.to_s
534
+ when :model, :id, :name
535
+ [offering.model, offering.canonical_model_alias].compact.map(&:to_s).include?(value.to_s)
536
+ when :instance, :instance_id, :provider_instance
537
+ [offering.provider_instance, offering.instance_id].compact.map(&:to_s).include?(value.to_s)
538
+ else
539
+ true
540
+ end
541
+ end
542
+
543
+ def health_status(readiness_data, raw_health)
544
+ return 'healthy' if readiness_data[:ready] == true || readiness_data['ready'] == true
545
+
546
+ status = raw_health[:status] || raw_health['status'] || raw_health[:state] || raw_health['state']
547
+ return 'healthy' if %w[ok ready healthy running].include?(status.to_s.downcase)
548
+
549
+ 'unhealthy'
550
+ end
551
+
552
+ def estimate_text_tokens(content)
553
+ text = case content
554
+ when Content
555
+ [content.text, *content.attachments.map(&:to_s)].compact.join(' ')
556
+ when Array
557
+ content.map do |part|
558
+ part.respond_to?(:[]) ? part[:text] || part['text'] || part.to_s : part.to_s
559
+ end.join(' ')
560
+ else
561
+ content.to_s
562
+ end
563
+ [(text.length / 4.0).ceil, 1].max
564
+ end
565
+
372
566
  def build_audio_file_part(file_path)
373
567
  expanded_path = File.expand_path(file_path)
374
568
  mime_type = Marcel::MimeType.for(Pathname.new(expanded_path))
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ # Documents the canonical public provider method signatures shared by provider gems.
7
+ module ProviderContract
8
+ REQUIRED_SIGNATURES = {
9
+ chat: [%i[keyreq messages], %i[keyreq model]],
10
+ stream_chat: [%i[keyreq messages], %i[keyreq model]],
11
+ embed: [%i[keyreq text], %i[keyreq model]],
12
+ image: [%i[keyreq prompt], %i[keyreq model]],
13
+ list_models: [%i[key live], %i[keyrest filters]],
14
+ discover_offerings: [%i[key live], %i[keyrest filters]],
15
+ health: [%i[key live]],
16
+ count_tokens: [%i[keyreq messages], %i[keyreq model], %i[key params]]
17
+ }.freeze
18
+ end
19
+ end
20
+ end
21
+ end
@@ -7,7 +7,10 @@ module Legion
7
7
  module ProviderSettings
8
8
  module_function
9
9
 
10
- def build(family:, instance: {}, enabled: true, discovery: {}, instances: {})
10
+ def build(family:, instance: {}, enabled: true, discovery: {}, instances: {}, fleet: nil, gateways: nil, # rubocop:disable Metrics/ParameterLists
11
+ **legacy_settings)
12
+ validate_provider_defaults!(fleet:, gateways:, legacy_settings:)
13
+
11
14
  deep_merge(
12
15
  Legion::Extensions::Llm.default_settings,
13
16
  {
@@ -27,6 +30,20 @@ module Legion
27
30
  )
28
31
  end
29
32
 
33
+ def validate_provider_defaults!(fleet:, gateways:, legacy_settings:)
34
+ if fleet
35
+ raise ArgumentError,
36
+ 'Provider fleet defaults must be nested under an instance, for example instances.default.fleet'
37
+ end
38
+
39
+ raise ArgumentError, 'Provider gateways settings are no longer supported; use instances instead' if gateways
40
+
41
+ return if legacy_settings.empty?
42
+
43
+ invalid_keys = legacy_settings.keys.map(&:to_sym)
44
+ raise ArgumentError, "Unsupported top-level provider settings: #{invalid_keys.join(', ')}"
45
+ end
46
+
30
47
  def deep_dup(value)
31
48
  case value
32
49
  when Hash
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Responses
7
+ # Normalized non-streaming chat provider response.
8
+ class ChatResponse
9
+ attr_reader :content, :thinking, :metadata, :model, :tool_calls, :tokens, :raw, :internal_metadata
10
+
11
+ def initialize(content:, thinking: nil, metadata: {}, model: nil, tool_calls: nil, tokens: nil, raw: nil) # rubocop:disable Metrics/ParameterLists
12
+ extraction = ThinkingExtractor.extract(content, metadata: metadata)
13
+
14
+ @content = extraction.content
15
+ @thinking = thinking || extraction.thinking
16
+ @metadata = extraction.metadata
17
+ @internal_metadata = metadata.to_h
18
+ @model = model
19
+ @tool_calls = tool_calls
20
+ @tokens = tokens
21
+ @raw = raw
22
+
23
+ freeze
24
+ end
25
+
26
+ def to_h
27
+ {
28
+ content: content,
29
+ metadata: metadata,
30
+ model: model,
31
+ tool_calls: tool_calls,
32
+ tokens: tokens
33
+ }.compact
34
+ end
35
+
36
+ def to_internal_h
37
+ to_h.merge(thinking: thinking, metadata: internal_metadata, raw: raw).compact
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Responses
7
+ # Normalized embedding provider response.
8
+ class EmbeddingResponse
9
+ attr_reader :vectors, :model, :tokens, :metadata, :raw
10
+
11
+ def initialize(vectors:, model:, tokens: nil, metadata: {}, raw: nil)
12
+ @vectors = vectors
13
+ @model = model
14
+ @tokens = tokens
15
+ @metadata = ThinkingExtractor.extract(nil, metadata: metadata).metadata
16
+ @internal_metadata = metadata.to_h
17
+ @raw = raw
18
+
19
+ freeze
20
+ end
21
+
22
+ def to_h
23
+ {
24
+ vectors: vectors,
25
+ model: model,
26
+ tokens: tokens,
27
+ metadata: metadata
28
+ }.compact
29
+ end
30
+
31
+ def to_internal_h
32
+ to_h.merge(metadata: @internal_metadata, raw: raw).compact
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Responses
7
+ # Normalized streaming provider response chunk.
8
+ class StreamChunk
9
+ attr_reader :content, :thinking, :metadata, :model, :tool_calls, :tokens, :raw, :internal_metadata
10
+
11
+ def initialize(content: nil, thinking: nil, metadata: {}, model: nil, tool_calls: nil, tokens: nil, raw: nil) # rubocop:disable Metrics/ParameterLists
12
+ extraction = ThinkingExtractor.extract(content, metadata: metadata)
13
+
14
+ @content = extraction.content
15
+ @thinking = thinking || extraction.thinking
16
+ @metadata = extraction.metadata
17
+ @internal_metadata = metadata.to_h
18
+ @model = model
19
+ @tool_calls = tool_calls
20
+ @tokens = tokens
21
+ @raw = raw
22
+
23
+ freeze
24
+ end
25
+
26
+ def to_h
27
+ {
28
+ content: content,
29
+ metadata: metadata,
30
+ model: model,
31
+ tool_calls: tool_calls,
32
+ tokens: tokens
33
+ }.compact
34
+ end
35
+
36
+ def to_internal_h
37
+ to_h.merge(thinking: thinking, metadata: internal_metadata, raw: raw).compact
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Responses
7
+ # Separates provider thinking markup from caller-visible text.
8
+ module ThinkingExtractor
9
+ Extraction = Struct.new(:content, :thinking, :signature, :metadata, keyword_init: true)
10
+
11
+ THINK_OPEN = '<think>'
12
+ THINK_CLOSE = '</think>'
13
+ THINK_PATTERN = %r{<think>(.*?)</think>}m
14
+ THINKING_METADATA_KEYS = %i[
15
+ reasoning_content reasoning thinking thinking_text thinking_signature reasoning_signature thought_signature
16
+ ].freeze
17
+ RAW_METADATA_KEYS = %i[
18
+ raw raw_response response_body provider_body provider_response
19
+ ].freeze
20
+
21
+ module_function
22
+
23
+ def extract(content, metadata: {})
24
+ metadata = normalized_metadata(metadata)
25
+ content, extracted_thinking = extract_from_content(content)
26
+ metadata_thinking = extract_metadata_thinking(metadata)
27
+ metadata_signature = extract_metadata_signature(metadata)
28
+
29
+ Extraction.new(
30
+ content: content,
31
+ thinking: compact_thinking([metadata_thinking, extracted_thinking]),
32
+ signature: metadata_signature,
33
+ metadata: scrub_metadata(metadata)
34
+ )
35
+ end
36
+
37
+ def extract_from_content(content)
38
+ return [content, nil] unless content.is_a?(String)
39
+
40
+ clean = +''
41
+ thinking_parts = []
42
+ remaining = content.dup
43
+
44
+ remaining = consume_next_segment(remaining, clean, thinking_parts) until remaining.empty?
45
+
46
+ [clean.strip, compact_thinking(thinking_parts)]
47
+ end
48
+ private_class_method :extract_from_content
49
+
50
+ def consume_next_segment(remaining, clean, thinking_parts)
51
+ close_index = remaining.index(THINK_CLOSE)
52
+ open_index = remaining.index(THINK_OPEN)
53
+
54
+ if close_index && (open_index.nil? || close_index < open_index)
55
+ thinking_parts << remaining.slice(0, close_index)
56
+ remaining.slice((close_index + THINK_CLOSE.length)..).to_s.sub(/\A[[:space:]]+/, '')
57
+ elsif open_index
58
+ consume_open_think_segment(remaining, open_index, clean, thinking_parts)
59
+ else
60
+ clean << remaining
61
+ +''
62
+ end
63
+ end
64
+ private_class_method :consume_next_segment
65
+
66
+ def consume_open_think_segment(remaining, open_index, clean, thinking_parts)
67
+ clean << remaining.slice(0, open_index)
68
+ after_open = remaining.slice((open_index + THINK_OPEN.length)..).to_s
69
+ close_index = after_open.index(THINK_CLOSE)
70
+ unless close_index
71
+ thinking_parts << after_open
72
+ return +''
73
+ end
74
+
75
+ thinking_parts << after_open.slice(0, close_index)
76
+ after_open.slice((close_index + THINK_CLOSE.length)..).to_s
77
+ end
78
+ private_class_method :consume_open_think_segment
79
+
80
+ def extract_metadata_thinking(metadata)
81
+ compact_thinking(
82
+ [
83
+ metadata[:reasoning_content],
84
+ metadata[:reasoning],
85
+ metadata[:thinking],
86
+ metadata[:thinking_text]
87
+ ]
88
+ )
89
+ end
90
+ private_class_method :extract_metadata_thinking
91
+
92
+ def extract_metadata_signature(metadata)
93
+ [
94
+ metadata[:thinking_signature],
95
+ metadata[:reasoning_signature],
96
+ metadata[:thought_signature]
97
+ ].compact.map { |signature| signature.to_s.strip }.find { |signature| !signature.empty? }
98
+ end
99
+ private_class_method :extract_metadata_signature
100
+
101
+ def scrub_metadata(metadata)
102
+ metadata.each_with_object({}) do |(key, value), scrubbed|
103
+ normalized_key = normalize_metadata_key(key)
104
+ next if THINKING_METADATA_KEYS.include?(normalized_key) || RAW_METADATA_KEYS.include?(normalized_key)
105
+
106
+ scrubbed[normalized_key] = scrub_metadata_value(value)
107
+ end
108
+ end
109
+ private_class_method :scrub_metadata
110
+
111
+ def normalize_metadata_key(key)
112
+ key.to_s
113
+ .gsub(/([a-z\d])([A-Z])/, '\1_\2')
114
+ .tr('-', '_')
115
+ .downcase
116
+ .to_sym
117
+ end
118
+ private_class_method :normalize_metadata_key
119
+
120
+ def scrub_metadata_value(value)
121
+ case value
122
+ when Hash
123
+ scrub_metadata(normalized_metadata(value))
124
+ when Array
125
+ value.map { |item| scrub_metadata_value(item) }
126
+ when String
127
+ extract_from_content(value).first
128
+ else
129
+ value
130
+ end
131
+ end
132
+ private_class_method :scrub_metadata_value
133
+
134
+ def normalized_metadata(metadata)
135
+ return {} if metadata.nil?
136
+
137
+ metadata.to_h.transform_keys { |key| normalize_metadata_key(key) }
138
+ end
139
+ private_class_method :normalized_metadata
140
+
141
+ def compact_thinking(parts)
142
+ text = parts.compact.map { |part| part.to_s.strip }.reject(&:empty?).join
143
+ blank_to_nil(text)
144
+ end
145
+ private_class_method :compact_thinking
146
+
147
+ def blank_to_nil(value)
148
+ value.nil? || value.empty? ? nil : value
149
+ end
150
+ private_class_method :blank_to_nil
151
+ end
152
+ end
153
+ end
154
+ end
155
+ end
@@ -210,8 +210,11 @@ module Legion
210
210
  end
211
211
 
212
212
  def consume_non_think_content(remaining, start_tag, output)
213
+ unmatched_close = remaining.index('</think>')
213
214
  start_index = remaining.index(start_tag)
214
- if start_index
215
+ if unmatched_close && (start_index.nil? || unmatched_close < start_index)
216
+ consume_unmatched_think_close(remaining, unmatched_close)
217
+ elsif start_index
215
218
  output << remaining.slice(0, start_index)
216
219
  @inside_think_tag = true
217
220
  remaining.slice((start_index + start_tag.length)..) || +''
@@ -223,6 +226,14 @@ module Legion
223
226
  end
224
227
  end
225
228
 
229
+ def consume_unmatched_think_close(remaining, close_index)
230
+ end_tag = '</think>'
231
+ thinking = remaining.slice(0, close_index)
232
+ @thinking_text << thinking
233
+ @last_thinking_delta << thinking
234
+ remaining.slice((close_index + end_tag.length)..).to_s.sub(/\A[[:space:]]+/, '')
235
+ end
236
+
226
237
  def longest_suffix_prefix(text, tag)
227
238
  max = [text.length, tag.length - 1].min
228
239
  max.downto(1) do |len|
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/transport'
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Llm
8
+ module Transport
9
+ module Exchanges
10
+ # Shared topic exchange for live LLM fleet requests and replies.
11
+ class Fleet < ::Legion::Transport::Exchange
12
+ def exchange_name
13
+ 'llm.fleet'
14
+ end
15
+
16
+ def default_type
17
+ 'topic'
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+ require_relative '../../fleet/default_exchange_reply'
5
+ require_relative '../../fleet/envelope_validation'
6
+ require_relative '../../fleet/protocol'
7
+ require_relative '../exchanges/fleet'
8
+
9
+ module Legion
10
+ module Extensions
11
+ module Llm
12
+ module Transport
13
+ module Messages
14
+ # Correlated protocol-v2 error envelope for fleet reply queues.
15
+ class FleetError < ::Legion::Transport::Message
16
+ include Fleet::DefaultExchangeReply
17
+ include Fleet::EnvelopeValidation
18
+
19
+ def type = Fleet::Protocol::ERROR_TYPE
20
+ def app_id = @options[:app_id] || 'lex-llm'
21
+ def reply_to = @options[:reply_to]
22
+ def correlation_id = @options[:correlation_id]
23
+ def message_id = @options[:message_id] ||= "llm_fleet_err_#{SecureRandom.uuid}"
24
+
25
+ def routing_key
26
+ @options[:reply_to] || raise(ArgumentError, 'reply_to is required')
27
+ end
28
+
29
+ def validate
30
+ reject_legacy_options!
31
+ require_option!(:request_id)
32
+ require_option!(:correlation_id)
33
+ require_option!(:reply_to)
34
+ require_option!(:code)
35
+ require_protocol_version!
36
+ @valid = true
37
+ end
38
+
39
+ def message
40
+ super.merge(
41
+ protocol_version: @options[:protocol_version] || Fleet::Protocol::VERSION,
42
+ request_id: @options[:request_id],
43
+ correlation_id: correlation_id,
44
+ idempotency_key: @options[:idempotency_key],
45
+ operation: @options[:operation],
46
+ provider: @options[:provider],
47
+ provider_instance: @options[:provider_instance] || @options[:instance],
48
+ model: @options[:model],
49
+ reply_to: reply_to,
50
+ message_context: @options[:message_context],
51
+ trace_context: @options[:trace_context],
52
+ code: @options[:code],
53
+ message: @options[:message],
54
+ error_class: @options[:error_class],
55
+ retryable: @options[:retryable],
56
+ metadata: @options[:metadata] || {}
57
+ ).compact
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end