lex-llm 0.3.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -0
- data/README.md +18 -2
- data/lex-llm.gemspec +1 -0
- data/lib/legion/extensions/llm/auto_registration.rb +7 -36
- data/lib/legion/extensions/llm/embedding.rb +1 -1
- data/lib/legion/extensions/llm/error.rb +14 -0
- data/lib/legion/extensions/llm/errors/unsupported_capability.rb +21 -0
- data/lib/legion/extensions/llm/fleet/default_exchange_reply.rb +81 -0
- data/lib/legion/extensions/llm/fleet/envelope_validation.rb +39 -0
- data/lib/legion/extensions/llm/fleet/protocol.rb +16 -0
- data/lib/legion/extensions/llm/fleet/publish_safety.rb +123 -0
- data/lib/legion/extensions/llm/message.rb +9 -3
- data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +37 -36
- data/lib/legion/extensions/llm/provider.rb +198 -4
- data/lib/legion/extensions/llm/provider_contract.rb +21 -0
- data/lib/legion/extensions/llm/provider_settings.rb +18 -1
- data/lib/legion/extensions/llm/responses/chat_response.rb +43 -0
- data/lib/legion/extensions/llm/responses/embedding_response.rb +38 -0
- data/lib/legion/extensions/llm/responses/stream_chunk.rb +43 -0
- data/lib/legion/extensions/llm/responses/thinking_extractor.rb +155 -0
- data/lib/legion/extensions/llm/stream_accumulator.rb +12 -1
- data/lib/legion/extensions/llm/transport/exchanges/fleet.rb +24 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_error.rb +64 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_request.rb +155 -0
- data/lib/legion/extensions/llm/transport/messages/fleet_response.rb +63 -0
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +31 -11
- metadata +29 -1
|
@@ -62,6 +62,16 @@ module Legion
|
|
|
62
62
|
end
|
|
63
63
|
|
|
64
64
|
# rubocop:disable Metrics/ParameterLists
|
|
65
|
+
def chat(messages:, model:, tools: [], temperature: nil, params: {}, headers: {}, schema: nil, thinking: nil,
|
|
66
|
+
tool_prefs: nil)
|
|
67
|
+
complete(messages, tools:, temperature:, model:, params:, headers:, schema:, thinking:, tool_prefs:)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def stream_chat(messages:, model:, tools: [], temperature: nil, params: {}, headers: {}, schema: nil,
|
|
71
|
+
thinking: nil, tool_prefs: nil, &)
|
|
72
|
+
complete(messages, tools:, temperature:, model:, params:, headers:, schema:, thinking:, tool_prefs:, &)
|
|
73
|
+
end
|
|
74
|
+
|
|
65
75
|
def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil,
|
|
66
76
|
tool_prefs: nil, &)
|
|
67
77
|
normalized_temperature = maybe_normalize_temperature(temperature, model)
|
|
@@ -88,14 +98,55 @@ module Legion
|
|
|
88
98
|
end
|
|
89
99
|
# rubocop:enable Metrics/ParameterLists
|
|
90
100
|
|
|
91
|
-
def list_models
|
|
101
|
+
def list_models(live: false, **filters)
|
|
102
|
+
_ = [live, filters]
|
|
92
103
|
response = @connection.get models_url
|
|
93
104
|
parse_list_models_response response, slug, capabilities
|
|
94
105
|
end
|
|
95
106
|
|
|
96
|
-
def
|
|
97
|
-
|
|
98
|
-
|
|
107
|
+
def discover_offerings(live: false, **filters)
|
|
108
|
+
return filter_cached_offerings(Array(@cached_offerings), filters) unless live
|
|
109
|
+
|
|
110
|
+
provider_health = health(live:)
|
|
111
|
+
@cached_offerings = Array(list_models(live:, **filters)).filter_map do |model|
|
|
112
|
+
next unless model_matches_filters?(model, filters)
|
|
113
|
+
|
|
114
|
+
offering_from_model(model, health: provider_health)
|
|
115
|
+
end
|
|
116
|
+
@cached_offerings
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def health(live: false)
|
|
120
|
+
readiness_data = readiness(live:)
|
|
121
|
+
raw_health = readiness_data[:health] || readiness_data['health'] || {}
|
|
122
|
+
status = health_status(readiness_data, raw_health)
|
|
123
|
+
{
|
|
124
|
+
provider: slug.to_sym,
|
|
125
|
+
instance_id: provider_instance_id,
|
|
126
|
+
status:,
|
|
127
|
+
ready: readiness_data[:ready] == true || readiness_data['ready'] == true,
|
|
128
|
+
circuit_state: status == 'healthy' ? 'closed' : 'open',
|
|
129
|
+
latency_ms: raw_health[:latency_ms] || raw_health['latency_ms'],
|
|
130
|
+
raw: raw_health
|
|
131
|
+
}.compact
|
|
132
|
+
rescue StandardError => e
|
|
133
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.provider.health')
|
|
134
|
+
{
|
|
135
|
+
provider: slug.to_sym,
|
|
136
|
+
instance_id: provider_instance_id,
|
|
137
|
+
status: 'unhealthy',
|
|
138
|
+
ready: false,
|
|
139
|
+
circuit_state: 'open',
|
|
140
|
+
error: e.class.name,
|
|
141
|
+
message: e.message
|
|
142
|
+
}
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def embed(text:, model:, dimensions: nil, params: {}, headers: {})
|
|
146
|
+
payload = Utils.deep_merge(render_embedding_payload(text, model:, dimensions:), params)
|
|
147
|
+
response = @connection.post(embedding_url(model:), payload) do |req|
|
|
148
|
+
req.headers = headers.merge(req.headers) unless headers.empty?
|
|
149
|
+
end
|
|
99
150
|
parse_embedding_response(response, model:, text:)
|
|
100
151
|
end
|
|
101
152
|
|
|
@@ -112,6 +163,18 @@ module Legion
|
|
|
112
163
|
parse_image_response(response, model:)
|
|
113
164
|
end
|
|
114
165
|
|
|
166
|
+
def image(prompt:, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
|
|
167
|
+
paint(prompt, model:, size:, with:, mask:, params:)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def count_tokens(messages:, model:, params: {})
|
|
171
|
+
_ = [model, params]
|
|
172
|
+
Array(messages).sum do |message|
|
|
173
|
+
content = message.respond_to?(:content) ? message.content : message[:content] || message['content']
|
|
174
|
+
estimate_text_tokens(content)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
115
178
|
def transcribe(audio_file, model:, language:, **)
|
|
116
179
|
file_part = build_audio_file_part(audio_file)
|
|
117
180
|
payload = render_transcription_payload(file_part, model:, language:, **)
|
|
@@ -319,6 +382,12 @@ module Legion
|
|
|
319
382
|
end
|
|
320
383
|
end
|
|
321
384
|
|
|
385
|
+
def provider_instance_id
|
|
386
|
+
return config.instance_id.to_sym if config.respond_to?(:instance_id) && config.instance_id
|
|
387
|
+
|
|
388
|
+
:default
|
|
389
|
+
end
|
|
390
|
+
|
|
322
391
|
class << self
|
|
323
392
|
def name
|
|
324
393
|
to_s.split('::').last
|
|
@@ -369,6 +438,131 @@ module Legion
|
|
|
369
438
|
raise UnsupportedAttachmentError, "#{name} does not support image references in paint"
|
|
370
439
|
end
|
|
371
440
|
|
|
441
|
+
def offering_from_model(model, health: {})
|
|
442
|
+
Routing::ModelOffering.new(
|
|
443
|
+
provider_family: slug.to_sym,
|
|
444
|
+
provider_instance: model.instance || provider_instance_id,
|
|
445
|
+
transport: offering_transport,
|
|
446
|
+
tier: offering_tier,
|
|
447
|
+
model: model.id,
|
|
448
|
+
canonical_model_alias: model.name,
|
|
449
|
+
model_family: model.family,
|
|
450
|
+
usage_type: offering_usage_type(model),
|
|
451
|
+
capabilities: model.capabilities,
|
|
452
|
+
limits: offering_limits(model),
|
|
453
|
+
health:,
|
|
454
|
+
metadata: offering_metadata(model)
|
|
455
|
+
)
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
def offering_transport
|
|
459
|
+
local? ? :local : :http
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
def offering_tier
|
|
463
|
+
local? ? :local : :direct
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
def offering_usage_type(model)
|
|
467
|
+
model.embedding? ? :embedding : :inference
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
def offering_limits(model)
|
|
471
|
+
{
|
|
472
|
+
context_window: model.context_length,
|
|
473
|
+
max_output_tokens: model.max_output_tokens
|
|
474
|
+
}.compact
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
def offering_metadata(model)
|
|
478
|
+
{
|
|
479
|
+
raw_model: model.id,
|
|
480
|
+
parameter_count: model.parameter_count,
|
|
481
|
+
parameter_size: model.parameter_size,
|
|
482
|
+
quantization: model.quantization,
|
|
483
|
+
size_bytes: model.size_bytes,
|
|
484
|
+
modalities_input: model.modalities_input,
|
|
485
|
+
modalities_output: model.modalities_output
|
|
486
|
+
}.merge(model.metadata || {}).compact
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
def model_matches_filters?(model, filters)
|
|
490
|
+
return true if filters.empty?
|
|
491
|
+
|
|
492
|
+
filters.all? do |key, value|
|
|
493
|
+
blank_filter_value?(value) || model_matches_filter?(model, key, value)
|
|
494
|
+
end
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
def blank_filter_value?(value)
|
|
498
|
+
value.nil? || (value.respond_to?(:empty?) && value.empty?)
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
def model_matches_filter?(model, key, value)
|
|
502
|
+
case key.to_sym
|
|
503
|
+
when :capability, :capabilities
|
|
504
|
+
Array(value).all? { |capability| model.supports?(capability) }
|
|
505
|
+
when :type, :usage_type, :purpose
|
|
506
|
+
offering_usage_type(model).to_s == value.to_s || model.type.to_s == value.to_s
|
|
507
|
+
when :model, :id, :name
|
|
508
|
+
[model.id, model.name].map(&:to_s).include?(value.to_s)
|
|
509
|
+
when :instance, :instance_id, :provider_instance
|
|
510
|
+
provider_instance_id.to_s == value.to_s || model.instance.to_s == value.to_s
|
|
511
|
+
else
|
|
512
|
+
true
|
|
513
|
+
end
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
def filter_cached_offerings(offerings, filters)
|
|
517
|
+
return offerings if filters.empty?
|
|
518
|
+
|
|
519
|
+
offerings.select do |offering|
|
|
520
|
+
filters.all? do |key, value|
|
|
521
|
+
blank_filter_value?(value) || offering_matches_filter?(offering, key, value)
|
|
522
|
+
end
|
|
523
|
+
end
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
def offering_matches_filter?(offering, key, value)
|
|
527
|
+
case key.to_sym
|
|
528
|
+
when :provider, :provider_family
|
|
529
|
+
offering.provider_family.to_s == value.to_s
|
|
530
|
+
when :capability, :capabilities
|
|
531
|
+
Array(value).all? { |capability| offering.supports?(capability) }
|
|
532
|
+
when :type, :usage_type, :purpose
|
|
533
|
+
offering.usage_type.to_s == value.to_s
|
|
534
|
+
when :model, :id, :name
|
|
535
|
+
[offering.model, offering.canonical_model_alias].compact.map(&:to_s).include?(value.to_s)
|
|
536
|
+
when :instance, :instance_id, :provider_instance
|
|
537
|
+
[offering.provider_instance, offering.instance_id].compact.map(&:to_s).include?(value.to_s)
|
|
538
|
+
else
|
|
539
|
+
true
|
|
540
|
+
end
|
|
541
|
+
end
|
|
542
|
+
|
|
543
|
+
def health_status(readiness_data, raw_health)
|
|
544
|
+
return 'healthy' if readiness_data[:ready] == true || readiness_data['ready'] == true
|
|
545
|
+
|
|
546
|
+
status = raw_health[:status] || raw_health['status'] || raw_health[:state] || raw_health['state']
|
|
547
|
+
return 'healthy' if %w[ok ready healthy running].include?(status.to_s.downcase)
|
|
548
|
+
|
|
549
|
+
'unhealthy'
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
def estimate_text_tokens(content)
|
|
553
|
+
text = case content
|
|
554
|
+
when Content
|
|
555
|
+
[content.text, *content.attachments.map(&:to_s)].compact.join(' ')
|
|
556
|
+
when Array
|
|
557
|
+
content.map do |part|
|
|
558
|
+
part.respond_to?(:[]) ? part[:text] || part['text'] || part.to_s : part.to_s
|
|
559
|
+
end.join(' ')
|
|
560
|
+
else
|
|
561
|
+
content.to_s
|
|
562
|
+
end
|
|
563
|
+
[(text.length / 4.0).ceil, 1].max
|
|
564
|
+
end
|
|
565
|
+
|
|
372
566
|
def build_audio_file_part(file_path)
|
|
373
567
|
expanded_path = File.expand_path(file_path)
|
|
374
568
|
mime_type = Marcel::MimeType.for(Pathname.new(expanded_path))
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
# Documents the canonical public provider method signatures shared by provider gems.
|
|
7
|
+
module ProviderContract
|
|
8
|
+
REQUIRED_SIGNATURES = {
|
|
9
|
+
chat: [%i[keyreq messages], %i[keyreq model]],
|
|
10
|
+
stream_chat: [%i[keyreq messages], %i[keyreq model]],
|
|
11
|
+
embed: [%i[keyreq text], %i[keyreq model]],
|
|
12
|
+
image: [%i[keyreq prompt], %i[keyreq model]],
|
|
13
|
+
list_models: [%i[key live], %i[keyrest filters]],
|
|
14
|
+
discover_offerings: [%i[key live], %i[keyrest filters]],
|
|
15
|
+
health: [%i[key live]],
|
|
16
|
+
count_tokens: [%i[keyreq messages], %i[keyreq model], %i[key params]]
|
|
17
|
+
}.freeze
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -7,7 +7,10 @@ module Legion
|
|
|
7
7
|
module ProviderSettings
|
|
8
8
|
module_function
|
|
9
9
|
|
|
10
|
-
def build(family:, instance: {}, enabled: true, discovery: {}, instances: {}
|
|
10
|
+
def build(family:, instance: {}, enabled: true, discovery: {}, instances: {}, fleet: nil, gateways: nil, # rubocop:disable Metrics/ParameterLists
|
|
11
|
+
**legacy_settings)
|
|
12
|
+
validate_provider_defaults!(fleet:, gateways:, legacy_settings:)
|
|
13
|
+
|
|
11
14
|
deep_merge(
|
|
12
15
|
Legion::Extensions::Llm.default_settings,
|
|
13
16
|
{
|
|
@@ -27,6 +30,20 @@ module Legion
|
|
|
27
30
|
)
|
|
28
31
|
end
|
|
29
32
|
|
|
33
|
+
def validate_provider_defaults!(fleet:, gateways:, legacy_settings:)
|
|
34
|
+
if fleet
|
|
35
|
+
raise ArgumentError,
|
|
36
|
+
'Provider fleet defaults must be nested under an instance, for example instances.default.fleet'
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
raise ArgumentError, 'Provider gateways settings are no longer supported; use instances instead' if gateways
|
|
40
|
+
|
|
41
|
+
return if legacy_settings.empty?
|
|
42
|
+
|
|
43
|
+
invalid_keys = legacy_settings.keys.map(&:to_sym)
|
|
44
|
+
raise ArgumentError, "Unsupported top-level provider settings: #{invalid_keys.join(', ')}"
|
|
45
|
+
end
|
|
46
|
+
|
|
30
47
|
def deep_dup(value)
|
|
31
48
|
case value
|
|
32
49
|
when Hash
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Responses
|
|
7
|
+
# Normalized non-streaming chat provider response.
|
|
8
|
+
class ChatResponse
|
|
9
|
+
attr_reader :content, :thinking, :metadata, :model, :tool_calls, :tokens, :raw, :internal_metadata
|
|
10
|
+
|
|
11
|
+
def initialize(content:, thinking: nil, metadata: {}, model: nil, tool_calls: nil, tokens: nil, raw: nil) # rubocop:disable Metrics/ParameterLists
|
|
12
|
+
extraction = ThinkingExtractor.extract(content, metadata: metadata)
|
|
13
|
+
|
|
14
|
+
@content = extraction.content
|
|
15
|
+
@thinking = thinking || extraction.thinking
|
|
16
|
+
@metadata = extraction.metadata
|
|
17
|
+
@internal_metadata = metadata.to_h
|
|
18
|
+
@model = model
|
|
19
|
+
@tool_calls = tool_calls
|
|
20
|
+
@tokens = tokens
|
|
21
|
+
@raw = raw
|
|
22
|
+
|
|
23
|
+
freeze
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def to_h
|
|
27
|
+
{
|
|
28
|
+
content: content,
|
|
29
|
+
metadata: metadata,
|
|
30
|
+
model: model,
|
|
31
|
+
tool_calls: tool_calls,
|
|
32
|
+
tokens: tokens
|
|
33
|
+
}.compact
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def to_internal_h
|
|
37
|
+
to_h.merge(thinking: thinking, metadata: internal_metadata, raw: raw).compact
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Responses
|
|
7
|
+
# Normalized embedding provider response.
|
|
8
|
+
class EmbeddingResponse
|
|
9
|
+
attr_reader :vectors, :model, :tokens, :metadata, :raw
|
|
10
|
+
|
|
11
|
+
def initialize(vectors:, model:, tokens: nil, metadata: {}, raw: nil)
|
|
12
|
+
@vectors = vectors
|
|
13
|
+
@model = model
|
|
14
|
+
@tokens = tokens
|
|
15
|
+
@metadata = ThinkingExtractor.extract(nil, metadata: metadata).metadata
|
|
16
|
+
@internal_metadata = metadata.to_h
|
|
17
|
+
@raw = raw
|
|
18
|
+
|
|
19
|
+
freeze
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def to_h
|
|
23
|
+
{
|
|
24
|
+
vectors: vectors,
|
|
25
|
+
model: model,
|
|
26
|
+
tokens: tokens,
|
|
27
|
+
metadata: metadata
|
|
28
|
+
}.compact
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def to_internal_h
|
|
32
|
+
to_h.merge(metadata: @internal_metadata, raw: raw).compact
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Responses
|
|
7
|
+
# Normalized streaming provider response chunk.
|
|
8
|
+
class StreamChunk
|
|
9
|
+
attr_reader :content, :thinking, :metadata, :model, :tool_calls, :tokens, :raw, :internal_metadata
|
|
10
|
+
|
|
11
|
+
def initialize(content: nil, thinking: nil, metadata: {}, model: nil, tool_calls: nil, tokens: nil, raw: nil) # rubocop:disable Metrics/ParameterLists
|
|
12
|
+
extraction = ThinkingExtractor.extract(content, metadata: metadata)
|
|
13
|
+
|
|
14
|
+
@content = extraction.content
|
|
15
|
+
@thinking = thinking || extraction.thinking
|
|
16
|
+
@metadata = extraction.metadata
|
|
17
|
+
@internal_metadata = metadata.to_h
|
|
18
|
+
@model = model
|
|
19
|
+
@tool_calls = tool_calls
|
|
20
|
+
@tokens = tokens
|
|
21
|
+
@raw = raw
|
|
22
|
+
|
|
23
|
+
freeze
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def to_h
|
|
27
|
+
{
|
|
28
|
+
content: content,
|
|
29
|
+
metadata: metadata,
|
|
30
|
+
model: model,
|
|
31
|
+
tool_calls: tool_calls,
|
|
32
|
+
tokens: tokens
|
|
33
|
+
}.compact
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def to_internal_h
|
|
37
|
+
to_h.merge(thinking: thinking, metadata: internal_metadata, raw: raw).compact
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Responses
|
|
7
|
+
# Separates provider thinking markup from caller-visible text.
|
|
8
|
+
module ThinkingExtractor
|
|
9
|
+
Extraction = Struct.new(:content, :thinking, :signature, :metadata, keyword_init: true)
|
|
10
|
+
|
|
11
|
+
THINK_OPEN = '<think>'
|
|
12
|
+
THINK_CLOSE = '</think>'
|
|
13
|
+
THINK_PATTERN = %r{<think>(.*?)</think>}m
|
|
14
|
+
THINKING_METADATA_KEYS = %i[
|
|
15
|
+
reasoning_content reasoning thinking thinking_text thinking_signature reasoning_signature thought_signature
|
|
16
|
+
].freeze
|
|
17
|
+
RAW_METADATA_KEYS = %i[
|
|
18
|
+
raw raw_response response_body provider_body provider_response
|
|
19
|
+
].freeze
|
|
20
|
+
|
|
21
|
+
module_function
|
|
22
|
+
|
|
23
|
+
def extract(content, metadata: {})
|
|
24
|
+
metadata = normalized_metadata(metadata)
|
|
25
|
+
content, extracted_thinking = extract_from_content(content)
|
|
26
|
+
metadata_thinking = extract_metadata_thinking(metadata)
|
|
27
|
+
metadata_signature = extract_metadata_signature(metadata)
|
|
28
|
+
|
|
29
|
+
Extraction.new(
|
|
30
|
+
content: content,
|
|
31
|
+
thinking: compact_thinking([metadata_thinking, extracted_thinking]),
|
|
32
|
+
signature: metadata_signature,
|
|
33
|
+
metadata: scrub_metadata(metadata)
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def extract_from_content(content)
|
|
38
|
+
return [content, nil] unless content.is_a?(String)
|
|
39
|
+
|
|
40
|
+
clean = +''
|
|
41
|
+
thinking_parts = []
|
|
42
|
+
remaining = content.dup
|
|
43
|
+
|
|
44
|
+
remaining = consume_next_segment(remaining, clean, thinking_parts) until remaining.empty?
|
|
45
|
+
|
|
46
|
+
[clean.strip, compact_thinking(thinking_parts)]
|
|
47
|
+
end
|
|
48
|
+
private_class_method :extract_from_content
|
|
49
|
+
|
|
50
|
+
def consume_next_segment(remaining, clean, thinking_parts)
|
|
51
|
+
close_index = remaining.index(THINK_CLOSE)
|
|
52
|
+
open_index = remaining.index(THINK_OPEN)
|
|
53
|
+
|
|
54
|
+
if close_index && (open_index.nil? || close_index < open_index)
|
|
55
|
+
thinking_parts << remaining.slice(0, close_index)
|
|
56
|
+
remaining.slice((close_index + THINK_CLOSE.length)..).to_s.sub(/\A[[:space:]]+/, '')
|
|
57
|
+
elsif open_index
|
|
58
|
+
consume_open_think_segment(remaining, open_index, clean, thinking_parts)
|
|
59
|
+
else
|
|
60
|
+
clean << remaining
|
|
61
|
+
+''
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
private_class_method :consume_next_segment
|
|
65
|
+
|
|
66
|
+
def consume_open_think_segment(remaining, open_index, clean, thinking_parts)
|
|
67
|
+
clean << remaining.slice(0, open_index)
|
|
68
|
+
after_open = remaining.slice((open_index + THINK_OPEN.length)..).to_s
|
|
69
|
+
close_index = after_open.index(THINK_CLOSE)
|
|
70
|
+
unless close_index
|
|
71
|
+
thinking_parts << after_open
|
|
72
|
+
return +''
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
thinking_parts << after_open.slice(0, close_index)
|
|
76
|
+
after_open.slice((close_index + THINK_CLOSE.length)..).to_s
|
|
77
|
+
end
|
|
78
|
+
private_class_method :consume_open_think_segment
|
|
79
|
+
|
|
80
|
+
def extract_metadata_thinking(metadata)
|
|
81
|
+
compact_thinking(
|
|
82
|
+
[
|
|
83
|
+
metadata[:reasoning_content],
|
|
84
|
+
metadata[:reasoning],
|
|
85
|
+
metadata[:thinking],
|
|
86
|
+
metadata[:thinking_text]
|
|
87
|
+
]
|
|
88
|
+
)
|
|
89
|
+
end
|
|
90
|
+
private_class_method :extract_metadata_thinking
|
|
91
|
+
|
|
92
|
+
def extract_metadata_signature(metadata)
|
|
93
|
+
[
|
|
94
|
+
metadata[:thinking_signature],
|
|
95
|
+
metadata[:reasoning_signature],
|
|
96
|
+
metadata[:thought_signature]
|
|
97
|
+
].compact.map { |signature| signature.to_s.strip }.find { |signature| !signature.empty? }
|
|
98
|
+
end
|
|
99
|
+
private_class_method :extract_metadata_signature
|
|
100
|
+
|
|
101
|
+
def scrub_metadata(metadata)
|
|
102
|
+
metadata.each_with_object({}) do |(key, value), scrubbed|
|
|
103
|
+
normalized_key = normalize_metadata_key(key)
|
|
104
|
+
next if THINKING_METADATA_KEYS.include?(normalized_key) || RAW_METADATA_KEYS.include?(normalized_key)
|
|
105
|
+
|
|
106
|
+
scrubbed[normalized_key] = scrub_metadata_value(value)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
private_class_method :scrub_metadata
|
|
110
|
+
|
|
111
|
+
def normalize_metadata_key(key)
|
|
112
|
+
key.to_s
|
|
113
|
+
.gsub(/([a-z\d])([A-Z])/, '\1_\2')
|
|
114
|
+
.tr('-', '_')
|
|
115
|
+
.downcase
|
|
116
|
+
.to_sym
|
|
117
|
+
end
|
|
118
|
+
private_class_method :normalize_metadata_key
|
|
119
|
+
|
|
120
|
+
def scrub_metadata_value(value)
|
|
121
|
+
case value
|
|
122
|
+
when Hash
|
|
123
|
+
scrub_metadata(normalized_metadata(value))
|
|
124
|
+
when Array
|
|
125
|
+
value.map { |item| scrub_metadata_value(item) }
|
|
126
|
+
when String
|
|
127
|
+
extract_from_content(value).first
|
|
128
|
+
else
|
|
129
|
+
value
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
private_class_method :scrub_metadata_value
|
|
133
|
+
|
|
134
|
+
def normalized_metadata(metadata)
|
|
135
|
+
return {} if metadata.nil?
|
|
136
|
+
|
|
137
|
+
metadata.to_h.transform_keys { |key| normalize_metadata_key(key) }
|
|
138
|
+
end
|
|
139
|
+
private_class_method :normalized_metadata
|
|
140
|
+
|
|
141
|
+
def compact_thinking(parts)
|
|
142
|
+
text = parts.compact.map { |part| part.to_s.strip }.reject(&:empty?).join
|
|
143
|
+
blank_to_nil(text)
|
|
144
|
+
end
|
|
145
|
+
private_class_method :compact_thinking
|
|
146
|
+
|
|
147
|
+
def blank_to_nil(value)
|
|
148
|
+
value.nil? || value.empty? ? nil : value
|
|
149
|
+
end
|
|
150
|
+
private_class_method :blank_to_nil
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
@@ -210,8 +210,11 @@ module Legion
|
|
|
210
210
|
end
|
|
211
211
|
|
|
212
212
|
def consume_non_think_content(remaining, start_tag, output)
|
|
213
|
+
unmatched_close = remaining.index('</think>')
|
|
213
214
|
start_index = remaining.index(start_tag)
|
|
214
|
-
if start_index
|
|
215
|
+
if unmatched_close && (start_index.nil? || unmatched_close < start_index)
|
|
216
|
+
consume_unmatched_think_close(remaining, unmatched_close)
|
|
217
|
+
elsif start_index
|
|
215
218
|
output << remaining.slice(0, start_index)
|
|
216
219
|
@inside_think_tag = true
|
|
217
220
|
remaining.slice((start_index + start_tag.length)..) || +''
|
|
@@ -223,6 +226,14 @@ module Legion
|
|
|
223
226
|
end
|
|
224
227
|
end
|
|
225
228
|
|
|
229
|
+
def consume_unmatched_think_close(remaining, close_index)
|
|
230
|
+
end_tag = '</think>'
|
|
231
|
+
thinking = remaining.slice(0, close_index)
|
|
232
|
+
@thinking_text << thinking
|
|
233
|
+
@last_thinking_delta << thinking
|
|
234
|
+
remaining.slice((close_index + end_tag.length)..).to_s.sub(/\A[[:space:]]+/, '')
|
|
235
|
+
end
|
|
236
|
+
|
|
226
237
|
def longest_suffix_prefix(text, tag)
|
|
227
238
|
max = [text.length, tag.length - 1].min
|
|
228
239
|
max.downto(1) do |len|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/transport'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Llm
|
|
8
|
+
module Transport
|
|
9
|
+
module Exchanges
|
|
10
|
+
# Shared topic exchange for live LLM fleet requests and replies.
|
|
11
|
+
class Fleet < ::Legion::Transport::Exchange
|
|
12
|
+
def exchange_name
|
|
13
|
+
'llm.fleet'
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def default_type
|
|
17
|
+
'topic'
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
require_relative '../../fleet/default_exchange_reply'
|
|
5
|
+
require_relative '../../fleet/envelope_validation'
|
|
6
|
+
require_relative '../../fleet/protocol'
|
|
7
|
+
require_relative '../exchanges/fleet'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Llm
|
|
12
|
+
module Transport
|
|
13
|
+
module Messages
|
|
14
|
+
# Correlated protocol-v2 error envelope for fleet reply queues.
|
|
15
|
+
class FleetError < ::Legion::Transport::Message
|
|
16
|
+
include Fleet::DefaultExchangeReply
|
|
17
|
+
include Fleet::EnvelopeValidation
|
|
18
|
+
|
|
19
|
+
def type = Fleet::Protocol::ERROR_TYPE
|
|
20
|
+
def app_id = @options[:app_id] || 'lex-llm'
|
|
21
|
+
def reply_to = @options[:reply_to]
|
|
22
|
+
def correlation_id = @options[:correlation_id]
|
|
23
|
+
def message_id = @options[:message_id] ||= "llm_fleet_err_#{SecureRandom.uuid}"
|
|
24
|
+
|
|
25
|
+
def routing_key
|
|
26
|
+
@options[:reply_to] || raise(ArgumentError, 'reply_to is required')
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def validate
|
|
30
|
+
reject_legacy_options!
|
|
31
|
+
require_option!(:request_id)
|
|
32
|
+
require_option!(:correlation_id)
|
|
33
|
+
require_option!(:reply_to)
|
|
34
|
+
require_option!(:code)
|
|
35
|
+
require_protocol_version!
|
|
36
|
+
@valid = true
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def message
|
|
40
|
+
super.merge(
|
|
41
|
+
protocol_version: @options[:protocol_version] || Fleet::Protocol::VERSION,
|
|
42
|
+
request_id: @options[:request_id],
|
|
43
|
+
correlation_id: correlation_id,
|
|
44
|
+
idempotency_key: @options[:idempotency_key],
|
|
45
|
+
operation: @options[:operation],
|
|
46
|
+
provider: @options[:provider],
|
|
47
|
+
provider_instance: @options[:provider_instance] || @options[:instance],
|
|
48
|
+
model: @options[:model],
|
|
49
|
+
reply_to: reply_to,
|
|
50
|
+
message_context: @options[:message_context],
|
|
51
|
+
trace_context: @options[:trace_context],
|
|
52
|
+
code: @options[:code],
|
|
53
|
+
message: @options[:message],
|
|
54
|
+
error_class: @options[:error_class],
|
|
55
|
+
retryable: @options[:retryable],
|
|
56
|
+
metadata: @options[:metadata] || {}
|
|
57
|
+
).compact
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|