legion-llm 0.9.10 → 0.9.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/lib/legion/llm/api/native/helpers.rb +65 -8
- data/lib/legion/llm/api/native/inference.rb +8 -1
- data/lib/legion/llm/audit.rb +11 -0
- data/lib/legion/llm/call/dispatch.rb +4 -2
- data/lib/legion/llm/fleet/dispatcher.rb +8 -2
- data/lib/legion/llm/fleet/handler.rb +11 -2
- data/lib/legion/llm/hooks/reflection.rb +8 -2
- data/lib/legion/llm/inference/audit_publisher.rb +3 -3
- data/lib/legion/llm/inference/executor.rb +10 -11
- data/lib/legion/llm/inference/route_attempts.rb +1 -2
- data/lib/legion/llm/inference/steps/rag_context.rb +30 -2
- data/lib/legion/llm/inference/steps/sticky_persist.rb +21 -2
- data/lib/legion/llm/inference/steps/tool_calls.rb +110 -0
- data/lib/legion/llm/inference.rb +9 -6
- data/lib/legion/llm/metering.rb +11 -1
- data/lib/legion/llm/publisher_identity.rb +118 -0
- data/lib/legion/llm/router.rb +15 -0
- data/lib/legion/llm/skills/base.rb +18 -9
- data/lib/legion/llm/transport/message.rb +2 -2
- data/lib/legion/llm/transport/messages/prompt_event.rb +1 -1
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 206afbe8609bb8ed7df111d216967aafba55b0d523d5939aad024f169e43f5ef
|
|
4
|
+
data.tar.gz: 283f42c3d5b9ba07aa7857aad3e6d1f30b7559f35282f4a1d00986ea4ab2c646
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 54b3e821013f9ba6f73019907821e85d1aaacc766e8942767f5e6a9630d66757c1d16a8e1b6643054895b1e5de229245e45ebf562f42bdec1cfac8f609024a5c
|
|
7
|
+
data.tar.gz: 45d349d01bef14e68527aa0c8108c4d08f71e05b63c87c331e377703bdacfcee431b903fe72dc2ad48b10c2f73a67523e9c67a0da1dd46b4b9cf3e041c290671
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,39 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.15] - 2026-05-08
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Normalize structured user message content before RAG query handling, preventing multipart API messages from crashing trivial-query detection or reaching Apollo as arrays.
|
|
7
|
+
- Normalize empty string tool-call arguments to `{}` and make sticky tool history tolerate non-hash argument payloads without dropping state writes.
|
|
8
|
+
- Pass non-executable API client tool calls through to callers as streaming `tool-call` events instead of dispatching them server-side as failed tool executions.
|
|
9
|
+
- Add runtime logging for client tool receipt, native tool injection summaries, registry-injection skips, and returned tool-call SSE emission.
|
|
10
|
+
|
|
11
|
+
## [0.9.14] - 2026-05-08
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
- Guard discovery-first provider inference when the lightweight discovery namespace is loaded before the full discovery cache API.
|
|
15
|
+
- Clean up publisher-identity review follow-up by making request identity fallback explicit and removing unused caller identity requires.
|
|
16
|
+
|
|
17
|
+
## [0.9.13] - 2026-05-08
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
- Allow trigger-matched registry tools to reach native provider dispatch even when `Settings::Extensions` has no always-loaded tools registered.
|
|
21
|
+
- Pass native dispatch options as top-level fleet request parameters so fleet providers receive `system`, `tools`, and offering metadata consistently with direct dispatch.
|
|
22
|
+
|
|
23
|
+
## [0.9.12] - 2026-05-07
|
|
24
|
+
|
|
25
|
+
### Fixed
|
|
26
|
+
- Route LLM AMQP publisher identity, API fallback caller identity, prompt audit attribution, and metering attribution through the local `Legion::Identity::Process` identity instead of trusting request-supplied caller hashes.
|
|
27
|
+
- Preserve request caller context separately from publisher identity headers, including prompt audit request caller type, skill events, escalation events, fleet envelopes, reflection ingest metadata, and privacy-blocked audit events.
|
|
28
|
+
|
|
29
|
+
## [0.9.11] - 2026-05-07
|
|
30
|
+
|
|
31
|
+
### Fixed
|
|
32
|
+
- `infer_provider_for_model` now consults `Discovery.cached_discovered_models` before falling back to static regex patterns, so models reported by registered lex-llm-* providers route correctly regardless of naming convention.
|
|
33
|
+
- Add Bedrock vendor prefix detection (`anthropic.`, `meta.`, `mistral.`, etc.) before the Ollama catch-all pattern to prevent Bedrock model IDs like `anthropic.claude-opus-4-5-20251101-v1:0` from being misrouted to Ollama due to the `:` in the version suffix.
|
|
34
|
+
- `inferred_provider_tier` now checks `Call::Registry` metadata for the provider's tier before falling back to the static `PROVIDER_TIER` hash.
|
|
35
|
+
- Restore `thinking` option in `native_dispatch_chat_options` where provider dispatch expects it.
|
|
36
|
+
|
|
3
37
|
## [0.9.10] - 2026-05-07
|
|
4
38
|
|
|
5
39
|
### Fixed
|
|
@@ -5,6 +5,7 @@ require 'open3'
|
|
|
5
5
|
require 'time'
|
|
6
6
|
require 'legion/cache/helper'
|
|
7
7
|
require 'legion/logging/helper'
|
|
8
|
+
require 'legion/llm/publisher_identity'
|
|
8
9
|
require 'legion/llm/types'
|
|
9
10
|
|
|
10
11
|
begin
|
|
@@ -346,6 +347,56 @@ module Legion
|
|
|
346
347
|
stream << "event: #{event_name}\ndata: #{Legion::JSON.dump(payload)}\n\n"
|
|
347
348
|
end
|
|
348
349
|
|
|
350
|
+
define_method(:emit_response_tool_call_events) do |stream, pipeline_response|
|
|
351
|
+
tool_calls = extract_tool_calls(pipeline_response)
|
|
352
|
+
return if tool_calls.empty?
|
|
353
|
+
|
|
354
|
+
timeline_tool_call_ids = Array(pipeline_response.timeline).filter_map do |event|
|
|
355
|
+
key = event[:key].to_s
|
|
356
|
+
next unless key.start_with?('tool:execute:')
|
|
357
|
+
|
|
358
|
+
data = event[:data].is_a?(Hash) ? event[:data] : {}
|
|
359
|
+
data[:tool_call_id] || data['tool_call_id']
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
emitted = 0
|
|
363
|
+
skipped_timeline = 0
|
|
364
|
+
request_id = pipeline_response.respond_to?(:request_id) ? pipeline_response.request_id : 'unknown'
|
|
365
|
+
conversation_id = pipeline_response.respond_to?(:conversation_id) ? pipeline_response.conversation_id : 'none'
|
|
366
|
+
|
|
367
|
+
tool_calls.each do |tool_call|
|
|
368
|
+
tool_call_id = tool_call[:id] || tool_call['id']
|
|
369
|
+
if tool_call_id && timeline_tool_call_ids.include?(tool_call_id)
|
|
370
|
+
skipped_timeline += 1
|
|
371
|
+
next
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
tool_name = tool_call[:name] || tool_call['name']
|
|
375
|
+
next if tool_name.to_s.empty?
|
|
376
|
+
|
|
377
|
+
log.info(
|
|
378
|
+
"[llm][api][tools] action=returned_tool_call_sse request_id=#{request_id || 'unknown'} " \
|
|
379
|
+
"conversation_id=#{conversation_id || 'none'} tool_call_id=#{tool_call_id || 'none'} name=#{tool_name} " \
|
|
380
|
+
"args_class=#{(tool_call[:arguments] || tool_call['arguments'] || {}).class}"
|
|
381
|
+
)
|
|
382
|
+
emit_sse_event(stream, 'tool-call', {
|
|
383
|
+
toolCallId: tool_call_id,
|
|
384
|
+
toolName: tool_name,
|
|
385
|
+
args: tool_call[:arguments] || tool_call['arguments'] || {},
|
|
386
|
+
timestamp: Time.now.utc.iso8601
|
|
387
|
+
})
|
|
388
|
+
emitted += 1
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
names = tool_calls.map { |tool_call| tool_call[:name] || tool_call['name'] }.compact
|
|
392
|
+
names = names.first(30).join(',') + (names.size > 30 ? ",+#{names.size - 30}more" : '')
|
|
393
|
+
log.info(
|
|
394
|
+
"[llm][api][tools] action=returned_tool_calls_complete request_id=#{request_id || 'unknown'} " \
|
|
395
|
+
"conversation_id=#{conversation_id || 'none'} total=#{tool_calls.size} emitted=#{emitted} " \
|
|
396
|
+
"skipped_timeline=#{skipped_timeline} names=#{names.empty? ? 'none' : names}"
|
|
397
|
+
)
|
|
398
|
+
end
|
|
399
|
+
|
|
349
400
|
define_method(:emit_timeline_tool_events) do |stream, pipeline_response, skip_tool_results: false|
|
|
350
401
|
timeline = Array(pipeline_response.timeline)
|
|
351
402
|
log.debug("[llm][api][helpers] emit_timeline_tool_events count=#{timeline.size} skip_tool_results=#{skip_tool_results}")
|
|
@@ -390,8 +441,18 @@ module Legion
|
|
|
390
441
|
|
|
391
442
|
define_method(:identity_canonical_name) do |rack_env|
|
|
392
443
|
request_identity = identity_request_from_env(rack_env)
|
|
393
|
-
|
|
394
|
-
|
|
444
|
+
if request_identity.respond_to?(:to_caller_hash)
|
|
445
|
+
caller_hash = request_identity.to_caller_hash
|
|
446
|
+
requested_by = nil
|
|
447
|
+
requested_by = caller_hash[:requested_by] || caller_hash['requested_by'] if caller_hash.is_a?(Hash)
|
|
448
|
+
unless Legion::LLM::PublisherIdentity.generic_requested_by?(requested_by)
|
|
449
|
+
name = requested_by[:identity] || requested_by['identity'] if requested_by.respond_to?(:key?)
|
|
450
|
+
return name if name && name.to_s != ''
|
|
451
|
+
end
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
publisher_identity = Legion::LLM::PublisherIdentity.requested_by[:identity]
|
|
455
|
+
return publisher_identity if publisher_identity && publisher_identity.to_s != ''
|
|
395
456
|
|
|
396
457
|
if defined?(Legion::Identity::Process) && Legion::Identity::Process.respond_to?(:canonical_name)
|
|
397
458
|
process_name = Legion::Identity::Process.canonical_name
|
|
@@ -408,16 +469,12 @@ module Legion
|
|
|
408
469
|
caller_hash = request_identity.to_caller_hash
|
|
409
470
|
if caller_hash.is_a?(Hash)
|
|
410
471
|
requested_by = caller_hash[:requested_by] || caller_hash['requested_by']
|
|
411
|
-
return { requested_by: requested_by } if requested_by
|
|
472
|
+
return { requested_by: requested_by } if requested_by && !Legion::LLM::PublisherIdentity.generic_requested_by?(requested_by)
|
|
412
473
|
end
|
|
413
474
|
end
|
|
414
475
|
|
|
415
476
|
{
|
|
416
|
-
requested_by:
|
|
417
|
-
identity: identity_canonical_name(rack_env),
|
|
418
|
-
type: :process,
|
|
419
|
-
credential: :system
|
|
420
|
-
}
|
|
477
|
+
requested_by: Legion::LLM::PublisherIdentity.requested_by
|
|
421
478
|
}
|
|
422
479
|
end
|
|
423
480
|
|
|
@@ -72,7 +72,13 @@ module Legion
|
|
|
72
72
|
build_client_tool_class(ts[:name].to_s, ts[:description].to_s, ts[:parameters] || ts[:input_schema])
|
|
73
73
|
end
|
|
74
74
|
|
|
75
|
-
|
|
75
|
+
client_tool_names = tool_declarations.map(&:name)
|
|
76
|
+
client_tool_summary = client_tool_names.empty? ? 'none' : client_tool_names.first(30).join(',')
|
|
77
|
+
client_tool_summary = "#{client_tool_summary},+#{client_tool_names.size - 30}more" if client_tool_names.size > 30
|
|
78
|
+
log.info(
|
|
79
|
+
"[llm][api][tools] action=client_tools_built request_id=#{request_id} " \
|
|
80
|
+
"conversation_id=#{conversation_id || 'none'} count=#{tool_declarations.size} names=#{client_tool_summary}"
|
|
81
|
+
)
|
|
76
82
|
|
|
77
83
|
streaming = body[:stream] == true && request.preferred_type.to_s.include?('text/event-stream')
|
|
78
84
|
effective_caller = build_server_caller(source: 'api', path: request.path, env: env,
|
|
@@ -155,6 +161,7 @@ module Legion
|
|
|
155
161
|
emit_sse_event(out, 'text-delta', { delta: text })
|
|
156
162
|
end
|
|
157
163
|
|
|
164
|
+
emit_response_tool_call_events(out, pipeline_response)
|
|
158
165
|
emit_timeline_tool_events(out, pipeline_response, skip_tool_results: !executor.tool_event_handler.nil?)
|
|
159
166
|
|
|
160
167
|
enrichments = pipeline_response.enrichments
|
data/lib/legion/llm/audit.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/logging/helper'
|
|
4
|
+
require_relative 'publisher_identity'
|
|
4
5
|
|
|
5
6
|
module Legion
|
|
6
7
|
module LLM
|
|
@@ -19,6 +20,7 @@ module Legion
|
|
|
19
20
|
module_function
|
|
20
21
|
|
|
21
22
|
def emit_prompt(event)
|
|
23
|
+
event = attributed_event(event)
|
|
22
24
|
if transport_connected? && defined?(Legion::LLM::Transport::Messages::PromptEvent)
|
|
23
25
|
Legion::LLM::Transport::Messages::PromptEvent.new(**event).publish
|
|
24
26
|
log.info('[llm][audit] published prompt audit')
|
|
@@ -33,6 +35,7 @@ module Legion
|
|
|
33
35
|
end
|
|
34
36
|
|
|
35
37
|
def emit_tools(event)
|
|
38
|
+
event = attributed_event(event)
|
|
36
39
|
if transport_connected? && defined?(Legion::LLM::Transport::Messages::ToolEvent)
|
|
37
40
|
Legion::LLM::Transport::Messages::ToolEvent.new(**event).publish
|
|
38
41
|
log.info('[llm][audit] published tool audit')
|
|
@@ -47,6 +50,7 @@ module Legion
|
|
|
47
50
|
end
|
|
48
51
|
|
|
49
52
|
def emit_skill(**event)
|
|
53
|
+
event = attributed_event(event)
|
|
50
54
|
if transport_connected? && defined?(Legion::LLM::Transport::Messages::SkillEvent)
|
|
51
55
|
Legion::LLM::Transport::Messages::SkillEvent.new(**event).publish
|
|
52
56
|
log.info('[llm][audit] published skill audit')
|
|
@@ -64,6 +68,13 @@ module Legion
|
|
|
64
68
|
Legion::LLM::Settings.transport_connected?
|
|
65
69
|
end
|
|
66
70
|
|
|
71
|
+
def attributed_event(event)
|
|
72
|
+
source = event.is_a?(Hash) ? event.dup : {}
|
|
73
|
+
source[:identity] = Legion::LLM::PublisherIdentity.current
|
|
74
|
+
source[:caller] ||= Legion::LLM::PublisherIdentity.caller_hash
|
|
75
|
+
source
|
|
76
|
+
end
|
|
77
|
+
|
|
67
78
|
# Backward-compat: resolve old Legion::LLM::Audit::Exchange, ::PromptEvent, etc.
|
|
68
79
|
def self.const_missing(name)
|
|
69
80
|
case name
|
|
@@ -332,11 +332,13 @@ module Legion
|
|
|
332
332
|
|
|
333
333
|
def parse_arguments(arguments)
|
|
334
334
|
return arguments unless arguments.is_a?(String)
|
|
335
|
+
return {} if arguments.strip.empty?
|
|
335
336
|
|
|
336
|
-
Legion::JSON.parse(arguments)
|
|
337
|
+
parsed = Legion::JSON.parse(arguments)
|
|
338
|
+
parsed.is_a?(Hash) ? parsed : {}
|
|
337
339
|
rescue StandardError => e
|
|
338
340
|
handle_exception(e, level: :debug, handled: true, operation: 'llm.dispatch.parse_arguments')
|
|
339
|
-
|
|
341
|
+
{}
|
|
340
342
|
end
|
|
341
343
|
end
|
|
342
344
|
end
|
|
@@ -6,6 +6,7 @@ require 'time'
|
|
|
6
6
|
require 'legion/extensions/llm/fleet/protocol'
|
|
7
7
|
require 'legion/logging/helper'
|
|
8
8
|
|
|
9
|
+
require_relative '../publisher_identity'
|
|
9
10
|
require_relative 'token_issuer'
|
|
10
11
|
|
|
11
12
|
module Legion
|
|
@@ -15,7 +16,7 @@ module Legion
|
|
|
15
16
|
extend Legion::Logging::Helper
|
|
16
17
|
|
|
17
18
|
ENVELOPE_KEYS = %i[
|
|
18
|
-
app_id caller correlation_id expires_at idempotency_key message_context operation
|
|
19
|
+
app_id caller correlation_id expires_at idempotency_key identity message_context operation
|
|
19
20
|
model priority protocol_version provider provider_instance reply_to request_id routing_key
|
|
20
21
|
signed_token timeout timeout_seconds trace_context ttl
|
|
21
22
|
].freeze
|
|
@@ -89,6 +90,7 @@ module Legion
|
|
|
89
90
|
reply_to: reply_to,
|
|
90
91
|
message_context: message_context || {},
|
|
91
92
|
caller: fetch_option(request_opts, :caller) || default_caller,
|
|
93
|
+
identity: Legion::LLM::PublisherIdentity.current,
|
|
92
94
|
trace_context: fetch_option(request_opts, :trace_context) || {},
|
|
93
95
|
timeout_seconds: timeout,
|
|
94
96
|
expires_at: (Time.now.utc + timeout).iso8601,
|
|
@@ -293,7 +295,11 @@ module Legion
|
|
|
293
295
|
end
|
|
294
296
|
|
|
295
297
|
def default_caller
|
|
296
|
-
{
|
|
298
|
+
{
|
|
299
|
+
source: 'legion-llm',
|
|
300
|
+
component: 'fleet_dispatcher',
|
|
301
|
+
requested_by: Legion::LLM::PublisherIdentity.requested_by
|
|
302
|
+
}
|
|
297
303
|
end
|
|
298
304
|
end
|
|
299
305
|
end
|
|
@@ -4,6 +4,7 @@ require 'legion/extensions/llm/fleet/protocol'
|
|
|
4
4
|
require 'legion/logging/helper'
|
|
5
5
|
|
|
6
6
|
require_relative '../call/registry'
|
|
7
|
+
require_relative '../publisher_identity'
|
|
7
8
|
require_relative 'worker_execution'
|
|
8
9
|
|
|
9
10
|
module Legion
|
|
@@ -73,6 +74,8 @@ module Legion
|
|
|
73
74
|
reply_to: envelope[:reply_to],
|
|
74
75
|
message_context: envelope[:message_context] || {},
|
|
75
76
|
trace_context: envelope[:trace_context] || {},
|
|
77
|
+
caller: envelope[:caller],
|
|
78
|
+
identity: Legion::LLM::PublisherIdentity.current,
|
|
76
79
|
content: response_content(response),
|
|
77
80
|
tool_calls: response_tool_calls(response),
|
|
78
81
|
usage: response_usage(response),
|
|
@@ -96,12 +99,14 @@ module Legion
|
|
|
96
99
|
reply_to: envelope[:reply_to],
|
|
97
100
|
message_context: envelope[:message_context] || {},
|
|
98
101
|
trace_context: envelope[:trace_context] || {},
|
|
102
|
+
caller: envelope[:caller],
|
|
103
|
+
identity: Legion::LLM::PublisherIdentity.current,
|
|
99
104
|
message: error.message,
|
|
100
105
|
error_class: error.class.name
|
|
101
106
|
}.compact
|
|
102
107
|
end
|
|
103
108
|
|
|
104
|
-
def publish_response(
|
|
109
|
+
def publish_response(envelope, result)
|
|
105
110
|
require 'legion/extensions/llm/transport/messages/fleet_response'
|
|
106
111
|
publish_result = ::Legion::Extensions::Llm::Transport::Messages::FleetResponse.new(
|
|
107
112
|
protocol_version: result[:protocol_version],
|
|
@@ -115,6 +120,8 @@ module Legion
|
|
|
115
120
|
reply_to: result[:reply_to],
|
|
116
121
|
message_context: result[:message_context],
|
|
117
122
|
trace_context: result[:trace_context],
|
|
123
|
+
caller: envelope[:caller],
|
|
124
|
+
identity: Legion::LLM::PublisherIdentity.current,
|
|
118
125
|
content: result[:content],
|
|
119
126
|
tool_calls: result[:tool_calls],
|
|
120
127
|
usage: result[:usage],
|
|
@@ -127,7 +134,7 @@ module Legion
|
|
|
127
134
|
handle_exception(e, level: :warn, operation: 'llm.fleet.handler.publish_response')
|
|
128
135
|
end
|
|
129
136
|
|
|
130
|
-
def publish_error(
|
|
137
|
+
def publish_error(envelope, result)
|
|
131
138
|
require 'legion/extensions/llm/transport/messages/fleet_error'
|
|
132
139
|
publish_result = ::Legion::Extensions::Llm::Transport::Messages::FleetError.new(
|
|
133
140
|
protocol_version: result[:protocol_version],
|
|
@@ -141,6 +148,8 @@ module Legion
|
|
|
141
148
|
reply_to: result[:reply_to],
|
|
142
149
|
message_context: result[:message_context],
|
|
143
150
|
trace_context: result[:trace_context],
|
|
151
|
+
caller: envelope[:caller],
|
|
152
|
+
identity: Legion::LLM::PublisherIdentity.current,
|
|
144
153
|
code: result[:error],
|
|
145
154
|
message: result[:message],
|
|
146
155
|
error_class: result[:error_class],
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/logging/helper'
|
|
4
|
+
require_relative '../publisher_identity'
|
|
4
5
|
module Legion
|
|
5
6
|
module LLM
|
|
6
7
|
module Hooks
|
|
@@ -166,7 +167,11 @@ module Legion
|
|
|
166
167
|
knowledge_domain: 'reflection',
|
|
167
168
|
confidence: entry[:confidence],
|
|
168
169
|
source_agent: "llm:#{model}",
|
|
169
|
-
metadata: {
|
|
170
|
+
metadata: {
|
|
171
|
+
context: entry[:context],
|
|
172
|
+
source: 'reflection_hook',
|
|
173
|
+
submitted_by: Legion::LLM::PublisherIdentity.requested_by
|
|
174
|
+
}
|
|
170
175
|
})
|
|
171
176
|
)
|
|
172
177
|
log.info("[llm][reflection] published via=transport model=#{model} type=#{entry[:type]}")
|
|
@@ -176,7 +181,8 @@ module Legion
|
|
|
176
181
|
content_type: entry[:type].to_s,
|
|
177
182
|
knowledge_domain: 'reflection',
|
|
178
183
|
confidence: entry[:confidence],
|
|
179
|
-
source_agent: "llm:#{model}"
|
|
184
|
+
source_agent: "llm:#{model}",
|
|
185
|
+
metadata: { submitted_by: Legion::LLM::PublisherIdentity.requested_by }
|
|
180
186
|
)
|
|
181
187
|
log.info("[llm][reflection] published via=direct model=#{model} type=#{entry[:type]}")
|
|
182
188
|
end
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/logging/helper'
|
|
4
|
-
require_relative '../
|
|
4
|
+
require_relative '../publisher_identity'
|
|
5
5
|
module Legion
|
|
6
6
|
module LLM
|
|
7
7
|
module Inference
|
|
@@ -65,8 +65,8 @@ module Legion
|
|
|
65
65
|
nil
|
|
66
66
|
end
|
|
67
67
|
|
|
68
|
-
def extract_identity(
|
|
69
|
-
Legion::LLM::
|
|
68
|
+
def extract_identity(_caller)
|
|
69
|
+
Legion::LLM::PublisherIdentity.current
|
|
70
70
|
end
|
|
71
71
|
|
|
72
72
|
def serialize_tokens(tokens)
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
require 'concurrent'
|
|
4
4
|
require 'faraday'
|
|
5
5
|
|
|
6
|
-
require_relative '../
|
|
6
|
+
require_relative '../publisher_identity'
|
|
7
7
|
require_relative 'route_attempts'
|
|
8
8
|
|
|
9
9
|
module Legion
|
|
@@ -142,6 +142,9 @@ module Legion
|
|
|
142
142
|
|
|
143
143
|
def inferred_provider_tier(provider)
|
|
144
144
|
return nil unless provider
|
|
145
|
+
|
|
146
|
+
meta = Call::Registry.metadata_for(provider, @resolved_instance || :default)
|
|
147
|
+
return meta[:tier].to_sym if meta.is_a?(Hash) && meta[:tier]
|
|
145
148
|
return Router.provider_tier(provider) if defined?(Router) && Router.respond_to?(:provider_tier)
|
|
146
149
|
|
|
147
150
|
Router::PROVIDER_TIER.fetch(provider.to_sym, :cloud) if defined?(Router::PROVIDER_TIER)
|
|
@@ -650,10 +653,7 @@ module Legion
|
|
|
650
653
|
end
|
|
651
654
|
|
|
652
655
|
def native_dispatch_chat_options
|
|
653
|
-
opts = {
|
|
654
|
-
model: @resolved_model,
|
|
655
|
-
provider: @resolved_provider
|
|
656
|
-
}
|
|
656
|
+
opts = { model: @resolved_model, provider: @resolved_provider }
|
|
657
657
|
opts[:instance] = @resolved_instance if @resolved_instance
|
|
658
658
|
opts[:thinking] = @request.thinking if @request.thinking
|
|
659
659
|
opts.compact
|
|
@@ -674,6 +674,7 @@ module Legion
|
|
|
674
674
|
log.debug "[llm][executor] action=native_tool_loop.complete rounds=#{round} reason=no_tool_calls"
|
|
675
675
|
return result
|
|
676
676
|
end
|
|
677
|
+
return client_passthrough_tool_loop_result(result, tool_calls, round) if tool_calls.any? { |tool_call| client_passthrough_tool_call?(tool_call) }
|
|
677
678
|
|
|
678
679
|
round += 1
|
|
679
680
|
tool_names = tool_calls.map { |tc| tc[:name] }.join(',')
|
|
@@ -697,6 +698,7 @@ module Legion
|
|
|
697
698
|
Array(@request.tools).each { |tool| add_native_tool_definition(definitions, tool) }
|
|
698
699
|
add_registry_tool_definitions(definitions) if registry_tool_injection_requested?
|
|
699
700
|
log.debug "[llm][executor] action=native_tool_definitions.built count=#{definitions.size}"
|
|
701
|
+
log_native_tool_definitions(definitions)
|
|
700
702
|
definitions
|
|
701
703
|
end
|
|
702
704
|
end
|
|
@@ -728,9 +730,7 @@ module Legion
|
|
|
728
730
|
end
|
|
729
731
|
|
|
730
732
|
def add_registry_tool_definitions(definitions)
|
|
731
|
-
return unless
|
|
732
|
-
Legion::Settings::Extensions.respond_to?(:filter_tools) &&
|
|
733
|
-
Array(Legion::Settings::Extensions.tools).any?
|
|
733
|
+
return unless registry_tool_sources_available?
|
|
734
734
|
|
|
735
735
|
add_settings_extensions_tool_definitions(definitions)
|
|
736
736
|
rescue StandardError => e
|
|
@@ -841,7 +841,7 @@ module Legion
|
|
|
841
841
|
else
|
|
842
842
|
{}
|
|
843
843
|
end
|
|
844
|
-
normalized[:arguments]
|
|
844
|
+
normalized[:arguments] = normalize_tool_arguments(normalized[:arguments])
|
|
845
845
|
normalized[:id] ||= "call_#{SecureRandom.hex(12)}"
|
|
846
846
|
normalized
|
|
847
847
|
end
|
|
@@ -1418,8 +1418,7 @@ module Legion
|
|
|
1418
1418
|
end
|
|
1419
1419
|
|
|
1420
1420
|
def metering_identity
|
|
1421
|
-
|
|
1422
|
-
Legion::LLM::CallerIdentity.normalize(caller: @request.caller, identity: top_id)
|
|
1421
|
+
Legion::LLM::PublisherIdentity.current
|
|
1423
1422
|
end
|
|
1424
1423
|
|
|
1425
1424
|
def step_context_store
|
|
@@ -96,11 +96,10 @@ module Legion
|
|
|
96
96
|
model: @resolved_model,
|
|
97
97
|
idempotency_key: idempotency_key,
|
|
98
98
|
messages: messages,
|
|
99
|
-
options: native_dispatch_options,
|
|
100
99
|
caller: @request.caller,
|
|
101
100
|
trace_context: @tracing || {},
|
|
102
101
|
timeout: @request.ttl
|
|
103
|
-
}.compact
|
|
102
|
+
}.merge(native_dispatch_options).compact
|
|
104
103
|
end
|
|
105
104
|
|
|
106
105
|
def normalize_fleet_result(result)
|
|
@@ -127,11 +127,12 @@ module Legion
|
|
|
127
127
|
def estimate_utilization
|
|
128
128
|
return 0.0 if @request.tokens[:max].nil? || @request.tokens[:max].zero?
|
|
129
129
|
|
|
130
|
-
message_tokens = @request.messages.sum { |m| (m
|
|
130
|
+
message_tokens = @request.messages.sum { |m| content_text(message_content(m)).length / 4 }
|
|
131
131
|
message_tokens.to_f / @request.tokens[:max]
|
|
132
132
|
end
|
|
133
133
|
|
|
134
134
|
def trivial_query?(query)
|
|
135
|
+
query = content_text(query)
|
|
135
136
|
max_chars = rag_setting(:trivial_max_chars, 20)
|
|
136
137
|
patterns = rag_setting(:trivial_patterns, [])
|
|
137
138
|
|
|
@@ -247,7 +248,34 @@ module Legion
|
|
|
247
248
|
|
|
248
249
|
def extract_query
|
|
249
250
|
@request.messages.select { |m| Legion::LLM::Settings.config_value(m, :role).to_s == 'user' }
|
|
250
|
-
.then { |messages|
|
|
251
|
+
.then { |messages| content_text(message_content(messages.last)) }
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def message_content(message)
|
|
255
|
+
Legion::LLM::Settings.config_value(message, :content)
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def content_text(content)
|
|
259
|
+
case content
|
|
260
|
+
when nil
|
|
261
|
+
''
|
|
262
|
+
when String
|
|
263
|
+
content
|
|
264
|
+
when Array
|
|
265
|
+
content.filter_map { |entry| content_text(entry) }.join
|
|
266
|
+
when Hash
|
|
267
|
+
type = content[:type] || content['type']
|
|
268
|
+
return '' unless type.nil? || type.to_s == 'text'
|
|
269
|
+
|
|
270
|
+
text = if content.key?(:text) || content.key?('text')
|
|
271
|
+
content[:text] || content['text']
|
|
272
|
+
else
|
|
273
|
+
content[:content] || content['content']
|
|
274
|
+
end
|
|
275
|
+
content_text(text)
|
|
276
|
+
else
|
|
277
|
+
content.respond_to?(:text) ? content.text.to_s : content.to_s
|
|
278
|
+
end
|
|
251
279
|
end
|
|
252
280
|
|
|
253
281
|
def apply_gaia_context_limit(limit, strategy:)
|
|
@@ -17,7 +17,7 @@ module Legion
|
|
|
17
17
|
access_token private_key secret_key auth_token credential
|
|
18
18
|
].freeze
|
|
19
19
|
|
|
20
|
-
def step_sticky_persist # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
20
|
+
def step_sticky_persist # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
21
21
|
return unless sticky_persist_ready?
|
|
22
22
|
|
|
23
23
|
conv_id = @request.conversation_id
|
|
@@ -100,7 +100,7 @@ module Legion
|
|
|
100
100
|
tool: entry[:tool_name],
|
|
101
101
|
runner: runner_key,
|
|
102
102
|
turn: @sticky_turn_snapshot,
|
|
103
|
-
args: sanitize_args(truncate_args(entry[:args]
|
|
103
|
+
args: sanitize_args(truncate_args(normalize_history_args(entry[:args]))),
|
|
104
104
|
result: entry[:result].to_s[0, max_result_length],
|
|
105
105
|
error: entry[:error] || false
|
|
106
106
|
}
|
|
@@ -162,6 +162,25 @@ module Legion
|
|
|
162
162
|
end
|
|
163
163
|
end
|
|
164
164
|
|
|
165
|
+
def normalize_history_args(args)
|
|
166
|
+
case args
|
|
167
|
+
when nil
|
|
168
|
+
{}
|
|
169
|
+
when Hash
|
|
170
|
+
args
|
|
171
|
+
when String
|
|
172
|
+
return {} if args.strip.empty?
|
|
173
|
+
|
|
174
|
+
parsed = Legion::JSON.parse(args)
|
|
175
|
+
parsed.is_a?(Hash) ? parsed : {}
|
|
176
|
+
else
|
|
177
|
+
args.respond_to?(:to_h) ? args.to_h : {}
|
|
178
|
+
end
|
|
179
|
+
rescue StandardError => e
|
|
180
|
+
handle_exception(e, level: :debug, handled: true, operation: 'llm.pipeline.step_sticky_persist.normalize_args')
|
|
181
|
+
{}
|
|
182
|
+
end
|
|
183
|
+
|
|
165
184
|
def sanitize_args(args)
|
|
166
185
|
args.each_with_object({}) do |(k, v), h|
|
|
167
186
|
h[k] = SENSITIVE_PARAM_NAMES.include?(k.to_s.downcase) ? '[REDACTED]' : v
|
|
@@ -32,6 +32,20 @@ module Legion
|
|
|
32
32
|
source = find_tool_source(tool_name)
|
|
33
33
|
next unless source
|
|
34
34
|
|
|
35
|
+
if client_passthrough_source?(source)
|
|
36
|
+
log.info(
|
|
37
|
+
"[llm][tools] client_passthrough request_id=#{@request.id} " \
|
|
38
|
+
"tool_call_id=#{tool_call_id || 'none'} name=#{tool_name}"
|
|
39
|
+
)
|
|
40
|
+
log_step_debug(
|
|
41
|
+
:tool_calls,
|
|
42
|
+
:client_passthrough,
|
|
43
|
+
tool_call_id: tool_call_id || 'none',
|
|
44
|
+
tool_name: tool_name
|
|
45
|
+
)
|
|
46
|
+
next
|
|
47
|
+
end
|
|
48
|
+
|
|
35
49
|
# Skip builtin tools; native providers handle provider-owned tools.
|
|
36
50
|
if source[:type] == :builtin
|
|
37
51
|
log.info(
|
|
@@ -123,6 +137,102 @@ module Legion
|
|
|
123
137
|
{ type: :builtin }
|
|
124
138
|
end
|
|
125
139
|
|
|
140
|
+
def client_passthrough_source?(source)
|
|
141
|
+
source[:type] == :client && source[:executable] != true
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def client_passthrough_tool_call?(tool_call)
|
|
145
|
+
client_passthrough_source?(find_tool_source(tool_call[:name]))
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def client_passthrough_tool_loop_result(result, tool_calls, round)
|
|
149
|
+
result[:tool_calls] = tool_calls
|
|
150
|
+
log.debug "[llm][executor] action=native_tool_loop.complete rounds=#{round} reason=client_passthrough"
|
|
151
|
+
result
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def normalize_tool_arguments(arguments)
|
|
155
|
+
case arguments
|
|
156
|
+
when nil
|
|
157
|
+
{}
|
|
158
|
+
when Hash
|
|
159
|
+
arguments
|
|
160
|
+
when String
|
|
161
|
+
return {} if arguments.strip.empty?
|
|
162
|
+
|
|
163
|
+
parsed = Legion::JSON.parse(arguments)
|
|
164
|
+
parsed.is_a?(Hash) ? parsed : {}
|
|
165
|
+
else
|
|
166
|
+
arguments.respond_to?(:to_h) ? arguments.to_h : {}
|
|
167
|
+
end
|
|
168
|
+
rescue StandardError => e
|
|
169
|
+
handle_exception(e, level: :debug, handled: true, operation: 'llm.pipeline.normalize_tool_arguments')
|
|
170
|
+
{}
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def registry_tool_sources_available?
|
|
174
|
+
unless Legion::Settings::Extensions.respond_to?(:tools) &&
|
|
175
|
+
Legion::Settings::Extensions.respond_to?(:filter_tools)
|
|
176
|
+
log_tool_injection_skip(:settings_extensions_unavailable)
|
|
177
|
+
return false
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
settings_tool_count = Array(Legion::Settings::Extensions.tools).size
|
|
181
|
+
if settings_tool_count.zero? && @triggered_tools.empty?
|
|
182
|
+
log_tool_injection_skip(:no_settings_or_triggered_tools, settings_tool_count: settings_tool_count)
|
|
183
|
+
return false
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
true
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def log_tool_injection_skip(reason, settings_tool_count: nil)
|
|
190
|
+
log.info(
|
|
191
|
+
"[llm][tools][inject] action=registry_skipped request_id=#{request_log_value(:id, 'unknown')} " \
|
|
192
|
+
"conversation_id=#{request_log_value(:conversation_id, 'none') || 'none'} reason=#{reason} " \
|
|
193
|
+
"settings_tools=#{settings_tool_count || 'unknown'} triggered_tools=#{@triggered_tools.size} " \
|
|
194
|
+
"requested_tools=#{requested_deferred_tool_names.size}"
|
|
195
|
+
)
|
|
196
|
+
rescue StandardError => e
|
|
197
|
+
handle_exception(e, level: :debug, handled: true, operation: 'llm.pipeline.log_tool_injection_skip')
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def log_native_tool_definitions(definitions)
|
|
201
|
+
log.info(
|
|
202
|
+
"[llm][tools][inject] action=native_tool_definitions request_id=#{request_log_value(:id, 'unknown')} " \
|
|
203
|
+
"conversation_id=#{request_log_value(:conversation_id, 'none') || 'none'} provider=#{@resolved_provider || 'unknown'} " \
|
|
204
|
+
"model=#{@resolved_model || 'unknown'} total=#{definitions.size} sources=#{format_tool_source_counts(definitions)} " \
|
|
205
|
+
"client_request_tools=#{Array(request_log_value(:tools, [])).size} triggered_tools=#{@triggered_tools.size} " \
|
|
206
|
+
"requested_tools=#{requested_deferred_tool_names.size} names=#{format_tool_names(definitions.map(&:name))}"
|
|
207
|
+
)
|
|
208
|
+
rescue StandardError => e
|
|
209
|
+
handle_exception(e, level: :debug, handled: true, operation: 'llm.pipeline.log_native_tool_definitions')
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def format_tool_source_counts(definitions)
|
|
213
|
+
counts = definitions.each_with_object(Hash.new(0)) do |definition, memo|
|
|
214
|
+
source = definition.respond_to?(:source) ? definition.source : {}
|
|
215
|
+
key = source.is_a?(Hash) ? (source[:type] || source['type'] || :unknown) : :unknown
|
|
216
|
+
memo[key] += 1
|
|
217
|
+
end
|
|
218
|
+
return 'none' if counts.empty?
|
|
219
|
+
|
|
220
|
+
counts.map { |key, count| "#{key}:#{count}" }.join(',')
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def format_tool_names(names, limit = 30)
|
|
224
|
+
names = Array(names).map(&:to_s).reject(&:empty?)
|
|
225
|
+
return 'none' if names.empty?
|
|
226
|
+
|
|
227
|
+
visible = names.first(limit)
|
|
228
|
+
suffix = names.size > limit ? ",+#{names.size - limit}more" : ''
|
|
229
|
+
"#{visible.join(',')}#{suffix}"
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def request_log_value(method_name, fallback)
|
|
233
|
+
@request.respond_to?(method_name) ? @request.public_send(method_name) : fallback
|
|
234
|
+
end
|
|
235
|
+
|
|
126
236
|
def describe_tool_source(source)
|
|
127
237
|
case source[:type]
|
|
128
238
|
when :mcp
|
data/lib/legion/llm/inference.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/logging/helper'
|
|
4
|
+
require_relative 'publisher_identity'
|
|
4
5
|
require_relative 'metering/usage'
|
|
5
6
|
require_relative 'inference/request'
|
|
6
7
|
require_relative 'inference/response'
|
|
@@ -589,7 +590,7 @@ module Legion
|
|
|
589
590
|
return response if response
|
|
590
591
|
end
|
|
591
592
|
|
|
592
|
-
publish_escalation_event(history, :exhausted) if history.size > 1
|
|
593
|
+
publish_escalation_event(history, :exhausted, caller: kwargs[:caller]) if history.size > 1
|
|
593
594
|
message = "All #{history.size} escalation attempts failed"
|
|
594
595
|
if last_error
|
|
595
596
|
providers = history.filter_map { |attempt| attempt[:provider] }.uniq.join(', ')
|
|
@@ -608,7 +609,8 @@ module Legion
|
|
|
608
609
|
duration_ms = ((Time.now - start_time) * 1000).round
|
|
609
610
|
result = Quality::Checker.check(response, quality_threshold: threshold, quality_check: quality_check)
|
|
610
611
|
|
|
611
|
-
return [response, nil] if escalation_attempt_passed?(response, result, resolution, duration_ms, history, chain
|
|
612
|
+
return [response, nil] if escalation_attempt_passed?(response, result, resolution, duration_ms, history, chain,
|
|
613
|
+
caller: kwargs[:caller])
|
|
612
614
|
|
|
613
615
|
report_health(:quality_failure, resolution, duration_ms, failures: result.failures)
|
|
614
616
|
history << build_attempt(resolution, :quality_failure, result.failures, duration_ms)
|
|
@@ -630,13 +632,13 @@ module Legion
|
|
|
630
632
|
**opts.except(:model, :provider))
|
|
631
633
|
end
|
|
632
634
|
|
|
633
|
-
def escalation_attempt_passed?(response, result, resolution, duration_ms, history, chain)
|
|
635
|
+
def escalation_attempt_passed?(response, result, resolution, duration_ms, history, chain, caller: nil)
|
|
634
636
|
return false unless result.passed
|
|
635
637
|
|
|
636
638
|
report_health(:success, resolution, duration_ms)
|
|
637
639
|
history << build_attempt(resolution, :success, [], duration_ms)
|
|
638
640
|
attach_escalation_history(response, history, resolution, chain)
|
|
639
|
-
publish_escalation_event(history, :success) if history.size > 1
|
|
641
|
+
publish_escalation_event(history, :success, caller: caller) if history.size > 1
|
|
640
642
|
log.debug "[llm][inference] chat_with_escalation success attempts=#{history.size}"
|
|
641
643
|
true
|
|
642
644
|
end
|
|
@@ -683,11 +685,12 @@ module Legion
|
|
|
683
685
|
signal: :latency, value: duration_ms, metadata: {})
|
|
684
686
|
end
|
|
685
687
|
|
|
686
|
-
def publish_escalation_event(history, final_outcome)
|
|
688
|
+
def publish_escalation_event(history, final_outcome, caller: nil)
|
|
687
689
|
payload = {
|
|
688
690
|
outcome: final_outcome,
|
|
689
691
|
attempts: history.size,
|
|
690
692
|
history: history,
|
|
693
|
+
caller: caller || Legion::LLM::PublisherIdentity.caller_hash,
|
|
691
694
|
timestamp: Time.now.utc.iso8601
|
|
692
695
|
}
|
|
693
696
|
|
|
@@ -775,7 +778,7 @@ module Legion
|
|
|
775
778
|
|
|
776
779
|
def emit_privacy_blocked_audit
|
|
777
780
|
Legion::LLM::Audit.emit_prompt(
|
|
778
|
-
request_id: nil, conversation_id: nil, caller:
|
|
781
|
+
request_id: nil, conversation_id: nil, caller: Legion::LLM::PublisherIdentity.caller_hash,
|
|
779
782
|
routing: {}, tokens: {}, status: 'privacy_blocked',
|
|
780
783
|
error: { class: 'PrivacyModeError', message: 'External tiers blocked by enterprise privacy' },
|
|
781
784
|
timestamp: Time.now, request_type: 'chat'
|
data/lib/legion/llm/metering.rb
CHANGED
|
@@ -5,6 +5,7 @@ require_relative 'metering/estimator'
|
|
|
5
5
|
require_relative 'metering/tracker'
|
|
6
6
|
require_relative 'metering/tokens'
|
|
7
7
|
require_relative 'metering/usage'
|
|
8
|
+
require_relative 'publisher_identity'
|
|
8
9
|
|
|
9
10
|
module Legion
|
|
10
11
|
module LLM
|
|
@@ -24,6 +25,7 @@ module Legion
|
|
|
24
25
|
module_function
|
|
25
26
|
|
|
26
27
|
def emit(event)
|
|
28
|
+
event = attributed_event(event)
|
|
27
29
|
event_class = metering_event_class if transport_connected?
|
|
28
30
|
|
|
29
31
|
if event_class
|
|
@@ -47,6 +49,13 @@ module Legion
|
|
|
47
49
|
:dropped
|
|
48
50
|
end
|
|
49
51
|
|
|
52
|
+
def attributed_event(event)
|
|
53
|
+
source = event.is_a?(Hash) ? event.dup : {}
|
|
54
|
+
source[:identity] = Legion::LLM::PublisherIdentity.current
|
|
55
|
+
source[:caller] ||= Legion::LLM::PublisherIdentity.caller_hash
|
|
56
|
+
source
|
|
57
|
+
end
|
|
58
|
+
|
|
50
59
|
def flush_spool
|
|
51
60
|
return 0 unless spool_available? && transport_connected?
|
|
52
61
|
|
|
@@ -64,7 +73,7 @@ module Legion
|
|
|
64
73
|
end
|
|
65
74
|
|
|
66
75
|
def install_hook
|
|
67
|
-
Legion::LLM::Hooks.after_chat do |response:, model:, **|
|
|
76
|
+
Legion::LLM::Hooks.after_chat do |response:, model:, caller: nil, **|
|
|
68
77
|
usage = extract_usage(response)
|
|
69
78
|
next if usage[:input_tokens].zero? && usage[:output_tokens].zero?
|
|
70
79
|
|
|
@@ -83,6 +92,7 @@ module Legion
|
|
|
83
92
|
model_id: resolved_model,
|
|
84
93
|
input_tokens: usage[:input_tokens],
|
|
85
94
|
output_tokens: usage[:output_tokens],
|
|
95
|
+
caller: caller,
|
|
86
96
|
event_type: 'llm_completion',
|
|
87
97
|
status: response.is_a?(Hash) && response[:error] ? 'failure' : 'success'
|
|
88
98
|
)
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'caller_identity'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module LLM
|
|
7
|
+
module PublisherIdentity
|
|
8
|
+
GENERIC_PUBLISHER_IDENTITIES = %w[
|
|
9
|
+
anonymous process:anonymous service:system system system:system unknown:anonymous
|
|
10
|
+
].freeze
|
|
11
|
+
|
|
12
|
+
module_function
|
|
13
|
+
|
|
14
|
+
def current
|
|
15
|
+
process = process_identity_module
|
|
16
|
+
identity = process_identity(process)
|
|
17
|
+
return identity if present_identity?(identity)
|
|
18
|
+
|
|
19
|
+
env_identity
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def caller_hash
|
|
23
|
+
identity = current
|
|
24
|
+
{
|
|
25
|
+
requested_by: {
|
|
26
|
+
identity: identity[:identity],
|
|
27
|
+
type: identity[:type],
|
|
28
|
+
credential: identity[:credential],
|
|
29
|
+
hostname: identity[:hostname]
|
|
30
|
+
}.compact
|
|
31
|
+
}
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def requested_by
|
|
35
|
+
caller_hash[:requested_by]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def generic_requested_by?(value)
|
|
39
|
+
requested = value.is_a?(Hash) ? value : {}
|
|
40
|
+
raw_id = hash_value(requested, :id).to_s
|
|
41
|
+
return true if raw_id == 'system:system'
|
|
42
|
+
|
|
43
|
+
identity = CallerIdentity.normalize(caller: { requested_by: requested })
|
|
44
|
+
normalized = identity[:identity].to_s
|
|
45
|
+
GENERIC_PUBLISHER_IDENTITIES.include?(normalized)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def process_identity_module
|
|
49
|
+
return Legion::Identity::Process if defined?(Legion::Identity::Process)
|
|
50
|
+
|
|
51
|
+
begin
|
|
52
|
+
require 'legion/identity/process'
|
|
53
|
+
rescue LoadError
|
|
54
|
+
nil
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
defined?(Legion::Identity::Process) ? Legion::Identity::Process : nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def process_identity(process)
|
|
61
|
+
return nil unless process
|
|
62
|
+
|
|
63
|
+
canonical = process_value(process, :canonical_name)
|
|
64
|
+
return nil unless present?(canonical)
|
|
65
|
+
|
|
66
|
+
CallerIdentity.normalize(
|
|
67
|
+
caller: {
|
|
68
|
+
requested_by: {
|
|
69
|
+
identity: canonical,
|
|
70
|
+
type: process_value(process, :kind) || :process,
|
|
71
|
+
credential: process_value(process, :source) || :system,
|
|
72
|
+
hostname: process_value(process, :hostname)
|
|
73
|
+
}.compact
|
|
74
|
+
}
|
|
75
|
+
)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def env_identity
|
|
79
|
+
raw = ENV.fetch('USER', nil) || ENV.fetch('LOGNAME', nil)
|
|
80
|
+
return CallerIdentity::DEFAULT_IDENTITY.dup unless present?(raw)
|
|
81
|
+
|
|
82
|
+
CallerIdentity.normalize(
|
|
83
|
+
caller: {
|
|
84
|
+
requested_by: {
|
|
85
|
+
identity: raw.to_s,
|
|
86
|
+
type: :human,
|
|
87
|
+
credential: :system
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def process_value(process, method_name)
|
|
94
|
+
return nil unless process.respond_to?(method_name)
|
|
95
|
+
|
|
96
|
+
process.public_send(method_name)
|
|
97
|
+
rescue StandardError
|
|
98
|
+
nil
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def hash_value(hash, key)
|
|
102
|
+
return nil unless hash.respond_to?(:key?)
|
|
103
|
+
return hash[key] if hash.key?(key)
|
|
104
|
+
|
|
105
|
+
string_key = key.to_s
|
|
106
|
+
hash[string_key] if hash.key?(string_key)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def present_identity?(identity)
|
|
110
|
+
identity.is_a?(Hash) && present?(identity[:identity])
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def present?(value)
|
|
114
|
+
!value.nil? && !(value.respond_to?(:empty?) && value.empty?)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
data/lib/legion/llm/router.rb
CHANGED
|
@@ -28,8 +28,12 @@ module Legion
|
|
|
28
28
|
def infer_provider_for_model(model)
|
|
29
29
|
return nil if model.nil? || model.to_s.empty?
|
|
30
30
|
|
|
31
|
+
discovered = discover_provider_for_model(model)
|
|
32
|
+
return discovered if discovered
|
|
33
|
+
|
|
31
34
|
model_s = model.to_s
|
|
32
35
|
return :bedrock if model_s.start_with?('us.')
|
|
36
|
+
return :bedrock if model_s.match?(/\A(anthropic|meta|mistral|cohere|amazon|ai21)\./i)
|
|
33
37
|
return :openai if model_s.match?(/\Agpt-|\Ao[134]-/)
|
|
34
38
|
return :anthropic if model_s.start_with?('claude-')
|
|
35
39
|
return :gemini if model_s.start_with?('gemini-')
|
|
@@ -38,6 +42,17 @@ module Legion
|
|
|
38
42
|
nil
|
|
39
43
|
end
|
|
40
44
|
|
|
45
|
+
def discover_provider_for_model(model)
|
|
46
|
+
return nil unless defined?(Discovery) && Discovery.respond_to?(:cached_discovered_models)
|
|
47
|
+
|
|
48
|
+
model_s = model.to_s
|
|
49
|
+
entry = Array(Discovery.cached_discovered_models).find do |m|
|
|
50
|
+
dn = m[:model].to_s
|
|
51
|
+
dn == model_s || dn.start_with?("#{model_s}:")
|
|
52
|
+
end
|
|
53
|
+
entry&.dig(:provider)
|
|
54
|
+
end
|
|
55
|
+
|
|
41
56
|
# Resolve an LLM routing intent to a tier/provider/model decision.
|
|
42
57
|
#
|
|
43
58
|
# @param intent [Hash, nil] routing intent (capability, privacy, etc.)
|
|
@@ -132,7 +132,7 @@ module Legion
|
|
|
132
132
|
total_duration += duration_ms
|
|
133
133
|
inject_parts << result.inject if result.inject
|
|
134
134
|
|
|
135
|
-
emit_step_success(conv_id, method_name, step_idx, duration_ms, result, classification)
|
|
135
|
+
emit_step_success(conv_id, method_name, step_idx, duration_ms, result, classification, context)
|
|
136
136
|
|
|
137
137
|
next unless result.gate
|
|
138
138
|
|
|
@@ -154,14 +154,21 @@ module Legion
|
|
|
154
154
|
|
|
155
155
|
private
|
|
156
156
|
|
|
157
|
+
def context_caller(context)
|
|
158
|
+
return nil unless context.is_a?(Hash)
|
|
159
|
+
|
|
160
|
+
context[:caller] || context['caller']
|
|
161
|
+
end
|
|
162
|
+
|
|
157
163
|
def execute_step(method_name, step_idx, context, conv_id, classification)
|
|
158
164
|
t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
165
|
+
caller = context_caller(context)
|
|
159
166
|
emit_event(conv_id, 'skill.step.started',
|
|
160
167
|
step_name: method_name, step_index: step_idx)
|
|
161
168
|
Legion::LLM::Metering.emit(
|
|
162
169
|
request_type: 'skill.step.start', skill_name: self.class.skill_name,
|
|
163
170
|
namespace: self.class.namespace, step_name: method_name,
|
|
164
|
-
step_index: step_idx, tier: 'local'
|
|
171
|
+
step_index: step_idx, tier: 'local', caller: caller
|
|
165
172
|
)
|
|
166
173
|
result = public_send(method_name, context: context)
|
|
167
174
|
unless result.respond_to?(:inject) && result.respond_to?(:metadata) && result.respond_to?(:gate)
|
|
@@ -175,10 +182,11 @@ module Legion
|
|
|
175
182
|
[result, duration_ms]
|
|
176
183
|
rescue StandardError => e
|
|
177
184
|
duration_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - t0) * 1000).round
|
|
178
|
-
handle_step_error(e, method_name, step_idx, conv_id, duration_ms, classification)
|
|
185
|
+
handle_step_error(e, method_name, step_idx, conv_id, duration_ms, classification, context)
|
|
179
186
|
end
|
|
180
187
|
|
|
181
|
-
def handle_step_error(err, method_name, step_idx, conv_id, duration_ms, classification)
|
|
188
|
+
def handle_step_error(err, method_name, step_idx, conv_id, duration_ms, classification, context)
|
|
189
|
+
caller = context_caller(context)
|
|
182
190
|
Legion::LLM::Inference::Conversation.clear_skill_state(conv_id) if conv_id
|
|
183
191
|
emit_event(conv_id, 'skill.step.failed',
|
|
184
192
|
step_name: method_name, error: err.message)
|
|
@@ -186,19 +194,20 @@ module Legion
|
|
|
186
194
|
skill_name: self.class.skill_name, namespace: self.class.namespace,
|
|
187
195
|
step_name: method_name, gate: nil, status: :failed,
|
|
188
196
|
duration_ms: duration_ms, metadata: { error: err.message },
|
|
189
|
-
classification: classification
|
|
197
|
+
classification: classification, caller: caller
|
|
190
198
|
)
|
|
191
199
|
Legion::LLM::Metering.emit(
|
|
192
200
|
request_type: 'skill.step', skill_name: self.class.skill_name,
|
|
193
201
|
namespace: self.class.namespace, step_name: method_name,
|
|
194
|
-
step_index: step_idx, duration_ms: duration_ms, gate: nil, tier: 'local'
|
|
202
|
+
step_index: step_idx, duration_ms: duration_ms, gate: nil, tier: 'local', caller: caller
|
|
195
203
|
)
|
|
196
204
|
raise Legion::LLM::Skills::StepError.new(
|
|
197
205
|
"#{self.class.skill_name}##{method_name} failed: #{err.message}", cause: err
|
|
198
206
|
)
|
|
199
207
|
end
|
|
200
208
|
|
|
201
|
-
def emit_step_success(conv_id, method_name, step_idx, duration_ms, result, classification)
|
|
209
|
+
def emit_step_success(conv_id, method_name, step_idx, duration_ms, result, classification, context)
|
|
210
|
+
caller = context_caller(context)
|
|
202
211
|
emit_event(conv_id, 'skill.step.completed',
|
|
203
212
|
step_name: method_name, duration_ms: duration_ms,
|
|
204
213
|
metadata: result.metadata)
|
|
@@ -206,13 +215,13 @@ module Legion
|
|
|
206
215
|
skill_name: self.class.skill_name, namespace: self.class.namespace,
|
|
207
216
|
step_name: method_name, gate: result.gate,
|
|
208
217
|
status: :completed, duration_ms: duration_ms,
|
|
209
|
-
metadata: result.metadata, classification: classification
|
|
218
|
+
metadata: result.metadata, classification: classification, caller: caller
|
|
210
219
|
)
|
|
211
220
|
Legion::LLM::Metering.emit(
|
|
212
221
|
request_type: 'skill.step', skill_name: self.class.skill_name,
|
|
213
222
|
namespace: self.class.namespace, step_name: method_name,
|
|
214
223
|
step_index: step_idx, duration_ms: duration_ms,
|
|
215
|
-
gate: result.gate&.to_s, tier: 'local'
|
|
224
|
+
gate: result.gate&.to_s, tier: 'local', caller: caller
|
|
216
225
|
)
|
|
217
226
|
end
|
|
218
227
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
require 'securerandom'
|
|
4
4
|
require 'uri'
|
|
5
5
|
require 'legion/logging/helper'
|
|
6
|
-
require_relative '../
|
|
6
|
+
require_relative '../publisher_identity'
|
|
7
7
|
|
|
8
8
|
module Legion
|
|
9
9
|
module LLM
|
|
@@ -209,7 +209,7 @@ module Legion
|
|
|
209
209
|
end
|
|
210
210
|
|
|
211
211
|
def identity_headers
|
|
212
|
-
identity = Legion::LLM::
|
|
212
|
+
identity = Legion::LLM::PublisherIdentity.current
|
|
213
213
|
return {} unless identity
|
|
214
214
|
|
|
215
215
|
h = {}
|
|
@@ -43,7 +43,7 @@ module Legion
|
|
|
43
43
|
type = caller_info[:type] || caller_info['type'] || top_id[:type] || top_id['type'] ||
|
|
44
44
|
(extension && 'extension')
|
|
45
45
|
h = {}
|
|
46
|
-
h['x-legion-caller-type'] = type.to_s if type
|
|
46
|
+
h['x-legion-request-caller-type'] = type.to_s if type
|
|
47
47
|
h
|
|
48
48
|
end
|
|
49
49
|
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
|
@@ -9,6 +9,7 @@ require_relative 'llm/version'
|
|
|
9
9
|
require_relative 'llm/errors'
|
|
10
10
|
require_relative 'llm/settings'
|
|
11
11
|
require_relative 'llm/caller_identity'
|
|
12
|
+
require_relative 'llm/publisher_identity'
|
|
12
13
|
require_relative 'llm/call/providers'
|
|
13
14
|
require_relative 'llm/call/registry'
|
|
14
15
|
require_relative 'llm/call/lex_llm_adapter'
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.9.
|
|
4
|
+
version: 0.9.15
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -294,6 +294,7 @@ files:
|
|
|
294
294
|
- lib/legion/llm/metering/tokens.rb
|
|
295
295
|
- lib/legion/llm/metering/tracker.rb
|
|
296
296
|
- lib/legion/llm/metering/usage.rb
|
|
297
|
+
- lib/legion/llm/publisher_identity.rb
|
|
297
298
|
- lib/legion/llm/quality.rb
|
|
298
299
|
- lib/legion/llm/quality/checker.rb
|
|
299
300
|
- lib/legion/llm/quality/confidence/score.rb
|