legion-llm 0.6.17 → 0.6.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -1
- data/CLAUDE.md +6 -2
- data/lib/legion/llm/pipeline/executor.rb +45 -38
- data/lib/legion/llm/pipeline/mcp_tool_adapter.rb +3 -67
- data/lib/legion/llm/pipeline/profile.rb +13 -1
- data/lib/legion/llm/pipeline/tool_adapter.rb +14 -1
- data/lib/legion/llm/routes.rb +11 -11
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +3 -6
- metadata +1 -2
- data/lib/legion/llm/tool_registry.rb +0 -40
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 45d07a2c60a8663ba1b62165b3b489d49a2aac37ee1e1ec6abff7bd5f4357d6c
|
|
4
|
+
data.tar.gz: 9ee8246c75fee6d7e690b55f4e2a91b030f6b142c91dd79acb7bf66edf4d9d05
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 92b102167bb6f346fab490787baedda2f2fa6fb528713c6b055b269f747c490d56fed5d21027616bc2c6d1f7cf4069ce14bb9b7607f7a6ad07c2c69b05ce0814
|
|
7
|
+
data.tar.gz: f1fded39722bf678936df28f3bbf3ec095265bdabc28f70eaf67e64fae5519b7c58842a8432e4b4bfdc0476c9275473f75a9c83bf0c77d6f5cc2afe1fa700aeb
|
data/CHANGELOG.md
CHANGED
|
@@ -1,9 +1,22 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
-
## [
|
|
3
|
+
## [0.6.20] - 2026-04-06
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Per-step pipeline timing diagnostics: `[pipeline][timing]` log line with duration per step
|
|
7
|
+
- Pre-pipeline timing in inference route: `gaia_ingest`, `pre_pipeline_setup`, `executor_call` durations
|
|
8
|
+
- `MAX_RUBY_LLM_TOOL_ROUNDS` (25) — caps RubyLLM's unbounded tool-use loop to prevent infinite cycling
|
|
9
|
+
- `install_tool_loop_guard` applied to both streaming and non-streaming provider paths
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
- fix Process namespace collision by using ::Process::CLOCK_MONOTONIC prefix inside Legion namespace
|
|
4
13
|
|
|
5
14
|
### Added
|
|
6
15
|
- `Legion::LLM::Pipeline::ToolAdapter` - wraps Tools::Base for RubyLLM sessions
|
|
16
|
+
- `Profile.derive` returns `:human` for `type: :human` and `type: :user` callers (Wire Format Phase 3)
|
|
17
|
+
- `Profile.derive` returns `:service` for `type: :service` callers (Wire Format Phase 3)
|
|
18
|
+
- `HUMAN_SKIP` constant (empty — humans get full pipeline)
|
|
19
|
+
- `SERVICE_SKIP` constant — services skip conversational steps (context, tools, knowledge)
|
|
7
20
|
|
|
8
21
|
### Changed
|
|
9
22
|
- Renamed `McpToolAdapter` to `ToolAdapter` (backwards compat alias kept)
|
data/CLAUDE.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
Core LegionIO gem providing LLM capabilities to all extensions. Wraps ruby_llm to provide a consistent interface for chat, embeddings, tool use, and agents across multiple providers (Bedrock, Anthropic, OpenAI, Gemini, Ollama). Includes a dynamic weighted routing engine that dispatches requests across local, fleet, and cloud tiers based on caller intent, priority rules, time schedules, cost multipliers, and real-time provider health.
|
|
9
9
|
|
|
10
10
|
**GitHub**: https://github.com/LegionIO/legion-llm
|
|
11
|
-
**Version**: 0.
|
|
11
|
+
**Version**: 0.6.18
|
|
12
12
|
**License**: Apache-2.0
|
|
13
13
|
|
|
14
14
|
## Architecture
|
|
@@ -61,8 +61,12 @@ Legion::LLM (lib/legion/llm.rb)
|
|
|
61
61
|
│ ├── Timeline # Ordered event recording with participant tracking
|
|
62
62
|
│ ├── Executor # 18-step pipeline skeleton with profile-aware execution
|
|
63
63
|
│ ├── Steps/
|
|
64
|
-
│ │
|
|
64
|
+
│ │ ├── Metering # Metering event builder (absorbed from lex-llm-gateway)
|
|
65
|
+
│ │ └── ToolDiscovery # Step 9 — formerly McpDiscovery; renamed to ToolDiscovery (McpDiscovery kept as backwards alias)
|
|
65
66
|
│ └── Executor#call_stream # Streaming variant: pre-provider steps, yield chunks, post-provider steps
|
|
67
|
+
│
|
|
68
|
+
│ Note: Legion::LLM::ToolRegistry was removed. Tool registration now lives in Legion::Tools::Registry (LegionIO gem).
|
|
69
|
+
│ McpToolAdapter renamed to ToolAdapter; McpToolAdapter kept as a backwards-compatible alias.
|
|
66
70
|
├── CostEstimator # Model cost estimation with fuzzy pricing (absorbed from lex-llm-gateway)
|
|
67
71
|
├── Fleet # Fleet RPC dispatch (absorbed from lex-llm-gateway)
|
|
68
72
|
│ ├── Dispatcher # Fleet dispatch with timeout and availability checks
|
|
@@ -45,28 +45,7 @@ module Legion
|
|
|
45
45
|
|
|
46
46
|
ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
|
|
47
47
|
|
|
48
|
-
|
|
49
|
-
legion_do
|
|
50
|
-
legion_get_status
|
|
51
|
-
legion_run_task
|
|
52
|
-
legion_describe_runner
|
|
53
|
-
legion_list_extensions
|
|
54
|
-
legion_get_extension
|
|
55
|
-
legion_list_tasks
|
|
56
|
-
legion_get_task
|
|
57
|
-
legion_get_task_logs
|
|
58
|
-
legion_query_knowledge
|
|
59
|
-
legion_knowledge_health
|
|
60
|
-
legion_knowledge_context
|
|
61
|
-
legion_list_workers
|
|
62
|
-
legion_show_worker
|
|
63
|
-
legion_mesh_status
|
|
64
|
-
legion_list_peers
|
|
65
|
-
legion_tools
|
|
66
|
-
legion_search_sessions
|
|
67
|
-
].freeze
|
|
68
|
-
|
|
69
|
-
private_constant :ALWAYS_LOADED_MCP_TOOLS
|
|
48
|
+
MAX_RUBY_LLM_TOOL_ROUNDS = 25
|
|
70
49
|
|
|
71
50
|
ASYNC_THREAD_POOL = Concurrent::FixedThreadPool.new(4, fallback_policy: :caller_runs)
|
|
72
51
|
|
|
@@ -109,24 +88,39 @@ module Legion
|
|
|
109
88
|
def inject_registry_tools(session)
|
|
110
89
|
return unless defined?(::Legion::Tools::Registry)
|
|
111
90
|
|
|
112
|
-
requested = requested_deferred_tool_names
|
|
113
|
-
always_loaded = always_loaded_tool_names
|
|
114
91
|
injected_names = []
|
|
115
92
|
|
|
93
|
+
# Always-loaded tools — inject all unconditionally
|
|
116
94
|
::Legion::Tools::Registry.tools.each do |tool_class|
|
|
117
95
|
adapter = ToolAdapter.new(tool_class)
|
|
118
|
-
next unless always_loaded.include?(adapter.name) || requested.include?(adapter.name)
|
|
119
|
-
|
|
120
96
|
session.with_tool(adapter)
|
|
121
97
|
injected_names << adapter.name
|
|
122
98
|
rescue StandardError => e
|
|
123
|
-
@warnings << "Failed to inject tool: #{e.message}"
|
|
124
|
-
handle_exception(e, level: :warn, operation: 'llm.pipeline.
|
|
99
|
+
@warnings << "Failed to inject always tool: #{e.message}"
|
|
100
|
+
handle_exception(e, level: :warn, operation: 'llm.pipeline.inject_always_tool')
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Requested deferred tools — inject only if explicitly requested
|
|
104
|
+
deferred = ::Legion::Tools::Registry.respond_to?(:deferred_tools) ? ::Legion::Tools::Registry.deferred_tools : []
|
|
105
|
+
requested = requested_deferred_tool_names
|
|
106
|
+
if requested.any?
|
|
107
|
+
deferred.each do |tool_class|
|
|
108
|
+
adapter = ToolAdapter.new(tool_class)
|
|
109
|
+
next unless requested.include?(adapter.name)
|
|
110
|
+
|
|
111
|
+
session.with_tool(adapter)
|
|
112
|
+
injected_names << adapter.name
|
|
113
|
+
rescue StandardError => e
|
|
114
|
+
@warnings << "Failed to inject deferred tool: #{e.message}"
|
|
115
|
+
handle_exception(e, level: :warn, operation: 'llm.pipeline.inject_deferred_tool')
|
|
116
|
+
end
|
|
125
117
|
end
|
|
126
118
|
|
|
127
119
|
log.info(
|
|
128
120
|
"[llm][tools] inject request_id=#{@request.id} " \
|
|
129
|
-
"
|
|
121
|
+
"always=#{::Legion::Tools::Registry.tools.size} " \
|
|
122
|
+
"deferred_available=#{deferred.size} " \
|
|
123
|
+
"requested_deferred=#{requested.size} " \
|
|
130
124
|
"injected=#{injected_names.size} names=#{injected_names.first(25).join(',')}"
|
|
131
125
|
)
|
|
132
126
|
rescue StandardError => e
|
|
@@ -137,27 +131,25 @@ module Legion
|
|
|
137
131
|
# Backwards compatibility alias
|
|
138
132
|
alias inject_discovered_tools inject_registry_tools
|
|
139
133
|
|
|
140
|
-
def always_loaded_tool_names
|
|
141
|
-
return ALWAYS_LOADED_MCP_TOOLS unless defined?(::Legion::Tools::Registry)
|
|
142
|
-
|
|
143
|
-
names = ::Legion::Tools::Registry.always_loaded_names.map { |name| name.to_s.tr('.', '_') }
|
|
144
|
-
names.any? ? names : ALWAYS_LOADED_MCP_TOOLS
|
|
145
|
-
rescue StandardError
|
|
146
|
-
ALWAYS_LOADED_MCP_TOOLS
|
|
147
|
-
end
|
|
148
|
-
|
|
149
134
|
def execute_steps
|
|
150
135
|
executed = 0
|
|
151
136
|
skipped = 0
|
|
137
|
+
pipeline_start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
138
|
+
step_timings = []
|
|
152
139
|
STEPS.each do |step|
|
|
153
140
|
if Profile.skip?(@profile, step)
|
|
154
141
|
skipped += 1
|
|
155
142
|
next
|
|
156
143
|
end
|
|
157
144
|
|
|
145
|
+
t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
158
146
|
execute_step(step) { send(:"step_#{step}") }
|
|
147
|
+
elapsed_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - t0) * 1000).round
|
|
148
|
+
step_timings << "#{step}=#{elapsed_ms}ms"
|
|
159
149
|
executed += 1
|
|
160
150
|
end
|
|
151
|
+
total_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - pipeline_start) * 1000).round
|
|
152
|
+
log.warn("[pipeline][timing] profile=#{@profile} total=#{total_ms}ms executed=#{executed} skipped=#{skipped} #{step_timings.join(' ')}")
|
|
161
153
|
annotate_top_level_span(steps_executed: executed, steps_skipped: skipped)
|
|
162
154
|
end
|
|
163
155
|
|
|
@@ -481,6 +473,7 @@ module Legion
|
|
|
481
473
|
|
|
482
474
|
def execute_provider_request_ruby_llm
|
|
483
475
|
session, message_content = build_ruby_llm_session
|
|
476
|
+
install_tool_loop_guard(session)
|
|
484
477
|
@raw_response = message_content ? session.ask(message_content) : session
|
|
485
478
|
end
|
|
486
479
|
|
|
@@ -662,6 +655,7 @@ module Legion
|
|
|
662
655
|
)
|
|
663
656
|
|
|
664
657
|
session, message_content = build_ruby_llm_session
|
|
658
|
+
install_tool_loop_guard(session)
|
|
665
659
|
@raw_response = message_content ? session.ask(message_content, &) : session
|
|
666
660
|
|
|
667
661
|
@timestamps[:provider_end] = Time.now
|
|
@@ -695,6 +689,19 @@ module Legion
|
|
|
695
689
|
inject_registry_tools(session)
|
|
696
690
|
end
|
|
697
691
|
|
|
692
|
+
def install_tool_loop_guard(session)
|
|
693
|
+
return unless session.respond_to?(:on)
|
|
694
|
+
|
|
695
|
+
tool_round = 0
|
|
696
|
+
session.on(:tool_call) do |_tool_call|
|
|
697
|
+
tool_round += 1
|
|
698
|
+
if tool_round > MAX_RUBY_LLM_TOOL_ROUNDS
|
|
699
|
+
log.warn("[pipeline] tool loop cap hit: #{tool_round} rounds, halting")
|
|
700
|
+
raise Legion::LLM::PipelineError, "tool loop exceeded #{MAX_RUBY_LLM_TOOL_ROUNDS} rounds"
|
|
701
|
+
end
|
|
702
|
+
end
|
|
703
|
+
end
|
|
704
|
+
|
|
698
705
|
def apply_ruby_llm_instructions(session)
|
|
699
706
|
injected_system = EnrichmentInjector.inject(
|
|
700
707
|
system: @request.system,
|
|
@@ -1,69 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
require
|
|
5
|
-
|
|
6
|
-
module Legion
|
|
7
|
-
module LLM
|
|
8
|
-
module Pipeline
|
|
9
|
-
class McpToolAdapter < RubyLLM::Tool
|
|
10
|
-
include Legion::Logging::Helper
|
|
11
|
-
|
|
12
|
-
def initialize(mcp_tool_class)
|
|
13
|
-
@mcp_tool_class = mcp_tool_class
|
|
14
|
-
raw_name = mcp_tool_class.respond_to?(:tool_name) ? mcp_tool_class.tool_name : mcp_tool_class.name.to_s
|
|
15
|
-
@tool_name = raw_name.tr('.', '_')
|
|
16
|
-
@tool_desc = mcp_tool_class.respond_to?(:description) ? mcp_tool_class.description.to_s : ''
|
|
17
|
-
@tool_schema = mcp_tool_class.respond_to?(:input_schema) ? mcp_tool_class.input_schema : nil
|
|
18
|
-
super()
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def name
|
|
22
|
-
@tool_name
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def description
|
|
26
|
-
@tool_desc
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
def params_schema
|
|
30
|
-
return @params_schema if defined?(@params_schema)
|
|
31
|
-
|
|
32
|
-
@params_schema = (RubyLLM::Utils.deep_stringify_keys(@tool_schema) if @tool_schema.is_a?(Hash))
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def execute(**args)
|
|
36
|
-
log.info("[llm][tools] adapter.execute name=#{@tool_name} arguments=#{summarize_payload(args)}")
|
|
37
|
-
result = @mcp_tool_class.call(**args)
|
|
38
|
-
content = extract_content(result)
|
|
39
|
-
log.info("[llm][tools] adapter.result name=#{@tool_name} output=#{summarize_payload(content)}")
|
|
40
|
-
content
|
|
41
|
-
rescue StandardError => e
|
|
42
|
-
handle_exception(e, level: :warn, operation: 'llm.pipeline.mcp_tool_adapter.execute', tool_name: @tool_name)
|
|
43
|
-
"Tool error: #{e.message}"
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
private
|
|
47
|
-
|
|
48
|
-
def extract_content(result)
|
|
49
|
-
# MCP::Tool::Response — has .content array of {type: 'text', text: '...'}
|
|
50
|
-
if result.respond_to?(:content) && result.content.is_a?(Array)
|
|
51
|
-
result.content.filter_map { |c| c[:text] || c['text'] || c.to_s }.join("\n")
|
|
52
|
-
elsif result.is_a?(Hash) && result[:content].is_a?(Array)
|
|
53
|
-
result[:content].filter_map { |c| c[:text] || c['text'] }.join("\n")
|
|
54
|
-
elsif result.is_a?(Hash)
|
|
55
|
-
Legion::JSON.dump(result)
|
|
56
|
-
elsif result.is_a?(String)
|
|
57
|
-
result
|
|
58
|
-
else
|
|
59
|
-
result.to_s
|
|
60
|
-
end
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
def summarize_payload(payload)
|
|
64
|
-
payload.to_s[0, 200].inspect
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
end
|
|
3
|
+
# Backwards-compatibility shim — the implementation moved to tool_adapter.rb.
|
|
4
|
+
# Callers that require this path directly will still find McpToolAdapter via the alias.
|
|
5
|
+
require_relative 'tool_adapter'
|
|
@@ -21,6 +21,14 @@ module Legion
|
|
|
21
21
|
tool_calls context_store post_response knowledge_capture
|
|
22
22
|
].freeze
|
|
23
23
|
|
|
24
|
+
HUMAN_SKIP = %i[].freeze
|
|
25
|
+
|
|
26
|
+
SERVICE_SKIP = %i[
|
|
27
|
+
conversation_uuid context_load gaia_advisory
|
|
28
|
+
rag_context tool_discovery confidence_scoring
|
|
29
|
+
tool_calls context_store knowledge_capture
|
|
30
|
+
].freeze
|
|
31
|
+
|
|
24
32
|
module_function
|
|
25
33
|
|
|
26
34
|
def derive(caller_hash)
|
|
@@ -31,7 +39,9 @@ module Legion
|
|
|
31
39
|
identity = requested_by[:identity].to_s
|
|
32
40
|
|
|
33
41
|
return :quick_reply if type == :quick_reply
|
|
34
|
-
return :
|
|
42
|
+
return :human if %i[human user].include?(type)
|
|
43
|
+
return :service if type == :service
|
|
44
|
+
return :external unless type == :system
|
|
35
45
|
|
|
36
46
|
identity.start_with?('gaia:') ? :gaia : :system
|
|
37
47
|
end
|
|
@@ -41,6 +51,8 @@ module Legion
|
|
|
41
51
|
when :gaia then GAIA_SKIP.include?(step)
|
|
42
52
|
when :system then SYSTEM_SKIP.include?(step)
|
|
43
53
|
when :quick_reply then QUICK_REPLY_SKIP.include?(step)
|
|
54
|
+
when :human then HUMAN_SKIP.include?(step)
|
|
55
|
+
when :service then SERVICE_SKIP.include?(step)
|
|
44
56
|
else false
|
|
45
57
|
end
|
|
46
58
|
end
|
|
@@ -9,10 +9,12 @@ module Legion
|
|
|
9
9
|
class ToolAdapter < RubyLLM::Tool
|
|
10
10
|
include Legion::Logging::Helper
|
|
11
11
|
|
|
12
|
+
MAX_TOOL_NAME_LENGTH = 64
|
|
13
|
+
|
|
12
14
|
def initialize(tool_class)
|
|
13
15
|
@tool_class = tool_class
|
|
14
16
|
raw_name = tool_class.respond_to?(:tool_name) ? tool_class.tool_name : tool_class.name.to_s
|
|
15
|
-
@tool_name = raw_name
|
|
17
|
+
@tool_name = sanitize_tool_name(raw_name)
|
|
16
18
|
@tool_desc = tool_class.respond_to?(:description) ? tool_class.description.to_s : ''
|
|
17
19
|
@tool_schema = tool_class.respond_to?(:input_schema) ? tool_class.input_schema : nil
|
|
18
20
|
super()
|
|
@@ -63,6 +65,17 @@ module Legion
|
|
|
63
65
|
def summarize_payload(payload)
|
|
64
66
|
payload.to_s[0, 200].inspect
|
|
65
67
|
end
|
|
68
|
+
|
|
69
|
+
# Bedrock constraints: [a-zA-Z0-9_-]+ and max 64 chars.
|
|
70
|
+
# Falls back to a stable name derived from the class object_id if sanitization yields
|
|
71
|
+
# an empty string (e.g. all chars stripped), ensuring the result always satisfies the
|
|
72
|
+
# at-least-one-character requirement.
|
|
73
|
+
def sanitize_tool_name(raw)
|
|
74
|
+
name = raw.tr('.', '_')
|
|
75
|
+
name = name.gsub(/[^a-zA-Z0-9_-]/, '') # strip ?, !, etc.
|
|
76
|
+
name = name[0, MAX_TOOL_NAME_LENGTH] if name.length > MAX_TOOL_NAME_LENGTH
|
|
77
|
+
name.empty? ? "tool_#{@tool_class.object_id}" : name
|
|
78
|
+
end
|
|
66
79
|
end
|
|
67
80
|
|
|
68
81
|
# Backwards compatibility alias
|
data/lib/legion/llm/routes.rb
CHANGED
|
@@ -12,17 +12,6 @@ require 'open3'
|
|
|
12
12
|
require 'time'
|
|
13
13
|
require 'legion/logging/helper'
|
|
14
14
|
|
|
15
|
-
begin
|
|
16
|
-
require 'legion/cli/chat/tools/search_traces'
|
|
17
|
-
if defined?(Legion::LLM::ToolRegistry) && defined?(Legion::CLI::Chat::Tools::SearchTraces)
|
|
18
|
-
Legion::LLM::ToolRegistry.register(Legion::CLI::Chat::Tools::SearchTraces)
|
|
19
|
-
end
|
|
20
|
-
rescue LoadError => e
|
|
21
|
-
if defined?(Legion::Logging) && Legion::Logging.respond_to?(:log_exception)
|
|
22
|
-
Legion::Logging.log_exception(e, payload_summary: 'SearchTraces not available for API', component_type: :api)
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
15
|
module Legion
|
|
27
16
|
module LLM
|
|
28
17
|
module Routes
|
|
@@ -457,8 +446,11 @@ module Legion
|
|
|
457
446
|
last_user = messages.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
|
|
458
447
|
prompt = (last_user || {})[:content] || (last_user || {})['content'] || ''
|
|
459
448
|
|
|
449
|
+
route_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
450
|
+
|
|
460
451
|
if defined?(Legion::Gaia) && Legion::Gaia.respond_to?(:started?) && Legion::Gaia.started? && prompt.to_s.length.positive?
|
|
461
452
|
begin
|
|
453
|
+
gaia_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
462
454
|
frame = Legion::Gaia::InputFrame.new(
|
|
463
455
|
content: prompt,
|
|
464
456
|
channel_id: :api,
|
|
@@ -467,6 +459,8 @@ module Legion
|
|
|
467
459
|
metadata: { source_type: :human_direct, salience: 0.9 }
|
|
468
460
|
)
|
|
469
461
|
Legion::Gaia.ingest(frame)
|
|
462
|
+
gaia_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - gaia_t0) * 1000).round
|
|
463
|
+
log.warn("[inference][timing] gaia_ingest=#{gaia_ms}ms request_id=#{request_id}")
|
|
470
464
|
rescue StandardError => e
|
|
471
465
|
handle_exception(e, level: :warn, operation: 'llm.routes.gaia_ingest', request_id: request_id)
|
|
472
466
|
end
|
|
@@ -512,6 +506,9 @@ module Legion
|
|
|
512
506
|
cache: { strategy: :default, cacheable: true }
|
|
513
507
|
)
|
|
514
508
|
|
|
509
|
+
setup_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - route_t0) * 1000).round
|
|
510
|
+
log.warn("[inference][timing] pre_pipeline_setup=#{setup_ms}ms request_id=#{request_id}")
|
|
511
|
+
|
|
515
512
|
executor = Legion::LLM::Pipeline::Executor.new(pipeline_request)
|
|
516
513
|
|
|
517
514
|
if streaming
|
|
@@ -572,7 +569,10 @@ module Legion
|
|
|
572
569
|
end
|
|
573
570
|
# rubocop:enable Metrics/BlockLength
|
|
574
571
|
else
|
|
572
|
+
exec_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
575
573
|
pipeline_response = executor.call
|
|
574
|
+
exec_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - exec_t0) * 1000).round
|
|
575
|
+
log.warn("[inference][timing] executor_call=#{exec_ms}ms request_id=#{request_id}")
|
|
576
576
|
raw_msg = pipeline_response.message
|
|
577
577
|
content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']) : raw_msg.to_s
|
|
578
578
|
routing = pipeline_response.routing || {}
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
|
@@ -32,7 +32,6 @@ require_relative 'llm/scheduling'
|
|
|
32
32
|
require_relative 'llm/off_peak'
|
|
33
33
|
require_relative 'llm/cost_tracker'
|
|
34
34
|
require_relative 'llm/token_tracker'
|
|
35
|
-
require_relative 'llm/tool_registry'
|
|
36
35
|
require_relative 'llm/override_confidence'
|
|
37
36
|
require_relative 'llm/routes'
|
|
38
37
|
|
|
@@ -108,7 +107,7 @@ module Legion
|
|
|
108
107
|
# for automatic metering and fleet dispatch
|
|
109
108
|
def chat(model: nil, provider: nil, intent: nil, tier: nil, escalate: nil,
|
|
110
109
|
max_escalations: nil, quality_check: nil, message: nil, **kwargs, &)
|
|
111
|
-
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
110
|
+
started_at = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
112
111
|
log_inference_request(
|
|
113
112
|
request_type: :chat,
|
|
114
113
|
requested_model: model,
|
|
@@ -154,7 +153,7 @@ module Legion
|
|
|
154
153
|
# Send a single message — daemon-first, falls through to direct on unavailability.
|
|
155
154
|
def ask(message:, model: nil, provider: nil, intent: nil, tier: nil,
|
|
156
155
|
context: {}, identity: nil, &)
|
|
157
|
-
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
156
|
+
started_at = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
158
157
|
log_inference_request(
|
|
159
158
|
request_type: :ask,
|
|
160
159
|
requested_model: model,
|
|
@@ -367,7 +366,7 @@ module Legion
|
|
|
367
366
|
end
|
|
368
367
|
|
|
369
368
|
def elapsed_ms_since(started_at)
|
|
370
|
-
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000).round
|
|
369
|
+
((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - started_at) * 1000).round
|
|
371
370
|
end
|
|
372
371
|
|
|
373
372
|
def inference_input_payload(message:, messages:)
|
|
@@ -690,8 +689,6 @@ module Legion
|
|
|
690
689
|
def adapted_registry_tools
|
|
691
690
|
tool_classes = if defined?(::Legion::Tools::Registry)
|
|
692
691
|
::Legion::Tools::Registry.tools
|
|
693
|
-
elsif defined?(::Legion::LLM::ToolRegistry)
|
|
694
|
-
::Legion::LLM::ToolRegistry.tools
|
|
695
692
|
else
|
|
696
693
|
return []
|
|
697
694
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.6.
|
|
4
|
+
version: 0.6.20
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -308,7 +308,6 @@ files:
|
|
|
308
308
|
- lib/legion/llm/shadow_eval.rb
|
|
309
309
|
- lib/legion/llm/structured_output.rb
|
|
310
310
|
- lib/legion/llm/token_tracker.rb
|
|
311
|
-
- lib/legion/llm/tool_registry.rb
|
|
312
311
|
- lib/legion/llm/transport/exchanges/audit.rb
|
|
313
312
|
- lib/legion/llm/transport/exchanges/escalation.rb
|
|
314
313
|
- lib/legion/llm/transport/messages/audit_event.rb
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'legion/logging/helper'
|
|
4
|
-
|
|
5
|
-
module Legion
|
|
6
|
-
module LLM
|
|
7
|
-
module ToolRegistry
|
|
8
|
-
extend Legion::Logging::Helper
|
|
9
|
-
|
|
10
|
-
@tools = []
|
|
11
|
-
@mutex = Mutex.new
|
|
12
|
-
|
|
13
|
-
class << self
|
|
14
|
-
def register(tool_class)
|
|
15
|
-
registered = @mutex.synchronize do
|
|
16
|
-
next false if @tools.include?(tool_class)
|
|
17
|
-
|
|
18
|
-
@tools << tool_class
|
|
19
|
-
true
|
|
20
|
-
end
|
|
21
|
-
if registered
|
|
22
|
-
log.info("[llm][tools] registered class=#{tool_class}")
|
|
23
|
-
else
|
|
24
|
-
log.debug("[llm][tools] already_registered class=#{tool_class}")
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
def tools
|
|
29
|
-
@mutex.synchronize { @tools.dup }
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
def clear
|
|
33
|
-
count = @mutex.synchronize { @tools.size }
|
|
34
|
-
@mutex.synchronize { @tools.clear }
|
|
35
|
-
log.info("[llm][tools] registry_cleared count=#{count}")
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
end
|