legion-llm 0.6.18 → 0.6.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/lib/legion/llm/pipeline/executor.rb +24 -0
- data/lib/legion/llm/pipeline/profile.rb +13 -1
- data/lib/legion/llm/routes.rb +11 -0
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 45d07a2c60a8663ba1b62165b3b489d49a2aac37ee1e1ec6abff7bd5f4357d6c
|
|
4
|
+
data.tar.gz: 9ee8246c75fee6d7e690b55f4e2a91b030f6b142c91dd79acb7bf66edf4d9d05
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 92b102167bb6f346fab490787baedda2f2fa6fb528713c6b055b269f747c490d56fed5d21027616bc2c6d1f7cf4069ce14bb9b7607f7a6ad07c2c69b05ce0814
|
|
7
|
+
data.tar.gz: f1fded39722bf678936df28f3bbf3ec095265bdabc28f70eaf67e64fae5519b7c58842a8432e4b4bfdc0476c9275473f75a9c83bf0c77d6f5cc2afe1fa700aeb
|
data/CHANGELOG.md
CHANGED
|
@@ -1,12 +1,22 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
-
## [
|
|
3
|
+
## [0.6.20] - 2026-04-06
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Per-step pipeline timing diagnostics: `[pipeline][timing]` log line with duration per step
|
|
7
|
+
- Pre-pipeline timing in inference route: `gaia_ingest`, `pre_pipeline_setup`, `executor_call` durations
|
|
8
|
+
- `MAX_RUBY_LLM_TOOL_ROUNDS` (25) — caps RubyLLM's unbounded tool-use loop to prevent infinite cycling
|
|
9
|
+
- `install_tool_loop_guard` applied to both streaming and non-streaming provider paths
|
|
4
10
|
|
|
5
11
|
### Fixed
|
|
6
12
|
- fix Process namespace collision by using ::Process::CLOCK_MONOTONIC prefix inside Legion namespace
|
|
7
13
|
|
|
8
14
|
### Added
|
|
9
15
|
- `Legion::LLM::Pipeline::ToolAdapter` - wraps Tools::Base for RubyLLM sessions
|
|
16
|
+
- `Profile.derive` returns `:human` for `type: :human` and `type: :user` callers (Wire Format Phase 3)
|
|
17
|
+
- `Profile.derive` returns `:service` for `type: :service` callers (Wire Format Phase 3)
|
|
18
|
+
- `HUMAN_SKIP` constant (empty — humans get full pipeline)
|
|
19
|
+
- `SERVICE_SKIP` constant — services skip conversational steps (context, tools, knowledge)
|
|
10
20
|
|
|
11
21
|
### Changed
|
|
12
22
|
- Renamed `McpToolAdapter` to `ToolAdapter` (backwards compat alias kept)
|
|
@@ -45,6 +45,8 @@ module Legion
|
|
|
45
45
|
|
|
46
46
|
ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
|
|
47
47
|
|
|
48
|
+
MAX_RUBY_LLM_TOOL_ROUNDS = 25
|
|
49
|
+
|
|
48
50
|
ASYNC_THREAD_POOL = Concurrent::FixedThreadPool.new(4, fallback_policy: :caller_runs)
|
|
49
51
|
|
|
50
52
|
def initialize(request)
|
|
@@ -132,15 +134,22 @@ module Legion
|
|
|
132
134
|
def execute_steps
|
|
133
135
|
executed = 0
|
|
134
136
|
skipped = 0
|
|
137
|
+
pipeline_start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
138
|
+
step_timings = []
|
|
135
139
|
STEPS.each do |step|
|
|
136
140
|
if Profile.skip?(@profile, step)
|
|
137
141
|
skipped += 1
|
|
138
142
|
next
|
|
139
143
|
end
|
|
140
144
|
|
|
145
|
+
t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
141
146
|
execute_step(step) { send(:"step_#{step}") }
|
|
147
|
+
elapsed_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - t0) * 1000).round
|
|
148
|
+
step_timings << "#{step}=#{elapsed_ms}ms"
|
|
142
149
|
executed += 1
|
|
143
150
|
end
|
|
151
|
+
total_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - pipeline_start) * 1000).round
|
|
152
|
+
log.warn("[pipeline][timing] profile=#{@profile} total=#{total_ms}ms executed=#{executed} skipped=#{skipped} #{step_timings.join(' ')}")
|
|
144
153
|
annotate_top_level_span(steps_executed: executed, steps_skipped: skipped)
|
|
145
154
|
end
|
|
146
155
|
|
|
@@ -464,6 +473,7 @@ module Legion
|
|
|
464
473
|
|
|
465
474
|
def execute_provider_request_ruby_llm
|
|
466
475
|
session, message_content = build_ruby_llm_session
|
|
476
|
+
install_tool_loop_guard(session)
|
|
467
477
|
@raw_response = message_content ? session.ask(message_content) : session
|
|
468
478
|
end
|
|
469
479
|
|
|
@@ -645,6 +655,7 @@ module Legion
|
|
|
645
655
|
)
|
|
646
656
|
|
|
647
657
|
session, message_content = build_ruby_llm_session
|
|
658
|
+
install_tool_loop_guard(session)
|
|
648
659
|
@raw_response = message_content ? session.ask(message_content, &) : session
|
|
649
660
|
|
|
650
661
|
@timestamps[:provider_end] = Time.now
|
|
@@ -678,6 +689,19 @@ module Legion
|
|
|
678
689
|
inject_registry_tools(session)
|
|
679
690
|
end
|
|
680
691
|
|
|
692
|
+
def install_tool_loop_guard(session)
|
|
693
|
+
return unless session.respond_to?(:on)
|
|
694
|
+
|
|
695
|
+
tool_round = 0
|
|
696
|
+
session.on(:tool_call) do |_tool_call|
|
|
697
|
+
tool_round += 1
|
|
698
|
+
if tool_round > MAX_RUBY_LLM_TOOL_ROUNDS
|
|
699
|
+
log.warn("[pipeline] tool loop cap hit: #{tool_round} rounds, halting")
|
|
700
|
+
raise Legion::LLM::PipelineError, "tool loop exceeded #{MAX_RUBY_LLM_TOOL_ROUNDS} rounds"
|
|
701
|
+
end
|
|
702
|
+
end
|
|
703
|
+
end
|
|
704
|
+
|
|
681
705
|
def apply_ruby_llm_instructions(session)
|
|
682
706
|
injected_system = EnrichmentInjector.inject(
|
|
683
707
|
system: @request.system,
|
|
@@ -21,6 +21,14 @@ module Legion
|
|
|
21
21
|
tool_calls context_store post_response knowledge_capture
|
|
22
22
|
].freeze
|
|
23
23
|
|
|
24
|
+
HUMAN_SKIP = %i[].freeze
|
|
25
|
+
|
|
26
|
+
SERVICE_SKIP = %i[
|
|
27
|
+
conversation_uuid context_load gaia_advisory
|
|
28
|
+
rag_context tool_discovery confidence_scoring
|
|
29
|
+
tool_calls context_store knowledge_capture
|
|
30
|
+
].freeze
|
|
31
|
+
|
|
24
32
|
module_function
|
|
25
33
|
|
|
26
34
|
def derive(caller_hash)
|
|
@@ -31,7 +39,9 @@ module Legion
|
|
|
31
39
|
identity = requested_by[:identity].to_s
|
|
32
40
|
|
|
33
41
|
return :quick_reply if type == :quick_reply
|
|
34
|
-
return :
|
|
42
|
+
return :human if %i[human user].include?(type)
|
|
43
|
+
return :service if type == :service
|
|
44
|
+
return :external unless type == :system
|
|
35
45
|
|
|
36
46
|
identity.start_with?('gaia:') ? :gaia : :system
|
|
37
47
|
end
|
|
@@ -41,6 +51,8 @@ module Legion
|
|
|
41
51
|
when :gaia then GAIA_SKIP.include?(step)
|
|
42
52
|
when :system then SYSTEM_SKIP.include?(step)
|
|
43
53
|
when :quick_reply then QUICK_REPLY_SKIP.include?(step)
|
|
54
|
+
when :human then HUMAN_SKIP.include?(step)
|
|
55
|
+
when :service then SERVICE_SKIP.include?(step)
|
|
44
56
|
else false
|
|
45
57
|
end
|
|
46
58
|
end
|
data/lib/legion/llm/routes.rb
CHANGED
|
@@ -446,8 +446,11 @@ module Legion
|
|
|
446
446
|
last_user = messages.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
|
|
447
447
|
prompt = (last_user || {})[:content] || (last_user || {})['content'] || ''
|
|
448
448
|
|
|
449
|
+
route_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
450
|
+
|
|
449
451
|
if defined?(Legion::Gaia) && Legion::Gaia.respond_to?(:started?) && Legion::Gaia.started? && prompt.to_s.length.positive?
|
|
450
452
|
begin
|
|
453
|
+
gaia_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
451
454
|
frame = Legion::Gaia::InputFrame.new(
|
|
452
455
|
content: prompt,
|
|
453
456
|
channel_id: :api,
|
|
@@ -456,6 +459,8 @@ module Legion
|
|
|
456
459
|
metadata: { source_type: :human_direct, salience: 0.9 }
|
|
457
460
|
)
|
|
458
461
|
Legion::Gaia.ingest(frame)
|
|
462
|
+
gaia_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - gaia_t0) * 1000).round
|
|
463
|
+
log.warn("[inference][timing] gaia_ingest=#{gaia_ms}ms request_id=#{request_id}")
|
|
459
464
|
rescue StandardError => e
|
|
460
465
|
handle_exception(e, level: :warn, operation: 'llm.routes.gaia_ingest', request_id: request_id)
|
|
461
466
|
end
|
|
@@ -501,6 +506,9 @@ module Legion
|
|
|
501
506
|
cache: { strategy: :default, cacheable: true }
|
|
502
507
|
)
|
|
503
508
|
|
|
509
|
+
setup_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - route_t0) * 1000).round
|
|
510
|
+
log.warn("[inference][timing] pre_pipeline_setup=#{setup_ms}ms request_id=#{request_id}")
|
|
511
|
+
|
|
504
512
|
executor = Legion::LLM::Pipeline::Executor.new(pipeline_request)
|
|
505
513
|
|
|
506
514
|
if streaming
|
|
@@ -561,7 +569,10 @@ module Legion
|
|
|
561
569
|
end
|
|
562
570
|
# rubocop:enable Metrics/BlockLength
|
|
563
571
|
else
|
|
572
|
+
exec_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
564
573
|
pipeline_response = executor.call
|
|
574
|
+
exec_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - exec_t0) * 1000).round
|
|
575
|
+
log.warn("[inference][timing] executor_call=#{exec_ms}ms request_id=#{request_id}")
|
|
565
576
|
raw_msg = pipeline_response.message
|
|
566
577
|
content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']) : raw_msg.to_s
|
|
567
578
|
routing = pipeline_response.routing || {}
|
data/lib/legion/llm/version.rb
CHANGED