legion-llm 0.6.18 → 0.6.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 83b1bf0eb338e47eb37627eb7004afb30b3fba91627577034554827edd797bcf
4
- data.tar.gz: 54ba4b787dfb7ebbbe6ef10f363cb237075f5b37f74467c9408db723d9a36c48
3
+ metadata.gz: 45d07a2c60a8663ba1b62165b3b489d49a2aac37ee1e1ec6abff7bd5f4357d6c
4
+ data.tar.gz: 9ee8246c75fee6d7e690b55f4e2a91b030f6b142c91dd79acb7bf66edf4d9d05
5
5
  SHA512:
6
- metadata.gz: 1a546dca02b403bc025621c1a980cf081114c383f97bd3ce3eef245a7b3314cdaacc7328cbbbfd7879cbf95dd53069e48b9af8f2401099e68966e0ee4e0b624f
7
- data.tar.gz: ddb6aabeca04cdbdacfd2412ee2713dfaecc4b17911b55246ec680538874df7725eabef8768fe69c8202952d6a3f14d0a0ae27696cd64ebf2727679f7947cd61
6
+ metadata.gz: 92b102167bb6f346fab490787baedda2f2fa6fb528713c6b055b269f747c490d56fed5d21027616bc2c6d1f7cf4069ce14bb9b7607f7a6ad07c2c69b05ce0814
7
+ data.tar.gz: f1fded39722bf678936df28f3bbf3ec095265bdabc28f70eaf67e64fae5519b7c58842a8432e4b4bfdc0476c9275473f75a9c83bf0c77d6f5cc2afe1fa700aeb
data/CHANGELOG.md CHANGED
@@ -1,12 +1,22 @@
1
1
  # Legion LLM Changelog
2
2
 
3
- ## [Unreleased]
3
+ ## [0.6.20] - 2026-04-06
4
+
5
+ ### Added
6
+ - Per-step pipeline timing diagnostics: `[pipeline][timing]` log line with duration per step
7
+ - Pre-pipeline timing in inference route: `gaia_ingest`, `pre_pipeline_setup`, `executor_call` durations
8
+ - `MAX_RUBY_LLM_TOOL_ROUNDS` (25) — caps RubyLLM's unbounded tool-use loop to prevent infinite cycling
9
+ - `install_tool_loop_guard` applied to both streaming and non-streaming provider paths
4
10
 
5
11
  ### Fixed
6
12
  - fix Process namespace collision by using ::Process::CLOCK_MONOTONIC prefix inside Legion namespace
7
13
 
8
14
  ### Added
9
15
  - `Legion::LLM::Pipeline::ToolAdapter` - wraps Tools::Base for RubyLLM sessions
16
+ - `Profile.derive` returns `:human` for `type: :human` and `type: :user` callers (Wire Format Phase 3)
17
+ - `Profile.derive` returns `:service` for `type: :service` callers (Wire Format Phase 3)
18
+ - `HUMAN_SKIP` constant (empty — humans get full pipeline)
19
+ - `SERVICE_SKIP` constant — services skip conversational steps (context, tools, knowledge)
10
20
 
11
21
  ### Changed
12
22
  - Renamed `McpToolAdapter` to `ToolAdapter` (backwards compat alias kept)
@@ -45,6 +45,8 @@ module Legion
45
45
 
46
46
  ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
47
47
 
48
+ MAX_RUBY_LLM_TOOL_ROUNDS = 25
49
+
48
50
  ASYNC_THREAD_POOL = Concurrent::FixedThreadPool.new(4, fallback_policy: :caller_runs)
49
51
 
50
52
  def initialize(request)
@@ -132,15 +134,22 @@ module Legion
132
134
  def execute_steps
133
135
  executed = 0
134
136
  skipped = 0
137
+ pipeline_start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
138
+ step_timings = []
135
139
  STEPS.each do |step|
136
140
  if Profile.skip?(@profile, step)
137
141
  skipped += 1
138
142
  next
139
143
  end
140
144
 
145
+ t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
141
146
  execute_step(step) { send(:"step_#{step}") }
147
+ elapsed_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - t0) * 1000).round
148
+ step_timings << "#{step}=#{elapsed_ms}ms"
142
149
  executed += 1
143
150
  end
151
+ total_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - pipeline_start) * 1000).round
152
+ log.warn("[pipeline][timing] profile=#{@profile} total=#{total_ms}ms executed=#{executed} skipped=#{skipped} #{step_timings.join(' ')}")
144
153
  annotate_top_level_span(steps_executed: executed, steps_skipped: skipped)
145
154
  end
146
155
 
@@ -464,6 +473,7 @@ module Legion
464
473
 
465
474
  def execute_provider_request_ruby_llm
466
475
  session, message_content = build_ruby_llm_session
476
+ install_tool_loop_guard(session)
467
477
  @raw_response = message_content ? session.ask(message_content) : session
468
478
  end
469
479
 
@@ -645,6 +655,7 @@ module Legion
645
655
  )
646
656
 
647
657
  session, message_content = build_ruby_llm_session
658
+ install_tool_loop_guard(session)
648
659
  @raw_response = message_content ? session.ask(message_content, &) : session
649
660
 
650
661
  @timestamps[:provider_end] = Time.now
@@ -678,6 +689,19 @@ module Legion
678
689
  inject_registry_tools(session)
679
690
  end
680
691
 
692
+ def install_tool_loop_guard(session)
693
+ return unless session.respond_to?(:on)
694
+
695
+ tool_round = 0
696
+ session.on(:tool_call) do |_tool_call|
697
+ tool_round += 1
698
+ if tool_round > MAX_RUBY_LLM_TOOL_ROUNDS
699
+ log.warn("[pipeline] tool loop cap hit: #{tool_round} rounds, halting")
700
+ raise Legion::LLM::PipelineError, "tool loop exceeded #{MAX_RUBY_LLM_TOOL_ROUNDS} rounds"
701
+ end
702
+ end
703
+ end
704
+
681
705
  def apply_ruby_llm_instructions(session)
682
706
  injected_system = EnrichmentInjector.inject(
683
707
  system: @request.system,
@@ -21,6 +21,14 @@ module Legion
21
21
  tool_calls context_store post_response knowledge_capture
22
22
  ].freeze
23
23
 
24
+ HUMAN_SKIP = %i[].freeze
25
+
26
+ SERVICE_SKIP = %i[
27
+ conversation_uuid context_load gaia_advisory
28
+ rag_context tool_discovery confidence_scoring
29
+ tool_calls context_store knowledge_capture
30
+ ].freeze
31
+
24
32
  module_function
25
33
 
26
34
  def derive(caller_hash)
@@ -31,7 +39,9 @@ module Legion
31
39
  identity = requested_by[:identity].to_s
32
40
 
33
41
  return :quick_reply if type == :quick_reply
34
- return :external unless type == :system
42
+ return :human if %i[human user].include?(type)
43
+ return :service if type == :service
44
+ return :external unless type == :system
35
45
 
36
46
  identity.start_with?('gaia:') ? :gaia : :system
37
47
  end
@@ -41,6 +51,8 @@ module Legion
41
51
  when :gaia then GAIA_SKIP.include?(step)
42
52
  when :system then SYSTEM_SKIP.include?(step)
43
53
  when :quick_reply then QUICK_REPLY_SKIP.include?(step)
54
+ when :human then HUMAN_SKIP.include?(step)
55
+ when :service then SERVICE_SKIP.include?(step)
44
56
  else false
45
57
  end
46
58
  end
@@ -446,8 +446,11 @@ module Legion
446
446
  last_user = messages.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
447
447
  prompt = (last_user || {})[:content] || (last_user || {})['content'] || ''
448
448
 
449
+ route_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
450
+
449
451
  if defined?(Legion::Gaia) && Legion::Gaia.respond_to?(:started?) && Legion::Gaia.started? && prompt.to_s.length.positive?
450
452
  begin
453
+ gaia_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
451
454
  frame = Legion::Gaia::InputFrame.new(
452
455
  content: prompt,
453
456
  channel_id: :api,
@@ -456,6 +459,8 @@ module Legion
456
459
  metadata: { source_type: :human_direct, salience: 0.9 }
457
460
  )
458
461
  Legion::Gaia.ingest(frame)
462
+ gaia_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - gaia_t0) * 1000).round
463
+ log.warn("[inference][timing] gaia_ingest=#{gaia_ms}ms request_id=#{request_id}")
459
464
  rescue StandardError => e
460
465
  handle_exception(e, level: :warn, operation: 'llm.routes.gaia_ingest', request_id: request_id)
461
466
  end
@@ -501,6 +506,9 @@ module Legion
501
506
  cache: { strategy: :default, cacheable: true }
502
507
  )
503
508
 
509
+ setup_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - route_t0) * 1000).round
510
+ log.warn("[inference][timing] pre_pipeline_setup=#{setup_ms}ms request_id=#{request_id}")
511
+
504
512
  executor = Legion::LLM::Pipeline::Executor.new(pipeline_request)
505
513
 
506
514
  if streaming
@@ -561,7 +569,10 @@ module Legion
561
569
  end
562
570
  # rubocop:enable Metrics/BlockLength
563
571
  else
572
+ exec_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
564
573
  pipeline_response = executor.call
574
+ exec_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - exec_t0) * 1000).round
575
+ log.warn("[inference][timing] executor_call=#{exec_ms}ms request_id=#{request_id}")
565
576
  raw_msg = pipeline_response.message
566
577
  content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']) : raw_msg.to_s
567
578
  routing = pipeline_response.routing || {}
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.6.18'
5
+ VERSION = '0.6.20'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.18
4
+ version: 0.6.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity