RubyGems - legion-llm - Versions diffs - 0.6.18 → 0.6.20 - Mend

legion-llm 0.6.18 → 0.6.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +11 -1
data/lib/legion/llm/pipeline/executor.rb +24 -0
data/lib/legion/llm/pipeline/profile.rb +13 -1
data/lib/legion/llm/routes.rb +11 -0
data/lib/legion/llm/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 83b1bf0eb338e47eb37627eb7004afb30b3fba91627577034554827edd797bcf
-  data.tar.gz: 54ba4b787dfb7ebbbe6ef10f363cb237075f5b37f74467c9408db723d9a36c48
+  metadata.gz: 45d07a2c60a8663ba1b62165b3b489d49a2aac37ee1e1ec6abff7bd5f4357d6c
+  data.tar.gz: 9ee8246c75fee6d7e690b55f4e2a91b030f6b142c91dd79acb7bf66edf4d9d05
 SHA512:
-  metadata.gz: 1a546dca02b403bc025621c1a980cf081114c383f97bd3ce3eef245a7b3314cdaacc7328cbbbfd7879cbf95dd53069e48b9af8f2401099e68966e0ee4e0b624f
-  data.tar.gz: ddb6aabeca04cdbdacfd2412ee2713dfaecc4b17911b55246ec680538874df7725eabef8768fe69c8202952d6a3f14d0a0ae27696cd64ebf2727679f7947cd61
+  metadata.gz: 92b102167bb6f346fab490787baedda2f2fa6fb528713c6b055b269f747c490d56fed5d21027616bc2c6d1f7cf4069ce14bb9b7607f7a6ad07c2c69b05ce0814
+  data.tar.gz: f1fded39722bf678936df28f3bbf3ec095265bdabc28f70eaf67e64fae5519b7c58842a8432e4b4bfdc0476c9275473f75a9c83bf0c77d6f5cc2afe1fa700aeb

data/CHANGELOG.md CHANGED Viewed

@@ -1,12 +1,22 @@
 # Legion LLM Changelog
-## [Unreleased]
+## [0.6.20] - 2026-04-06
+### Added
+- Per-step pipeline timing diagnostics: `[pipeline][timing]` log line with duration per step
+- Pre-pipeline timing in inference route: `gaia_ingest`, `pre_pipeline_setup`, `executor_call` durations
+- `MAX_RUBY_LLM_TOOL_ROUNDS` (25) — caps RubyLLM's unbounded tool-use loop to prevent infinite cycling
+- `install_tool_loop_guard` applied to both streaming and non-streaming provider paths
 ### Fixed
 - fix Process namespace collision by using ::Process::CLOCK_MONOTONIC prefix inside Legion namespace
 ### Added
 - `Legion::LLM::Pipeline::ToolAdapter` - wraps Tools::Base for RubyLLM sessions
+- `Profile.derive` returns `:human` for `type: :human` and `type: :user` callers (Wire Format Phase 3)
+- `Profile.derive` returns `:service` for `type: :service` callers (Wire Format Phase 3)
+- `HUMAN_SKIP` constant (empty — humans get full pipeline)
+- `SERVICE_SKIP` constant — services skip conversational steps (context, tools, knowledge)
 ### Changed
 - Renamed `McpToolAdapter` to `ToolAdapter` (backwards compat alias kept)

data/lib/legion/llm/pipeline/executor.rb CHANGED Viewed

@@ -45,6 +45,8 @@ module Legion
         ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
+        MAX_RUBY_LLM_TOOL_ROUNDS = 25
         ASYNC_THREAD_POOL = Concurrent::FixedThreadPool.new(4, fallback_policy: :caller_runs)
         def initialize(request)
@@ -132,15 +134,22 @@ module Legion
         def execute_steps
           executed = 0
           skipped = 0
+          pipeline_start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
+          step_timings = []
           STEPS.each do |step|
             if Profile.skip?(@profile, step)
               skipped += 1
               next
             end
+            t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
             execute_step(step) { send(:"step_#{step}") }
+            elapsed_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - t0) * 1000).round
+            step_timings << "#{step}=#{elapsed_ms}ms"
             executed += 1
           end
+          total_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - pipeline_start) * 1000).round
+          log.warn("[pipeline][timing] profile=#{@profile} total=#{total_ms}ms executed=#{executed} skipped=#{skipped} #{step_timings.join(' ')}")
           annotate_top_level_span(steps_executed: executed, steps_skipped: skipped)
         end
@@ -464,6 +473,7 @@ module Legion
         def execute_provider_request_ruby_llm
           session, message_content = build_ruby_llm_session
+          install_tool_loop_guard(session)
           @raw_response = message_content ? session.ask(message_content) : session
         end
@@ -645,6 +655,7 @@ module Legion
           )
           session, message_content = build_ruby_llm_session
+          install_tool_loop_guard(session)
           @raw_response = message_content ? session.ask(message_content, &) : session
           @timestamps[:provider_end] = Time.now
@@ -678,6 +689,19 @@ module Legion
           inject_registry_tools(session)
         end
+        def install_tool_loop_guard(session)
+          return unless session.respond_to?(:on)
+          tool_round = 0
+          session.on(:tool_call) do |_tool_call|
+            tool_round += 1
+            if tool_round > MAX_RUBY_LLM_TOOL_ROUNDS
+              log.warn("[pipeline] tool loop cap hit: #{tool_round} rounds, halting")
+              raise Legion::LLM::PipelineError, "tool loop exceeded #{MAX_RUBY_LLM_TOOL_ROUNDS} rounds"
+            end
+          end
+        end
         def apply_ruby_llm_instructions(session)
           injected_system = EnrichmentInjector.inject(
             system:      @request.system,

data/lib/legion/llm/pipeline/profile.rb CHANGED Viewed

@@ -21,6 +21,14 @@ module Legion
           tool_calls context_store post_response knowledge_capture
         ].freeze
+        HUMAN_SKIP = %i[].freeze
+        SERVICE_SKIP = %i[
+          conversation_uuid context_load gaia_advisory
+          rag_context tool_discovery confidence_scoring
+          tool_calls context_store knowledge_capture
+        ].freeze
         module_function
         def derive(caller_hash)
@@ -31,7 +39,9 @@ module Legion
           identity = requested_by[:identity].to_s
           return :quick_reply if type == :quick_reply
-          return :external unless type == :system
+          return :human       if %i[human user].include?(type)
+          return :service     if type == :service
+          return :external    unless type == :system
           identity.start_with?('gaia:') ? :gaia : :system
         end
@@ -41,6 +51,8 @@ module Legion
           when :gaia        then GAIA_SKIP.include?(step)
           when :system      then SYSTEM_SKIP.include?(step)
           when :quick_reply then QUICK_REPLY_SKIP.include?(step)
+          when :human       then HUMAN_SKIP.include?(step)
+          when :service     then SERVICE_SKIP.include?(step)
           else false
           end
         end

data/lib/legion/llm/routes.rb CHANGED Viewed

@@ -446,8 +446,11 @@ module Legion
           last_user = messages.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
           prompt    = (last_user || {})[:content] || (last_user || {})['content'] || ''
+          route_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
           if defined?(Legion::Gaia) && Legion::Gaia.respond_to?(:started?) && Legion::Gaia.started? && prompt.to_s.length.positive?
             begin
+              gaia_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
               frame = Legion::Gaia::InputFrame.new(
                 content:      prompt,
                 channel_id:   :api,
@@ -456,6 +459,8 @@ module Legion
                 metadata:     { source_type: :human_direct, salience: 0.9 }
               )
               Legion::Gaia.ingest(frame)
+              gaia_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - gaia_t0) * 1000).round
+              log.warn("[inference][timing] gaia_ingest=#{gaia_ms}ms request_id=#{request_id}")
             rescue StandardError => e
               handle_exception(e, level: :warn, operation: 'llm.routes.gaia_ingest', request_id: request_id)
             end
@@ -501,6 +506,9 @@ module Legion
             cache:           { strategy: :default, cacheable: true }
           )
+          setup_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - route_t0) * 1000).round
+          log.warn("[inference][timing] pre_pipeline_setup=#{setup_ms}ms request_id=#{request_id}")
           executor = Legion::LLM::Pipeline::Executor.new(pipeline_request)
           if streaming
@@ -561,7 +569,10 @@ module Legion
             end
             # rubocop:enable Metrics/BlockLength
           else
+            exec_t0 = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
             pipeline_response = executor.call
+            exec_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - exec_t0) * 1000).round
+            log.warn("[inference][timing] executor_call=#{exec_ms}ms request_id=#{request_id}")
             raw_msg = pipeline_response.message
             content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']) : raw_msg.to_s
             routing = pipeline_response.routing || {}

data/lib/legion/llm/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Legion
   module LLM
-    VERSION = '0.6.18'
+    VERSION = '0.6.20'
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: legion-llm
 version: !ruby/object:Gem::Version
-  version: 0.6.18
+  version: 0.6.20
 platform: ruby
 authors:
 - Esity