RubyGems - legion-llm - Versions diffs - 0.6.23 → 0.6.24 - Mend

legion-llm 0.6.23 → 0.6.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +16 -0
data/lib/legion/llm/conversation_store.rb +3 -1
data/lib/legion/llm/patches/ruby_llm_parallel_tools.rb +102 -0
data/lib/legion/llm/pipeline/executor.rb +56 -4
data/lib/legion/llm/settings.rb +1 -0
data/lib/legion/llm/version.rb +1 -1
data/lib/legion/llm.rb +1 -0
metadata +2 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: bd422bcc5c5b6da0dbd4906df8ac394e5c712e709eb8cb367cc676fbf6e45f97
-  data.tar.gz: 148e5741014313918781e757c87a50e40b2d5e5ef164631b71959f6027c70316
+  metadata.gz: c7e4263174302a505c21078bbc343c36134c08e22f0ad2d2741e6e2b4b747327
+  data.tar.gz: ef99cbea7efe6f0e0c479586c73792d814328169ca1e0faaf2362da8cb140be1
 SHA512:
-  metadata.gz: dc80d32daf35e53bfe514a0e318911c97e9e3971374eb711128c68db6a02084cc9fd259f68ccf6e0242fb10ff1cbccf2a1ca9132b37e2aa1e6ee23cd5cbe0b5d
-  data.tar.gz: 5673f3536126bc1d3e17e69ab2892edb1b8bd9524bdc6c38993e85ac0869e48a42bd82f9d8691923527f701940a42969052bbbf9db40976434f1ad00210f3934
+  metadata.gz: bb18ac2c9d7cb8108edc71208dc97b4befaed9ab6cdbcb7c1f8662c00402c08619609e3ce5d372b9f4483175020ef2100ac5b86ae2d147216e755f4211173f41
+  data.tar.gz: e0f5b35fb908eff66faea9509f984fa141531c467db9ab1c8e2276f5e47dcd7a65f42e63e5004db4d27334867489c578a44c51b3779d9d954bd7d4028487e37e

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,21 @@
 # Legion LLM Changelog
+## [0.6.24] - 2026-04-08
+### Added
+- `Legion::LLM::Patches::RubyLLMParallelTools`: monkey-patch that replaces RubyLLM's serial `handle_tool_calls` loop with concurrent thread execution so all tool calls in a batch run in parallel
+- `ToolResultWrapper` struct exposes `tool_call_id`, `id`, `tool_name`, `result`, and `content` so bridge scripts can match results back to UI slots without falling back to name-based matching
+- `emit_tool_result_event` in `Pipeline::Executor`: fires `tool_event_handler` with `type: :tool_result`, `duration_ms`, `started_at`, and `finished_at` after each tool completes
+- `tool_event_handler` now also fires `type: :model_fallback` events (with `from_model`, `to_model`, `error`, `reason`) on auth-failed provider fallback in both regular and streaming paths
+- `max_tool_rounds` setting (default `200`) in LLM settings; `install_tool_loop_guard` now reads it at call time so callers can override the cap per-session
+- `started_at` timestamp stored in `Thread.current[:legion_current_tool_started_at]` for accurate per-call wall-clock duration even across parallel threads
+### Changed
+- `MAX_RUBY_LLM_TOOL_ROUNDS` constant raised from `25` to `200` (now serves as a fallback default for the configurable `max_tool_rounds` setting)
+### Fixed
+- `ConversationStore#db_append_message` now serializes non-String `content` values (e.g., tool-call arrays) to JSON before writing to the database, preventing Sequel type errors when tool-use messages are persisted
 ## [0.6.23] - 2026-04-07
 ### Fixed

data/lib/legion/llm/conversation_store.rb CHANGED Viewed

@@ -373,11 +373,13 @@ module Legion
         end
         def db_append_message(conversation_id, msg)
+          content = msg[:content]
+          content = content.to_json unless content.is_a?(String) || content.nil?
           row = {
             conversation_id: conversation_id,
             seq:             msg[:seq],
             role:            msg[:role].to_s,
-            content:         msg[:content],
+            content:         content,
             provider:        msg[:provider]&.to_s,
             model:           msg[:model]&.to_s,
             input_tokens:    msg[:input_tokens],

data/lib/legion/llm/patches/ruby_llm_parallel_tools.rb ADDED Viewed

@@ -0,0 +1,102 @@
+# frozen_string_literal: true
+# Patch: RubyLLM::Chat parallel tool call execution
+#
+# RubyLLM's default `handle_tool_calls` iterates tool calls serially with
+# `.each_value`, meaning when an LLM returns N tool calls in a single response
+# they execute one-at-a-time. This patch replaces that loop with concurrent
+# thread execution so all tool calls in a batch run in parallel, and results
+# are collected before re-prompting the model.
+#
+# Additionally, RubyLLM fires `on_tool_result` with the raw tool return value
+# (a String/Hash/etc.) which carries no `tool_call_id`. The legion-interlink
+# bridge script's `serialize_tool_result` needs a `tool_call_id` field to
+# match results back to the correct tool call slot in the UI — without it
+# every result falls back to name-based matching, which breaks when multiple
+# tools of the same name run in parallel and leaves them stuck on RUNNING.
+#
+# Fix: wrap each result in a ToolResultWrapper that exposes both the raw
+# content/result AND the originating tool_call_id / id fields.
+#
+# NOTE: This is a temporary shim. When RubyLLM is replaced this file goes away.
+#
+# Thread safety notes:
+#   - Each tool call executes in its own thread.
+#   - @on[:tool_call] fires per-thread (fast, just event emission — safe).
+#   - @on[:tool_result] fires per-thread with the wrapper object.
+#   - add_message is called serially after all threads complete to preserve
+#     message ordering and avoid races on @messages.
+#   - If ANY tool returns a RubyLLM::Tool::Halt, complete() is skipped —
+#     matching the original semantics.
+module Legion
+  module LLM
+    module Patches
+      # Wraps a raw tool result value so that the bridge-script's
+      # serialize_tool_result can read both :tool_call_id/:id (for UI matching)
+      # and :result/:content (for the result payload) off a single object.
+      ToolResultWrapper = Struct.new(:result, :content, :tool_call_id, :id, :tool_name) do
+        # Delegate is_a? checks for RubyLLM::Tool::Halt so the caller can still
+        # detect halt results transparently.
+        def is_a?(klass)
+          result.is_a?(klass) || super
+        end
+        alias_method :kind_of?, :is_a?
+      end
+      module RubyLLMParallelTools
+        def handle_tool_calls(response, &)
+          tool_calls = response.tool_calls.values
+          # Dispatch all tool calls concurrently, preserving original order.
+          threads = tool_calls.map do |tool_call|
+            Thread.new do
+              @on[:new_message]&.call
+              @on[:tool_call]&.call(tool_call)
+              raw = execute_tool(tool_call)
+              # Wrap so serialize_tool_result in the bridge script gets an ID.
+              wrapper = ToolResultWrapper.new(
+                raw,                  # :result  — raw value (String/Hash/Halt/etc.)
+                raw,                  # :content — alias for bridge compat
+                tool_call.id,         # :tool_call_id
+                tool_call.id,         # :id
+                tool_call.name        # :tool_name
+              )
+              @on[:tool_result]&.call(wrapper)
+              { tool_call: tool_call, raw: raw }
+            end
+          end
+          begin
+            results = threads.map(&:value) # block until all complete
+            # Commit messages serially — preserves ordering, avoids @messages races.
+            halt_result = nil
+            results.each do |entry|
+              tool_call    = entry[:tool_call]
+              raw          = entry[:raw]
+              tool_payload = raw.is_a?(RubyLLM::Tool::Halt) ? raw.content : raw
+              content      = content_like?(tool_payload) ? tool_payload : tool_payload.to_s
+              message      = add_message(role: :tool, content: content, tool_call_id: tool_call.id)
+              @on[:end_message]&.call(message)
+              halt_result = raw if raw.is_a?(RubyLLM::Tool::Halt)
+            end
+            reset_tool_choice if forced_tool_choice?
+            halt_result || complete(&)
+          ensure
+            threads.each do |thread|
+              thread.kill if thread.alive?
+              thread.join
+            end
+          end
+        end
+      end
+    end
+  end
+end
+# Use prepend (not alias_method/override) so the patch stays clearly visible
+# in the ancestor chain and is easy to remove when RubyLLM is dropped.
+RubyLLM::Chat.prepend(Legion::LLM::Patches::RubyLLMParallelTools)

data/lib/legion/llm/pipeline/executor.rb CHANGED Viewed

@@ -46,7 +46,7 @@ module Legion
         ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
-        MAX_RUBY_LLM_TOOL_ROUNDS = 25
+        MAX_RUBY_LLM_TOOL_ROUNDS = 200
         ASYNC_THREAD_POOL = Concurrent::FixedThreadPool.new(4, fallback_policy: :caller_runs)
@@ -340,9 +340,17 @@ module Legion
             )
             if fallback
               log.warn "[pipeline] #{@resolved_provider} auth failed (#{e.class}), falling back to #{fallback[:provider]}:#{fallback[:model]}"
+              from_provider = @resolved_provider
+              from_model = @resolved_model
               @resolved_provider = fallback[:provider]
               @resolved_model = fallback[:model]
               @warnings << { type: :provider_fallback, original_error: e.message, fallback: "#{@resolved_provider}:#{@resolved_model}" }
+              @tool_event_handler&.call(
+                type: :model_fallback,
+                from_provider: from_provider, to_provider: @resolved_provider,
+                from_model: from_model, to_model: @resolved_model,
+                error: e.message, reason: 'auth_failed'
+              )
               @timeline.record(
                 category: :provider, key: 'provider:fallback',
                 direction: :internal,
@@ -625,9 +633,17 @@ module Legion
             if fallback
               log.warn "[pipeline] #{@resolved_provider} stream auth failed (#{e.class}), " \
                        "falling back to #{fallback[:provider]}:#{fallback[:model]}"
+              from_provider = @resolved_provider
+              from_model = @resolved_model
               @resolved_provider = fallback[:provider]
               @resolved_model = fallback[:model]
               @warnings << { type: :provider_fallback, original_error: e.message, fallback: "#{@resolved_provider}:#{@resolved_model}" }
+              @tool_event_handler&.call(
+                type: :model_fallback,
+                from_provider: from_provider, to_provider: @resolved_provider,
+                from_model: from_model, to_model: @resolved_model,
+                error: e.message, reason: 'auth_failed'
+              )
               retry
             end
             raise Legion::LLM::AuthError, e.message
@@ -670,6 +686,7 @@ module Legion
             Thread.current[:legion_tool_event_handler] = nil
             Thread.current[:legion_current_tool_call_id] = nil
             Thread.current[:legion_current_tool_name] = nil
+            Thread.current[:legion_current_tool_started_at] = nil
           end
           @timestamps[:provider_end] = Time.now
@@ -709,31 +726,66 @@ module Legion
             return
           end
+          max_rounds = Legion::LLM.settings[:max_tool_rounds] || MAX_RUBY_LLM_TOOL_ROUNDS
           tool_round = 0
           session.on_tool_call do |tool_call|
             tool_round += 1
-            if tool_round > MAX_RUBY_LLM_TOOL_ROUNDS
+            if tool_round > max_rounds
               log.warn("[pipeline] tool loop cap hit: #{tool_round} rounds, halting")
-              raise Legion::LLM::PipelineError, "tool loop exceeded #{MAX_RUBY_LLM_TOOL_ROUNDS} rounds"
+              raise Legion::LLM::PipelineError, "tool loop exceeded #{max_rounds} rounds"
             end
             emit_tool_call_event(tool_call, tool_round)
           end
+          # Wire up tool-result events so the API SSE stream can notify the
+          # frontend when each tool finishes (clears the RUNNING state in the UI).
+          return unless session.respond_to?(:on_tool_result)
+          session.on_tool_result do |tool_result|
+            emit_tool_result_event(tool_result)
+          end
         end
         def emit_tool_call_event(tool_call, round)
           tc_id   = tool_call_field(tool_call, :id)
           tc_name = tool_call_field(tool_call, :name)
           tc_args = tool_call_field(tool_call, :arguments)
+          started_at = Time.now
           log.info("[pipeline][tool-call] round=#{round} id=#{tc_id} tool=#{tc_name}")
+          # Store start time per-tool-call-id so emit_tool_result_event can calculate
+          # accurate wall-clock duration even when tools run in parallel threads.
           Thread.current[:legion_current_tool_call_id] = tc_id
           Thread.current[:legion_current_tool_name] = tc_name
+          Thread.current[:legion_current_tool_started_at] = started_at
           @tool_event_handler&.call(
             type: :tool_call, tool_call_id: tc_id, tool_name: tc_name,
-            arguments: tc_args, round: round
+            arguments: tc_args, round: round, started_at: started_at
+          )
+        end
+        def emit_tool_result_event(tool_result)
+          # tool_result may be a raw value (String/Hash) or a ToolResultWrapper
+          # from our parallel patch — extract the fields defensively.
+          tc_id      = tool_result.respond_to?(:tool_call_id) ? tool_result.tool_call_id : Thread.current[:legion_current_tool_call_id]
+          tc_name    = tool_result.respond_to?(:tool_name)    ? tool_result.tool_name    : Thread.current[:legion_current_tool_name]
+          started_at = tool_result.respond_to?(:started_at)   ? tool_result.started_at   : Thread.current[:legion_current_tool_started_at]
+          finished_at = Time.now
+          raw = tool_result.respond_to?(:result) ? tool_result.result : tool_result
+          duration_ms = started_at ? ((finished_at - started_at) * 1000).round : nil
+          log.info("[pipeline][tool-result] id=#{tc_id} tool=#{tc_name} duration_ms=#{duration_ms}")
+          result_str = (raw.is_a?(String) ? raw : raw.to_s)[0, 4096]
+          @tool_event_handler&.call(
+            type: :tool_result, tool_call_id: tc_id, tool_name: tc_name,
+            result: result_str, result_size: (raw.is_a?(String) ? raw : raw.to_s).bytesize,
+            started_at: started_at, finished_at: finished_at, duration_ms: duration_ms
           )
         end

data/lib/legion/llm/settings.rb CHANGED Viewed

@@ -13,6 +13,7 @@ module Legion
           connected:                 false,
           pipeline_enabled:          true,
           pipeline_async_post_steps: true,
+          max_tool_rounds:           200,
           default_model:             model_override,
           default_provider:          nil,
           providers:                 providers,

data/lib/legion/llm/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Legion
   module LLM
-    VERSION = '0.6.23'
+    VERSION = '0.6.24'
   end
 end

data/lib/legion/llm.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 require 'legion/logging/helper'
 require 'ruby_llm'
+require 'legion/llm/patches/ruby_llm_parallel_tools'
 require 'legion/llm/version'
 require 'legion/llm/errors'
 require 'legion/llm/conversation_store'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: legion-llm
 version: !ruby/object:Gem::Version
-  version: 0.6.23
+  version: 0.6.24
 platform: ruby
 authors:
 - Esity
@@ -260,6 +260,7 @@ files:
 - lib/legion/llm/native_dispatch.rb
 - lib/legion/llm/off_peak.rb
 - lib/legion/llm/override_confidence.rb
+- lib/legion/llm/patches/ruby_llm_parallel_tools.rb
 - lib/legion/llm/pipeline.rb
 - lib/legion/llm/pipeline/audit_publisher.rb
 - lib/legion/llm/pipeline/enrichment_injector.rb