legion-llm 0.6.23 → 0.6.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bd422bcc5c5b6da0dbd4906df8ac394e5c712e709eb8cb367cc676fbf6e45f97
4
- data.tar.gz: 148e5741014313918781e757c87a50e40b2d5e5ef164631b71959f6027c70316
3
+ metadata.gz: c7e4263174302a505c21078bbc343c36134c08e22f0ad2d2741e6e2b4b747327
4
+ data.tar.gz: ef99cbea7efe6f0e0c479586c73792d814328169ca1e0faaf2362da8cb140be1
5
5
  SHA512:
6
- metadata.gz: dc80d32daf35e53bfe514a0e318911c97e9e3971374eb711128c68db6a02084cc9fd259f68ccf6e0242fb10ff1cbccf2a1ca9132b37e2aa1e6ee23cd5cbe0b5d
7
- data.tar.gz: 5673f3536126bc1d3e17e69ab2892edb1b8bd9524bdc6c38993e85ac0869e48a42bd82f9d8691923527f701940a42969052bbbf9db40976434f1ad00210f3934
6
+ metadata.gz: bb18ac2c9d7cb8108edc71208dc97b4befaed9ab6cdbcb7c1f8662c00402c08619609e3ce5d372b9f4483175020ef2100ac5b86ae2d147216e755f4211173f41
7
+ data.tar.gz: e0f5b35fb908eff66faea9509f984fa141531c467db9ab1c8e2276f5e47dcd7a65f42e63e5004db4d27334867489c578a44c51b3779d9d954bd7d4028487e37e
data/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.6.24] - 2026-04-08
4
+
5
+ ### Added
6
+ - `Legion::LLM::Patches::RubyLLMParallelTools`: monkey-patch that replaces RubyLLM's serial `handle_tool_calls` loop with concurrent thread execution so all tool calls in a batch run in parallel
7
+ - `ToolResultWrapper` struct exposes `tool_call_id`, `id`, `tool_name`, `result`, and `content` so bridge scripts can match results back to UI slots without falling back to name-based matching
8
+ - `emit_tool_result_event` in `Pipeline::Executor`: fires `tool_event_handler` with `type: :tool_result`, `duration_ms`, `started_at`, and `finished_at` after each tool completes
9
+ - `tool_event_handler` now also fires `type: :model_fallback` events (with `from_model`, `to_model`, `error`, `reason`) on auth-failed provider fallback in both regular and streaming paths
10
+ - `max_tool_rounds` setting (default `200`) in LLM settings; `install_tool_loop_guard` now reads it at call time so callers can override the cap per-session
11
+ - `started_at` timestamp stored in `Thread.current[:legion_current_tool_started_at]` for accurate per-call wall-clock duration even across parallel threads
12
+
13
+ ### Changed
14
+ - `MAX_RUBY_LLM_TOOL_ROUNDS` constant raised from `25` to `200` (now serves as a fallback default for the configurable `max_tool_rounds` setting)
15
+
16
+ ### Fixed
17
+ - `ConversationStore#db_append_message` now serializes non-String `content` values (e.g., tool-call arrays) to JSON before writing to the database, preventing Sequel type errors when tool-use messages are persisted
18
+
3
19
  ## [0.6.23] - 2026-04-07
4
20
 
5
21
  ### Fixed
@@ -373,11 +373,13 @@ module Legion
373
373
  end
374
374
 
375
375
  def db_append_message(conversation_id, msg)
376
+ content = msg[:content]
377
+ content = content.to_json unless content.is_a?(String) || content.nil?
376
378
  row = {
377
379
  conversation_id: conversation_id,
378
380
  seq: msg[:seq],
379
381
  role: msg[:role].to_s,
380
- content: msg[:content],
382
+ content: content,
381
383
  provider: msg[:provider]&.to_s,
382
384
  model: msg[:model]&.to_s,
383
385
  input_tokens: msg[:input_tokens],
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Patch: RubyLLM::Chat parallel tool call execution
4
+ #
5
+ # RubyLLM's default `handle_tool_calls` iterates tool calls serially with
6
+ # `.each_value`, meaning when an LLM returns N tool calls in a single response
7
+ # they execute one-at-a-time. This patch replaces that loop with concurrent
8
+ # thread execution so all tool calls in a batch run in parallel, and results
9
+ # are collected before re-prompting the model.
10
+ #
11
+ # Additionally, RubyLLM fires `on_tool_result` with the raw tool return value
12
+ # (a String/Hash/etc.) which carries no `tool_call_id`. The legion-interlink
13
+ # bridge script's `serialize_tool_result` needs a `tool_call_id` field to
14
+ # match results back to the correct tool call slot in the UI — without it
15
+ # every result falls back to name-based matching, which breaks when multiple
16
+ # tools of the same name run in parallel and leaves them stuck on RUNNING.
17
+ #
18
+ # Fix: wrap each result in a ToolResultWrapper that exposes both the raw
19
+ # content/result AND the originating tool_call_id / id fields.
20
+ #
21
+ # NOTE: This is a temporary shim. When RubyLLM is replaced this file goes away.
22
+ #
23
+ # Thread safety notes:
24
+ # - Each tool call executes in its own thread.
25
+ # - @on[:tool_call] fires per-thread (fast, just event emission — safe).
26
+ # - @on[:tool_result] fires per-thread with the wrapper object.
27
+ # - add_message is called serially after all threads complete to preserve
28
+ # message ordering and avoid races on @messages.
29
+ # - If ANY tool returns a RubyLLM::Tool::Halt, complete() is skipped —
30
+ # matching the original semantics.
31
+
32
+ module Legion
33
+ module LLM
34
+ module Patches
35
+ # Wraps a raw tool result value so that the bridge-script's
36
+ # serialize_tool_result can read both :tool_call_id/:id (for UI matching)
37
+ # and :result/:content (for the result payload) off a single object.
38
+ ToolResultWrapper = Struct.new(:result, :content, :tool_call_id, :id, :tool_name) do
39
+ # Delegate is_a? checks for RubyLLM::Tool::Halt so the caller can still
40
+ # detect halt results transparently.
41
+ def is_a?(klass)
42
+ result.is_a?(klass) || super
43
+ end
44
+
45
+ alias_method :kind_of?, :is_a?
46
+ end
47
+
48
+ module RubyLLMParallelTools
49
+ def handle_tool_calls(response, &)
50
+ tool_calls = response.tool_calls.values
51
+
52
+ # Dispatch all tool calls concurrently, preserving original order.
53
+ threads = tool_calls.map do |tool_call|
54
+ Thread.new do
55
+ @on[:new_message]&.call
56
+ @on[:tool_call]&.call(tool_call)
57
+ raw = execute_tool(tool_call)
58
+ # Wrap so serialize_tool_result in the bridge script gets an ID.
59
+ wrapper = ToolResultWrapper.new(
60
+ raw, # :result — raw value (String/Hash/Halt/etc.)
61
+ raw, # :content — alias for bridge compat
62
+ tool_call.id, # :tool_call_id
63
+ tool_call.id, # :id
64
+ tool_call.name # :tool_name
65
+ )
66
+ @on[:tool_result]&.call(wrapper)
67
+ { tool_call: tool_call, raw: raw }
68
+ end
69
+ end
70
+
71
+ begin
72
+ results = threads.map(&:value) # block until all complete
73
+
74
+ # Commit messages serially — preserves ordering, avoids @messages races.
75
+ halt_result = nil
76
+ results.each do |entry|
77
+ tool_call = entry[:tool_call]
78
+ raw = entry[:raw]
79
+ tool_payload = raw.is_a?(RubyLLM::Tool::Halt) ? raw.content : raw
80
+ content = content_like?(tool_payload) ? tool_payload : tool_payload.to_s
81
+ message = add_message(role: :tool, content: content, tool_call_id: tool_call.id)
82
+ @on[:end_message]&.call(message)
83
+ halt_result = raw if raw.is_a?(RubyLLM::Tool::Halt)
84
+ end
85
+
86
+ reset_tool_choice if forced_tool_choice?
87
+ halt_result || complete(&)
88
+ ensure
89
+ threads.each do |thread|
90
+ thread.kill if thread.alive?
91
+ thread.join
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ # Use prepend (not alias_method/override) so the patch stays clearly visible
101
+ # in the ancestor chain and is easy to remove when RubyLLM is dropped.
102
+ RubyLLM::Chat.prepend(Legion::LLM::Patches::RubyLLMParallelTools)
@@ -46,7 +46,7 @@ module Legion
46
46
 
47
47
  ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
48
48
 
49
- MAX_RUBY_LLM_TOOL_ROUNDS = 25
49
+ MAX_RUBY_LLM_TOOL_ROUNDS = 200
50
50
 
51
51
  ASYNC_THREAD_POOL = Concurrent::FixedThreadPool.new(4, fallback_policy: :caller_runs)
52
52
 
@@ -340,9 +340,17 @@ module Legion
340
340
  )
341
341
  if fallback
342
342
  log.warn "[pipeline] #{@resolved_provider} auth failed (#{e.class}), falling back to #{fallback[:provider]}:#{fallback[:model]}"
343
+ from_provider = @resolved_provider
344
+ from_model = @resolved_model
343
345
  @resolved_provider = fallback[:provider]
344
346
  @resolved_model = fallback[:model]
345
347
  @warnings << { type: :provider_fallback, original_error: e.message, fallback: "#{@resolved_provider}:#{@resolved_model}" }
348
+ @tool_event_handler&.call(
349
+ type: :model_fallback,
350
+ from_provider: from_provider, to_provider: @resolved_provider,
351
+ from_model: from_model, to_model: @resolved_model,
352
+ error: e.message, reason: 'auth_failed'
353
+ )
346
354
  @timeline.record(
347
355
  category: :provider, key: 'provider:fallback',
348
356
  direction: :internal,
@@ -625,9 +633,17 @@ module Legion
625
633
  if fallback
626
634
  log.warn "[pipeline] #{@resolved_provider} stream auth failed (#{e.class}), " \
627
635
  "falling back to #{fallback[:provider]}:#{fallback[:model]}"
636
+ from_provider = @resolved_provider
637
+ from_model = @resolved_model
628
638
  @resolved_provider = fallback[:provider]
629
639
  @resolved_model = fallback[:model]
630
640
  @warnings << { type: :provider_fallback, original_error: e.message, fallback: "#{@resolved_provider}:#{@resolved_model}" }
641
+ @tool_event_handler&.call(
642
+ type: :model_fallback,
643
+ from_provider: from_provider, to_provider: @resolved_provider,
644
+ from_model: from_model, to_model: @resolved_model,
645
+ error: e.message, reason: 'auth_failed'
646
+ )
631
647
  retry
632
648
  end
633
649
  raise Legion::LLM::AuthError, e.message
@@ -670,6 +686,7 @@ module Legion
670
686
  Thread.current[:legion_tool_event_handler] = nil
671
687
  Thread.current[:legion_current_tool_call_id] = nil
672
688
  Thread.current[:legion_current_tool_name] = nil
689
+ Thread.current[:legion_current_tool_started_at] = nil
673
690
  end
674
691
 
675
692
  @timestamps[:provider_end] = Time.now
@@ -709,31 +726,66 @@ module Legion
709
726
  return
710
727
  end
711
728
 
729
+ max_rounds = Legion::LLM.settings[:max_tool_rounds] || MAX_RUBY_LLM_TOOL_ROUNDS
712
730
  tool_round = 0
713
731
  session.on_tool_call do |tool_call|
714
732
  tool_round += 1
715
- if tool_round > MAX_RUBY_LLM_TOOL_ROUNDS
733
+ if tool_round > max_rounds
716
734
  log.warn("[pipeline] tool loop cap hit: #{tool_round} rounds, halting")
717
- raise Legion::LLM::PipelineError, "tool loop exceeded #{MAX_RUBY_LLM_TOOL_ROUNDS} rounds"
735
+ raise Legion::LLM::PipelineError, "tool loop exceeded #{max_rounds} rounds"
718
736
  end
719
737
 
720
738
  emit_tool_call_event(tool_call, tool_round)
721
739
  end
740
+
741
+ # Wire up tool-result events so the API SSE stream can notify the
742
+ # frontend when each tool finishes (clears the RUNNING state in the UI).
743
+ return unless session.respond_to?(:on_tool_result)
744
+
745
+ session.on_tool_result do |tool_result|
746
+ emit_tool_result_event(tool_result)
747
+ end
722
748
  end
723
749
 
724
750
  def emit_tool_call_event(tool_call, round)
725
751
  tc_id = tool_call_field(tool_call, :id)
726
752
  tc_name = tool_call_field(tool_call, :name)
727
753
  tc_args = tool_call_field(tool_call, :arguments)
754
+ started_at = Time.now
728
755
 
729
756
  log.info("[pipeline][tool-call] round=#{round} id=#{tc_id} tool=#{tc_name}")
730
757
 
758
+ # Store start time per-tool-call-id so emit_tool_result_event can calculate
759
+ # accurate wall-clock duration even when tools run in parallel threads.
731
760
  Thread.current[:legion_current_tool_call_id] = tc_id
732
761
  Thread.current[:legion_current_tool_name] = tc_name
762
+ Thread.current[:legion_current_tool_started_at] = started_at
733
763
 
734
764
  @tool_event_handler&.call(
735
765
  type: :tool_call, tool_call_id: tc_id, tool_name: tc_name,
736
- arguments: tc_args, round: round
766
+ arguments: tc_args, round: round, started_at: started_at
767
+ )
768
+ end
769
+
770
+ def emit_tool_result_event(tool_result)
771
+ # tool_result may be a raw value (String/Hash) or a ToolResultWrapper
772
+ # from our parallel patch — extract the fields defensively.
773
+ tc_id = tool_result.respond_to?(:tool_call_id) ? tool_result.tool_call_id : Thread.current[:legion_current_tool_call_id]
774
+ tc_name = tool_result.respond_to?(:tool_name) ? tool_result.tool_name : Thread.current[:legion_current_tool_name]
775
+ started_at = tool_result.respond_to?(:started_at) ? tool_result.started_at : Thread.current[:legion_current_tool_started_at]
776
+ finished_at = Time.now
777
+ raw = tool_result.respond_to?(:result) ? tool_result.result : tool_result
778
+
779
+ duration_ms = started_at ? ((finished_at - started_at) * 1000).round : nil
780
+
781
+ log.info("[pipeline][tool-result] id=#{tc_id} tool=#{tc_name} duration_ms=#{duration_ms}")
782
+
783
+ result_str = (raw.is_a?(String) ? raw : raw.to_s)[0, 4096]
784
+
785
+ @tool_event_handler&.call(
786
+ type: :tool_result, tool_call_id: tc_id, tool_name: tc_name,
787
+ result: result_str, result_size: (raw.is_a?(String) ? raw : raw.to_s).bytesize,
788
+ started_at: started_at, finished_at: finished_at, duration_ms: duration_ms
737
789
  )
738
790
  end
739
791
 
@@ -13,6 +13,7 @@ module Legion
13
13
  connected: false,
14
14
  pipeline_enabled: true,
15
15
  pipeline_async_post_steps: true,
16
+ max_tool_rounds: 200,
16
17
  default_model: model_override,
17
18
  default_provider: nil,
18
19
  providers: providers,
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.6.23'
5
+ VERSION = '0.6.24'
6
6
  end
7
7
  end
data/lib/legion/llm.rb CHANGED
@@ -3,6 +3,7 @@
3
3
  require 'legion/logging/helper'
4
4
 
5
5
  require 'ruby_llm'
6
+ require 'legion/llm/patches/ruby_llm_parallel_tools'
6
7
  require 'legion/llm/version'
7
8
  require 'legion/llm/errors'
8
9
  require 'legion/llm/conversation_store'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.23
4
+ version: 0.6.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -260,6 +260,7 @@ files:
260
260
  - lib/legion/llm/native_dispatch.rb
261
261
  - lib/legion/llm/off_peak.rb
262
262
  - lib/legion/llm/override_confidence.rb
263
+ - lib/legion/llm/patches/ruby_llm_parallel_tools.rb
263
264
  - lib/legion/llm/pipeline.rb
264
265
  - lib/legion/llm/pipeline/audit_publisher.rb
265
266
  - lib/legion/llm/pipeline/enrichment_injector.rb