legion-llm 0.6.23 → 0.6.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/lib/legion/llm/conversation_store.rb +3 -1
- data/lib/legion/llm/patches/ruby_llm_parallel_tools.rb +102 -0
- data/lib/legion/llm/pipeline/executor.rb +56 -4
- data/lib/legion/llm/settings.rb +1 -0
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c7e4263174302a505c21078bbc343c36134c08e22f0ad2d2741e6e2b4b747327
|
|
4
|
+
data.tar.gz: ef99cbea7efe6f0e0c479586c73792d814328169ca1e0faaf2362da8cb140be1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bb18ac2c9d7cb8108edc71208dc97b4befaed9ab6cdbcb7c1f8662c00402c08619609e3ce5d372b9f4483175020ef2100ac5b86ae2d147216e755f4211173f41
|
|
7
|
+
data.tar.gz: e0f5b35fb908eff66faea9509f984fa141531c467db9ab1c8e2276f5e47dcd7a65f42e63e5004db4d27334867489c578a44c51b3779d9d954bd7d4028487e37e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.6.24] - 2026-04-08
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `Legion::LLM::Patches::RubyLLMParallelTools`: monkey-patch that replaces RubyLLM's serial `handle_tool_calls` loop with concurrent thread execution so all tool calls in a batch run in parallel
|
|
7
|
+
- `ToolResultWrapper` struct exposes `tool_call_id`, `id`, `tool_name`, `result`, and `content` so bridge scripts can match results back to UI slots without falling back to name-based matching
|
|
8
|
+
- `emit_tool_result_event` in `Pipeline::Executor`: fires `tool_event_handler` with `type: :tool_result`, `duration_ms`, `started_at`, and `finished_at` after each tool completes
|
|
9
|
+
- `tool_event_handler` now also fires `type: :model_fallback` events (with `from_model`, `to_model`, `error`, `reason`) on auth-failed provider fallback in both regular and streaming paths
|
|
10
|
+
- `max_tool_rounds` setting (default `200`) in LLM settings; `install_tool_loop_guard` now reads it at call time so callers can override the cap per-session
|
|
11
|
+
- `started_at` timestamp stored in `Thread.current[:legion_current_tool_started_at]` for accurate per-call wall-clock duration even across parallel threads
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
- `MAX_RUBY_LLM_TOOL_ROUNDS` constant raised from `25` to `200` (now serves as a fallback default for the configurable `max_tool_rounds` setting)
|
|
15
|
+
|
|
16
|
+
### Fixed
|
|
17
|
+
- `ConversationStore#db_append_message` now serializes non-String `content` values (e.g., tool-call arrays) to JSON before writing to the database, preventing Sequel type errors when tool-use messages are persisted
|
|
18
|
+
|
|
3
19
|
## [0.6.23] - 2026-04-07
|
|
4
20
|
|
|
5
21
|
### Fixed
|
|
@@ -373,11 +373,13 @@ module Legion
|
|
|
373
373
|
end
|
|
374
374
|
|
|
375
375
|
def db_append_message(conversation_id, msg)
|
|
376
|
+
content = msg[:content]
|
|
377
|
+
content = content.to_json unless content.is_a?(String) || content.nil?
|
|
376
378
|
row = {
|
|
377
379
|
conversation_id: conversation_id,
|
|
378
380
|
seq: msg[:seq],
|
|
379
381
|
role: msg[:role].to_s,
|
|
380
|
-
content:
|
|
382
|
+
content: content,
|
|
381
383
|
provider: msg[:provider]&.to_s,
|
|
382
384
|
model: msg[:model]&.to_s,
|
|
383
385
|
input_tokens: msg[:input_tokens],
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Patch: RubyLLM::Chat parallel tool call execution
|
|
4
|
+
#
|
|
5
|
+
# RubyLLM's default `handle_tool_calls` iterates tool calls serially with
|
|
6
|
+
# `.each_value`, meaning when an LLM returns N tool calls in a single response
|
|
7
|
+
# they execute one-at-a-time. This patch replaces that loop with concurrent
|
|
8
|
+
# thread execution so all tool calls in a batch run in parallel, and results
|
|
9
|
+
# are collected before re-prompting the model.
|
|
10
|
+
#
|
|
11
|
+
# Additionally, RubyLLM fires `on_tool_result` with the raw tool return value
|
|
12
|
+
# (a String/Hash/etc.) which carries no `tool_call_id`. The legion-interlink
|
|
13
|
+
# bridge script's `serialize_tool_result` needs a `tool_call_id` field to
|
|
14
|
+
# match results back to the correct tool call slot in the UI — without it
|
|
15
|
+
# every result falls back to name-based matching, which breaks when multiple
|
|
16
|
+
# tools of the same name run in parallel and leaves them stuck on RUNNING.
|
|
17
|
+
#
|
|
18
|
+
# Fix: wrap each result in a ToolResultWrapper that exposes both the raw
|
|
19
|
+
# content/result AND the originating tool_call_id / id fields.
|
|
20
|
+
#
|
|
21
|
+
# NOTE: This is a temporary shim. When RubyLLM is replaced this file goes away.
|
|
22
|
+
#
|
|
23
|
+
# Thread safety notes:
|
|
24
|
+
# - Each tool call executes in its own thread.
|
|
25
|
+
# - @on[:tool_call] fires per-thread (fast, just event emission — safe).
|
|
26
|
+
# - @on[:tool_result] fires per-thread with the wrapper object.
|
|
27
|
+
# - add_message is called serially after all threads complete to preserve
|
|
28
|
+
# message ordering and avoid races on @messages.
|
|
29
|
+
# - If ANY tool returns a RubyLLM::Tool::Halt, complete() is skipped —
|
|
30
|
+
# matching the original semantics.
|
|
31
|
+
|
|
32
|
+
module Legion
|
|
33
|
+
module LLM
|
|
34
|
+
module Patches
|
|
35
|
+
# Wraps a raw tool result value so that the bridge-script's
|
|
36
|
+
# serialize_tool_result can read both :tool_call_id/:id (for UI matching)
|
|
37
|
+
# and :result/:content (for the result payload) off a single object.
|
|
38
|
+
ToolResultWrapper = Struct.new(:result, :content, :tool_call_id, :id, :tool_name) do
|
|
39
|
+
# Delegate is_a? checks for RubyLLM::Tool::Halt so the caller can still
|
|
40
|
+
# detect halt results transparently.
|
|
41
|
+
def is_a?(klass)
|
|
42
|
+
result.is_a?(klass) || super
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
alias_method :kind_of?, :is_a?
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
module RubyLLMParallelTools
|
|
49
|
+
def handle_tool_calls(response, &)
|
|
50
|
+
tool_calls = response.tool_calls.values
|
|
51
|
+
|
|
52
|
+
# Dispatch all tool calls concurrently, preserving original order.
|
|
53
|
+
threads = tool_calls.map do |tool_call|
|
|
54
|
+
Thread.new do
|
|
55
|
+
@on[:new_message]&.call
|
|
56
|
+
@on[:tool_call]&.call(tool_call)
|
|
57
|
+
raw = execute_tool(tool_call)
|
|
58
|
+
# Wrap so serialize_tool_result in the bridge script gets an ID.
|
|
59
|
+
wrapper = ToolResultWrapper.new(
|
|
60
|
+
raw, # :result — raw value (String/Hash/Halt/etc.)
|
|
61
|
+
raw, # :content — alias for bridge compat
|
|
62
|
+
tool_call.id, # :tool_call_id
|
|
63
|
+
tool_call.id, # :id
|
|
64
|
+
tool_call.name # :tool_name
|
|
65
|
+
)
|
|
66
|
+
@on[:tool_result]&.call(wrapper)
|
|
67
|
+
{ tool_call: tool_call, raw: raw }
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
begin
|
|
72
|
+
results = threads.map(&:value) # block until all complete
|
|
73
|
+
|
|
74
|
+
# Commit messages serially — preserves ordering, avoids @messages races.
|
|
75
|
+
halt_result = nil
|
|
76
|
+
results.each do |entry|
|
|
77
|
+
tool_call = entry[:tool_call]
|
|
78
|
+
raw = entry[:raw]
|
|
79
|
+
tool_payload = raw.is_a?(RubyLLM::Tool::Halt) ? raw.content : raw
|
|
80
|
+
content = content_like?(tool_payload) ? tool_payload : tool_payload.to_s
|
|
81
|
+
message = add_message(role: :tool, content: content, tool_call_id: tool_call.id)
|
|
82
|
+
@on[:end_message]&.call(message)
|
|
83
|
+
halt_result = raw if raw.is_a?(RubyLLM::Tool::Halt)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
reset_tool_choice if forced_tool_choice?
|
|
87
|
+
halt_result || complete(&)
|
|
88
|
+
ensure
|
|
89
|
+
threads.each do |thread|
|
|
90
|
+
thread.kill if thread.alive?
|
|
91
|
+
thread.join
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Use prepend (not alias_method/override) so the patch stays clearly visible
|
|
101
|
+
# in the ancestor chain and is easy to remove when RubyLLM is dropped.
|
|
102
|
+
RubyLLM::Chat.prepend(Legion::LLM::Patches::RubyLLMParallelTools)
|
|
@@ -46,7 +46,7 @@ module Legion
|
|
|
46
46
|
|
|
47
47
|
ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
|
|
48
48
|
|
|
49
|
-
MAX_RUBY_LLM_TOOL_ROUNDS =
|
|
49
|
+
MAX_RUBY_LLM_TOOL_ROUNDS = 200
|
|
50
50
|
|
|
51
51
|
ASYNC_THREAD_POOL = Concurrent::FixedThreadPool.new(4, fallback_policy: :caller_runs)
|
|
52
52
|
|
|
@@ -340,9 +340,17 @@ module Legion
|
|
|
340
340
|
)
|
|
341
341
|
if fallback
|
|
342
342
|
log.warn "[pipeline] #{@resolved_provider} auth failed (#{e.class}), falling back to #{fallback[:provider]}:#{fallback[:model]}"
|
|
343
|
+
from_provider = @resolved_provider
|
|
344
|
+
from_model = @resolved_model
|
|
343
345
|
@resolved_provider = fallback[:provider]
|
|
344
346
|
@resolved_model = fallback[:model]
|
|
345
347
|
@warnings << { type: :provider_fallback, original_error: e.message, fallback: "#{@resolved_provider}:#{@resolved_model}" }
|
|
348
|
+
@tool_event_handler&.call(
|
|
349
|
+
type: :model_fallback,
|
|
350
|
+
from_provider: from_provider, to_provider: @resolved_provider,
|
|
351
|
+
from_model: from_model, to_model: @resolved_model,
|
|
352
|
+
error: e.message, reason: 'auth_failed'
|
|
353
|
+
)
|
|
346
354
|
@timeline.record(
|
|
347
355
|
category: :provider, key: 'provider:fallback',
|
|
348
356
|
direction: :internal,
|
|
@@ -625,9 +633,17 @@ module Legion
|
|
|
625
633
|
if fallback
|
|
626
634
|
log.warn "[pipeline] #{@resolved_provider} stream auth failed (#{e.class}), " \
|
|
627
635
|
"falling back to #{fallback[:provider]}:#{fallback[:model]}"
|
|
636
|
+
from_provider = @resolved_provider
|
|
637
|
+
from_model = @resolved_model
|
|
628
638
|
@resolved_provider = fallback[:provider]
|
|
629
639
|
@resolved_model = fallback[:model]
|
|
630
640
|
@warnings << { type: :provider_fallback, original_error: e.message, fallback: "#{@resolved_provider}:#{@resolved_model}" }
|
|
641
|
+
@tool_event_handler&.call(
|
|
642
|
+
type: :model_fallback,
|
|
643
|
+
from_provider: from_provider, to_provider: @resolved_provider,
|
|
644
|
+
from_model: from_model, to_model: @resolved_model,
|
|
645
|
+
error: e.message, reason: 'auth_failed'
|
|
646
|
+
)
|
|
631
647
|
retry
|
|
632
648
|
end
|
|
633
649
|
raise Legion::LLM::AuthError, e.message
|
|
@@ -670,6 +686,7 @@ module Legion
|
|
|
670
686
|
Thread.current[:legion_tool_event_handler] = nil
|
|
671
687
|
Thread.current[:legion_current_tool_call_id] = nil
|
|
672
688
|
Thread.current[:legion_current_tool_name] = nil
|
|
689
|
+
Thread.current[:legion_current_tool_started_at] = nil
|
|
673
690
|
end
|
|
674
691
|
|
|
675
692
|
@timestamps[:provider_end] = Time.now
|
|
@@ -709,31 +726,66 @@ module Legion
|
|
|
709
726
|
return
|
|
710
727
|
end
|
|
711
728
|
|
|
729
|
+
max_rounds = Legion::LLM.settings[:max_tool_rounds] || MAX_RUBY_LLM_TOOL_ROUNDS
|
|
712
730
|
tool_round = 0
|
|
713
731
|
session.on_tool_call do |tool_call|
|
|
714
732
|
tool_round += 1
|
|
715
|
-
if tool_round >
|
|
733
|
+
if tool_round > max_rounds
|
|
716
734
|
log.warn("[pipeline] tool loop cap hit: #{tool_round} rounds, halting")
|
|
717
|
-
raise Legion::LLM::PipelineError, "tool loop exceeded #{
|
|
735
|
+
raise Legion::LLM::PipelineError, "tool loop exceeded #{max_rounds} rounds"
|
|
718
736
|
end
|
|
719
737
|
|
|
720
738
|
emit_tool_call_event(tool_call, tool_round)
|
|
721
739
|
end
|
|
740
|
+
|
|
741
|
+
# Wire up tool-result events so the API SSE stream can notify the
|
|
742
|
+
# frontend when each tool finishes (clears the RUNNING state in the UI).
|
|
743
|
+
return unless session.respond_to?(:on_tool_result)
|
|
744
|
+
|
|
745
|
+
session.on_tool_result do |tool_result|
|
|
746
|
+
emit_tool_result_event(tool_result)
|
|
747
|
+
end
|
|
722
748
|
end
|
|
723
749
|
|
|
724
750
|
def emit_tool_call_event(tool_call, round)
|
|
725
751
|
tc_id = tool_call_field(tool_call, :id)
|
|
726
752
|
tc_name = tool_call_field(tool_call, :name)
|
|
727
753
|
tc_args = tool_call_field(tool_call, :arguments)
|
|
754
|
+
started_at = Time.now
|
|
728
755
|
|
|
729
756
|
log.info("[pipeline][tool-call] round=#{round} id=#{tc_id} tool=#{tc_name}")
|
|
730
757
|
|
|
758
|
+
# Store start time per-tool-call-id so emit_tool_result_event can calculate
|
|
759
|
+
# accurate wall-clock duration even when tools run in parallel threads.
|
|
731
760
|
Thread.current[:legion_current_tool_call_id] = tc_id
|
|
732
761
|
Thread.current[:legion_current_tool_name] = tc_name
|
|
762
|
+
Thread.current[:legion_current_tool_started_at] = started_at
|
|
733
763
|
|
|
734
764
|
@tool_event_handler&.call(
|
|
735
765
|
type: :tool_call, tool_call_id: tc_id, tool_name: tc_name,
|
|
736
|
-
arguments: tc_args, round: round
|
|
766
|
+
arguments: tc_args, round: round, started_at: started_at
|
|
767
|
+
)
|
|
768
|
+
end
|
|
769
|
+
|
|
770
|
+
def emit_tool_result_event(tool_result)
|
|
771
|
+
# tool_result may be a raw value (String/Hash) or a ToolResultWrapper
|
|
772
|
+
# from our parallel patch — extract the fields defensively.
|
|
773
|
+
tc_id = tool_result.respond_to?(:tool_call_id) ? tool_result.tool_call_id : Thread.current[:legion_current_tool_call_id]
|
|
774
|
+
tc_name = tool_result.respond_to?(:tool_name) ? tool_result.tool_name : Thread.current[:legion_current_tool_name]
|
|
775
|
+
started_at = tool_result.respond_to?(:started_at) ? tool_result.started_at : Thread.current[:legion_current_tool_started_at]
|
|
776
|
+
finished_at = Time.now
|
|
777
|
+
raw = tool_result.respond_to?(:result) ? tool_result.result : tool_result
|
|
778
|
+
|
|
779
|
+
duration_ms = started_at ? ((finished_at - started_at) * 1000).round : nil
|
|
780
|
+
|
|
781
|
+
log.info("[pipeline][tool-result] id=#{tc_id} tool=#{tc_name} duration_ms=#{duration_ms}")
|
|
782
|
+
|
|
783
|
+
result_str = (raw.is_a?(String) ? raw : raw.to_s)[0, 4096]
|
|
784
|
+
|
|
785
|
+
@tool_event_handler&.call(
|
|
786
|
+
type: :tool_result, tool_call_id: tc_id, tool_name: tc_name,
|
|
787
|
+
result: result_str, result_size: (raw.is_a?(String) ? raw : raw.to_s).bytesize,
|
|
788
|
+
started_at: started_at, finished_at: finished_at, duration_ms: duration_ms
|
|
737
789
|
)
|
|
738
790
|
end
|
|
739
791
|
|
data/lib/legion/llm/settings.rb
CHANGED
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.6.
|
|
4
|
+
version: 0.6.24
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -260,6 +260,7 @@ files:
|
|
|
260
260
|
- lib/legion/llm/native_dispatch.rb
|
|
261
261
|
- lib/legion/llm/off_peak.rb
|
|
262
262
|
- lib/legion/llm/override_confidence.rb
|
|
263
|
+
- lib/legion/llm/patches/ruby_llm_parallel_tools.rb
|
|
263
264
|
- lib/legion/llm/pipeline.rb
|
|
264
265
|
- lib/legion/llm/pipeline/audit_publisher.rb
|
|
265
266
|
- lib/legion/llm/pipeline/enrichment_injector.rb
|