brute 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2e5a610b24378a83f8ce97c8e251a4325705e17a455aa95e9f9c14efe581845a
4
- data.tar.gz: 43a0dc2f5e1c2d5d3668b00133278956d800caaeb6f955fb5824d4091da16455
3
+ metadata.gz: 434ac3760153b860523176d38105ee8618ec95025713a332745281cda1af4cb8
4
+ data.tar.gz: 3358b33334bf01bd79188c1fb488729997b090bb4617063bc2f61579358b63d6
5
5
  SHA512:
6
- metadata.gz: 6482e969a2865fc56aaa24f3bf2505f7418bf4b03b7d9c96454ade03e9e715511d33ec7d878cef8d3e1dab95b9ed514bd9eb42890229f426268271ee28b8690f
7
- data.tar.gz: 7d16e0ccbf71f5ed106b3a073a122668004d9b729f7efdc0d2a03b3c9a3a8483b9b7a9e57546bbb82820de6be5c361a8ae44f9cdae40e64d2b85be641ecd2e9c
6
+ metadata.gz: 03a3b9866b7e32cc44b260bdd8655983f2776d9c14235bc98f04a26e57f949070b9a3bae87008fb940bd44e9eb671f373f5544759f5ced71026a2c55eac6df44
7
+ data.tar.gz: f3896062d7c20fb622463c4af6b122b8d1281a9ff1147d2b8d8a747ea372fc8631609449652229dd10a604570b866119005449673273ac029d43ab087d9f5b5f
@@ -1,32 +1,38 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Brute
4
- # Bridges llm.rb's streaming callbacks to forge-rb's callback system.
4
+ # Bridges llm.rb's streaming callbacks to the host application.
5
5
  #
6
6
  # Text and reasoning chunks fire immediately as the LLM generates them.
7
- # Tool calls spawn threads on arrivaltools start running while the
8
- # response is still streaming. on_tool_result fires as each thread finishes.
7
+ # Tool calls are collected but NOT executed execution is deferred to the
8
+ # orchestrator after the stream completes. This ensures text is never
9
+ # concurrent with tool execution.
10
+ #
11
+ # After the stream finishes, the orchestrator reads +pending_tools+ to
12
+ # dispatch all tool calls concurrently, then fires +on_tool_call_start+
13
+ # once with the full batch.
9
14
  #
10
15
  class AgentStream < LLM::Stream
11
16
  # Tool call metadata recorded during streaming, used by ToolUseGuard
12
17
  # when ctx.functions is empty (nil-choice bug in llm.rb).
13
- # Cleared by the guard after consumption to prevent stale data from
14
- # causing duplicate synthetic assistant messages on subsequent calls.
15
18
  attr_reader :pending_tool_calls
16
19
 
17
- def clear_pending_tool_calls!
18
- @pending_tool_calls.clear
19
- end
20
+ # Deferred tool/error pairs: [(LLM::Function, error_or_nil), ...]
21
+ # The orchestrator reads these after the stream completes.
22
+ attr_reader :pending_tools
20
23
 
21
- def initialize(on_content: nil, on_reasoning: nil, on_tool_call: nil, on_tool_result: nil, on_question: nil)
24
+ def initialize(on_content: nil, on_reasoning: nil, on_question: nil)
22
25
  @on_content = on_content
23
26
  @on_reasoning = on_reasoning
24
- @on_tool_call = on_tool_call
25
- @on_tool_result = on_tool_result
26
27
  @on_question = on_question
27
28
  @pending_tool_calls = []
29
+ @pending_tools = []
28
30
  end
29
31
 
32
+ # The on_question callback, needed by the orchestrator to set
33
+ # thread/fiber-locals before tool execution.
34
+ attr_reader :on_question
35
+
30
36
  def on_content(text)
31
37
  @on_content&.call(text)
32
38
  end
@@ -35,30 +41,17 @@ module Brute
35
41
  @on_reasoning&.call(text)
36
42
  end
37
43
 
44
+ # Called by llm.rb per tool as it arrives during streaming.
45
+ # Records only — no execution, no threads, no queue pushes.
38
46
  def on_tool_call(tool, error)
39
47
  @pending_tool_calls << { id: tool.id, name: tool.name, arguments: tool.arguments }
40
- @on_tool_call&.call(tool.name, tool.arguments)
41
-
42
- if error
43
- queue << error
44
- @on_tool_result&.call(tool.name, error.value)
45
- else
46
- queue << LLM::Function::Task.new(spawn_with_callback(tool))
47
- end
48
+ @pending_tools << [tool, error]
48
49
  end
49
50
 
50
- private
51
-
52
- def spawn_with_callback(tool)
53
- on_result = @on_tool_result
54
- on_question = @on_question
55
- name = tool.name
56
- Thread.new do
57
- Thread.current[:on_question] = on_question
58
- result = tool.call
59
- on_result&.call(name, result.respond_to?(:value) ? result.value : result)
60
- result
61
- end
51
+ # Clear all deferred state after the orchestrator has consumed it.
52
+ def clear_pending!
53
+ @pending_tool_calls.clear
54
+ @pending_tools.clear
62
55
  end
63
56
  end
64
57
  end
@@ -56,7 +56,7 @@ module Brute
56
56
  stream = resolve_stream(ctx)
57
57
  if stream
58
58
  data = stream.pending_tool_calls.dup
59
- stream.clear_pending_tool_calls!
59
+ stream.clear_pending!
60
60
  data
61
61
  else
62
62
  []
@@ -17,6 +17,11 @@ module Brute
17
17
  # 2. Executes any tool calls the LLM requested
18
18
  # 3. Repeats until done or a limit is hit
19
19
  #
20
+ # Tool execution is always deferred until after the LLM response (including
21
+ # streaming) completes. Tools then run concurrently with each other via
22
+ # Async::Barrier. on_tool_call_start fires once with the full batch before
23
+ # execution begins; on_tool_result fires per-tool as each finishes.
24
+ #
20
25
  class Orchestrator
21
26
  MAX_REQUESTS_PER_TURN = 100
22
27
 
@@ -33,7 +38,7 @@ module Brute
33
38
  agent_name: nil,
34
39
  on_content: nil,
35
40
  on_reasoning: nil,
36
- on_tool_call: nil,
41
+ on_tool_call_start: nil,
37
42
  on_tool_result: nil,
38
43
  on_question: nil,
39
44
  logger: nil
@@ -62,8 +67,6 @@ module Brute
62
67
  AgentStream.new(
63
68
  on_content: on_content,
64
69
  on_reasoning: on_reasoning,
65
- on_tool_call: on_tool_call,
66
- on_tool_result: on_tool_result,
67
70
  on_question: on_question,
68
71
  )
69
72
  end
@@ -95,7 +98,7 @@ module Brute
95
98
  callbacks: {
96
99
  on_content: on_content,
97
100
  on_reasoning: on_reasoning,
98
- on_tool_call: on_tool_call,
101
+ on_tool_call_start: on_tool_call_start,
99
102
  on_tool_result: on_tool_result,
100
103
  on_question: on_question,
101
104
  },
@@ -131,15 +134,28 @@ module Brute
131
134
 
132
135
  # --- Agent loop ---
133
136
  loop do
134
- break if @context.functions.empty? && (!@stream || @stream.queue.empty?)
135
-
136
- # Collect tool results.
137
- # Streaming: tools already spawned threads during the LLM response — just join them.
138
- # Non-streaming: execute manually (parallel or sequential).
139
- results = if @stream && !@stream.queue.empty?
140
- @context.wait(:thread)
141
- else
142
- execute_tool_calls
137
+ # Collect pending tools from either source:
138
+ # - Streaming: AgentStream deferred tools (collected during stream)
139
+ # - Non-streaming: ctx.functions (populated by llm.rb after response)
140
+ pending = collect_pending_tools
141
+ break if pending.empty?
142
+
143
+ # Fire on_tool_call_start ONCE with the full batch
144
+ on_start = @env.dig(:callbacks, :on_tool_call_start)
145
+ on_start&.call(pending.map { |tool, _| { name: tool.name, arguments: tool.arguments } })
146
+
147
+ # Separate errors (tool not found) from executable tools
148
+ errors = pending.select { |_, err| err }
149
+ executable = pending.reject { |_, err| err }.map(&:first)
150
+
151
+ # Execute tools concurrently, collect results
152
+ results = execute_tool_calls(executable)
153
+
154
+ # Append error results (tool not found, etc.)
155
+ errors.each do |_, err|
156
+ on_result = @env.dig(:callbacks, :on_tool_result)
157
+ on_result&.call(err.name, result_value(err))
158
+ results << err
143
159
  end
144
160
 
145
161
  # Send results back through the pipeline
@@ -151,7 +167,7 @@ module Brute
151
167
  @request_count += 1
152
168
 
153
169
  # Check limits
154
- break if @context.functions.empty? && (!@stream || @stream.queue.empty?)
170
+ break if collect_pending_tools.empty?
155
171
  break if @request_count >= MAX_REQUESTS_PER_TURN
156
172
  break if @env[:metadata][:tool_error_limit_reached]
157
173
  end
@@ -222,24 +238,55 @@ module Brute
222
238
  end
223
239
  end
224
240
 
241
+ # ------------------------------------------------------------------
242
+ # Pending tool collection
243
+ # ------------------------------------------------------------------
244
+
245
+ # Collect pending tools from the stream (streaming) or context (non-streaming).
246
+ # Returns an array of [tool, error_or_nil] pairs.
247
+ # Clears the stream's deferred state after consumption.
248
+ def collect_pending_tools
249
+ if @stream&.pending_tools&.any?
250
+ tools = @stream.pending_tools.dup
251
+ @stream.clear_pending!
252
+ tools
253
+ elsif @context.functions.any?
254
+ @context.functions.to_a.map { |fn| [fn, nil] }
255
+ else
256
+ []
257
+ end
258
+ end
259
+
225
260
  # ------------------------------------------------------------------
226
261
  # Tool execution
227
262
  # ------------------------------------------------------------------
228
263
 
229
- def execute_tool_calls
230
- pending = @context.functions.to_a
231
- return execute_sequential(pending) if pending.size <= 1
264
+ def execute_tool_calls(functions)
265
+ return [] if functions.empty?
266
+
267
+ # Questions block execution — they must complete before other tools
268
+ # run, since the LLM may need the answer to inform subsequent work.
269
+ # Execute any question tools first (sequentially), then dispatch
270
+ # the remaining tools concurrently.
271
+ questions, others = functions.partition { |fn| fn.name == "question" }
232
272
 
233
- execute_parallel(pending)
273
+ results = []
274
+ results.concat(execute_sequential(questions)) if questions.any?
275
+ if others.size <= 1
276
+ results.concat(execute_sequential(others))
277
+ else
278
+ results.concat(execute_parallel(others))
279
+ end
280
+ results
234
281
  end
235
282
 
236
283
  # Run a single tool call synchronously.
237
284
  def execute_sequential(functions)
238
- on_call = @env.dig(:callbacks, :on_tool_call)
239
285
  on_result = @env.dig(:callbacks, :on_tool_result)
286
+ on_question = @env.dig(:callbacks, :on_question)
240
287
 
241
288
  functions.map do |fn|
242
- on_call&.call(fn.name, fn.arguments)
289
+ Thread.current[:on_question] = on_question
243
290
  result = fn.call
244
291
  on_result&.call(fn.name, result_value(result))
245
292
  result
@@ -256,8 +303,8 @@ module Brute
256
303
  # The barrier is stored in @barrier so abort! can cancel in-flight tools.
257
304
  #
258
305
  def execute_parallel(functions)
259
- on_call = @env.dig(:callbacks, :on_tool_call)
260
306
  on_result = @env.dig(:callbacks, :on_tool_result)
307
+ on_question = @env.dig(:callbacks, :on_question)
261
308
 
262
309
  results = Array.new(functions.size)
263
310
 
@@ -266,7 +313,7 @@ module Brute
266
313
 
267
314
  functions.each_with_index do |fn, i|
268
315
  @barrier.async do
269
- on_call&.call(fn.name, fn.arguments)
316
+ Thread.current[:on_question] = on_question
270
317
  results[i] = fn.call
271
318
  r = results[i]
272
319
  on_result&.call(r.name, result_value(r))
@@ -22,7 +22,7 @@ module Brute
22
22
  # tools: [Tool, ...], # tool classes
23
23
  # params: {}, # extra LLM call params (reasoning config, etc.)
24
24
  # metadata: {}, # shared scratchpad for middleware state
25
- # callbacks: {}, # :on_content, :on_tool_call, :on_tool_result
25
+ # callbacks: {}, # :on_content, :on_tool_call_start, :on_tool_result
26
26
  # }
27
27
  #
28
28
  # ## The response
data/lib/brute/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Brute
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: brute
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brute Contributors