phronomy 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +8 -7
  3. data/CHANGELOG.md +151 -1
  4. data/README.md +155 -32
  5. data/Rakefile +33 -0
  6. data/benchmark/baseline.json +1 -1
  7. data/benchmark/bench_regression.rb +1 -0
  8. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +24 -0
  9. data/docs/decisions/006-no-built-in-guardrails.md +20 -2
  10. data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
  11. data/lib/phronomy/agent/base.rb +250 -65
  12. data/lib/phronomy/agent/concerns/suspendable.rb +15 -0
  13. data/lib/phronomy/agent/fsm.rb +41 -64
  14. data/lib/phronomy/agent/orchestrator.rb +146 -121
  15. data/lib/phronomy/agent/parallel_tool_chat.rb +79 -22
  16. data/lib/phronomy/agent/react_agent.rb +8 -0
  17. data/lib/phronomy/async_queue.rb +155 -0
  18. data/lib/phronomy/blocking_adapter_pool.rb +435 -0
  19. data/lib/phronomy/cancellation_scope.rb +123 -0
  20. data/lib/phronomy/cancellation_token.rb +43 -2
  21. data/lib/phronomy/concurrency_gate.rb +155 -0
  22. data/lib/phronomy/configuration.rb +142 -0
  23. data/lib/phronomy/deadline.rb +63 -0
  24. data/lib/phronomy/diagnostics.rb +62 -0
  25. data/lib/phronomy/embeddings/base.rb +17 -0
  26. data/lib/phronomy/eval/runner.rb +9 -9
  27. data/lib/phronomy/event_loop.rb +181 -43
  28. data/lib/phronomy/fsm_session.rb +50 -4
  29. data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
  30. data/lib/phronomy/invocation_context.rb +152 -0
  31. data/lib/phronomy/knowledge_source/base.rb +18 -0
  32. data/lib/phronomy/llm_adapter/base.rb +104 -0
  33. data/lib/phronomy/llm_adapter/ruby_llm.rb +41 -0
  34. data/lib/phronomy/llm_adapter.rb +20 -0
  35. data/lib/phronomy/metrics.rb +38 -0
  36. data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
  37. data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
  38. data/lib/phronomy/runtime/gate_registry.rb +52 -0
  39. data/lib/phronomy/runtime/pool_registry.rb +57 -0
  40. data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
  41. data/lib/phronomy/runtime/scheduler.rb +98 -0
  42. data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
  43. data/lib/phronomy/runtime/task_registry.rb +48 -0
  44. data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
  45. data/lib/phronomy/runtime/timer_queue.rb +106 -0
  46. data/lib/phronomy/runtime/timer_service.rb +42 -0
  47. data/lib/phronomy/runtime.rb +374 -0
  48. data/lib/phronomy/task/backend.rb +80 -0
  49. data/lib/phronomy/task/fiber_backend.rb +157 -0
  50. data/lib/phronomy/task/immediate_backend.rb +89 -0
  51. data/lib/phronomy/task/thread_backend.rb +84 -0
  52. data/lib/phronomy/task.rb +275 -0
  53. data/lib/phronomy/task_group.rb +265 -0
  54. data/lib/phronomy/testing/fake_clock.rb +109 -0
  55. data/lib/phronomy/testing/fake_scheduler.rb +104 -0
  56. data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
  57. data/lib/phronomy/testing.rb +12 -0
  58. data/lib/phronomy/tool/base.rb +110 -2
  59. data/lib/phronomy/tool/mcp_tool.rb +47 -16
  60. data/lib/phronomy/tool/scope_policy.rb +50 -0
  61. data/lib/phronomy/tool_executor.rb +106 -0
  62. data/lib/phronomy/tracing/open_telemetry_tracer.rb +34 -0
  63. data/lib/phronomy/vector_store/async_backend.rb +110 -0
  64. data/lib/phronomy/vector_store/base.rb +7 -0
  65. data/lib/phronomy/version.rb +1 -1
  66. data/lib/phronomy/workflow.rb +52 -5
  67. data/lib/phronomy/workflow_context.rb +29 -2
  68. data/lib/phronomy/workflow_runner.rb +74 -3
  69. data/lib/phronomy.rb +42 -0
  70. metadata +40 -2
@@ -15,14 +15,14 @@ module Phronomy
15
15
  # == Execution model
16
16
  #
17
17
  # {#start} is called by the EventLoop on the +:start+ event. It immediately
18
- # returns after spawning a background IO thread that runs the agent's full
18
+ # returns after spawning a {Phronomy::Task} that runs the agent's full
19
19
  # invocation pipeline (via +_invoke_impl+). The EventLoop thread is never
20
20
  # blocked by agent execution.
21
21
  #
22
- # Inside the IO thread, the +:phronomy_agent_parallel_tools+ thread-local
23
- # flag is set to +true+ so that {Agent::Base#build_chat} returns a
24
- # {ParallelToolChat} instance, enabling concurrent tool dispatch when the LLM
25
- # returns multiple tool calls in one response.
22
+ # Inside the task, {Agent::Base#build_chat} returns a
23
+ # {ParallelToolChat} instance when EventLoop mode is enabled, allowing
24
+ # concurrent tool dispatch when the LLM returns multiple tool calls in one
25
+ # response.
26
26
  #
27
27
  # == Completion events
28
28
  #
@@ -72,40 +72,30 @@ module Phronomy
72
72
  # auto-generated when nil
73
73
  # @param config [Hash] invocation config forwarded to
74
74
  # +_invoke_impl+
75
- # @param parent_id [String, nil] EventLoop id of the parent
76
- # FSMSession; when set, a
77
- # +:child_completed+ event is posted
78
- # on completion
79
- # @param result_writer [Proc, nil] optional callable invoked with the
80
- # result hash <b>before</b>
81
- # +:child_completed+ is posted.
82
- # Use this to write the agent output
83
- # back into the parent WorkflowContext.
84
- # Thread::Queue provides the
85
- # happens-before guarantee.
75
+ # @param parent_id [String, nil] EventLoop id of the parent FSMSession;
76
+ # when set, a +:child_completed+ event
77
+ # is posted on completion. The result
78
+ # is delivered exclusively as the event
79
+ # payload no cross-thread writes to the
80
+ # parent WorkflowContext are performed.
86
81
  #
87
- # @example Writing result into context
88
- # entry :run_agent, ->(ctx) {
89
- # MyAgent.new.run_as_child(ctx.query, ctx: ctx) { |r| ctx.answer = r[:output] }
90
- # }
91
82
  # @api private
92
- def initialize(agent:, input:, messages: [], thread_id: nil, config: {}, parent_id: nil, result_writer: nil)
83
+ def initialize(agent:, input:, messages: [], thread_id: nil, config: {}, parent_id: nil)
93
84
  @agent = agent
94
85
  @input = input
95
86
  @messages = Array(messages).dup
96
87
  @thread_id = thread_id || SecureRandom.uuid
97
88
  @config = config
98
89
  @parent_id = parent_id
99
- @result_writer = result_writer
100
90
  @id = @thread_id
101
91
  @current_phase = :idle
102
92
  end
103
93
 
104
94
  # Called by {EventLoop} on the +:start+ event.
105
- # Transitions to +:running+ and spawns the agent IO thread.
95
+ # Transitions to +:running+ and spawns the agent task.
106
96
  def start
107
97
  @current_phase = :running
108
- spawn_agent_thread
98
+ spawn_agent_task
109
99
  end
110
100
 
111
101
  # Called by {EventLoop} for external events dispatched to this id.
@@ -117,10 +107,10 @@ module Phronomy
117
107
 
118
108
  private
119
109
 
120
- # Spawns the background IO thread that runs the agent invocation.
121
- # Captures all instance variables by value so the thread closure is
110
+ # Spawns a {Phronomy::Task} that runs the agent invocation pipeline.
111
+ # Captures all instance variables by value so the task closure is
122
112
  # safe even if the FSM object is modified (though it is not in practice).
123
- def spawn_agent_thread
113
+ def spawn_agent_task
124
114
  agent = @agent
125
115
  input = @input
126
116
  messages = @messages
@@ -128,51 +118,38 @@ module Phronomy
128
118
  config = @config
129
119
  fsm_id = @id
130
120
  parent_id = @parent_id
131
- result_writer = @result_writer
132
121
 
133
- Thread.new do
134
- # Enable parallel tool dispatch inside this IO thread.
135
- Thread.current[:phronomy_agent_parallel_tools] = true
136
- # Forward the concurrency cap to ParallelToolChat.
137
- Thread.current[:phronomy_max_parallel_tools] =
138
- agent.class.respond_to?(:max_parallel_tools) ? agent.class.max_parallel_tools : 10
139
-
140
- begin
141
- result = agent.send(:_invoke_impl,
142
- input,
143
- messages: messages,
144
- thread_id: thread_id,
145
- config: config)
146
-
147
- if parent_id
148
- # Let the caller write the result into the context BEFORE the
149
- # parent FSMSession advances. Thread::Queue provides the
150
- # happens-before guarantee — no Mutex needed.
151
- result_writer&.call(result)
152
-
153
- Phronomy::EventLoop.instance.post(
154
- Phronomy::Event.new(type: :child_completed, target_id: parent_id, payload: result)
155
- )
156
- end
122
+ Phronomy::Runtime.instance.spawn(name: "agent-fsm:#{fsm_id}") do
123
+ result = agent.send(:_invoke_impl,
124
+ input,
125
+ messages: messages,
126
+ thread_id: thread_id,
127
+ config: config)
157
128
 
129
+ if parent_id
130
+ # Result is delivered exclusively as the :child_completed payload.
131
+ # The parent Workflow task is the sole owner of WorkflowContext
132
+ # and applies the result after receiving the event.
158
133
  Phronomy::EventLoop.instance.post(
159
- Phronomy::Event.new(type: :finished, target_id: fsm_id, payload: result)
134
+ Phronomy::Event.new(type: :child_completed, target_id: parent_id, payload: result)
160
135
  )
161
- rescue => e
162
- if parent_id
163
- Phronomy::EventLoop.instance.post(
164
- Phronomy::Event.new(type: :child_failed, target_id: parent_id, payload: e)
165
- )
166
- end
136
+ end
167
137
 
138
+ Phronomy::EventLoop.instance.post(
139
+ Phronomy::Event.new(type: :finished, target_id: fsm_id, payload: result)
140
+ )
141
+ rescue => e
142
+ if parent_id
168
143
  Phronomy::EventLoop.instance.post(
169
- Phronomy::Event.new(type: :error, target_id: fsm_id, payload: e)
144
+ Phronomy::Event.new(type: :child_failed, target_id: parent_id, payload: e)
170
145
  )
171
- ensure
172
- # Clear the thread-local context cache for this agent so the IO
173
- # thread's cache does not grow unboundedly across invocations.
174
- Thread.current[:phronomy_context_version_caches]&.delete(agent.object_id)
175
146
  end
147
+
148
+ Phronomy::EventLoop.instance.post(
149
+ Phronomy::Event.new(type: :error, target_id: fsm_id, payload: e)
150
+ )
151
+
152
+ # Context caches are instance variables; no thread-local cleanup needed.
176
153
  end
177
154
  end
178
155
  end
@@ -62,15 +62,29 @@ module Phronomy
62
62
  description "Dispatch work to the #{name} subagent (#{agent_class.name})"
63
63
  param :input, type: :string, desc: "The task or question for the subagent"
64
64
 
65
+ # @_orchestrator_context is injected at call time by prepare_tool_class.
66
+ attr_writer :_orchestrator_context
67
+
65
68
  define_method(:execute) do |input:|
66
- # Inherit the calling orchestrator's thread_id and config when
67
- # available so that sub-agent spans and memory stay connected.
68
- ctx = Thread.current[:phronomy_orchestrator_context] || {}
69
- result = agent_class.new.invoke(
69
+ # Inherit the calling orchestrator's thread_id, config, and
70
+ # InvocationContext so that child subagent spans and memory stay
71
+ # connected to the parent invocation.
72
+ ctx = @_orchestrator_context || {}
73
+ parent_ic = ctx[:invocation_context]
74
+ task_config = ctx[:config] || {}
75
+
76
+ # Propagate parent InvocationContext to the child agent so that
77
+ # cancellation, deadline, and tracing carry through automatically.
78
+ if parent_ic && !task_config[:invocation_context]
79
+ child_ic = parent_ic.merge(parent_task_id: parent_ic.task_id)
80
+ task_config = task_config.merge(invocation_context: child_ic)
81
+ end
82
+
83
+ result = agent_class.new.invoke_async(
70
84
  input,
71
- thread_id: ctx[:thread_id],
72
- config: ctx[:config] || {}
73
- )
85
+ thread_id: ctx[:thread_id] || parent_ic&.thread_id,
86
+ config: task_config
87
+ ).await
74
88
  result[:output]
75
89
  rescue
76
90
  raise if on_error == :raise
@@ -78,6 +92,9 @@ module Phronomy
78
92
  end
79
93
  end
80
94
 
95
+ # Track this tool class so prepare_tool_class can inject context.
96
+ @_subagent_tool_classes = (@_subagent_tool_classes || []) + [tool_class]
97
+
81
98
  # Append without clobbering previously registered tools or aliases.
82
99
  @tools = (@tools || []) + [tool_class]
83
100
  @tool_aliases ||= {}
@@ -85,6 +102,14 @@ module Phronomy
85
102
  registered_subagents[name] = {agent_class: agent_class, on_error: on_error}
86
103
  end
87
104
 
105
+ # Returns the subagent tool classes registered on this specific class.
106
+ # Used by {#prepare_tool_class} to inject context.
107
+ # @return [Array<Class>]
108
+ # @api private
109
+ def self._subagent_tool_classes
110
+ @_subagent_tool_classes || []
111
+ end
112
+
88
113
  # Returns the subagent registry for this specific class (not inherited).
89
114
  #
90
115
  # @return [Hash{Symbol => Hash}]
@@ -93,8 +118,8 @@ module Phronomy
93
118
  @registered_subagents ||= {}
94
119
  end
95
120
 
96
- # Dispatches multiple heterogeneous agent tasks in parallel using Ruby
97
- # threads. Each task is a Hash describing one agent invocation.
121
+ # Dispatches multiple heterogeneous agent tasks in parallel using
122
+ # cooperative {Task}s. Each task is a Hash describing one agent invocation.
98
123
  #
99
124
  # Results are returned in the same order as the input +tasks+ array.
100
125
  # Concurrency is bounded by +max_concurrency+; when nil all tasks run at
@@ -102,7 +127,7 @@ module Phronomy
102
127
  #
103
128
  # Error semantics are controlled by +on_error+:
104
129
  # - +:raise+ (default) — every task runs to completion; the first
105
- # exception in input order is then re-raised in the calling thread.
130
+ # exception in input order is then re-raised in the calling task.
106
131
  # - +:skip+ — failed tasks return +nil+; no exception is raised.
107
132
  #
108
133
  # @param tasks [Array<Hash>]
@@ -110,27 +135,27 @@ module Phronomy
110
135
  # @option task [String] :input input string for the agent (required)
111
136
  # @option task [Hash] :config forwarded to +agent#invoke+ (default: +{}+)
112
137
  # @option task [String] :thread_id forwarded to +agent#invoke+ (default: nil)
113
- # @param max_concurrency [Integer, nil] maximum number of concurrent threads;
138
+ # @param max_concurrency [Integer, nil] maximum number of concurrent tasks;
114
139
  # nil means no limit (all tasks run simultaneously)
115
140
  # @param on_error [Symbol] +:raise+ or +:skip+
116
- # @param timeout [Numeric, nil] maximum seconds to wait for all workers;
141
+ # @param timeout [Numeric, nil] maximum seconds to wait for all tasks;
117
142
  # nil means wait indefinitely. When the deadline is exceeded,
118
- # {Phronomy::TimeoutError} is raised and all surviving worker threads are killed.
143
+ # {Phronomy::TimeoutError} is raised and all surviving tasks are cancelled
144
+ # cooperatively.
119
145
  # @param cancellation_token [Phronomy::CancellationToken, nil] when provided, the
120
146
  # token is merged into each task's config (unless the task already sets one) so
121
- # that every worker agent checks it before making LLM calls.
122
- # @param force_kill [Boolean] when +true+, surviving worker threads are killed with
123
- # +Thread#kill+ after the grace period if they do not stop cooperatively. When
124
- # +false+ (default), workers are asked to stop cooperatively but are never killed;
125
- # the caller receives {Phronomy::TimeoutError} immediately and abandoned workers
126
- # discard their results when they eventually finish. +false+ is safer for
127
- # production because +Thread#kill+ can interrupt +ensure+ blocks.
147
+ # that every child agent checks it before making LLM calls.
148
+ # @param invocation_context [Phronomy::InvocationContext, nil] when provided,
149
+ # the context (cancellation_token, deadline, thread_id) is propagated to each
150
+ # child agent as a child InvocationContext.
151
+ # @param force_kill [Boolean] deprecated cooperative cancellation is always
152
+ # used; this parameter is accepted for backwards compatibility but has no effect.
128
153
  # @return [Array<Hash, nil>] agent results in the same order as +tasks+
129
154
  # @raise [ArgumentError] if +on_error+ is not +:raise+ or +:skip+
130
155
  # @raise [ArgumentError] if +max_concurrency+ is not a positive Integer or nil
131
156
  # @raise [Phronomy::TimeoutError] if +timeout+ is exceeded
132
157
  # @api public
133
- def dispatch_parallel(*tasks, max_concurrency: nil, on_error: :raise, timeout: nil, cancellation_token: nil, force_kill: false)
158
+ def dispatch_parallel(*tasks, max_concurrency: nil, on_error: :raise, timeout: nil, cancellation_token: nil, invocation_context: nil, force_kill: false)
134
159
  unless [:raise, :skip].include?(on_error)
135
160
  raise ArgumentError, "unknown on_error: #{on_error.inspect}"
136
161
  end
@@ -138,7 +163,7 @@ module Phronomy
138
163
  raise ArgumentError, "max_concurrency must be a positive Integer"
139
164
  end
140
165
 
141
- bounded_map(tasks, max_concurrency: max_concurrency, on_error: on_error, timeout: timeout, cancellation_token: cancellation_token, force_kill: force_kill)
166
+ bounded_map(tasks, max_concurrency: max_concurrency, on_error: on_error, timeout: timeout, cancellation_token: cancellation_token, invocation_context: invocation_context, force_kill: force_kill)
142
167
  end
143
168
 
144
169
  # Runs the same agent against multiple inputs in parallel (fan-out pattern).
@@ -150,17 +175,20 @@ module Phronomy
150
175
  # @param inputs [Array<String>] list of input strings
151
176
  # @param config [Hash] forwarded to every +agent#invoke+ call
152
177
  # @param thread_id [String, nil] forwarded to every +agent#invoke+ call
153
- # @param max_concurrency [Integer, nil] forwarded to {#dispatch_parallel}
154
- # @param on_error [Symbol] forwarded to {#dispatch_parallel}
178
+ # @param max_concurrency [Integer, nil] forwarded to {#dispatch_parallel}
179
+ # @param on_error [Symbol] forwarded to {#dispatch_parallel}
180
+ # @param invocation_context [Phronomy::InvocationContext, nil] forwarded to
181
+ # {#dispatch_parallel} for child context propagation
155
182
  # @return [Array<Hash, nil>] results in the same order as +inputs+
156
183
  # @api public
157
- def fan_out(agent:, inputs:, config: {}, thread_id: nil, max_concurrency: nil, on_error: :raise, timeout: nil, cancellation_token: nil, force_kill: false)
184
+ def fan_out(agent:, inputs:, config: {}, thread_id: nil, max_concurrency: nil, on_error: :raise, timeout: nil, cancellation_token: nil, invocation_context: nil, force_kill: false)
158
185
  dispatch_parallel(
159
186
  *inputs.map { |input| {agent: agent, input: input, config: config, thread_id: thread_id} },
160
187
  max_concurrency: max_concurrency,
161
188
  on_error: on_error,
162
189
  timeout: timeout,
163
190
  cancellation_token: cancellation_token,
191
+ invocation_context: invocation_context,
164
192
  force_kill: force_kill
165
193
  )
166
194
  end
@@ -175,131 +203,128 @@ module Phronomy
175
203
  # @return [Hash] the sub-agent's result hash (+:output+, +:messages+)
176
204
  # @api public
177
205
  def subagent(agent_class, input, config: nil, thread_id: nil)
178
- ctx = Thread.current[:phronomy_orchestrator_context] || {}
179
- agent_class.new.invoke(
206
+ ctx = @_orchestrator_context || {}
207
+ parent_ic = ctx[:invocation_context]
208
+ effective_config = config || ctx[:config] || {}
209
+
210
+ # Propagate parent InvocationContext to the child agent.
211
+ if parent_ic && !effective_config[:invocation_context]
212
+ child_ic = parent_ic.merge(parent_task_id: parent_ic.task_id)
213
+ effective_config = effective_config.merge(invocation_context: child_ic)
214
+ end
215
+
216
+ agent_class.new.invoke_async(
180
217
  input,
181
- config: config || ctx[:config] || {},
182
- thread_id: thread_id || ctx[:thread_id]
183
- )
218
+ config: effective_config,
219
+ thread_id: thread_id || ctx[:thread_id] || parent_ic&.thread_id
220
+ ).await
184
221
  end
185
222
 
186
223
  private
187
224
 
188
- # Override invoke_once to expose the current thread_id and config via a
189
- # thread-local so that DSL-registered subagent tools can inherit them.
225
+ # Override invoke_once to expose the current thread_id and config via an
226
+ # instance variable so that DSL-registered subagent tools can inherit them
227
+ # without using Thread.current.
190
228
  def invoke_once(input, messages: [], thread_id: nil, config: {})
191
- prev = Thread.current[:phronomy_orchestrator_context]
192
- Thread.current[:phronomy_orchestrator_context] = {thread_id: thread_id, config: config}
229
+ prev = @_orchestrator_context
230
+ @_orchestrator_context = {
231
+ thread_id: thread_id,
232
+ config: config,
233
+ invocation_context: config[:invocation_context]
234
+ }
193
235
  super
194
236
  ensure
195
- Thread.current[:phronomy_orchestrator_context] = prev
237
+ @_orchestrator_context = prev
196
238
  end
197
239
 
198
- # Worker-pool implementation shared by {#dispatch_parallel} and {#fan_out}.
199
- #
200
- # Uses a +Queue+ as a work-stealing mechanism: each worker thread pops a
201
- # task, executes it, and loops until the queue is empty. The number of
202
- # workers is +min(max_concurrency, tasks.length)+, capped at the task count
203
- # so we never spin up idle threads.
240
+ # Override prepare_tool_class to inject the current orchestrator context
241
+ # into DSL-registered subagent tools before each call.
242
+ def prepare_tool_class(tool_class)
243
+ prepared = super
244
+ orch = self
245
+
246
+ # Only wrap subagent tools (those registered via the .subagent DSL).
247
+ return prepared unless self.class._subagent_tool_classes.include?(tool_class)
248
+
249
+ # Capture the effective tool name before building the anonymous subclass.
250
+ # Class-level instance variables (@tool_name) are not inherited through
251
+ # subclassing, so the wrapper must set it explicitly.
252
+ effective_name = prepared.new.name
253
+ Class.new(prepared) do
254
+ tool_name effective_name
255
+ define_method(:call) do |args|
256
+ self._orchestrator_context = orch.instance_variable_get(:@_orchestrator_context)
257
+ super(args)
258
+ end
259
+ end
260
+ end
261
+
262
+ # Task-based worker pool shared by {#dispatch_parallel} and {#fan_out}.
204
263
  #
205
- # +errors+ is indexed by task position so that the first error in *input*
206
- # order is deterministically re-raised when +on_error: :raise+ is used.
207
- # A +Mutex+ guards concurrent writes to +errors+ even though Array element
208
- # assignment at different indices is safe in MRI; this keeps the code
209
- # correct across alternative Ruby runtimes.
264
+ # Spawns one {Task} per input using a {TaskGroup} so that +max_concurrency+
265
+ # acts as a semaphore: spare tasks block on {TaskGroup#spawn} until a slot
266
+ # becomes available. Results are written back to +results+ in input order;
267
+ # +errors+ captures the first error per position so that the first error in
268
+ # *input* order is deterministically re-raised when +on_error: :raise+ is used.
210
269
  #
211
- # When +timeout+ is given, workers are first asked to stop cooperatively
212
- # via a cancellation flag (so they do not pick up new tasks) and then given
213
- # +KILL_GRACE_SECONDS+ to finish any in-flight +ensure+ blocks. Only
214
- # workers that are still alive after the grace period are force-killed, and
215
- # a warning is logged in that case. Use a +CancellationToken+ (see #216)
216
- # for full cooperative cancellation of long-running tasks.
270
+ # When +timeout+ is given, each spawned task is joined with the remaining
271
+ # deadline. Any still-alive tasks are cancelled cooperatively via
272
+ # {TaskGroup#cancel_all!} before {Phronomy::TimeoutError} is raised.
273
+ # The +force_kill+ argument is deprecated: cooperative cancellation is always
274
+ # used regardless of its value.
217
275
  #
218
276
  # Deadline tracking uses +Process.clock_gettime(Process::CLOCK_MONOTONIC)+
219
277
  # to avoid sensitivity to NTP adjustments and system-clock changes.
220
- KILL_GRACE_SECONDS = 0.5
221
- private_constant :KILL_GRACE_SECONDS
222
-
223
- def bounded_map(tasks, max_concurrency:, on_error:, timeout: nil, cancellation_token: nil, force_kill: false)
278
+ def bounded_map(tasks, max_concurrency:, on_error:, timeout: nil, cancellation_token: nil, invocation_context: nil, force_kill: false) # rubocop:disable Lint/UnusedMethodArgument
224
279
  return [] if tasks.empty?
225
280
 
226
281
  results = Array.new(tasks.length)
227
282
  errors = Array.new(tasks.length)
228
- errors_mutex = Mutex.new
229
- # Mutex-backed cooperative stop token; workers check before each task pick-up.
230
- internal_stop_token = Phronomy::CancellationToken.new
231
-
232
- queue = Queue.new
233
- tasks.each_with_index { |task, i| queue << [i, task] }
234
-
235
- worker_count = [max_concurrency || tasks.length, tasks.length].min
236
-
237
- workers = worker_count.times.map do
238
- Thread.new do
239
- loop do
240
- break if internal_stop_token.cancelled?
241
-
242
- i, task = begin
243
- queue.pop(true)
244
- rescue ThreadError
245
- break # queue is empty; this worker is done
246
- end
247
-
248
- # Merge the shared cancellation token into the task's config unless
249
- # the task already supplies its own token.
250
- task_config = task.fetch(:config, {})
251
- if cancellation_token && !task_config[:cancellation_token]
252
- task_config = task_config.merge(cancellation_token: cancellation_token)
253
- end
254
-
255
- begin
256
- results[i] = task[:agent].new.invoke(
257
- task[:input],
258
- config: task_config,
259
- thread_id: task[:thread_id]
260
- )
261
- rescue => e
262
- case on_error
263
- when :skip
264
- results[i] = nil
265
- else
266
- errors_mutex.synchronize { errors[i] = e }
267
- end
268
- end
283
+ group = Phronomy::Runtime.instance.task_group(limit: max_concurrency || tasks.length)
284
+
285
+ # Resolve the effective cancellation token: explicit argument wins;
286
+ # fall back to the one embedded in the InvocationContext if present.
287
+ effective_ct = cancellation_token || invocation_context&.cancellation_token
288
+
289
+ spawned = tasks.each_with_index.map do |task, i|
290
+ group.spawn do
291
+ task_config = task.fetch(:config, {})
292
+
293
+ # Merge the shared cancellation token unless the task already has one.
294
+ if effective_ct && !task_config[:cancellation_token]
295
+ task_config = task_config.merge(cancellation_token: effective_ct)
296
+ end
297
+
298
+ # Propagate parent InvocationContext to each child task so that
299
+ # cancellation, deadline, and tracing carry through automatically.
300
+ if invocation_context && !task_config[:invocation_context]
301
+ child_ic = invocation_context.merge(parent_task_id: invocation_context.task_id)
302
+ task_config = task_config.merge(invocation_context: child_ic)
269
303
  end
304
+
305
+ results[i] = task[:agent].new.invoke_async(
306
+ task[:input],
307
+ config: task_config,
308
+ thread_id: task[:thread_id] || invocation_context&.thread_id
309
+ ).await
310
+ rescue => e
311
+ errors[i] = e unless on_error == :skip
270
312
  end
271
313
  end
272
314
 
273
- workers.each(&:join) if timeout.nil?
274
-
275
315
  if timeout
276
- deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
277
- workers.each do |w|
278
- remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
279
- w.join([remaining, 0].max)
280
- end
316
+ deadline = Phronomy::Deadline.in(timeout)
317
+ spawned.each { |t| t.join([deadline.remaining_seconds, 0].max) }
281
318
 
282
- alive = workers.select(&:alive?)
319
+ alive = spawned.select(&:alive?)
283
320
  unless alive.empty?
284
- # Signal workers cooperatively to stop picking up new tasks.
285
- internal_stop_token.cancel!
286
- if force_kill
287
- # Give in-flight ensure blocks a short grace period before kill.
288
- alive.each { |w| w.join(KILL_GRACE_SECONDS) }
289
- still_alive = alive.select(&:alive?)
290
- if still_alive.any?
291
- Phronomy.configuration.logger&.warn(
292
- "[Phronomy] dispatch_parallel: #{still_alive.length} worker(s) did not stop " \
293
- "within grace period; force-killing. Use CancellationToken for " \
294
- "cooperative cancellation of long-running tasks."
295
- )
296
- still_alive.each(&:kill)
297
- end
298
- end
321
+ group.cancel_all!
299
322
  raise Phronomy::TimeoutError,
300
323
  "dispatch_parallel timed out after #{timeout}s " \
301
- "(#{alive.length} of #{workers.length} workers still running)"
324
+ "(#{alive.length} of #{spawned.length} tasks still running)"
302
325
  end
326
+ else
327
+ spawned.each(&:await)
303
328
  end
304
329
 
305
330
  first_error = errors.compact.first