phronomy 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mutant.yml +8 -7
- data/CHANGELOG.md +151 -1
- data/README.md +155 -32
- data/Rakefile +33 -0
- data/benchmark/baseline.json +1 -1
- data/benchmark/bench_regression.rb +1 -0
- data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +24 -0
- data/docs/decisions/006-no-built-in-guardrails.md +20 -2
- data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
- data/lib/phronomy/agent/base.rb +250 -65
- data/lib/phronomy/agent/concerns/suspendable.rb +15 -0
- data/lib/phronomy/agent/fsm.rb +41 -64
- data/lib/phronomy/agent/orchestrator.rb +146 -121
- data/lib/phronomy/agent/parallel_tool_chat.rb +79 -22
- data/lib/phronomy/agent/react_agent.rb +8 -0
- data/lib/phronomy/async_queue.rb +155 -0
- data/lib/phronomy/blocking_adapter_pool.rb +435 -0
- data/lib/phronomy/cancellation_scope.rb +123 -0
- data/lib/phronomy/cancellation_token.rb +43 -2
- data/lib/phronomy/concurrency_gate.rb +155 -0
- data/lib/phronomy/configuration.rb +142 -0
- data/lib/phronomy/deadline.rb +63 -0
- data/lib/phronomy/diagnostics.rb +62 -0
- data/lib/phronomy/embeddings/base.rb +17 -0
- data/lib/phronomy/eval/runner.rb +9 -9
- data/lib/phronomy/event_loop.rb +181 -43
- data/lib/phronomy/fsm_session.rb +50 -4
- data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
- data/lib/phronomy/invocation_context.rb +152 -0
- data/lib/phronomy/knowledge_source/base.rb +18 -0
- data/lib/phronomy/llm_adapter/base.rb +104 -0
- data/lib/phronomy/llm_adapter/ruby_llm.rb +41 -0
- data/lib/phronomy/llm_adapter.rb +20 -0
- data/lib/phronomy/metrics.rb +38 -0
- data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
- data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
- data/lib/phronomy/runtime/gate_registry.rb +52 -0
- data/lib/phronomy/runtime/pool_registry.rb +57 -0
- data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
- data/lib/phronomy/runtime/scheduler.rb +98 -0
- data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
- data/lib/phronomy/runtime/task_registry.rb +48 -0
- data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
- data/lib/phronomy/runtime/timer_queue.rb +106 -0
- data/lib/phronomy/runtime/timer_service.rb +42 -0
- data/lib/phronomy/runtime.rb +374 -0
- data/lib/phronomy/task/backend.rb +80 -0
- data/lib/phronomy/task/fiber_backend.rb +157 -0
- data/lib/phronomy/task/immediate_backend.rb +89 -0
- data/lib/phronomy/task/thread_backend.rb +84 -0
- data/lib/phronomy/task.rb +275 -0
- data/lib/phronomy/task_group.rb +265 -0
- data/lib/phronomy/testing/fake_clock.rb +109 -0
- data/lib/phronomy/testing/fake_scheduler.rb +104 -0
- data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
- data/lib/phronomy/testing.rb +12 -0
- data/lib/phronomy/tool/base.rb +110 -2
- data/lib/phronomy/tool/mcp_tool.rb +47 -16
- data/lib/phronomy/tool/scope_policy.rb +50 -0
- data/lib/phronomy/tool_executor.rb +106 -0
- data/lib/phronomy/tracing/open_telemetry_tracer.rb +34 -0
- data/lib/phronomy/vector_store/async_backend.rb +110 -0
- data/lib/phronomy/vector_store/base.rb +7 -0
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow.rb +52 -5
- data/lib/phronomy/workflow_context.rb +29 -2
- data/lib/phronomy/workflow_runner.rb +74 -3
- data/lib/phronomy.rb +42 -0
- metadata +40 -2
data/lib/phronomy/event_loop.rb
CHANGED
|
@@ -3,12 +3,37 @@
|
|
|
3
3
|
module Phronomy
|
|
4
4
|
# Singleton event loop that manages all FSMSession instances.
|
|
5
5
|
#
|
|
6
|
-
# A single background thread reads from a global
|
|
7
|
-
# events to their target FSMSession.
|
|
8
|
-
#
|
|
6
|
+
# A single background thread reads from a global {Phronomy::AsyncQueue} and
|
|
7
|
+
# dispatches events to their target FSMSession. IO work (LLM calls, tool
|
|
8
|
+
# calls) must be dispatched via +Runtime.instance.spawn+ or
|
|
9
|
+
# +BlockingAdapterPool+, then post results back to the loop via
|
|
10
|
+
# {EventLoop#post}.
|
|
9
11
|
#
|
|
10
12
|
# Activated with: +Phronomy.configure { |c| c.event_loop = true }+
|
|
11
13
|
#
|
|
14
|
+
# == Threading exception (see ADR-010 Rule 2)
|
|
15
|
+
#
|
|
16
|
+
# +EventLoop+ is a **deliberate exception** to Phronomy's cooperative-first
|
|
17
|
+
# concurrency model. Its dispatch loop is an infinite +while @running+ loop
|
|
18
|
+
# that must never block the framework's own event processing.
|
|
19
|
+
# Running it on a shared scheduler task would consume the scheduler, preventing
|
|
20
|
+
# other tasks from running. Therefore {#start} creates a dedicated
|
|
21
|
+
# {Runtime::ThreadScheduler} — this is correct and intentional per ADR-010.
|
|
22
|
+
# No other framework component should do the same; see the ADR-010 checklist.
|
|
23
|
+
#
|
|
24
|
+
# == Handler constraints
|
|
25
|
+
#
|
|
26
|
+
# Handlers dispatched by the EventLoop run **on the EventLoop thread**.
|
|
27
|
+
# They must not:
|
|
28
|
+
#
|
|
29
|
+
# * Perform blocking operations directly (database queries, LLM calls, HTTP
|
|
30
|
+
# requests). Schedule blocking work via +Runtime.instance.spawn+ or
|
|
31
|
+
# +BlockingAdapterPool+, then post results back with {#post}.
|
|
32
|
+
# * Call +Workflow#invoke+ (or any synchronous +invoke+) from within a
|
|
33
|
+
# handler. That method would block waiting for the EventLoop to process
|
|
34
|
+
# events, causing a deadlock. Use the async pattern: post a follow-up
|
|
35
|
+
# event instead.
|
|
36
|
+
#
|
|
12
37
|
# == Fork safety
|
|
13
38
|
#
|
|
14
39
|
# +EventLoop.instance+ is lazily initialized. The background thread is not
|
|
@@ -20,14 +45,25 @@ module Phronomy
|
|
|
20
45
|
# Do NOT call +Workflow#invoke+ (in EventLoop mode) from within a workflow
|
|
21
46
|
# entry action. The entry action runs on the EventLoop thread; a nested
|
|
22
47
|
# +invoke+ would block waiting for the same thread to process events →
|
|
23
|
-
# deadlock. Use the async
|
|
24
|
-
#
|
|
48
|
+
# deadlock. Use the async pattern instead: schedule work via
|
|
49
|
+
# +Runtime.instance.spawn+ or +BlockingAdapterPool+, then post events back
|
|
50
|
+
# via +Phronomy::EventLoop.instance.post(...)+.
|
|
25
51
|
class EventLoop
|
|
26
52
|
# Returns the singleton instance, creating and starting it on first call.
|
|
27
53
|
def self.instance
|
|
28
54
|
@instance ||= new.tap(&:start)
|
|
29
55
|
end
|
|
30
56
|
|
|
57
|
+
# Returns true when called from within the EventLoop dispatch task.
|
|
58
|
+
# Uses a task-local key set by the Runtime-spawned dispatch task so that
|
|
59
|
+
# the check works correctly for both thread-based and future fiber-based
|
|
60
|
+
# scheduler backends.
|
|
61
|
+
# @return [Boolean]
|
|
62
|
+
# @api private
|
|
63
|
+
def self.current?
|
|
64
|
+
Phronomy::Task.current&.name == "event-loop"
|
|
65
|
+
end
|
|
66
|
+
|
|
31
67
|
# Stops and destroys the singleton. Primarily used in tests.
|
|
32
68
|
# @api private
|
|
33
69
|
def self.reset!
|
|
@@ -36,7 +72,7 @@ module Phronomy
|
|
|
36
72
|
end
|
|
37
73
|
|
|
38
74
|
def initialize
|
|
39
|
-
@queue =
|
|
75
|
+
@queue = Phronomy::AsyncQueue.new # global event queue (thread-safe; no Mutex needed)
|
|
40
76
|
@fsms = {} # { id => FSMSession } — EventLoop thread only
|
|
41
77
|
@waiting = {} # { id => completion_queue } — EventLoop thread only
|
|
42
78
|
# Mutex-backed FSM count for drain-mode shutdown.
|
|
@@ -45,6 +81,42 @@ module Phronomy
|
|
|
45
81
|
@fsm_count = 0
|
|
46
82
|
# Token cancelled when shutdown is requested; new child sessions receive it.
|
|
47
83
|
@shutdown_token = Phronomy::CancellationToken.new
|
|
84
|
+
# Fairness metrics (EventLoop thread only, except where noted)
|
|
85
|
+
@lag_mutex = Mutex.new
|
|
86
|
+
@last_lag_ns = 0
|
|
87
|
+
@max_lag_ns = 0
|
|
88
|
+
@dispatch_count = 0
|
|
89
|
+
@total_lag_ns = 0
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Returns the most recently measured event-loop lag in seconds.
|
|
93
|
+
# Lag is the wall-clock time between {#post} and the moment the event
|
|
94
|
+
# is dequeued for dispatch. Thread-safe.
|
|
95
|
+
# @return [Float]
|
|
96
|
+
# @api private
|
|
97
|
+
def last_lag_seconds
|
|
98
|
+
@lag_mutex.synchronize { @last_lag_ns } / 1_000_000_000.0
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Returns the maximum event-loop lag seen since the loop was started.
|
|
102
|
+
# Thread-safe.
|
|
103
|
+
# @return [Float]
|
|
104
|
+
# @api private
|
|
105
|
+
def max_lag_seconds
|
|
106
|
+
@lag_mutex.synchronize { @max_lag_ns } / 1_000_000_000.0
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Returns the mean event-loop lag across all dispatched events since the
|
|
110
|
+
# loop was started. Returns 0.0 when no events have been dispatched.
|
|
111
|
+
# Thread-safe.
|
|
112
|
+
# @return [Float]
|
|
113
|
+
# @api private
|
|
114
|
+
def average_lag_seconds
|
|
115
|
+
@lag_mutex.synchronize do
|
|
116
|
+
return 0.0 if @dispatch_count.zero?
|
|
117
|
+
|
|
118
|
+
@total_lag_ns.to_f / @dispatch_count / 1_000_000_000.0
|
|
119
|
+
end
|
|
48
120
|
end
|
|
49
121
|
|
|
50
122
|
# Registers an FSMSession for execution and returns a completion queue.
|
|
@@ -58,21 +130,23 @@ module Phronomy
|
|
|
58
130
|
# the popped value will be an Exception — callers are responsible for re-raising it.
|
|
59
131
|
#
|
|
60
132
|
# @param fsm_session [Phronomy::FSMSession]
|
|
61
|
-
# @return [
|
|
133
|
+
# @return [Phronomy::AsyncQueue] resolves to final/halted context, or an Exception
|
|
62
134
|
# @api private
|
|
63
135
|
def register(fsm_session)
|
|
64
|
-
if
|
|
136
|
+
if Phronomy::EventLoop.current?
|
|
65
137
|
raise Phronomy::Error,
|
|
66
138
|
"Cannot call Workflow#invoke (EventLoop mode) from within an EventLoop " \
|
|
67
|
-
"entry action.
|
|
68
|
-
"back via
|
|
139
|
+
"entry action. Schedule work via Runtime.instance.spawn or " \
|
|
140
|
+
"BlockingAdapterPool, then post events back via " \
|
|
141
|
+
"Phronomy::EventLoop.instance.post(...) instead."
|
|
69
142
|
end
|
|
70
143
|
|
|
71
|
-
completion_queue =
|
|
144
|
+
completion_queue = Phronomy::AsyncQueue.new
|
|
72
145
|
# Pass both session and completion_queue in the event payload so that the
|
|
73
146
|
# EventLoop thread is the sole writer of @fsms and @waiting.
|
|
74
|
-
@queue.push(Event.new(type: :start, target_id: fsm_session.id,
|
|
75
|
-
payload: {session: fsm_session, completion: completion_queue})
|
|
147
|
+
@queue.push([Event.new(type: :start, target_id: fsm_session.id,
|
|
148
|
+
payload: {session: fsm_session, completion: completion_queue}),
|
|
149
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)])
|
|
76
150
|
completion_queue
|
|
77
151
|
end
|
|
78
152
|
|
|
@@ -87,60 +161,77 @@ module Phronomy
|
|
|
87
161
|
# @return [nil]
|
|
88
162
|
# @api private
|
|
89
163
|
def enqueue_child(agent_fsm)
|
|
90
|
-
@queue.push(Event.new(type: :start, target_id: agent_fsm.id,
|
|
91
|
-
payload: {session: agent_fsm, completion: nil})
|
|
164
|
+
@queue.push([Event.new(type: :start, target_id: agent_fsm.id,
|
|
165
|
+
payload: {session: agent_fsm, completion: nil}),
|
|
166
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)])
|
|
92
167
|
nil
|
|
93
168
|
end
|
|
94
169
|
|
|
95
170
|
# Posts an event to the loop. Safe to call from any thread (including IO threads).
|
|
171
|
+
# The current monotonic clock time is recorded so that the EventLoop can
|
|
172
|
+
# measure the dispatch lag when it dequeues the event.
|
|
96
173
|
#
|
|
174
|
+
# @note **Handler constraint**: do not perform blocking operations or call
|
|
175
|
+
# +Workflow#invoke+ directly from within the handler that processes a
|
|
176
|
+
# posted event. Handlers run on the EventLoop thread; blocking there
|
|
177
|
+
# stalls all session processing. For blocking work, post a new event
|
|
178
|
+
# after the result is ready.
|
|
97
179
|
# @param event [Phronomy::Event]
|
|
98
180
|
# @api private
|
|
99
181
|
def post(event)
|
|
100
|
-
@queue.push(event)
|
|
182
|
+
@queue.push([event, Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)])
|
|
101
183
|
end
|
|
102
184
|
|
|
103
|
-
# Starts the
|
|
185
|
+
# Starts the EventLoop dispatch task under {Runtime} ownership.
|
|
186
|
+
#
|
|
187
|
+
# The dispatch loop runs as a {Phronomy::Task} so that {Runtime#shutdown}
|
|
188
|
+
# can drain it together with all other in-flight tasks. The task is named
|
|
189
|
+
# +"event-loop"+ so that {.current?} can identify it via
|
|
190
|
+
# +Task.current&.name+.
|
|
104
191
|
# @return [self]
|
|
105
192
|
# @api private
|
|
106
193
|
def start
|
|
107
|
-
return self if @
|
|
194
|
+
return self if @task&.alive?
|
|
108
195
|
|
|
109
196
|
# Reset shutdown state so the loop can be restarted after a stop.
|
|
110
197
|
@shutdown_token = Phronomy::CancellationToken.new
|
|
111
198
|
@fsm_count_mutex.synchronize { @fsm_count = 0 }
|
|
112
199
|
@running = true
|
|
113
|
-
|
|
114
|
-
|
|
200
|
+
# The dispatch loop must always run in a real background thread.
|
|
201
|
+
# A cooperative scheduler (FakeScheduler/ImmediateBackend) executes tasks
|
|
202
|
+
# synchronously on the caller's thread, which would block forever inside
|
|
203
|
+
# the run_loop infinite loop. Create a dedicated Runtime with
|
|
204
|
+
# ThreadScheduler to guarantee async execution regardless of the global
|
|
205
|
+
# runtime_backend setting.
|
|
206
|
+
thread_runtime = Phronomy::Runtime.new(scheduler: Phronomy::Runtime::ThreadScheduler.new)
|
|
207
|
+
@task = thread_runtime.spawn(name: "event-loop") do
|
|
115
208
|
run_loop
|
|
116
209
|
end
|
|
117
|
-
@thread.abort_on_exception = false
|
|
118
210
|
self
|
|
119
211
|
end
|
|
120
212
|
|
|
121
|
-
# Stops the
|
|
213
|
+
# Stops the EventLoop dispatch task.
|
|
122
214
|
#
|
|
123
215
|
# Sends a cooperative shutdown sentinel to the event queue so that the
|
|
124
|
-
#
|
|
125
|
-
# to +timeout+ seconds for a clean shutdown; if the
|
|
126
|
-
# afterwards it is
|
|
216
|
+
# dispatch task can finish any in-flight handler before exiting. Waits up
|
|
217
|
+
# to +timeout+ seconds for a clean shutdown; if the task is still alive
|
|
218
|
+
# afterwards it is cancelled (cooperative cancellation via {Task#cancel!}).
|
|
127
219
|
#
|
|
128
220
|
# @param timeout [Numeric] seconds to wait for cooperative shutdown. Defaults
|
|
129
221
|
# to +Phronomy.configuration.event_loop_stop_grace_seconds+ (5 s).
|
|
130
222
|
# @param drain [Boolean] when +true+, wait for all active FSMSessions to
|
|
131
223
|
# complete before signalling the loop to stop. Bounded by +timeout+.
|
|
132
224
|
# Defaults to +false+.
|
|
133
|
-
# @param force_kill [Boolean]
|
|
134
|
-
# +
|
|
135
|
-
#
|
|
136
|
-
#
|
|
137
|
-
# interrupt +ensure+ blocks.
|
|
225
|
+
# @param force_kill [Boolean] deprecated — retained for backward compatibility.
|
|
226
|
+
# When +true+, the dispatch task is cancelled via {Task#cancel!} if it does
|
|
227
|
+
# not stop within +timeout+. +Thread#kill+ is no longer used; cooperative
|
|
228
|
+
# cancellation (raising {CancellationError}) replaces it.
|
|
138
229
|
# @return [Symbol] shutdown status:
|
|
139
230
|
# - +:clean+ — loop exited cooperatively with no active sessions discarded
|
|
140
231
|
# - +:drained_with_discards+ — drain mode requested but sessions remained;
|
|
141
232
|
# they were discarded and the loop was stopped
|
|
142
|
-
# - +:timeout+ — the
|
|
143
|
-
# - +:force_killed+ — the
|
|
233
|
+
# - +:timeout+ — the task did not stop in time and +force_kill:+ is +false+
|
|
234
|
+
# - +:force_killed+ — the task was cancelled because it did not stop in time
|
|
144
235
|
# @api private
|
|
145
236
|
def stop(timeout: Phronomy.configuration.event_loop_stop_grace_seconds, drain: false, force_kill: false)
|
|
146
237
|
@shutdown_token.cancel!
|
|
@@ -160,31 +251,31 @@ module Phronomy
|
|
|
160
251
|
end
|
|
161
252
|
|
|
162
253
|
@running = false
|
|
163
|
-
@queue.push(:__stop__) # unblock queue.pop so the
|
|
254
|
+
@queue.push(:__stop__) # unblock queue.pop so the task can see @running = false
|
|
164
255
|
begin
|
|
165
|
-
@
|
|
256
|
+
@task&.join(timeout)
|
|
166
257
|
rescue
|
|
167
|
-
#
|
|
168
|
-
#
|
|
258
|
+
# Task may have terminated with an error (e.g. simulated crash in tests).
|
|
259
|
+
# Suppress the re-raise so the cleanup below always runs.
|
|
169
260
|
nil
|
|
170
261
|
end
|
|
171
|
-
if @
|
|
262
|
+
if @task&.alive?
|
|
172
263
|
if force_kill
|
|
173
264
|
Phronomy.configuration.logger&.warn(
|
|
174
|
-
"[Phronomy] EventLoop
|
|
265
|
+
"[Phronomy] EventLoop task did not stop within #{timeout}s; cancelling. " \
|
|
175
266
|
"This is a last resort — check for blocking operations in event handlers."
|
|
176
267
|
)
|
|
177
|
-
@
|
|
268
|
+
@task.cancel!
|
|
178
269
|
status = :force_killed
|
|
179
270
|
else
|
|
180
271
|
Phronomy.configuration.logger&.warn(
|
|
181
|
-
"[Phronomy] EventLoop
|
|
272
|
+
"[Phronomy] EventLoop task did not stop within #{timeout}s; abandoning " \
|
|
182
273
|
"(force_kill: false). Check for blocking operations in event handlers."
|
|
183
274
|
)
|
|
184
275
|
status = :timeout
|
|
185
276
|
end
|
|
186
277
|
end
|
|
187
|
-
@
|
|
278
|
+
@task = nil
|
|
188
279
|
status
|
|
189
280
|
end
|
|
190
281
|
|
|
@@ -192,14 +283,22 @@ module Phronomy
|
|
|
192
283
|
|
|
193
284
|
def run_loop
|
|
194
285
|
while @running
|
|
195
|
-
|
|
286
|
+
item = @queue.pop
|
|
196
287
|
# :__stop__ is used purely as an unblock signal for @queue.pop; the
|
|
197
288
|
# actual stop condition is @running == false (set before the push).
|
|
198
289
|
# Treating it as `next` instead of `break` prevents a stale sentinel
|
|
199
290
|
# (left by a previous stop call that raced with thread start) from
|
|
200
291
|
# immediately terminating a freshly restarted EventLoop.
|
|
201
|
-
next if
|
|
292
|
+
next if item == :__stop__
|
|
202
293
|
|
|
294
|
+
# item is [event, posted_at_ns] — unwrap and measure lag
|
|
295
|
+
event, posted_at_ns = item
|
|
296
|
+
dequeued_at_ns = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
|
|
297
|
+
lag_ns = dequeued_at_ns - posted_at_ns
|
|
298
|
+
update_lag_metrics(lag_ns)
|
|
299
|
+
check_starvation_lag(lag_ns, event)
|
|
300
|
+
|
|
301
|
+
dispatch_start_ns = dequeued_at_ns
|
|
203
302
|
case event.type
|
|
204
303
|
when :finished, :halted, :error
|
|
205
304
|
# All three terminal events share the same cleanup path.
|
|
@@ -244,11 +343,50 @@ module Phronomy
|
|
|
244
343
|
"no handler for target_id #{event.target_id.inspect}"
|
|
245
344
|
end
|
|
246
345
|
end
|
|
346
|
+
|
|
347
|
+
# Check how long this dispatch took; warn if it exceeds the threshold.
|
|
348
|
+
check_dispatch_time(dispatch_start_ns, event)
|
|
247
349
|
end
|
|
248
350
|
rescue => e
|
|
249
351
|
# Unblock all waiting callers if the loop dies unexpectedly.
|
|
250
352
|
@waiting.values.each { |cq| cq.push(e) }
|
|
251
353
|
raise
|
|
252
354
|
end
|
|
355
|
+
|
|
356
|
+
def update_lag_metrics(lag_ns)
|
|
357
|
+
@lag_mutex.synchronize do
|
|
358
|
+
@last_lag_ns = lag_ns
|
|
359
|
+
@max_lag_ns = lag_ns if lag_ns > @max_lag_ns
|
|
360
|
+
@total_lag_ns += lag_ns
|
|
361
|
+
@dispatch_count += 1
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
def check_starvation_lag(lag_ns, event)
|
|
366
|
+
threshold = Phronomy.configuration.event_loop_starvation_threshold_seconds
|
|
367
|
+
return unless threshold && lag_ns > (threshold * 1_000_000_000)
|
|
368
|
+
|
|
369
|
+
Phronomy.configuration.logger&.warn do
|
|
370
|
+
"[Phronomy::EventLoop] Starvation detected: event #{event.type.inspect} " \
|
|
371
|
+
"for target #{event.target_id.inspect} waited " \
|
|
372
|
+
"#{format("%.3f", lag_ns / 1_000_000_000.0)}s in queue " \
|
|
373
|
+
"(threshold: #{threshold}s)"
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
def check_dispatch_time(dispatch_start_ns, event)
|
|
378
|
+
threshold = Phronomy.configuration.event_loop_dispatch_threshold_seconds
|
|
379
|
+
return unless threshold
|
|
380
|
+
|
|
381
|
+
elapsed_ns = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond) - dispatch_start_ns
|
|
382
|
+
return unless elapsed_ns > (threshold * 1_000_000_000)
|
|
383
|
+
|
|
384
|
+
Phronomy.configuration.logger&.warn do
|
|
385
|
+
"[Phronomy::EventLoop] Long dispatch: event #{event.type.inspect} " \
|
|
386
|
+
"for target #{event.target_id.inspect} took " \
|
|
387
|
+
"#{format("%.3f", elapsed_ns / 1_000_000_000.0)}s on the EventLoop thread " \
|
|
388
|
+
"(threshold: #{threshold}s). Consider moving blocking work to BlockingAdapterPool."
|
|
389
|
+
end
|
|
390
|
+
end
|
|
253
391
|
end
|
|
254
392
|
end
|
data/lib/phronomy/fsm_session.rb
CHANGED
|
@@ -49,12 +49,13 @@ module Phronomy
|
|
|
49
49
|
# @param external_events [Hash] { event_name => [{from:, to:, guard:}] }
|
|
50
50
|
# @param phase_machine_class [Class] state_machines-backed phase tracker class
|
|
51
51
|
# @param recursion_limit [Integer]
|
|
52
|
+
# @param action_timeouts [Hash] { state_name => seconds }
|
|
52
53
|
# @param resume_event [Symbol, nil] external event to fire when resuming
|
|
53
54
|
# @param resume_phase [Symbol, nil] wait state name to resume from
|
|
54
55
|
# @api private
|
|
55
56
|
def initialize(id:, context:, entry_point:, entry_actions:, auto_state_set:,
|
|
56
57
|
declared_states:, wait_state_names:, external_events:, phase_machine_class:,
|
|
57
|
-
recursion_limit:, resume_event: nil, resume_phase: nil)
|
|
58
|
+
recursion_limit:, action_timeouts: {}, resume_event: nil, resume_phase: nil)
|
|
58
59
|
@id = id
|
|
59
60
|
@ctx = context
|
|
60
61
|
@entry_point = entry_point
|
|
@@ -65,6 +66,7 @@ module Phronomy
|
|
|
65
66
|
@external_events = external_events
|
|
66
67
|
@phase_machine_class = phase_machine_class
|
|
67
68
|
@recursion_limit = recursion_limit
|
|
69
|
+
@action_timeouts = action_timeouts
|
|
68
70
|
@resume_event = resume_event
|
|
69
71
|
@resume_phase = resume_phase
|
|
70
72
|
@step = 0
|
|
@@ -91,23 +93,58 @@ module Phronomy
|
|
|
91
93
|
@tracker.context = @ctx
|
|
92
94
|
(@entry_actions[@current_state] || []).each do |c|
|
|
93
95
|
result = c.call(@ctx)
|
|
94
|
-
|
|
96
|
+
if result.is_a?(Phronomy::Task)
|
|
97
|
+
# Awaitable action: spawn a task to await without blocking EventLoop.
|
|
98
|
+
@tracker.async_pending = true
|
|
99
|
+
session_id = @id
|
|
100
|
+
current_state_name = @current_state
|
|
101
|
+
timeout_secs = @action_timeouts[current_state_name]
|
|
102
|
+
Phronomy::Runtime.instance.spawn(name: "fsm-await-#{session_id}") do
|
|
103
|
+
if timeout_secs
|
|
104
|
+
if result.join(timeout_secs).nil?
|
|
105
|
+
result.cancel!
|
|
106
|
+
raise Phronomy::ActionTimeoutError,
|
|
107
|
+
"Action in state #{current_state_name.inspect} timed out after #{timeout_secs}s"
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
task_result = result.await
|
|
111
|
+
if task_result.is_a?(Phronomy::WorkflowContext)
|
|
112
|
+
event_loop.post(Event.new(type: :action_completed, target_id: session_id, payload: task_result))
|
|
113
|
+
else
|
|
114
|
+
event_loop.post(Event.new(type: :state_completed, target_id: session_id, payload: nil))
|
|
115
|
+
end
|
|
116
|
+
rescue => e
|
|
117
|
+
event_loop.post(Event.new(type: :error, target_id: session_id, payload: e))
|
|
118
|
+
end
|
|
119
|
+
break # Only one async action at a time per state
|
|
120
|
+
elsif result.is_a?(Phronomy::WorkflowContext)
|
|
121
|
+
@ctx = result
|
|
122
|
+
end
|
|
95
123
|
end
|
|
96
124
|
@tracker.context = @ctx
|
|
97
|
-
advance_or_halt
|
|
125
|
+
advance_or_halt unless @tracker.async_pending
|
|
98
126
|
end
|
|
99
127
|
rescue => e
|
|
100
128
|
finish_with_error(e)
|
|
101
129
|
end
|
|
102
130
|
|
|
103
131
|
# Processes an event dispatched from EventLoop.
|
|
104
|
-
# Called for :state_completed and all user-defined external events.
|
|
132
|
+
# Called for :state_completed, :action_completed, and all user-defined external events.
|
|
105
133
|
#
|
|
106
134
|
# @param event [Phronomy::Event]
|
|
107
135
|
# @api private
|
|
108
136
|
def handle(event)
|
|
109
137
|
return if @done
|
|
110
138
|
|
|
139
|
+
if event.type == :action_completed
|
|
140
|
+
# An awaitable entry action completed: update context and advance.
|
|
141
|
+
@ctx = event.payload if event.payload.is_a?(Phronomy::WorkflowContext)
|
|
142
|
+
@tracker.context = @ctx
|
|
143
|
+
@tracker.async_pending = false # Reset flag set by start or fire_and_advance!
|
|
144
|
+
advance_or_halt
|
|
145
|
+
return
|
|
146
|
+
end
|
|
147
|
+
|
|
111
148
|
fire_and_advance!(event.type)
|
|
112
149
|
rescue => e
|
|
113
150
|
finish_with_error(e)
|
|
@@ -129,6 +166,15 @@ module Phronomy
|
|
|
129
166
|
# When next_phase == @current_state, no transition matched → treat as terminal.
|
|
130
167
|
@current_state = (next_phase == @current_state) ? FINISH : next_phase
|
|
131
168
|
@step += 1
|
|
169
|
+
|
|
170
|
+
# If an entry action returned a Task, the after_transition callback set
|
|
171
|
+
# async_pending = true and spawned a thread. Skip advance_or_halt — the
|
|
172
|
+
# background thread will post :action_completed or :state_completed.
|
|
173
|
+
if @tracker.async_pending
|
|
174
|
+
@tracker.async_pending = false
|
|
175
|
+
return
|
|
176
|
+
end
|
|
177
|
+
|
|
132
178
|
advance_or_halt
|
|
133
179
|
end
|
|
134
180
|
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
module Guardrail
|
|
5
|
+
# Detects potential prompt injection attempts in the agent input.
|
|
6
|
+
#
|
|
7
|
+
# Prompt injection is an attack where an adversary embeds LLM instructions
|
|
8
|
+
# inside data sources (e.g. RAG chunks, tool results, user input) to override
|
|
9
|
+
# the agent's intended behaviour.
|
|
10
|
+
#
|
|
11
|
+
# This guardrail scans the input string for common injection patterns and
|
|
12
|
+
# calls {#fail!} when a match is found. It is intended to be registered as
|
|
13
|
+
# an input guardrail on agents that consume untrusted external content.
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# class MyAgent < Phronomy::Agent::Base
|
|
17
|
+
# model "gpt-4o"
|
|
18
|
+
# input_guardrails Phronomy::Guardrail::PromptInjectionGuardrail.new
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
# @example Custom patterns
|
|
22
|
+
# guard = Phronomy::Guardrail::PromptInjectionGuardrail.new(
|
|
23
|
+
# extra_patterns: [/exfiltrate/i]
|
|
24
|
+
# )
|
|
25
|
+
class PromptInjectionGuardrail < InputGuardrail
|
|
26
|
+
# Common prompt injection / jailbreak patterns.
|
|
27
|
+
DEFAULT_PATTERNS = [
|
|
28
|
+
/ignore\s+(previous|prior|all)\s+instructions?/i,
|
|
29
|
+
/disregard\s+(previous|prior|all)\s+instructions?/i,
|
|
30
|
+
/forget\s+(previous|prior|all)\s+instructions?/i,
|
|
31
|
+
/override\s+(previous|prior|all)\s+instructions?/i,
|
|
32
|
+
/new\s+instructions?:\s/i,
|
|
33
|
+
/\byour\s+new\s+(role|instructions?|task)\b/i,
|
|
34
|
+
/you\s+are\s+now\s+(a|an)\b/i,
|
|
35
|
+
/\bact\s+as\s+(a|an)\b/i,
|
|
36
|
+
/\bpretend\s+(you\s+are|to\s+be)\b/i,
|
|
37
|
+
/\bdo\s+not\s+follow\s+(your|the)\s+instructions?\b/i
|
|
38
|
+
].freeze
|
|
39
|
+
|
|
40
|
+
# @param extra_patterns [Array<Regexp>] additional patterns to scan for
|
|
41
|
+
# @api private
|
|
42
|
+
def initialize(extra_patterns: [])
|
|
43
|
+
super()
|
|
44
|
+
@patterns = DEFAULT_PATTERNS + extra_patterns
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Scans the input string for injection patterns.
|
|
48
|
+
# @param input [String, Hash]
|
|
49
|
+
# @api private
|
|
50
|
+
def check(input)
|
|
51
|
+
text = input.is_a?(Hash) ? input.values.join(" ") : input.to_s
|
|
52
|
+
@patterns.each do |pattern|
|
|
53
|
+
fail!("Potential prompt injection detected") if text.match?(pattern)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
# Carries all per-invocation context values through the call stack.
|
|
5
|
+
#
|
|
6
|
+
# +InvocationContext+ is a plain value object (struct-like, frozen on
|
|
7
|
+
# creation) that replaces ad-hoc +Thread.current[...]+ propagation.
|
|
8
|
+
# Pass it explicitly wherever context needs to cross a method boundary
|
|
9
|
+
# or be handed to a child {Task} / {TaskGroup}.
|
|
10
|
+
#
|
|
11
|
+
# @example Build a context for a new agent invocation
|
|
12
|
+
# ctx = Phronomy::InvocationContext.new(
|
|
13
|
+
# thread_id: "conv-123",
|
|
14
|
+
# cancellation_token: Phronomy::CancellationToken.timeout_after(30),
|
|
15
|
+
# max_parallel_tools: 5
|
|
16
|
+
# )
|
|
17
|
+
# agent.invoke("Hello", invocation_context: ctx)
|
|
18
|
+
class InvocationContext
|
|
19
|
+
# @return [String, nil] conversation / workflow thread identifier
|
|
20
|
+
attr_reader :thread_id
|
|
21
|
+
|
|
22
|
+
# @return [String, nil] session identifier (e.g. Rails session id)
|
|
23
|
+
attr_reader :session_id
|
|
24
|
+
|
|
25
|
+
# @return [String, nil] end-user identifier for tracing / audit
|
|
26
|
+
attr_reader :user_id
|
|
27
|
+
|
|
28
|
+
# @return [CancellationToken, nil]
|
|
29
|
+
attr_reader :cancellation_token
|
|
30
|
+
|
|
31
|
+
# @return [Deadline, nil]
|
|
32
|
+
attr_reader :deadline
|
|
33
|
+
|
|
34
|
+
# @return [Object, nil] OpenTelemetry / tracing span
|
|
35
|
+
attr_reader :tracer_span
|
|
36
|
+
|
|
37
|
+
# @return [Integer, nil] max tokens the agent may consume this invocation
|
|
38
|
+
attr_reader :token_budget
|
|
39
|
+
|
|
40
|
+
# @return [Integer] maximum simultaneous tool calls (default: 10)
|
|
41
|
+
attr_reader :max_parallel_tools
|
|
42
|
+
|
|
43
|
+
# @return [Object, nil] approval policy applied before write-scope tools
|
|
44
|
+
attr_reader :approval_policy
|
|
45
|
+
|
|
46
|
+
# @return [Object, nil] redaction policy applied to tool args / results
|
|
47
|
+
attr_reader :redaction_policy
|
|
48
|
+
|
|
49
|
+
# @return [Hash, nil] per-provider concurrency / rate-limit overrides
|
|
50
|
+
attr_reader :provider_limits
|
|
51
|
+
|
|
52
|
+
# @return [String, nil] unique identifier for this task in the trace tree
|
|
53
|
+
attr_reader :task_id
|
|
54
|
+
|
|
55
|
+
# @return [String, nil] task_id of the parent span / task
|
|
56
|
+
attr_reader :parent_task_id
|
|
57
|
+
|
|
58
|
+
# @param thread_id [String, nil]
|
|
59
|
+
# @param session_id [String, nil]
|
|
60
|
+
# @param user_id [String, nil]
|
|
61
|
+
# @param cancellation_token [CancellationToken, nil]
|
|
62
|
+
# @param deadline [Deadline, nil]
|
|
63
|
+
# @param tracer_span [Object, nil]
|
|
64
|
+
# @param token_budget [Integer, nil]
|
|
65
|
+
# @param max_parallel_tools [Integer]
|
|
66
|
+
# @param approval_policy [Object, nil]
|
|
67
|
+
# @param redaction_policy [Object, nil]
|
|
68
|
+
# @param provider_limits [Hash, nil]
|
|
69
|
+
# @param task_id [String, nil]
|
|
70
|
+
# @param parent_task_id [String, nil]
|
|
71
|
+
# @api private
|
|
72
|
+
def initialize(
|
|
73
|
+
thread_id: nil,
|
|
74
|
+
session_id: nil,
|
|
75
|
+
user_id: nil,
|
|
76
|
+
cancellation_token: nil,
|
|
77
|
+
deadline: nil,
|
|
78
|
+
tracer_span: nil,
|
|
79
|
+
token_budget: nil,
|
|
80
|
+
max_parallel_tools: 10,
|
|
81
|
+
approval_policy: nil,
|
|
82
|
+
redaction_policy: nil,
|
|
83
|
+
provider_limits: nil,
|
|
84
|
+
task_id: nil,
|
|
85
|
+
parent_task_id: nil
|
|
86
|
+
)
|
|
87
|
+
@thread_id = thread_id
|
|
88
|
+
@session_id = session_id
|
|
89
|
+
@user_id = user_id
|
|
90
|
+
@cancellation_token = cancellation_token
|
|
91
|
+
@deadline = deadline
|
|
92
|
+
@tracer_span = tracer_span
|
|
93
|
+
@token_budget = token_budget
|
|
94
|
+
@max_parallel_tools = max_parallel_tools
|
|
95
|
+
@approval_policy = approval_policy
|
|
96
|
+
@redaction_policy = redaction_policy
|
|
97
|
+
@provider_limits = provider_limits
|
|
98
|
+
@task_id = task_id
|
|
99
|
+
@parent_task_id = parent_task_id
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Returns a new +InvocationContext+ with the given attributes merged in.
|
|
103
|
+
# All other attributes are carried over unchanged.
|
|
104
|
+
#
|
|
105
|
+
# @param overrides [Hash] keyword arguments to override
|
|
106
|
+
# @return [InvocationContext]
|
|
107
|
+
# @api private
|
|
108
|
+
def merge(**overrides)
|
|
109
|
+
InvocationContext.new(
|
|
110
|
+
thread_id: overrides.fetch(:thread_id, @thread_id),
|
|
111
|
+
session_id: overrides.fetch(:session_id, @session_id),
|
|
112
|
+
user_id: overrides.fetch(:user_id, @user_id),
|
|
113
|
+
cancellation_token: overrides.fetch(:cancellation_token, @cancellation_token),
|
|
114
|
+
deadline: overrides.fetch(:deadline, @deadline),
|
|
115
|
+
tracer_span: overrides.fetch(:tracer_span, @tracer_span),
|
|
116
|
+
token_budget: overrides.fetch(:token_budget, @token_budget),
|
|
117
|
+
max_parallel_tools: overrides.fetch(:max_parallel_tools, @max_parallel_tools),
|
|
118
|
+
approval_policy: overrides.fetch(:approval_policy, @approval_policy),
|
|
119
|
+
redaction_policy: overrides.fetch(:redaction_policy, @redaction_policy),
|
|
120
|
+
provider_limits: overrides.fetch(:provider_limits, @provider_limits),
|
|
121
|
+
task_id: overrides.fetch(:task_id, @task_id),
|
|
122
|
+
parent_task_id: overrides.fetch(:parent_task_id, @parent_task_id)
|
|
123
|
+
)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Convenience: returns the cancellation token or a new never-cancelled token.
|
|
127
|
+
# @return [CancellationToken]
|
|
128
|
+
# @api private
|
|
129
|
+
def effective_cancellation_token
|
|
130
|
+
@cancellation_token || CancellationToken.new
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Returns the cancellation token to use for an invocation, taking both the
|
|
134
|
+
# explicit +cancellation_token+ and the +deadline+ into account.
|
|
135
|
+
#
|
|
136
|
+
# - When +cancellation_token+ is set, it is returned unchanged.
|
|
137
|
+
# - When only +deadline+ is set, a new {CancellationToken} is created and
|
|
138
|
+
# the deadline is attached to it via {Deadline#attach_to}.
|
|
139
|
+
# - When neither is set, returns +nil+.
|
|
140
|
+
#
|
|
141
|
+
# @return [CancellationToken, nil]
|
|
142
|
+
# @api private
|
|
143
|
+
def effective_timeout_token
|
|
144
|
+
return @cancellation_token if @cancellation_token
|
|
145
|
+
return nil if @deadline.nil?
|
|
146
|
+
|
|
147
|
+
token = CancellationToken.new
|
|
148
|
+
@deadline.attach_to(token)
|
|
149
|
+
token
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|