phronomy 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +22 -0
  3. data/CHANGELOG.md +488 -0
  4. data/CONTRIBUTING.md +102 -0
  5. data/README.md +374 -36
  6. data/RELEASE_CHECKLIST.md +86 -0
  7. data/Rakefile +33 -0
  8. data/SECURITY.md +80 -0
  9. data/benchmark/baseline.json +9 -0
  10. data/benchmark/bench_agent_invoke.rb +105 -0
  11. data/benchmark/bench_context_assembler.rb +46 -0
  12. data/benchmark/bench_regression.rb +172 -0
  13. data/benchmark/bench_token_estimator.rb +44 -0
  14. data/benchmark/bench_tool_schema.rb +69 -0
  15. data/benchmark/bench_vector_store.rb +39 -0
  16. data/benchmark/bench_workflow.rb +55 -0
  17. data/benchmark/run_all.rb +118 -0
  18. data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
  19. data/docs/decisions/002-workflow-context-immutability.md +42 -0
  20. data/docs/decisions/003-event-loop-singleton.md +48 -0
  21. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +75 -0
  22. data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
  23. data/docs/decisions/006-no-built-in-guardrails.md +66 -0
  24. data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
  25. data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
  26. data/docs/decisions/009-state-store-abstraction.md +141 -0
  27. data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
  28. data/lib/phronomy/agent/base.rb +416 -49
  29. data/lib/phronomy/agent/before_completion_context.rb +1 -0
  30. data/lib/phronomy/agent/checkpoint.rb +1 -0
  31. data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
  32. data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
  33. data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
  34. data/lib/phronomy/agent/concerns/retryable.rb +12 -1
  35. data/lib/phronomy/agent/concerns/suspendable.rb +19 -0
  36. data/lib/phronomy/agent/fsm.rb +44 -52
  37. data/lib/phronomy/agent/handoff.rb +3 -0
  38. data/lib/phronomy/agent/orchestrator.rb +191 -54
  39. data/lib/phronomy/agent/parallel_tool_chat.rb +87 -13
  40. data/lib/phronomy/agent/react_agent.rb +16 -6
  41. data/lib/phronomy/agent/runner.rb +2 -0
  42. data/lib/phronomy/agent/shared_state.rb +11 -0
  43. data/lib/phronomy/agent/suspend_signal.rb +2 -0
  44. data/lib/phronomy/agent/team_coordinator.rb +17 -5
  45. data/lib/phronomy/async_queue.rb +155 -0
  46. data/lib/phronomy/blocking_adapter_pool.rb +435 -0
  47. data/lib/phronomy/cancellation_scope.rb +123 -0
  48. data/lib/phronomy/cancellation_token.rb +133 -0
  49. data/lib/phronomy/concurrency_gate.rb +155 -0
  50. data/lib/phronomy/configuration.rb +168 -2
  51. data/lib/phronomy/context/assembler.rb +6 -0
  52. data/lib/phronomy/context/compaction_context.rb +2 -0
  53. data/lib/phronomy/context/context_version_cache.rb +2 -0
  54. data/lib/phronomy/context/token_budget.rb +3 -0
  55. data/lib/phronomy/context/token_estimator.rb +9 -2
  56. data/lib/phronomy/context/trigger_context.rb +1 -0
  57. data/lib/phronomy/context/trim_context.rb +4 -0
  58. data/lib/phronomy/deadline.rb +63 -0
  59. data/lib/phronomy/diagnostics.rb +62 -0
  60. data/lib/phronomy/embeddings/base.rb +22 -2
  61. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
  62. data/lib/phronomy/eval/comparison.rb +2 -0
  63. data/lib/phronomy/eval/dataset.rb +4 -0
  64. data/lib/phronomy/eval/metrics.rb +6 -0
  65. data/lib/phronomy/eval/runner.rb +11 -9
  66. data/lib/phronomy/eval/scorer/base.rb +1 -0
  67. data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
  68. data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
  69. data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
  70. data/lib/phronomy/event_loop.rb +275 -30
  71. data/lib/phronomy/fsm_session.rb +57 -4
  72. data/lib/phronomy/generator_verifier.rb +2 -0
  73. data/lib/phronomy/guardrail/base.rb +3 -0
  74. data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
  75. data/lib/phronomy/invocation_context.rb +152 -0
  76. data/lib/phronomy/knowledge_source/base.rb +24 -2
  77. data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
  78. data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
  79. data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
  80. data/lib/phronomy/llm_adapter/base.rb +104 -0
  81. data/lib/phronomy/llm_adapter/ruby_llm.rb +41 -0
  82. data/lib/phronomy/llm_adapter.rb +20 -0
  83. data/lib/phronomy/loader/base.rb +1 -0
  84. data/lib/phronomy/loader/csv_loader.rb +2 -0
  85. data/lib/phronomy/loader/markdown_loader.rb +2 -0
  86. data/lib/phronomy/loader/plain_text_loader.rb +1 -0
  87. data/lib/phronomy/metrics.rb +38 -0
  88. data/lib/phronomy/output_parser/base.rb +1 -0
  89. data/lib/phronomy/output_parser/json_parser.rb +22 -3
  90. data/lib/phronomy/output_parser/structured_parser.rb +2 -0
  91. data/lib/phronomy/prompt_template.rb +5 -0
  92. data/lib/phronomy/runnable.rb +20 -3
  93. data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
  94. data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
  95. data/lib/phronomy/runtime/gate_registry.rb +52 -0
  96. data/lib/phronomy/runtime/pool_registry.rb +57 -0
  97. data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
  98. data/lib/phronomy/runtime/scheduler.rb +98 -0
  99. data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
  100. data/lib/phronomy/runtime/task_registry.rb +48 -0
  101. data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
  102. data/lib/phronomy/runtime/timer_queue.rb +106 -0
  103. data/lib/phronomy/runtime/timer_service.rb +42 -0
  104. data/lib/phronomy/runtime.rb +374 -0
  105. data/lib/phronomy/splitter/base.rb +2 -0
  106. data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
  107. data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
  108. data/lib/phronomy/state_store/base.rb +48 -0
  109. data/lib/phronomy/state_store/in_memory.rb +62 -0
  110. data/lib/phronomy/task/backend.rb +80 -0
  111. data/lib/phronomy/task/fiber_backend.rb +157 -0
  112. data/lib/phronomy/task/immediate_backend.rb +89 -0
  113. data/lib/phronomy/task/thread_backend.rb +84 -0
  114. data/lib/phronomy/task.rb +275 -0
  115. data/lib/phronomy/task_group.rb +265 -0
  116. data/lib/phronomy/testing/fake_clock.rb +109 -0
  117. data/lib/phronomy/testing/fake_scheduler.rb +104 -0
  118. data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
  119. data/lib/phronomy/testing.rb +12 -0
  120. data/lib/phronomy/tool/agent_tool.rb +1 -0
  121. data/lib/phronomy/tool/base.rb +298 -28
  122. data/lib/phronomy/tool/mcp_tool.rb +103 -17
  123. data/lib/phronomy/tool/scope_policy.rb +50 -0
  124. data/lib/phronomy/tool_executor.rb +106 -0
  125. data/lib/phronomy/tracing/base.rb +3 -0
  126. data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
  127. data/lib/phronomy/tracing/open_telemetry_tracer.rb +36 -0
  128. data/lib/phronomy/vector_store/async_backend.rb +110 -0
  129. data/lib/phronomy/vector_store/base.rb +40 -7
  130. data/lib/phronomy/vector_store/in_memory.rb +16 -7
  131. data/lib/phronomy/vector_store/pgvector.rb +40 -9
  132. data/lib/phronomy/vector_store/redis_search.rb +29 -8
  133. data/lib/phronomy/version.rb +1 -1
  134. data/lib/phronomy/workflow.rb +147 -11
  135. data/lib/phronomy/workflow_context.rb +83 -6
  136. data/lib/phronomy/workflow_runner.rb +106 -7
  137. data/lib/phronomy.rb +112 -1
  138. data/scripts/api_snapshot.rb +91 -0
  139. data/scripts/check_api_annotations.rb +68 -0
  140. data/scripts/check_private_enforcement.rb +93 -0
  141. data/scripts/check_readme_runnable.rb +98 -0
  142. data/scripts/run_mutation.sh +46 -0
  143. metadata +83 -2
@@ -3,12 +3,37 @@
3
3
  module Phronomy
4
4
  # Singleton event loop that manages all FSMSession instances.
5
5
  #
6
- # A single background thread reads from a global Thread::Queue and dispatches
7
- # events to their target FSMSession. IO work (LLM calls, tool calls) runs in
8
- # separate IO threads that post events back to the loop via EventLoop#post.
6
+ # A single background thread reads from a global {Phronomy::AsyncQueue} and
7
+ # dispatches events to their target FSMSession. IO work (LLM calls, tool
8
+ # calls) must be dispatched via +Runtime.instance.spawn+ or
9
+ # +BlockingAdapterPool+, then post results back to the loop via
10
+ # {EventLoop#post}.
9
11
  #
10
12
  # Activated with: +Phronomy.configure { |c| c.event_loop = true }+
11
13
  #
14
+ # == Threading exception (see ADR-010 Rule 2)
15
+ #
16
+ # +EventLoop+ is a **deliberate exception** to Phronomy's cooperative-first
17
+ # concurrency model. Its dispatch loop is an infinite +while @running+ loop
18
+ # that must never block the framework's own event processing.
19
+ # Running it on a shared scheduler task would consume the scheduler, preventing
20
+ # other tasks from running. Therefore {#start} creates a dedicated
21
+ # {Runtime::ThreadScheduler} — this is correct and intentional per ADR-010.
22
+ # No other framework component should do the same; see the ADR-010 checklist.
23
+ #
24
+ # == Handler constraints
25
+ #
26
+ # Handlers dispatched by the EventLoop run **on the EventLoop thread**.
27
+ # They must not:
28
+ #
29
+ # * Perform blocking operations directly (database queries, LLM calls, HTTP
30
+ # requests). Schedule blocking work via +Runtime.instance.spawn+ or
31
+ # +BlockingAdapterPool+, then post results back with {#post}.
32
+ # * Call +Workflow#invoke+ (or any synchronous +invoke+) from within a
33
+ # handler. That method would block waiting for the EventLoop to process
34
+ # events, causing a deadlock. Use the async pattern: post a follow-up
35
+ # event instead.
36
+ #
12
37
  # == Fork safety
13
38
  #
14
39
  # +EventLoop.instance+ is lazily initialized. The background thread is not
@@ -20,14 +45,25 @@ module Phronomy
20
45
  # Do NOT call +Workflow#invoke+ (in EventLoop mode) from within a workflow
21
46
  # entry action. The entry action runs on the EventLoop thread; a nested
22
47
  # +invoke+ would block waiting for the same thread to process events →
23
- # deadlock. Use the async IO pattern instead (spawn a Thread, post events
24
- # back to the EventLoop).
48
+ # deadlock. Use the async pattern instead: schedule work via
49
+ # +Runtime.instance.spawn+ or +BlockingAdapterPool+, then post events back
50
+ # via +Phronomy::EventLoop.instance.post(...)+.
25
51
  class EventLoop
26
52
  # Returns the singleton instance, creating and starting it on first call.
27
53
  def self.instance
28
54
  @instance ||= new.tap(&:start)
29
55
  end
30
56
 
57
+ # Returns true when called from within the EventLoop dispatch task.
58
+ # Uses a task-local key set by the Runtime-spawned dispatch task so that
59
+ # the check works correctly for both thread-based and future fiber-based
60
+ # scheduler backends.
61
+ # @return [Boolean]
62
+ # @api private
63
+ def self.current?
64
+ Phronomy::Task.current&.name == "event-loop"
65
+ end
66
+
31
67
  # Stops and destroys the singleton. Primarily used in tests.
32
68
  # @api private
33
69
  def self.reset!
@@ -36,9 +72,51 @@ module Phronomy
36
72
  end
37
73
 
38
74
  def initialize
39
- @queue = Thread::Queue.new # global event queue (thread-safe; no Mutex needed)
40
- @fsms = {} # { id => FSMSession } — EventLoop thread only
41
- @waiting = {} # { id => completion_queue } — EventLoop thread only
75
+ @queue = Phronomy::AsyncQueue.new # global event queue (thread-safe; no Mutex needed)
76
+ @fsms = {} # { id => FSMSession } — EventLoop thread only
77
+ @waiting = {} # { id => completion_queue } — EventLoop thread only
78
+ # Mutex-backed FSM count for drain-mode shutdown.
79
+ @fsm_count_mutex = Mutex.new
80
+ @fsm_count_cond = ConditionVariable.new
81
+ @fsm_count = 0
82
+ # Token cancelled when shutdown is requested; new child sessions receive it.
83
+ @shutdown_token = Phronomy::CancellationToken.new
84
+ # Fairness metrics (EventLoop thread only, except where noted)
85
+ @lag_mutex = Mutex.new
86
+ @last_lag_ns = 0
87
+ @max_lag_ns = 0
88
+ @dispatch_count = 0
89
+ @total_lag_ns = 0
90
+ end
91
+
92
+ # Returns the most recently measured event-loop lag in seconds.
93
+ # Lag is the wall-clock time between {#post} and the moment the event
94
+ # is dequeued for dispatch. Thread-safe.
95
+ # @return [Float]
96
+ # @api private
97
+ def last_lag_seconds
98
+ @lag_mutex.synchronize { @last_lag_ns } / 1_000_000_000.0
99
+ end
100
+
101
+ # Returns the maximum event-loop lag seen since the loop was started.
102
+ # Thread-safe.
103
+ # @return [Float]
104
+ # @api private
105
+ def max_lag_seconds
106
+ @lag_mutex.synchronize { @max_lag_ns } / 1_000_000_000.0
107
+ end
108
+
109
+ # Returns the mean event-loop lag across all dispatched events since the
110
+ # loop was started. Returns 0.0 when no events have been dispatched.
111
+ # Thread-safe.
112
+ # @return [Float]
113
+ # @api private
114
+ def average_lag_seconds
115
+ @lag_mutex.synchronize do
116
+ return 0.0 if @dispatch_count.zero?
117
+
118
+ @total_lag_ns.to_f / @dispatch_count / 1_000_000_000.0
119
+ end
42
120
  end
43
121
 
44
122
  # Registers an FSMSession for execution and returns a completion queue.
@@ -52,20 +130,23 @@ module Phronomy
52
130
  # the popped value will be an Exception — callers are responsible for re-raising it.
53
131
  #
54
132
  # @param fsm_session [Phronomy::FSMSession]
55
- # @return [Thread::Queue] resolves to final/halted context, or an Exception
133
+ # @return [Phronomy::AsyncQueue] resolves to final/halted context, or an Exception
134
+ # @api private
56
135
  def register(fsm_session)
57
- if Thread.current[:phronomy_event_loop_thread]
136
+ if Phronomy::EventLoop.current?
58
137
  raise Phronomy::Error,
59
138
  "Cannot call Workflow#invoke (EventLoop mode) from within an EventLoop " \
60
- "entry action. Use the async IO pattern: spawn a Thread, post events " \
61
- "back via Phronomy::EventLoop.instance.post(...) instead."
139
+ "entry action. Schedule work via Runtime.instance.spawn or " \
140
+ "BlockingAdapterPool, then post events back via " \
141
+ "Phronomy::EventLoop.instance.post(...) instead."
62
142
  end
63
143
 
64
- completion_queue = Thread::Queue.new
144
+ completion_queue = Phronomy::AsyncQueue.new
65
145
  # Pass both session and completion_queue in the event payload so that the
66
146
  # EventLoop thread is the sole writer of @fsms and @waiting.
67
- @queue.push(Event.new(type: :start, target_id: fsm_session.id,
68
- payload: {session: fsm_session, completion: completion_queue}))
147
+ @queue.push([Event.new(type: :start, target_id: fsm_session.id,
148
+ payload: {session: fsm_session, completion: completion_queue}),
149
+ Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)])
69
150
  completion_queue
70
151
  end
71
152
 
@@ -78,45 +159,146 @@ module Phronomy
78
159
  #
79
160
  # @param agent_fsm [Phronomy::Agent::FSM]
80
161
  # @return [nil]
162
+ # @api private
81
163
  def enqueue_child(agent_fsm)
82
- @queue.push(Event.new(type: :start, target_id: agent_fsm.id,
83
- payload: {session: agent_fsm, completion: nil}))
164
+ @queue.push([Event.new(type: :start, target_id: agent_fsm.id,
165
+ payload: {session: agent_fsm, completion: nil}),
166
+ Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)])
84
167
  nil
85
168
  end
86
169
 
87
170
  # Posts an event to the loop. Safe to call from any thread (including IO threads).
171
+ # The current monotonic clock time is recorded so that the EventLoop can
172
+ # measure the dispatch lag when it dequeues the event.
88
173
  #
174
+ # @note **Handler constraint**: do not perform blocking operations or call
175
+ # +Workflow#invoke+ directly from within the handler that processes a
176
+ # posted event. Handlers run on the EventLoop thread; blocking there
177
+ # stalls all session processing. For blocking work, post a new event
178
+ # after the result is ready.
89
179
  # @param event [Phronomy::Event]
180
+ # @api private
90
181
  def post(event)
91
- @queue.push(event)
182
+ @queue.push([event, Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)])
92
183
  end
93
184
 
94
- # Starts the background event loop thread.
185
+ # Starts the EventLoop dispatch task under {Runtime} ownership.
186
+ #
187
+ # The dispatch loop runs as a {Phronomy::Task} so that {Runtime#shutdown}
188
+ # can drain it together with all other in-flight tasks. The task is named
189
+ # +"event-loop"+ so that {.current?} can identify it via
190
+ # +Task.current&.name+.
95
191
  # @return [self]
192
+ # @api private
96
193
  def start
194
+ return self if @task&.alive?
195
+
196
+ # Reset shutdown state so the loop can be restarted after a stop.
197
+ @shutdown_token = Phronomy::CancellationToken.new
198
+ @fsm_count_mutex.synchronize { @fsm_count = 0 }
97
199
  @running = true
98
- @thread = Thread.new do
99
- Thread.current[:phronomy_event_loop_thread] = true
200
+ # The dispatch loop must always run in a real background thread.
201
+ # A cooperative scheduler (FakeScheduler/ImmediateBackend) executes tasks
202
+ # synchronously on the caller's thread, which would block forever inside
203
+ # the run_loop infinite loop. Create a dedicated Runtime with
204
+ # ThreadScheduler to guarantee async execution regardless of the global
205
+ # runtime_backend setting.
206
+ thread_runtime = Phronomy::Runtime.new(scheduler: Phronomy::Runtime::ThreadScheduler.new)
207
+ @task = thread_runtime.spawn(name: "event-loop") do
100
208
  run_loop
101
209
  end
102
- @thread.abort_on_exception = false
103
210
  self
104
211
  end
105
212
 
106
- # Stops the background thread. Used in tests only.
213
+ # Stops the EventLoop dispatch task.
214
+ #
215
+ # Sends a cooperative shutdown sentinel to the event queue so that the
216
+ # dispatch task can finish any in-flight handler before exiting. Waits up
217
+ # to +timeout+ seconds for a clean shutdown; if the task is still alive
218
+ # afterwards it is cancelled (cooperative cancellation via {Task#cancel!}).
219
+ #
220
+ # @param timeout [Numeric] seconds to wait for cooperative shutdown. Defaults
221
+ # to +Phronomy.configuration.event_loop_stop_grace_seconds+ (5 s).
222
+ # @param drain [Boolean] when +true+, wait for all active FSMSessions to
223
+ # complete before signalling the loop to stop. Bounded by +timeout+.
224
+ # Defaults to +false+.
225
+ # @param force_kill [Boolean] deprecated — retained for backward compatibility.
226
+ # When +true+, the dispatch task is cancelled via {Task#cancel!} if it does
227
+ # not stop within +timeout+. +Thread#kill+ is no longer used; cooperative
228
+ # cancellation (raising {CancellationError}) replaces it.
229
+ # @return [Symbol] shutdown status:
230
+ # - +:clean+ — loop exited cooperatively with no active sessions discarded
231
+ # - +:drained_with_discards+ — drain mode requested but sessions remained;
232
+ # they were discarded and the loop was stopped
233
+ # - +:timeout+ — the task did not stop in time and +force_kill:+ is +false+
234
+ # - +:force_killed+ — the task was cancelled because it did not stop in time
107
235
  # @api private
108
- def stop
236
+ def stop(timeout: Phronomy.configuration.event_loop_stop_grace_seconds, drain: false, force_kill: false)
237
+ @shutdown_token.cancel!
238
+ status = :clean
239
+
240
+ if drain
241
+ # Wait for active sessions to finish, bounded by timeout.
242
+ deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
243
+ @fsm_count_mutex.synchronize do
244
+ while @fsm_count > 0
245
+ remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
246
+ break if remaining <= 0
247
+ @fsm_count_cond.wait(@fsm_count_mutex, remaining)
248
+ end
249
+ status = :drained_with_discards if @fsm_count > 0
250
+ end
251
+ end
252
+
109
253
  @running = false
110
- @thread&.kill
111
- @thread = nil
254
+ @queue.push(:__stop__) # unblock queue.pop so the task can see @running = false
255
+ begin
256
+ @task&.join(timeout)
257
+ rescue
258
+ # Task may have terminated with an error (e.g. simulated crash in tests).
259
+ # Suppress the re-raise so the cleanup below always runs.
260
+ nil
261
+ end
262
+ if @task&.alive?
263
+ if force_kill
264
+ Phronomy.configuration.logger&.warn(
265
+ "[Phronomy] EventLoop task did not stop within #{timeout}s; cancelling. " \
266
+ "This is a last resort — check for blocking operations in event handlers."
267
+ )
268
+ @task.cancel!
269
+ status = :force_killed
270
+ else
271
+ Phronomy.configuration.logger&.warn(
272
+ "[Phronomy] EventLoop task did not stop within #{timeout}s; abandoning " \
273
+ "(force_kill: false). Check for blocking operations in event handlers."
274
+ )
275
+ status = :timeout
276
+ end
277
+ end
278
+ @task = nil
279
+ status
112
280
  end
113
281
 
114
282
  private
115
283
 
116
284
  def run_loop
117
285
  while @running
118
- event = @queue.pop
286
+ item = @queue.pop
287
+ # :__stop__ is used purely as an unblock signal for @queue.pop; the
288
+ # actual stop condition is @running == false (set before the push).
289
+ # Treating it as `next` instead of `break` prevents a stale sentinel
290
+ # (left by a previous stop call that raced with thread start) from
291
+ # immediately terminating a freshly restarted EventLoop.
292
+ next if item == :__stop__
293
+
294
+ # item is [event, posted_at_ns] — unwrap and measure lag
295
+ event, posted_at_ns = item
296
+ dequeued_at_ns = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
297
+ lag_ns = dequeued_at_ns - posted_at_ns
298
+ update_lag_metrics(lag_ns)
299
+ check_starvation_lag(lag_ns, event)
119
300
 
301
+ dispatch_start_ns = dequeued_at_ns
120
302
  case event.type
121
303
  when :finished, :halted, :error
122
304
  # All three terminal events share the same cleanup path.
@@ -124,24 +306,87 @@ module Phronomy
124
306
  @fsms.delete(event.target_id)
125
307
  cq = @waiting.delete(event.target_id)
126
308
  cq&.push(event.payload)
309
+ # Decrement active FSM count and signal drain waiters.
310
+ @fsm_count_mutex.synchronize do
311
+ @fsm_count -= 1
312
+ @fsm_count_cond.signal if @fsm_count <= 0
313
+ end
127
314
 
128
315
  when :start
129
316
  # session and completion_queue arrive together in the payload so that
130
317
  # this thread is the sole writer of @fsms and @waiting.
131
318
  # completion may be nil for fire-and-forget child sessions (AgentFSM).
132
- @fsms[event.target_id] = event.payload[:session]
319
+ session = event.payload[:session]
133
320
  cq = event.payload[:completion]
321
+
322
+ # When shutdown has been requested, reject new sessions with a
323
+ # CancellationError rather than starting new LLM calls that would
324
+ # be interrupted by force-kill.
325
+ if @shutdown_token.cancelled? && cq
326
+ cq.push(Phronomy::CancellationError.new("EventLoop is shutting down"))
327
+ next
328
+ end
329
+
330
+ @fsms[event.target_id] = session
134
331
  @waiting[event.target_id] = cq if cq
135
- event.payload[:session].start
332
+ @fsm_count_mutex.synchronize { @fsm_count += 1 }
333
+ session.start
136
334
 
137
335
  else
138
- @fsms[event.target_id]&.handle(event)
336
+ fsm = @fsms[event.target_id]
337
+ if fsm
338
+ fsm.handle(event)
339
+ else
340
+ # Warn when an event is dropped due to an unknown target_id so that
341
+ # mis-typed IDs and handler-deregistration races are visible.
342
+ warn "[Phronomy::EventLoop] Dropped event #{event.type.inspect} — " \
343
+ "no handler for target_id #{event.target_id.inspect}"
344
+ end
139
345
  end
346
+
347
+ # Check how long this dispatch took; warn if it exceeds the threshold.
348
+ check_dispatch_time(dispatch_start_ns, event)
140
349
  end
141
350
  rescue => e
142
351
  # Unblock all waiting callers if the loop dies unexpectedly.
143
352
  @waiting.values.each { |cq| cq.push(e) }
144
353
  raise
145
354
  end
355
+
356
+ def update_lag_metrics(lag_ns)
357
+ @lag_mutex.synchronize do
358
+ @last_lag_ns = lag_ns
359
+ @max_lag_ns = lag_ns if lag_ns > @max_lag_ns
360
+ @total_lag_ns += lag_ns
361
+ @dispatch_count += 1
362
+ end
363
+ end
364
+
365
+ def check_starvation_lag(lag_ns, event)
366
+ threshold = Phronomy.configuration.event_loop_starvation_threshold_seconds
367
+ return unless threshold && lag_ns > (threshold * 1_000_000_000)
368
+
369
+ Phronomy.configuration.logger&.warn do
370
+ "[Phronomy::EventLoop] Starvation detected: event #{event.type.inspect} " \
371
+ "for target #{event.target_id.inspect} waited " \
372
+ "#{format("%.3f", lag_ns / 1_000_000_000.0)}s in queue " \
373
+ "(threshold: #{threshold}s)"
374
+ end
375
+ end
376
+
377
+ def check_dispatch_time(dispatch_start_ns, event)
378
+ threshold = Phronomy.configuration.event_loop_dispatch_threshold_seconds
379
+ return unless threshold
380
+
381
+ elapsed_ns = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond) - dispatch_start_ns
382
+ return unless elapsed_ns > (threshold * 1_000_000_000)
383
+
384
+ Phronomy.configuration.logger&.warn do
385
+ "[Phronomy::EventLoop] Long dispatch: event #{event.type.inspect} " \
386
+ "for target #{event.target_id.inspect} took " \
387
+ "#{format("%.3f", elapsed_ns / 1_000_000_000.0)}s on the EventLoop thread " \
388
+ "(threshold: #{threshold}s). Consider moving blocking work to BlockingAdapterPool."
389
+ end
390
+ end
146
391
  end
147
392
  end
@@ -49,11 +49,13 @@ module Phronomy
49
49
  # @param external_events [Hash] { event_name => [{from:, to:, guard:}] }
50
50
  # @param phase_machine_class [Class] state_machines-backed phase tracker class
51
51
  # @param recursion_limit [Integer]
52
+ # @param action_timeouts [Hash] { state_name => seconds }
52
53
  # @param resume_event [Symbol, nil] external event to fire when resuming
53
54
  # @param resume_phase [Symbol, nil] wait state name to resume from
55
+ # @api private
54
56
  def initialize(id:, context:, entry_point:, entry_actions:, auto_state_set:,
55
57
  declared_states:, wait_state_names:, external_events:, phase_machine_class:,
56
- recursion_limit:, resume_event: nil, resume_phase: nil)
58
+ recursion_limit:, action_timeouts: {}, resume_event: nil, resume_phase: nil)
57
59
  @id = id
58
60
  @ctx = context
59
61
  @entry_point = entry_point
@@ -64,6 +66,7 @@ module Phronomy
64
66
  @external_events = external_events
65
67
  @phase_machine_class = phase_machine_class
66
68
  @recursion_limit = recursion_limit
69
+ @action_timeouts = action_timeouts
67
70
  @resume_event = resume_event
68
71
  @resume_phase = resume_phase
69
72
  @step = 0
@@ -88,20 +91,60 @@ module Phronomy
88
91
  @current_state = @entry_point
89
92
  @tracker = build_tracker(@current_state)
90
93
  @tracker.context = @ctx
91
- (@entry_actions[@current_state] || []).each { |c| c.call(@ctx) }
92
- advance_or_halt
94
+ (@entry_actions[@current_state] || []).each do |c|
95
+ result = c.call(@ctx)
96
+ if result.is_a?(Phronomy::Task)
97
+ # Awaitable action: spawn a task to await without blocking EventLoop.
98
+ @tracker.async_pending = true
99
+ session_id = @id
100
+ current_state_name = @current_state
101
+ timeout_secs = @action_timeouts[current_state_name]
102
+ Phronomy::Runtime.instance.spawn(name: "fsm-await-#{session_id}") do
103
+ if timeout_secs
104
+ if result.join(timeout_secs).nil?
105
+ result.cancel!
106
+ raise Phronomy::ActionTimeoutError,
107
+ "Action in state #{current_state_name.inspect} timed out after #{timeout_secs}s"
108
+ end
109
+ end
110
+ task_result = result.await
111
+ if task_result.is_a?(Phronomy::WorkflowContext)
112
+ event_loop.post(Event.new(type: :action_completed, target_id: session_id, payload: task_result))
113
+ else
114
+ event_loop.post(Event.new(type: :state_completed, target_id: session_id, payload: nil))
115
+ end
116
+ rescue => e
117
+ event_loop.post(Event.new(type: :error, target_id: session_id, payload: e))
118
+ end
119
+ break # Only one async action at a time per state
120
+ elsif result.is_a?(Phronomy::WorkflowContext)
121
+ @ctx = result
122
+ end
123
+ end
124
+ @tracker.context = @ctx
125
+ advance_or_halt unless @tracker.async_pending
93
126
  end
94
127
  rescue => e
95
128
  finish_with_error(e)
96
129
  end
97
130
 
98
131
  # Processes an event dispatched from EventLoop.
99
- # Called for :state_completed and all user-defined external events.
132
+ # Called for :state_completed, :action_completed, and all user-defined external events.
100
133
  #
101
134
  # @param event [Phronomy::Event]
135
+ # @api private
102
136
  def handle(event)
103
137
  return if @done
104
138
 
139
+ if event.type == :action_completed
140
+ # An awaitable entry action completed: update context and advance.
141
+ @ctx = event.payload if event.payload.is_a?(Phronomy::WorkflowContext)
142
+ @tracker.context = @ctx
143
+ @tracker.async_pending = false # Reset flag set by start or fire_and_advance!
144
+ advance_or_halt
145
+ return
146
+ end
147
+
105
148
  fire_and_advance!(event.type)
106
149
  rescue => e
107
150
  finish_with_error(e)
@@ -118,10 +161,20 @@ module Phronomy
118
161
  end
119
162
 
120
163
  fire_event!(@tracker, event_name, @current_state)
164
+ @ctx = @tracker.context
121
165
  next_phase = @tracker.phase.to_sym
122
166
  # When next_phase == @current_state, no transition matched → treat as terminal.
123
167
  @current_state = (next_phase == @current_state) ? FINISH : next_phase
124
168
  @step += 1
169
+
170
+ # If an entry action returned a Task, the after_transition callback set
171
+ # async_pending = true and spawned a thread. Skip advance_or_halt — the
172
+ # background thread will post :action_completed or :state_completed.
173
+ if @tracker.async_pending
174
+ @tracker.async_pending = false
175
+ return
176
+ end
177
+
125
178
  advance_or_halt
126
179
  end
127
180
 
@@ -113,6 +113,7 @@ module Phronomy
113
113
  # @param raise_if_untrusted [Boolean] when +true+, raises
114
114
  # {Phronomy::LowConfidenceError} if the final result does not meet the
115
115
  # confidence threshold (default: false)
116
+ # @api private
116
117
  def initialize(
117
118
  draft_agent:,
118
119
  review_agent:,
@@ -143,6 +144,7 @@ module Phronomy
143
144
  # @return [Result]
144
145
  # @raise [Phronomy::LowConfidenceError] when +raise_if_untrusted:+ is +true+
145
146
  # and the result does not meet the confidence threshold
147
+ # @api private
146
148
  def invoke(input, config: {})
147
149
  app = compiled_workflow
148
150
  state = app.invoke({input: input}, config: config)
@@ -17,6 +17,7 @@ module Phronomy
17
17
  # Validate the value. Subclasses must implement this method.
18
18
  # @param value [Object] the input or output being checked
19
19
  # @raise [Phronomy::GuardrailError] if the guardrail rejects the value
20
+ # @api public
20
21
  def check(value)
21
22
  raise NotImplementedError, "#{self.class}#check is not implemented"
22
23
  end
@@ -24,6 +25,7 @@ module Phronomy
24
25
  # Run the check, raising GuardrailError on failure.
25
26
  # @param value [Object]
26
27
  # @return [Object] the original value (unchanged) when the check passes
28
+ # @api public
27
29
  def run!(value)
28
30
  check(value)
29
31
  value
@@ -34,6 +36,7 @@ module Phronomy
34
36
  # Call inside #check to reject the value.
35
37
  # @param reason [String] human-readable rejection reason
36
38
  # @raise [Phronomy::GuardrailError]
39
+ # @api public
37
40
  def fail!(reason)
38
41
  raise Phronomy::GuardrailError.new(reason, guardrail: self)
39
42
  end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Phronomy
4
+ module Guardrail
5
+ # Detects potential prompt injection attempts in the agent input.
6
+ #
7
+ # Prompt injection is an attack where an adversary embeds LLM instructions
8
+ # inside data sources (e.g. RAG chunks, tool results, user input) to override
9
+ # the agent's intended behaviour.
10
+ #
11
+ # This guardrail scans the input string for common injection patterns and
12
+ # calls {#fail!} when a match is found. It is intended to be registered as
13
+ # an input guardrail on agents that consume untrusted external content.
14
+ #
15
+ # @example
16
+ # class MyAgent < Phronomy::Agent::Base
17
+ # model "gpt-4o"
18
+ # input_guardrails Phronomy::Guardrail::PromptInjectionGuardrail.new
19
+ # end
20
+ #
21
+ # @example Custom patterns
22
+ # guard = Phronomy::Guardrail::PromptInjectionGuardrail.new(
23
+ # extra_patterns: [/exfiltrate/i]
24
+ # )
25
+ class PromptInjectionGuardrail < InputGuardrail
26
+ # Common prompt injection / jailbreak patterns.
27
+ DEFAULT_PATTERNS = [
28
+ /ignore\s+(previous|prior|all)\s+instructions?/i,
29
+ /disregard\s+(previous|prior|all)\s+instructions?/i,
30
+ /forget\s+(previous|prior|all)\s+instructions?/i,
31
+ /override\s+(previous|prior|all)\s+instructions?/i,
32
+ /new\s+instructions?:\s/i,
33
+ /\byour\s+new\s+(role|instructions?|task)\b/i,
34
+ /you\s+are\s+now\s+(a|an)\b/i,
35
+ /\bact\s+as\s+(a|an)\b/i,
36
+ /\bpretend\s+(you\s+are|to\s+be)\b/i,
37
+ /\bdo\s+not\s+follow\s+(your|the)\s+instructions?\b/i
38
+ ].freeze
39
+
40
+ # @param extra_patterns [Array<Regexp>] additional patterns to scan for
41
+ # @api private
42
+ def initialize(extra_patterns: [])
43
+ super()
44
+ @patterns = DEFAULT_PATTERNS + extra_patterns
45
+ end
46
+
47
+ # Scans the input string for injection patterns.
48
+ # @param input [String, Hash]
49
+ # @api private
50
+ def check(input)
51
+ text = input.is_a?(Hash) ? input.values.join(" ") : input.to_s
52
+ @patterns.each do |pattern|
53
+ fail!("Potential prompt injection detected") if text.match?(pattern)
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end