phronomy 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/.mutant.yml +22 -0
  3. data/CHANGELOG.md +488 -0
  4. data/CONTRIBUTING.md +102 -0
  5. data/README.md +374 -36
  6. data/RELEASE_CHECKLIST.md +86 -0
  7. data/Rakefile +33 -0
  8. data/SECURITY.md +80 -0
  9. data/benchmark/baseline.json +9 -0
  10. data/benchmark/bench_agent_invoke.rb +105 -0
  11. data/benchmark/bench_context_assembler.rb +46 -0
  12. data/benchmark/bench_regression.rb +172 -0
  13. data/benchmark/bench_token_estimator.rb +44 -0
  14. data/benchmark/bench_tool_schema.rb +69 -0
  15. data/benchmark/bench_vector_store.rb +39 -0
  16. data/benchmark/bench_workflow.rb +55 -0
  17. data/benchmark/run_all.rb +118 -0
  18. data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
  19. data/docs/decisions/002-workflow-context-immutability.md +42 -0
  20. data/docs/decisions/003-event-loop-singleton.md +48 -0
  21. data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +75 -0
  22. data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
  23. data/docs/decisions/006-no-built-in-guardrails.md +66 -0
  24. data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
  25. data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
  26. data/docs/decisions/009-state-store-abstraction.md +141 -0
  27. data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
  28. data/lib/phronomy/agent/base.rb +416 -49
  29. data/lib/phronomy/agent/before_completion_context.rb +1 -0
  30. data/lib/phronomy/agent/checkpoint.rb +1 -0
  31. data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
  32. data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
  33. data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
  34. data/lib/phronomy/agent/concerns/retryable.rb +12 -1
  35. data/lib/phronomy/agent/concerns/suspendable.rb +19 -0
  36. data/lib/phronomy/agent/fsm.rb +44 -52
  37. data/lib/phronomy/agent/handoff.rb +3 -0
  38. data/lib/phronomy/agent/orchestrator.rb +191 -54
  39. data/lib/phronomy/agent/parallel_tool_chat.rb +87 -13
  40. data/lib/phronomy/agent/react_agent.rb +16 -6
  41. data/lib/phronomy/agent/runner.rb +2 -0
  42. data/lib/phronomy/agent/shared_state.rb +11 -0
  43. data/lib/phronomy/agent/suspend_signal.rb +2 -0
  44. data/lib/phronomy/agent/team_coordinator.rb +17 -5
  45. data/lib/phronomy/async_queue.rb +155 -0
  46. data/lib/phronomy/blocking_adapter_pool.rb +435 -0
  47. data/lib/phronomy/cancellation_scope.rb +123 -0
  48. data/lib/phronomy/cancellation_token.rb +133 -0
  49. data/lib/phronomy/concurrency_gate.rb +155 -0
  50. data/lib/phronomy/configuration.rb +168 -2
  51. data/lib/phronomy/context/assembler.rb +6 -0
  52. data/lib/phronomy/context/compaction_context.rb +2 -0
  53. data/lib/phronomy/context/context_version_cache.rb +2 -0
  54. data/lib/phronomy/context/token_budget.rb +3 -0
  55. data/lib/phronomy/context/token_estimator.rb +9 -2
  56. data/lib/phronomy/context/trigger_context.rb +1 -0
  57. data/lib/phronomy/context/trim_context.rb +4 -0
  58. data/lib/phronomy/deadline.rb +63 -0
  59. data/lib/phronomy/diagnostics.rb +62 -0
  60. data/lib/phronomy/embeddings/base.rb +22 -2
  61. data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
  62. data/lib/phronomy/eval/comparison.rb +2 -0
  63. data/lib/phronomy/eval/dataset.rb +4 -0
  64. data/lib/phronomy/eval/metrics.rb +6 -0
  65. data/lib/phronomy/eval/runner.rb +11 -9
  66. data/lib/phronomy/eval/scorer/base.rb +1 -0
  67. data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
  68. data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
  69. data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
  70. data/lib/phronomy/event_loop.rb +275 -30
  71. data/lib/phronomy/fsm_session.rb +57 -4
  72. data/lib/phronomy/generator_verifier.rb +2 -0
  73. data/lib/phronomy/guardrail/base.rb +3 -0
  74. data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
  75. data/lib/phronomy/invocation_context.rb +152 -0
  76. data/lib/phronomy/knowledge_source/base.rb +24 -2
  77. data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
  78. data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
  79. data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
  80. data/lib/phronomy/llm_adapter/base.rb +104 -0
  81. data/lib/phronomy/llm_adapter/ruby_llm.rb +41 -0
  82. data/lib/phronomy/llm_adapter.rb +20 -0
  83. data/lib/phronomy/loader/base.rb +1 -0
  84. data/lib/phronomy/loader/csv_loader.rb +2 -0
  85. data/lib/phronomy/loader/markdown_loader.rb +2 -0
  86. data/lib/phronomy/loader/plain_text_loader.rb +1 -0
  87. data/lib/phronomy/metrics.rb +38 -0
  88. data/lib/phronomy/output_parser/base.rb +1 -0
  89. data/lib/phronomy/output_parser/json_parser.rb +22 -3
  90. data/lib/phronomy/output_parser/structured_parser.rb +2 -0
  91. data/lib/phronomy/prompt_template.rb +5 -0
  92. data/lib/phronomy/runnable.rb +20 -3
  93. data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
  94. data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
  95. data/lib/phronomy/runtime/gate_registry.rb +52 -0
  96. data/lib/phronomy/runtime/pool_registry.rb +57 -0
  97. data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
  98. data/lib/phronomy/runtime/scheduler.rb +98 -0
  99. data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
  100. data/lib/phronomy/runtime/task_registry.rb +48 -0
  101. data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
  102. data/lib/phronomy/runtime/timer_queue.rb +106 -0
  103. data/lib/phronomy/runtime/timer_service.rb +42 -0
  104. data/lib/phronomy/runtime.rb +374 -0
  105. data/lib/phronomy/splitter/base.rb +2 -0
  106. data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
  107. data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
  108. data/lib/phronomy/state_store/base.rb +48 -0
  109. data/lib/phronomy/state_store/in_memory.rb +62 -0
  110. data/lib/phronomy/task/backend.rb +80 -0
  111. data/lib/phronomy/task/fiber_backend.rb +157 -0
  112. data/lib/phronomy/task/immediate_backend.rb +89 -0
  113. data/lib/phronomy/task/thread_backend.rb +84 -0
  114. data/lib/phronomy/task.rb +275 -0
  115. data/lib/phronomy/task_group.rb +265 -0
  116. data/lib/phronomy/testing/fake_clock.rb +109 -0
  117. data/lib/phronomy/testing/fake_scheduler.rb +104 -0
  118. data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
  119. data/lib/phronomy/testing.rb +12 -0
  120. data/lib/phronomy/tool/agent_tool.rb +1 -0
  121. data/lib/phronomy/tool/base.rb +298 -28
  122. data/lib/phronomy/tool/mcp_tool.rb +103 -17
  123. data/lib/phronomy/tool/scope_policy.rb +50 -0
  124. data/lib/phronomy/tool_executor.rb +106 -0
  125. data/lib/phronomy/tracing/base.rb +3 -0
  126. data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
  127. data/lib/phronomy/tracing/open_telemetry_tracer.rb +36 -0
  128. data/lib/phronomy/vector_store/async_backend.rb +110 -0
  129. data/lib/phronomy/vector_store/base.rb +40 -7
  130. data/lib/phronomy/vector_store/in_memory.rb +16 -7
  131. data/lib/phronomy/vector_store/pgvector.rb +40 -9
  132. data/lib/phronomy/vector_store/redis_search.rb +29 -8
  133. data/lib/phronomy/version.rb +1 -1
  134. data/lib/phronomy/workflow.rb +147 -11
  135. data/lib/phronomy/workflow_context.rb +83 -6
  136. data/lib/phronomy/workflow_runner.rb +106 -7
  137. data/lib/phronomy.rb +112 -1
  138. data/scripts/api_snapshot.rb +91 -0
  139. data/scripts/check_api_annotations.rb +68 -0
  140. data/scripts/check_private_enforcement.rb +93 -0
  141. data/scripts/check_readme_runnable.rb +98 -0
  142. data/scripts/run_mutation.sh +46 -0
  143. metadata +83 -2
@@ -7,9 +7,13 @@ module Phronomy
7
7
  # @see https://claude.com/blog/multi-agent-coordination-patterns
8
8
  #
9
9
  # A coordinator LLM agent decomposes work into tasks and enqueues them
10
- # dynamically via built-in tools. A fixed pool of worker agents claims tasks
11
- # from the shared queue, carrying forward their conversation history across
12
- # assignments to accumulate domain context over time.
10
+ # dynamically via built-in tools. A fixed set of worker agents processes tasks
11
+ # sequentially one task per worker per turn — carrying forward their
12
+ # conversation history across assignments to accumulate domain context over time.
13
+ #
14
+ # Workers are selected in sequence (the worker with the fewest accumulated
15
+ # messages is chosen by default). Task dispatch is synchronous; there is no
16
+ # concurrent or parallel execution.
13
17
  #
14
18
  # The coordinator is an {Agent::Base} subclass that has two built-in tools:
15
19
  # - +enqueue_task+ — adds a task description to the queue
@@ -56,6 +60,7 @@ module Phronomy
56
60
  # Falls back to +Phronomy.configuration.default_model+ when not set.
57
61
  #
58
62
  # @param value [String, nil]
63
+ # @api public
59
64
  def coordinator_model(value = nil)
60
65
  value ? @coordinator_model = value : @coordinator_model
61
66
  end
@@ -65,6 +70,7 @@ module Phronomy
65
70
  # and then call +finalize+ when all tasks are enqueued.
66
71
  #
67
72
  # @param value [String, nil]
73
+ # @api public
68
74
  def coordinator_instructions(value = nil)
69
75
  value ? @coordinator_instructions = value : @coordinator_instructions
70
76
  end
@@ -75,16 +81,18 @@ module Phronomy
75
81
  # Pass the same value as +LLMConfig::PROVIDER+ in your examples.
76
82
  #
77
83
  # @param value [Symbol, nil]
84
+ # @api public
78
85
  def coordinator_provider(value = nil)
79
86
  value ? @coordinator_provider = value : @coordinator_provider
80
87
  end
81
88
 
82
- # Configures the worker pool.
89
+ # Configures the set of workers.
83
90
  #
84
- # @param size [Integer] number of persistent worker instances
91
+ # @param size [Integer] number of persistent worker instances (tasks are assigned sequentially)
85
92
  # @param agent [Class] Agent::Base subclass used for all workers
86
93
  # @param on_error [Symbol] +:raise+ (default) propagates worker exceptions;
87
94
  # +:skip+ records the failure and continues with remaining tasks
95
+ # @api public
88
96
  def pool(size:, agent:, on_error: :raise)
89
97
  @pool_size = Integer(size)
90
98
  @worker_agent = agent
@@ -98,6 +106,7 @@ module Phronomy
98
106
  #
99
107
  # @yield [Array<WorkerState>] available workers
100
108
  # @yieldreturn [WorkerState] the chosen worker
109
+ # @api public
101
110
  def schedule(&block)
102
111
  @scheduler = block
103
112
  end
@@ -108,6 +117,7 @@ module Phronomy
108
117
  # When omitted, the raw assignments array is returned.
109
118
  #
110
119
  # @yield [Array<Hash>] all completed (and skipped) task assignments
120
+ # @api public
111
121
  def aggregate(&block)
112
122
  @aggregator = block
113
123
  end
@@ -137,6 +147,7 @@ module Phronomy
137
147
  # @param config [Hash] reserved for future use
138
148
  # @return [Object] the return value of the aggregate block, or the raw assignments Array
139
149
  # @raise [ArgumentError] when +pool :agent+ has not been configured
150
+ # @api public
140
151
  def invoke(team_input, config: {})
141
152
  raise ArgumentError, "pool :agent must be configured before invoking" unless self.class._worker_agent
142
153
 
@@ -161,6 +172,7 @@ module Phronomy
161
172
  # @yield [Hash] one event per completed/failed task
162
173
  # @return [Object] same as +invoke+
163
174
  # @raise [ArgumentError] when +pool :agent+ has not been configured
175
+ # @api public
164
176
  def stream(team_input, config: {}, &block)
165
177
  return invoke(team_input, config: config) unless block
166
178
 
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Phronomy
4
+ # A thread-safe FIFO queue for passing values between concurrent tasks.
5
+ #
6
+ # Wraps +Thread::Queue+ so that callers do not need to reference the Ruby
7
+ # standard-library type directly. A future implementation may replace the
8
+ # backing primitive without changing call sites.
9
+ #
10
+ # @example Producer / consumer
11
+ # queue = Phronomy::AsyncQueue.new
12
+ # Runtime.instance.spawn { queue.push(expensive_io()) }
13
+ # value = queue.pop # blocks until the producer pushes
14
+ # @api private
15
+ class AsyncQueue
16
+ # @param max_size [Integer, nil] optional upper bound on queue depth.
17
+ # When set, {#push} blocks the caller until a slot is available.
18
+ # @api private
19
+ def initialize(max_size: nil)
20
+ @queue = max_size ? SizedQueue.new(max_size) : Thread::Queue.new
21
+ @max_size = max_size
22
+ end
23
+
24
+ # Enqueues +item+.
25
+ # In a cooperative scheduler context with a bounded queue (max_size:), suspends
26
+ # the current Fiber via a scheduler signal when the queue is full rather than
27
+ # blocking the OS thread. Without a scheduler, falls back to the standard
28
+ # SizedQueue blocking behaviour.
29
+ # @param item [Object] value to enqueue
30
+ # @return [self]
31
+ # @api private
32
+ def push(item)
33
+ scheduler = Phronomy::Runtime::Scheduler.current
34
+ if scheduler && @max_size
35
+ _push_cooperative(scheduler, item)
36
+ else
37
+ @queue.push(item)
38
+ scheduler.raise_signal(@coop_signal) if scheduler && @coop_signal
39
+ end
40
+ self
41
+ end
42
+
43
+ # Dequeues and returns the next item.
44
+ # In a cooperative scheduler context, suspends the current Fiber (yielding
45
+ # control back to the scheduler) rather than blocking the OS thread.
46
+ #
47
+ # When +timeout+ is given the semantics depend on the active backend:
48
+ #
49
+ # * **Thread backend** (`:thread`) — uses real wall-clock time via
50
+ # +Thread::Queue#pop(timeout:)+. Requires Ruby 3.2+.
51
+ # Returns +nil+ if no item arrives within the specified number of real seconds.
52
+ # * **DeterministicScheduler / `:fiber` backend** — uses the scheduler's
53
+ # *virtual time* (+scheduler.virtual_time+). The timeout elapses only when
54
+ # the virtual clock is advanced (e.g. via {Phronomy::Testing::FakeClock#advance}).
55
+ # In tests this means the timeout is fully deterministic and does not depend on
56
+ # actual elapsed wall time. However, in production `:fiber` mode the timeout
57
+ # may never expire unless the scheduler explicitly advances virtual time.
58
+ #
59
+ # @note The `:fiber` backend is **EXPERIMENTAL**. Real-time timeout behaviour
60
+ # in production workloads is not guaranteed and may differ from wall-clock
61
+ # expectations.
62
+ # @note **Cooperative timeout limitation**: on the cooperative path, the
63
+ # deadline is re-checked *after* a wake-up signal arrives. If virtual time
64
+ # has already passed the deadline when the consumer is woken by a producer
65
+ # push, the consumer returns +nil+ rather than the pushed item. Without any
66
+ # wake-up signal the waiting Fiber remains suspended even after
67
+ # +scheduler.advance+ — the timeout does not self-fire.
68
+ # @param timeout [Numeric, nil] seconds to wait before returning +nil+.
69
+ # Semantics are wall-clock on `:thread` and virtual-time on `:fiber`.
70
+ # @return [Object, nil] the next item, or +nil+ when timeout expires
71
+ # @api private
72
+ def pop(timeout: nil)
73
+ scheduler = Phronomy::Runtime::Scheduler.current
74
+ if scheduler
75
+ _pop_cooperative(scheduler, timeout: timeout)
76
+ elsif timeout
77
+ @queue.pop(timeout: timeout)
78
+ else
79
+ @queue.pop
80
+ end
81
+ end
82
+
83
+ # Returns the current number of items in the queue.
84
+ # @return [Integer]
85
+ # @api private
86
+ def size
87
+ @queue.size
88
+ end
89
+
90
+ # Returns +true+ when the queue contains no items.
91
+ # @return [Boolean]
92
+ # @api private
93
+ def empty?
94
+ @queue.empty?
95
+ end
96
+
97
+ # Closes the queue. Subsequent {#pop} calls raise +ClosedQueueError+.
98
+ # @return [self]
99
+ # @api private
100
+ def close
101
+ @queue.close
102
+ self
103
+ end
104
+
105
+ private
106
+
107
+ # Cooperative pop for DeterministicScheduler context.
108
+ # Suspends the current Fiber via the scheduler's signal mechanism rather than
109
+ # blocking the OS thread. Because cooperative mode is single-threaded, the
110
+ # empty?/pop pair is race-free (no other Fiber can run between the two calls).
111
+ # After dequeuing, notifies any push-waiter so that a backpressure-suspended
112
+ # producer can be unblocked.
113
+ # @api private
114
+ # @param scheduler [Runtime::Scheduler]
115
+ # @param timeout [Numeric, nil]
116
+ # @return [Object, nil]
117
+ def _pop_cooperative(scheduler, timeout:)
118
+ @coop_signal ||= scheduler.new_signal
119
+ deadline = timeout ? (scheduler.virtual_time + timeout) : nil
120
+
121
+ loop do
122
+ unless @queue.empty?
123
+ item = @queue.pop(timeout: 0)
124
+ # Notify a push-waiter (bounded queue) that a slot opened up.
125
+ scheduler.raise_signal(@push_signal) if @push_signal
126
+ return item
127
+ end
128
+ return nil if deadline && scheduler.virtual_time >= deadline
129
+ scheduler.wait_for_signal(@coop_signal)
130
+ return nil if deadline && scheduler.virtual_time >= deadline
131
+ end
132
+ end
133
+
134
+ # Cooperative push for DeterministicScheduler context with a bounded queue.
135
+ # Suspends the current Fiber via a scheduler signal when the queue is full,
136
+ # rather than blocking the OS thread.
137
+ # @api private
138
+ # @param scheduler [Runtime::Scheduler]
139
+ # @param item [Object]
140
+ # @return [void]
141
+ def _push_cooperative(scheduler, item)
142
+ @push_signal ||= scheduler.new_signal
143
+
144
+ loop do
145
+ unless @queue.size >= @max_size
146
+ @queue.push(item)
147
+ # Notify any pop-waiter that an item is now available.
148
+ scheduler.raise_signal(@coop_signal) if @coop_signal
149
+ return
150
+ end
151
+ scheduler.wait_for_signal(@push_signal)
152
+ end
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,435 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Phronomy
4
+ # A bounded, observable thread pool for blocking I/O operations.
5
+ #
6
+ # ## Architectural boundary
7
+ #
8
+ # `BlockingAdapterPool` is the *only* place in Phronomy that uses raw OS threads
9
+ # for I/O. All third-party gem calls whose internal I/O Phronomy cannot control
10
+ # — including RubyLLM, ActiveRecord, Redis, Faraday, and MCP stdio transport —
11
+ # **must** route through this pool (or a named pool obtained via
12
+ # {Runtime#pool}). Custom non-blocking HTTP/selector runtimes are intentionally
13
+ # out of scope; the pool + cooperative scheduler combination satisfies all
14
+ # current concurrency requirements without that complexity. (See ADR-010.)
15
+ #
16
+ # All blocking calls (LLM HTTP, MCP stdio, ActiveRecord, Redis, etc.) must be
17
+ # submitted through this pool so that:
18
+ #
19
+ # 1. The total number of OS threads is capped.
20
+ # 2. Queue depth is bounded (backpressure when the pool is saturated).
21
+ # 3. Per-operation timeouts are enforced consistently.
22
+ # 4. Abandoned (timed-out) operations are tracked and logged.
23
+ # 5. Metrics (active count, queue depth, abandoned count, avg wait time) are
24
+ # observable at runtime.
25
+ #
26
+ # @example Submitting a blocking LLM call
27
+ # op = runtime.blocking_io.submit(timeout: 30) { chat.ask(message) }
28
+ # result = op.await # blocks the calling thread until done
29
+ #
30
+ # @example With cancellation
31
+ # token = Phronomy::CancellationToken.timeout_after(60)
32
+ # op = pool.submit(timeout: 30, cancellation_token: token) { expensive_call }
33
+ # result = op.await
34
+ class BlockingAdapterPool
35
+ # Represents the pending result of a submitted blocking operation.
36
+ # Returned immediately by {BlockingAdapterPool#submit}; call {#await} to
37
+ # wait for the result.
38
+ class PendingOperation
39
+ # @return [Boolean] true when the operation has finished (success or error)
40
+ # @api private
41
+ def done?
42
+ @mutex.synchronize { @done }
43
+ end
44
+
45
+ # @return [Boolean] true when the operation was abandoned due to timeout
46
+ # @api private
47
+ def abandoned?
48
+ @abandoned
49
+ end
50
+
51
+ # @return [Float] seconds spent in the queue before execution started
52
+ # @api private
53
+ def wait_time
54
+ @wait_time || 0.0
55
+ end
56
+
57
+ # Blocks until the operation completes and returns its value.
58
+ #
59
+ # An optional +timeout+ (in seconds) may be passed here; it is measured
60
+ # from the moment +await+ is called. If both a submit-time timeout and an
61
+ # await-time timeout are present, the earlier deadline wins. The worker
62
+ # thread is NOT interrupted — it runs to completion on its own.
63
+ #
64
+ # An optional +cancellation_token+ may be passed here (or at submit time).
65
+ # If the token is cancelled while waiting, {Phronomy::CancellationError} is
66
+ # raised immediately without interrupting the worker.
67
+ #
68
+ # **Cooperative path (`:fiber` / `DeterministicScheduler`):**
69
+ # When called from a Fiber managed by {DeterministicScheduler} (i.e. under
70
+ # the +:fiber+ runtime backend), the calling Fiber suspends cooperatively
71
+ # via +Fiber.yield+ rather than blocking the OS thread. The Fiber is
72
+ # resumed on the scheduler's ready queue once the worker thread completes
73
+ # the operation.
74
+ #
75
+ # @note **Cooperative cancellation semantics** (ADR-010):
76
+ # Phronomy uses a non-preemptive, cooperative-first concurrency model.
77
+ # Cancellation is *cooperative*, not preemptive:
78
+ # - When a +cancellation_token+ is cancelled, +CancellationError+ is
79
+ # raised to the +await+ caller immediately; when the timeout fires,
80
+ # +TimeoutError+ is raised instead. In both cases, the underlying
81
+ # worker thread is **not** forcibly stopped.
82
+ # - The worker thread will complete its submitted block naturally.
83
+ # Code inside the block must call +token.check!+ at suitable
84
+ # checkpoints to observe the cancelled state and exit early.
85
+ # - There is no +Thread#kill+ or +Thread#raise+ involved. The framework
86
+ # never forcibly terminates worker threads.
87
+ #
88
+ # @note **Cooperative timeout limitation**: the +timeout:+ parameter passed
89
+ # to +await+ is *not* enforced on the cooperative path. The calling Fiber
90
+ # remains suspended until the worker thread finishes regardless of how many
91
+ # seconds elapse. This is because the cooperative scheduler cannot
92
+ # preempt a running OS thread. If a time bound is required, set
93
+ # +timeout:+ at {BlockingAdapterPool#submit submit} time instead; the pool
94
+ # will then abandon the operation on the worker side and mark it as
95
+ # {#abandoned?}.
96
+ #
97
+ # @param timeout [Numeric, nil] seconds from now before raising TimeoutError
98
+ # (thread path only; ignored on the cooperative/fiber path)
99
+ # @param cancellation_token [CancellationToken, nil]
100
+ # @return [Object]
101
+ # @raise [Phronomy::TimeoutError]
102
+ # @raise [Phronomy::CancellationError]
103
+ # @raise [Exception] error raised inside the submitted block
104
+ # @api private
105
+ def await(timeout: nil, cancellation_token: nil)
106
+ effective_timeout = [timeout, @timeout].compact.min
107
+ effective_token = cancellation_token || @cancellation_token
108
+
109
+ raise CancellationError, "blocking operation cancelled" if effective_token&.cancelled?
110
+
111
+ # Cooperative context: suspend the calling Fiber rather than blocking
112
+ # the OS thread so that DeterministicScheduler can continue dispatching
113
+ # other tasks while waiting for the blocking worker to finish.
114
+ # (Issue #338, ADR-010 Rule 3)
115
+ # Uses the same thread-local key as Task::FiberBackend::SCHEDULER_KEY
116
+ # (:phronomy_deterministic_scheduler) to avoid a cross-file constant
117
+ # dependency at load time.
118
+ scheduler = Thread.current.thread_variable_get(:phronomy_deterministic_scheduler)
119
+ in_managed_fiber = !Fiber.respond_to?(:main) || Fiber.current != Fiber.main
120
+ if scheduler && in_managed_fiber
121
+ unless @done
122
+ # Register this await with the scheduler so run_until_idle knows
123
+ # not to exit until the worker thread completes (Issue #338).
124
+ scheduler.track_blocking_await
125
+ waiting_fiber = Fiber.current
126
+ on_complete do |_result, _error|
127
+ # Decrement the counter and wake run_until_idle, then re-enqueue
128
+ # the suspended Fiber for cooperative resumption.
129
+ scheduler.complete_blocking_await
130
+ scheduler.enqueue_fiber(-> { waiting_fiber.resume })
131
+ end
132
+ Fiber.yield(:cooperative_suspend)
133
+ end
134
+ raise CancellationError, "blocking operation cancelled" if effective_token&.cancelled?
135
+ raise @error if @error
136
+
137
+ return @value
138
+ end
139
+
140
+ # Wake up the waiting thread whenever the token is cancelled so we can
141
+ # propagate cancellation without sleeping until the timeout expires.
142
+ effective_token&.on_cancel { @mutex.synchronize { @cond.broadcast } }
143
+
144
+ if effective_timeout
145
+ deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + effective_timeout
146
+ @mutex.synchronize do
147
+ until @done
148
+ raise CancellationError, "blocking operation cancelled" if effective_token&.cancelled?
149
+
150
+ remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
151
+ if remaining <= 0
152
+ # Guard against double-counting when await is called multiple times.
153
+ unless @abandoned
154
+ @abandoned = true
155
+ @on_abandoned&.call
156
+ end
157
+ raise Phronomy::TimeoutError, "blocking operation timed out after #{effective_timeout}s"
158
+ end
159
+ @cond.wait(@mutex, remaining)
160
+ end
161
+ end
162
+ else
163
+ @mutex.synchronize do
164
+ until @done
165
+ raise CancellationError, "blocking operation cancelled" if effective_token&.cancelled?
166
+
167
+ @cond.wait(@mutex)
168
+ end
169
+ end
170
+ end
171
+ raise @error if @error
172
+
173
+ @value
174
+ end
175
+
176
+ # Registers a callback to be called when the operation finishes.
177
+ # If the operation has already finished the callback is invoked immediately
178
+ # on the calling thread. Otherwise it is invoked on the worker thread that
179
+ # completes the operation.
180
+ #
181
+ # The callback receives +result+ and +error+ (one of them will be +nil+).
182
+ #
183
+ # @yield [result, error]
184
+ # @return [self]
185
+ # @api private
186
+ def on_complete(&callback)
187
+ fire_args = nil
188
+ @mutex.synchronize do
189
+ if @done
190
+ fire_args = [@value, @error]
191
+ else
192
+ @callbacks ||= []
193
+ @callbacks << callback
194
+ end
195
+ end
196
+ callback.call(*fire_args) if fire_args
197
+ self
198
+ end
199
+
200
+ # @api private
201
+ def initialize(block, timeout: nil, cancellation_token: nil, on_abandoned: nil)
202
+ @block = block
203
+ @timeout = timeout
204
+ @cancellation_token = cancellation_token
205
+ @on_abandoned = on_abandoned
206
+ @value = nil
207
+ @error = nil
208
+ @done = false
209
+ @abandoned = false
210
+ @wait_time = nil
211
+ @submitted_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
212
+ @mutex = Mutex.new
213
+ @cond = ConditionVariable.new
214
+ end
215
+
216
+ # @api private
217
+ def execute!
218
+ @wait_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - @submitted_at
219
+
220
+ if @cancellation_token&.cancelled?
221
+ complete_with_error!(CancellationError.new("operation cancelled before execution"))
222
+ return
223
+ end
224
+
225
+ # Do NOT use Timeout.timeout here — it delivers an async Thread#raise
226
+ # that can corrupt external library state (mutexes, C extensions, etc.).
227
+ # Timeout enforcement is handled cooperatively in #await instead.
228
+ # Each blocking library (Net::HTTP, pg, redis, etc.) should set its
229
+ # own native connection/read timeouts.
230
+ begin
231
+ complete_with_value!(@block.call)
232
+ rescue => e
233
+ complete_with_error!(e)
234
+ end
235
+ end
236
+
237
+ private
238
+
239
+ def complete_with_value!(value)
240
+ cbs = nil
241
+ @mutex.synchronize do
242
+ @value = value
243
+ @done = true
244
+ @cond.broadcast
245
+ cbs = @callbacks
246
+ @callbacks = nil
247
+ end
248
+ cbs&.each { |cb| cb.call(value, nil) }
249
+ end
250
+
251
+ def complete_with_error!(error)
252
+ cbs = nil
253
+ @mutex.synchronize do
254
+ @error = error
255
+ @done = true
256
+ @cond.broadcast
257
+ cbs = @callbacks
258
+ @callbacks = nil
259
+ end
260
+ cbs&.each { |cb| cb.call(nil, error) }
261
+ end
262
+ end
263
+
264
+ # @param pool_size [Integer] maximum number of worker threads
265
+ # @param queue_size [Integer] maximum pending operations waiting for a worker
266
+ # @param name [String, Symbol, nil] optional pool name used in thread labels
267
+ # @param logger [Logger, nil] optional logger for warnings
268
+ # @api private
269
+ def initialize(pool_size: 10, queue_size: 100, name: nil, logger: nil)
270
+ @pool_size = pool_size
271
+ @queue_size = queue_size
272
+ @name = name
273
+ @logger = logger
274
+ @queue = SizedQueue.new(queue_size)
275
+ @active_count = 0
276
+ @abandoned_count = 0
277
+ @total_wait_ns = 0
278
+ @completed_count = 0
279
+ @mutex = Mutex.new
280
+ @shutdown = false
281
+ @workers = Array.new(pool_size) { |i| spawn_worker(i) }
282
+ end
283
+
284
+ # Submits a blocking operation to the pool.
285
+ # Returns a {PendingOperation} immediately; the block runs on a worker thread.
286
+ #
287
+ # @note **Cooperative callers**: if you are running under the `:fiber` backend
288
+ # (i.e. inside a {DeterministicScheduler} Fiber), set +timeout:+ here
289
+ # rather than on {PendingOperation#await}. The await-time timeout is not
290
+ # enforced on the cooperative path (the Fiber cannot preempt a running
291
+ # worker thread). A submit-time timeout triggers on the worker side and
292
+ # marks the operation {PendingOperation#abandoned? abandoned}, which
293
+ # unblocks the waiting Fiber via the normal on-complete callback.
294
+ # @param timeout [Numeric, nil] seconds before the operation is abandoned
295
+ # @param cancellation_token [CancellationToken, nil]
296
+ # @yield block containing the blocking call
297
+ # @return [PendingOperation]
298
+ # @raise [Phronomy::PoolShutdownError] when the pool has been shut down
299
+ # @raise [Phronomy::BackpressureError] when +on_full: :raise+ and queue is full
300
+ # @raise [Phronomy::TimeoutError] when +on_full: :timeout+ and wait exceeds +full_timeout+
301
+ # @api private
302
+ def submit(timeout: nil, cancellation_token: nil, on_full: :wait, full_timeout: nil, &block)
303
+ raise Phronomy::PoolShutdownError, "pool has been shut down" if @shutdown
304
+
305
+ op = PendingOperation.new(block, timeout: timeout, cancellation_token: cancellation_token,
306
+ on_abandoned: timeout ? -> { @mutex.synchronize { @abandoned_count += 1 } } : nil)
307
+ begin
308
+ case on_full
309
+ when :raise
310
+ begin
311
+ @queue.push(op, true)
312
+ rescue ThreadError
313
+ raise Phronomy::BackpressureError, "BlockingAdapterPool queue is full (depth: #{@queue_size})"
314
+ end
315
+ when :timeout
316
+ deadline = full_timeout ? (Process.clock_gettime(Process::CLOCK_MONOTONIC) + full_timeout) : nil
317
+ loop do
318
+ @queue.push(op, true)
319
+ break
320
+ rescue ThreadError
321
+ if deadline && Process.clock_gettime(Process::CLOCK_MONOTONIC) >= deadline
322
+ raise Phronomy::TimeoutError, "timed out waiting for a free slot in BlockingAdapterPool"
323
+ end
324
+ sleep(0.005)
325
+ end
326
+ else # :wait (default)
327
+ @queue.push(op)
328
+ end
329
+ rescue ClosedQueueError
330
+ # Shutdown raced with this submit — treat as if @shutdown was already set.
331
+ raise Phronomy::PoolShutdownError, "pool has been shut down"
332
+ end
333
+ op
334
+ end
335
+
336
+ # Gracefully drains the pool and terminates all worker threads.
337
+ # Waits up to +drain_timeout+ seconds for in-flight operations to finish.
338
+ #
339
+ # Closing the underlying SizedQueue signals workers to exit after draining
340
+ # remaining items, without blocking on a full-queue push.
341
+ #
342
+ # @param drain_timeout [Numeric] seconds to wait for workers to finish
343
+ # @return [self]
344
+ # @api private
345
+ def shutdown(drain_timeout: 30)
346
+ @shutdown = true
347
+ @queue.close
348
+ @workers.each { |t| t.join(drain_timeout) }
349
+ self
350
+ end
351
+
352
+ # --- Metrics ----------------------------------------------------------
353
+
354
+ # @return [Integer] number of operations currently executing on workers
355
+ # @api private
356
+ def active_count
357
+ @mutex.synchronize { @active_count }
358
+ end
359
+
360
+ # @return [Integer] number of operations waiting in the queue
361
+ # @api private
362
+ def queue_depth
363
+ @queue.size
364
+ end
365
+
366
+ # @return [Integer] number of operations that were abandoned due to timeout
367
+ # @api private
368
+ def abandoned_count
369
+ @mutex.synchronize { @abandoned_count }
370
+ end
371
+
372
+ # Average time (in seconds) that completed operations spent in the queue
373
+ # waiting for a worker. Returns 0.0 when no operations have completed yet.
374
+ # @return [Float]
375
+ # @api private
376
+ def average_wait_seconds
377
+ @mutex.synchronize do
378
+ return 0.0 if @completed_count.zero?
379
+
380
+ @total_wait_ns / @completed_count.to_f / 1_000_000_000.0
381
+ end
382
+ end
383
+
384
+ # @return [Integer] configured maximum number of worker threads
385
+ attr_reader :pool_size
386
+
387
+ # @return [Integer] configured maximum queue depth
388
+ attr_reader :queue_size
389
+
390
+ # @return [String, Symbol, nil] pool name used in thread labels
391
+ attr_reader :name
392
+
393
+ private
394
+
395
+ SENTINEL = :shutdown
396
+ private_constant :SENTINEL
397
+
398
+ def spawn_worker(index = nil)
399
+ label = ["phronomy", "blocking-pool", @name, index].compact.join("-")
400
+ Thread.new do
401
+ Thread.current.name = label
402
+ loop do
403
+ op = begin
404
+ @queue.pop
405
+ rescue ClosedQueueError
406
+ break
407
+ end
408
+ # nil is returned by a closed, empty Queue on some Ruby versions
409
+ break if op.nil? || op == SENTINEL
410
+
411
+ run_operation(op)
412
+ end
413
+ end
414
+ end
415
+
416
+ def run_operation(op)
417
+ @mutex.synchronize { @active_count += 1 }
418
+
419
+ begin
420
+ op.execute!
421
+ ensure
422
+ @mutex.synchronize do
423
+ @active_count -= 1
424
+
425
+ if op.abandoned?
426
+ @logger&.warn { "BlockingAdapterPool: worker finished operation after caller timed out" }
427
+ end
428
+
429
+ @total_wait_ns += (op.wait_time * 1_000_000_000).to_i
430
+ @completed_count += 1
431
+ end
432
+ end
433
+ end
434
+ end
435
+ end