phronomy 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mutant.yml +22 -0
- data/CHANGELOG.md +488 -0
- data/CONTRIBUTING.md +102 -0
- data/README.md +374 -36
- data/RELEASE_CHECKLIST.md +86 -0
- data/Rakefile +33 -0
- data/SECURITY.md +80 -0
- data/benchmark/baseline.json +9 -0
- data/benchmark/bench_agent_invoke.rb +105 -0
- data/benchmark/bench_context_assembler.rb +46 -0
- data/benchmark/bench_regression.rb +172 -0
- data/benchmark/bench_token_estimator.rb +44 -0
- data/benchmark/bench_tool_schema.rb +69 -0
- data/benchmark/bench_vector_store.rb +39 -0
- data/benchmark/bench_workflow.rb +55 -0
- data/benchmark/run_all.rb +118 -0
- data/docs/decisions/001-rubyllm-as-provider-layer.md +42 -0
- data/docs/decisions/002-workflow-context-immutability.md +42 -0
- data/docs/decisions/003-event-loop-singleton.md +48 -0
- data/docs/decisions/004-invoke-timeout-is-not-cancellation.md +75 -0
- data/docs/decisions/005-static-knowledge-class-level-cache.md +45 -0
- data/docs/decisions/006-no-built-in-guardrails.md +66 -0
- data/docs/decisions/007-mcp-is-beta-stability.md +51 -0
- data/docs/decisions/008-orchestrator-uses-os-threads.md +52 -0
- data/docs/decisions/009-state-store-abstraction.md +141 -0
- data/docs/decisions/010-cooperative-first-concurrency.md +248 -0
- data/lib/phronomy/agent/base.rb +416 -49
- data/lib/phronomy/agent/before_completion_context.rb +1 -0
- data/lib/phronomy/agent/checkpoint.rb +1 -0
- data/lib/phronomy/agent/concerns/before_completion.rb +6 -0
- data/lib/phronomy/agent/concerns/error_translation.rb +45 -0
- data/lib/phronomy/agent/concerns/guardrailable.rb +3 -0
- data/lib/phronomy/agent/concerns/retryable.rb +12 -1
- data/lib/phronomy/agent/concerns/suspendable.rb +19 -0
- data/lib/phronomy/agent/fsm.rb +44 -52
- data/lib/phronomy/agent/handoff.rb +3 -0
- data/lib/phronomy/agent/orchestrator.rb +191 -54
- data/lib/phronomy/agent/parallel_tool_chat.rb +87 -13
- data/lib/phronomy/agent/react_agent.rb +16 -6
- data/lib/phronomy/agent/runner.rb +2 -0
- data/lib/phronomy/agent/shared_state.rb +11 -0
- data/lib/phronomy/agent/suspend_signal.rb +2 -0
- data/lib/phronomy/agent/team_coordinator.rb +17 -5
- data/lib/phronomy/async_queue.rb +155 -0
- data/lib/phronomy/blocking_adapter_pool.rb +435 -0
- data/lib/phronomy/cancellation_scope.rb +123 -0
- data/lib/phronomy/cancellation_token.rb +133 -0
- data/lib/phronomy/concurrency_gate.rb +155 -0
- data/lib/phronomy/configuration.rb +168 -2
- data/lib/phronomy/context/assembler.rb +6 -0
- data/lib/phronomy/context/compaction_context.rb +2 -0
- data/lib/phronomy/context/context_version_cache.rb +2 -0
- data/lib/phronomy/context/token_budget.rb +3 -0
- data/lib/phronomy/context/token_estimator.rb +9 -2
- data/lib/phronomy/context/trigger_context.rb +1 -0
- data/lib/phronomy/context/trim_context.rb +4 -0
- data/lib/phronomy/deadline.rb +63 -0
- data/lib/phronomy/diagnostics.rb +62 -0
- data/lib/phronomy/embeddings/base.rb +22 -2
- data/lib/phronomy/embeddings/ruby_llm_embeddings.rb +6 -2
- data/lib/phronomy/eval/comparison.rb +2 -0
- data/lib/phronomy/eval/dataset.rb +4 -0
- data/lib/phronomy/eval/metrics.rb +6 -0
- data/lib/phronomy/eval/runner.rb +11 -9
- data/lib/phronomy/eval/scorer/base.rb +1 -0
- data/lib/phronomy/eval/scorer/exact_match.rb +2 -0
- data/lib/phronomy/eval/scorer/includes_scorer.rb +2 -0
- data/lib/phronomy/eval/scorer/llm_judge.rb +2 -0
- data/lib/phronomy/event_loop.rb +275 -30
- data/lib/phronomy/fsm_session.rb +57 -4
- data/lib/phronomy/generator_verifier.rb +2 -0
- data/lib/phronomy/guardrail/base.rb +3 -0
- data/lib/phronomy/guardrail/prompt_injection_guardrail.rb +58 -0
- data/lib/phronomy/invocation_context.rb +152 -0
- data/lib/phronomy/knowledge_source/base.rb +24 -2
- data/lib/phronomy/knowledge_source/entity_knowledge.rb +7 -2
- data/lib/phronomy/knowledge_source/rag_knowledge.rb +8 -4
- data/lib/phronomy/knowledge_source/static_knowledge.rb +7 -2
- data/lib/phronomy/llm_adapter/base.rb +104 -0
- data/lib/phronomy/llm_adapter/ruby_llm.rb +41 -0
- data/lib/phronomy/llm_adapter.rb +20 -0
- data/lib/phronomy/loader/base.rb +1 -0
- data/lib/phronomy/loader/csv_loader.rb +2 -0
- data/lib/phronomy/loader/markdown_loader.rb +2 -0
- data/lib/phronomy/loader/plain_text_loader.rb +1 -0
- data/lib/phronomy/metrics.rb +38 -0
- data/lib/phronomy/output_parser/base.rb +1 -0
- data/lib/phronomy/output_parser/json_parser.rb +22 -3
- data/lib/phronomy/output_parser/structured_parser.rb +2 -0
- data/lib/phronomy/prompt_template.rb +5 -0
- data/lib/phronomy/runnable.rb +20 -3
- data/lib/phronomy/runtime/deterministic_scheduler.rb +412 -0
- data/lib/phronomy/runtime/fake_scheduler.rb +165 -0
- data/lib/phronomy/runtime/gate_registry.rb +52 -0
- data/lib/phronomy/runtime/pool_registry.rb +57 -0
- data/lib/phronomy/runtime/runtime_metrics.rb +117 -0
- data/lib/phronomy/runtime/scheduler.rb +98 -0
- data/lib/phronomy/runtime/scheduler_timer_adapter.rb +79 -0
- data/lib/phronomy/runtime/task_registry.rb +48 -0
- data/lib/phronomy/runtime/thread_scheduler.rb +30 -0
- data/lib/phronomy/runtime/timer_queue.rb +106 -0
- data/lib/phronomy/runtime/timer_service.rb +42 -0
- data/lib/phronomy/runtime.rb +374 -0
- data/lib/phronomy/splitter/base.rb +2 -0
- data/lib/phronomy/splitter/fixed_size_splitter.rb +2 -0
- data/lib/phronomy/splitter/recursive_splitter.rb +2 -0
- data/lib/phronomy/state_store/base.rb +48 -0
- data/lib/phronomy/state_store/in_memory.rb +62 -0
- data/lib/phronomy/task/backend.rb +80 -0
- data/lib/phronomy/task/fiber_backend.rb +157 -0
- data/lib/phronomy/task/immediate_backend.rb +89 -0
- data/lib/phronomy/task/thread_backend.rb +84 -0
- data/lib/phronomy/task.rb +275 -0
- data/lib/phronomy/task_group.rb +265 -0
- data/lib/phronomy/testing/fake_clock.rb +109 -0
- data/lib/phronomy/testing/fake_scheduler.rb +104 -0
- data/lib/phronomy/testing/scheduler_helpers.rb +59 -0
- data/lib/phronomy/testing.rb +12 -0
- data/lib/phronomy/tool/agent_tool.rb +1 -0
- data/lib/phronomy/tool/base.rb +298 -28
- data/lib/phronomy/tool/mcp_tool.rb +103 -17
- data/lib/phronomy/tool/scope_policy.rb +50 -0
- data/lib/phronomy/tool_executor.rb +106 -0
- data/lib/phronomy/tracing/base.rb +3 -0
- data/lib/phronomy/tracing/langfuse_tracer.rb +2 -0
- data/lib/phronomy/tracing/open_telemetry_tracer.rb +36 -0
- data/lib/phronomy/vector_store/async_backend.rb +110 -0
- data/lib/phronomy/vector_store/base.rb +40 -7
- data/lib/phronomy/vector_store/in_memory.rb +16 -7
- data/lib/phronomy/vector_store/pgvector.rb +40 -9
- data/lib/phronomy/vector_store/redis_search.rb +29 -8
- data/lib/phronomy/version.rb +1 -1
- data/lib/phronomy/workflow.rb +147 -11
- data/lib/phronomy/workflow_context.rb +83 -6
- data/lib/phronomy/workflow_runner.rb +106 -7
- data/lib/phronomy.rb +112 -1
- data/scripts/api_snapshot.rb +91 -0
- data/scripts/check_api_annotations.rb +68 -0
- data/scripts/check_private_enforcement.rb +93 -0
- data/scripts/check_readme_runnable.rb +98 -0
- data/scripts/run_mutation.sh +46 -0
- metadata +83 -2
|
@@ -7,9 +7,13 @@ module Phronomy
|
|
|
7
7
|
# @see https://claude.com/blog/multi-agent-coordination-patterns
|
|
8
8
|
#
|
|
9
9
|
# A coordinator LLM agent decomposes work into tasks and enqueues them
|
|
10
|
-
# dynamically via built-in tools. A fixed
|
|
11
|
-
#
|
|
12
|
-
# assignments to accumulate domain context over time.
|
|
10
|
+
# dynamically via built-in tools. A fixed set of worker agents processes tasks
|
|
11
|
+
# sequentially — one task per worker per turn — carrying forward their
|
|
12
|
+
# conversation history across assignments to accumulate domain context over time.
|
|
13
|
+
#
|
|
14
|
+
# Workers are selected in sequence (the worker with the fewest accumulated
|
|
15
|
+
# messages is chosen by default). Task dispatch is synchronous; there is no
|
|
16
|
+
# concurrent or parallel execution.
|
|
13
17
|
#
|
|
14
18
|
# The coordinator is an {Agent::Base} subclass that has two built-in tools:
|
|
15
19
|
# - +enqueue_task+ — adds a task description to the queue
|
|
@@ -56,6 +60,7 @@ module Phronomy
|
|
|
56
60
|
# Falls back to +Phronomy.configuration.default_model+ when not set.
|
|
57
61
|
#
|
|
58
62
|
# @param value [String, nil]
|
|
63
|
+
# @api public
|
|
59
64
|
def coordinator_model(value = nil)
|
|
60
65
|
value ? @coordinator_model = value : @coordinator_model
|
|
61
66
|
end
|
|
@@ -65,6 +70,7 @@ module Phronomy
|
|
|
65
70
|
# and then call +finalize+ when all tasks are enqueued.
|
|
66
71
|
#
|
|
67
72
|
# @param value [String, nil]
|
|
73
|
+
# @api public
|
|
68
74
|
def coordinator_instructions(value = nil)
|
|
69
75
|
value ? @coordinator_instructions = value : @coordinator_instructions
|
|
70
76
|
end
|
|
@@ -75,16 +81,18 @@ module Phronomy
|
|
|
75
81
|
# Pass the same value as +LLMConfig::PROVIDER+ in your examples.
|
|
76
82
|
#
|
|
77
83
|
# @param value [Symbol, nil]
|
|
84
|
+
# @api public
|
|
78
85
|
def coordinator_provider(value = nil)
|
|
79
86
|
value ? @coordinator_provider = value : @coordinator_provider
|
|
80
87
|
end
|
|
81
88
|
|
|
82
|
-
# Configures the
|
|
89
|
+
# Configures the set of workers.
|
|
83
90
|
#
|
|
84
|
-
# @param size [Integer] number of persistent worker instances
|
|
91
|
+
# @param size [Integer] number of persistent worker instances (tasks are assigned sequentially)
|
|
85
92
|
# @param agent [Class] Agent::Base subclass used for all workers
|
|
86
93
|
# @param on_error [Symbol] +:raise+ (default) propagates worker exceptions;
|
|
87
94
|
# +:skip+ records the failure and continues with remaining tasks
|
|
95
|
+
# @api public
|
|
88
96
|
def pool(size:, agent:, on_error: :raise)
|
|
89
97
|
@pool_size = Integer(size)
|
|
90
98
|
@worker_agent = agent
|
|
@@ -98,6 +106,7 @@ module Phronomy
|
|
|
98
106
|
#
|
|
99
107
|
# @yield [Array<WorkerState>] available workers
|
|
100
108
|
# @yieldreturn [WorkerState] the chosen worker
|
|
109
|
+
# @api public
|
|
101
110
|
def schedule(&block)
|
|
102
111
|
@scheduler = block
|
|
103
112
|
end
|
|
@@ -108,6 +117,7 @@ module Phronomy
|
|
|
108
117
|
# When omitted, the raw assignments array is returned.
|
|
109
118
|
#
|
|
110
119
|
# @yield [Array<Hash>] all completed (and skipped) task assignments
|
|
120
|
+
# @api public
|
|
111
121
|
def aggregate(&block)
|
|
112
122
|
@aggregator = block
|
|
113
123
|
end
|
|
@@ -137,6 +147,7 @@ module Phronomy
|
|
|
137
147
|
# @param config [Hash] reserved for future use
|
|
138
148
|
# @return [Object] the return value of the aggregate block, or the raw assignments Array
|
|
139
149
|
# @raise [ArgumentError] when +pool :agent+ has not been configured
|
|
150
|
+
# @api public
|
|
140
151
|
def invoke(team_input, config: {})
|
|
141
152
|
raise ArgumentError, "pool :agent must be configured before invoking" unless self.class._worker_agent
|
|
142
153
|
|
|
@@ -161,6 +172,7 @@ module Phronomy
|
|
|
161
172
|
# @yield [Hash] one event per completed/failed task
|
|
162
173
|
# @return [Object] same as +invoke+
|
|
163
174
|
# @raise [ArgumentError] when +pool :agent+ has not been configured
|
|
175
|
+
# @api public
|
|
164
176
|
def stream(team_input, config: {}, &block)
|
|
165
177
|
return invoke(team_input, config: config) unless block
|
|
166
178
|
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
# A thread-safe FIFO queue for passing values between concurrent tasks.
|
|
5
|
+
#
|
|
6
|
+
# Wraps +Thread::Queue+ so that callers do not need to reference the Ruby
|
|
7
|
+
# standard-library type directly. A future implementation may replace the
|
|
8
|
+
# backing primitive without changing call sites.
|
|
9
|
+
#
|
|
10
|
+
# @example Producer / consumer
|
|
11
|
+
# queue = Phronomy::AsyncQueue.new
|
|
12
|
+
# Runtime.instance.spawn { queue.push(expensive_io()) }
|
|
13
|
+
# value = queue.pop # blocks until the producer pushes
|
|
14
|
+
# @api private
|
|
15
|
+
class AsyncQueue
|
|
16
|
+
# @param max_size [Integer, nil] optional upper bound on queue depth.
|
|
17
|
+
# When set, {#push} blocks the caller until a slot is available.
|
|
18
|
+
# @api private
|
|
19
|
+
def initialize(max_size: nil)
|
|
20
|
+
@queue = max_size ? SizedQueue.new(max_size) : Thread::Queue.new
|
|
21
|
+
@max_size = max_size
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Enqueues +item+.
|
|
25
|
+
# In a cooperative scheduler context with a bounded queue (max_size:), suspends
|
|
26
|
+
# the current Fiber via a scheduler signal when the queue is full rather than
|
|
27
|
+
# blocking the OS thread. Without a scheduler, falls back to the standard
|
|
28
|
+
# SizedQueue blocking behaviour.
|
|
29
|
+
# @param item [Object] value to enqueue
|
|
30
|
+
# @return [self]
|
|
31
|
+
# @api private
|
|
32
|
+
def push(item)
|
|
33
|
+
scheduler = Phronomy::Runtime::Scheduler.current
|
|
34
|
+
if scheduler && @max_size
|
|
35
|
+
_push_cooperative(scheduler, item)
|
|
36
|
+
else
|
|
37
|
+
@queue.push(item)
|
|
38
|
+
scheduler.raise_signal(@coop_signal) if scheduler && @coop_signal
|
|
39
|
+
end
|
|
40
|
+
self
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Dequeues and returns the next item.
|
|
44
|
+
# In a cooperative scheduler context, suspends the current Fiber (yielding
|
|
45
|
+
# control back to the scheduler) rather than blocking the OS thread.
|
|
46
|
+
#
|
|
47
|
+
# When +timeout+ is given the semantics depend on the active backend:
|
|
48
|
+
#
|
|
49
|
+
# * **Thread backend** (`:thread`) — uses real wall-clock time via
|
|
50
|
+
# +Thread::Queue#pop(timeout:)+. Requires Ruby 3.2+.
|
|
51
|
+
# Returns +nil+ if no item arrives within the specified number of real seconds.
|
|
52
|
+
# * **DeterministicScheduler / `:fiber` backend** — uses the scheduler's
|
|
53
|
+
# *virtual time* (+scheduler.virtual_time+). The timeout elapses only when
|
|
54
|
+
# the virtual clock is advanced (e.g. via {Phronomy::Testing::FakeClock#advance}).
|
|
55
|
+
# In tests this means the timeout is fully deterministic and does not depend on
|
|
56
|
+
# actual elapsed wall time. However, in production `:fiber` mode the timeout
|
|
57
|
+
# may never expire unless the scheduler explicitly advances virtual time.
|
|
58
|
+
#
|
|
59
|
+
# @note The `:fiber` backend is **EXPERIMENTAL**. Real-time timeout behaviour
|
|
60
|
+
# in production workloads is not guaranteed and may differ from wall-clock
|
|
61
|
+
# expectations.
|
|
62
|
+
# @note **Cooperative timeout limitation**: on the cooperative path, the
|
|
63
|
+
# deadline is re-checked *after* a wake-up signal arrives. If virtual time
|
|
64
|
+
# has already passed the deadline when the consumer is woken by a producer
|
|
65
|
+
# push, the consumer returns +nil+ rather than the pushed item. Without any
|
|
66
|
+
# wake-up signal the waiting Fiber remains suspended even after
|
|
67
|
+
# +scheduler.advance+ — the timeout does not self-fire.
|
|
68
|
+
# @param timeout [Numeric, nil] seconds to wait before returning +nil+.
|
|
69
|
+
# Semantics are wall-clock on `:thread` and virtual-time on `:fiber`.
|
|
70
|
+
# @return [Object, nil] the next item, or +nil+ when timeout expires
|
|
71
|
+
# @api private
|
|
72
|
+
def pop(timeout: nil)
|
|
73
|
+
scheduler = Phronomy::Runtime::Scheduler.current
|
|
74
|
+
if scheduler
|
|
75
|
+
_pop_cooperative(scheduler, timeout: timeout)
|
|
76
|
+
elsif timeout
|
|
77
|
+
@queue.pop(timeout: timeout)
|
|
78
|
+
else
|
|
79
|
+
@queue.pop
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Returns the current number of items in the queue.
|
|
84
|
+
# @return [Integer]
|
|
85
|
+
# @api private
|
|
86
|
+
def size
|
|
87
|
+
@queue.size
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Returns +true+ when the queue contains no items.
|
|
91
|
+
# @return [Boolean]
|
|
92
|
+
# @api private
|
|
93
|
+
def empty?
|
|
94
|
+
@queue.empty?
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Closes the queue. Subsequent {#pop} calls raise +ClosedQueueError+.
|
|
98
|
+
# @return [self]
|
|
99
|
+
# @api private
|
|
100
|
+
def close
|
|
101
|
+
@queue.close
|
|
102
|
+
self
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
private
|
|
106
|
+
|
|
107
|
+
# Cooperative pop for DeterministicScheduler context.
|
|
108
|
+
# Suspends the current Fiber via the scheduler's signal mechanism rather than
|
|
109
|
+
# blocking the OS thread. Because cooperative mode is single-threaded, the
|
|
110
|
+
# empty?/pop pair is race-free (no other Fiber can run between the two calls).
|
|
111
|
+
# After dequeuing, notifies any push-waiter so that a backpressure-suspended
|
|
112
|
+
# producer can be unblocked.
|
|
113
|
+
# @api private
|
|
114
|
+
# @param scheduler [Runtime::Scheduler]
|
|
115
|
+
# @param timeout [Numeric, nil]
|
|
116
|
+
# @return [Object, nil]
|
|
117
|
+
def _pop_cooperative(scheduler, timeout:)
|
|
118
|
+
@coop_signal ||= scheduler.new_signal
|
|
119
|
+
deadline = timeout ? (scheduler.virtual_time + timeout) : nil
|
|
120
|
+
|
|
121
|
+
loop do
|
|
122
|
+
unless @queue.empty?
|
|
123
|
+
item = @queue.pop(timeout: 0)
|
|
124
|
+
# Notify a push-waiter (bounded queue) that a slot opened up.
|
|
125
|
+
scheduler.raise_signal(@push_signal) if @push_signal
|
|
126
|
+
return item
|
|
127
|
+
end
|
|
128
|
+
return nil if deadline && scheduler.virtual_time >= deadline
|
|
129
|
+
scheduler.wait_for_signal(@coop_signal)
|
|
130
|
+
return nil if deadline && scheduler.virtual_time >= deadline
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Cooperative push for DeterministicScheduler context with a bounded queue.
|
|
135
|
+
# Suspends the current Fiber via a scheduler signal when the queue is full,
|
|
136
|
+
# rather than blocking the OS thread.
|
|
137
|
+
# @api private
|
|
138
|
+
# @param scheduler [Runtime::Scheduler]
|
|
139
|
+
# @param item [Object]
|
|
140
|
+
# @return [void]
|
|
141
|
+
def _push_cooperative(scheduler, item)
|
|
142
|
+
@push_signal ||= scheduler.new_signal
|
|
143
|
+
|
|
144
|
+
loop do
|
|
145
|
+
unless @queue.size >= @max_size
|
|
146
|
+
@queue.push(item)
|
|
147
|
+
# Notify any pop-waiter that an item is now available.
|
|
148
|
+
scheduler.raise_signal(@coop_signal) if @coop_signal
|
|
149
|
+
return
|
|
150
|
+
end
|
|
151
|
+
scheduler.wait_for_signal(@push_signal)
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Phronomy
|
|
4
|
+
# A bounded, observable thread pool for blocking I/O operations.
|
|
5
|
+
#
|
|
6
|
+
# ## Architectural boundary
|
|
7
|
+
#
|
|
8
|
+
# `BlockingAdapterPool` is the *only* place in Phronomy that uses raw OS threads
|
|
9
|
+
# for I/O. All third-party gem calls whose internal I/O Phronomy cannot control
|
|
10
|
+
# — including RubyLLM, ActiveRecord, Redis, Faraday, and MCP stdio transport —
|
|
11
|
+
# **must** route through this pool (or a named pool obtained via
|
|
12
|
+
# {Runtime#pool}). Custom non-blocking HTTP/selector runtimes are intentionally
|
|
13
|
+
# out of scope; the pool + cooperative scheduler combination satisfies all
|
|
14
|
+
# current concurrency requirements without that complexity. (See ADR-010.)
|
|
15
|
+
#
|
|
16
|
+
# All blocking calls (LLM HTTP, MCP stdio, ActiveRecord, Redis, etc.) must be
|
|
17
|
+
# submitted through this pool so that:
|
|
18
|
+
#
|
|
19
|
+
# 1. The total number of OS threads is capped.
|
|
20
|
+
# 2. Queue depth is bounded (backpressure when the pool is saturated).
|
|
21
|
+
# 3. Per-operation timeouts are enforced consistently.
|
|
22
|
+
# 4. Abandoned (timed-out) operations are tracked and logged.
|
|
23
|
+
# 5. Metrics (active count, queue depth, abandoned count, avg wait time) are
|
|
24
|
+
# observable at runtime.
|
|
25
|
+
#
|
|
26
|
+
# @example Submitting a blocking LLM call
|
|
27
|
+
# op = runtime.blocking_io.submit(timeout: 30) { chat.ask(message) }
|
|
28
|
+
# result = op.await # blocks the calling thread until done
|
|
29
|
+
#
|
|
30
|
+
# @example With cancellation
|
|
31
|
+
# token = Phronomy::CancellationToken.timeout_after(60)
|
|
32
|
+
# op = pool.submit(timeout: 30, cancellation_token: token) { expensive_call }
|
|
33
|
+
# result = op.await
|
|
34
|
+
class BlockingAdapterPool
|
|
35
|
+
# Represents the pending result of a submitted blocking operation.
|
|
36
|
+
# Returned immediately by {BlockingAdapterPool#submit}; call {#await} to
|
|
37
|
+
# wait for the result.
|
|
38
|
+
class PendingOperation
|
|
39
|
+
# @return [Boolean] true when the operation has finished (success or error)
|
|
40
|
+
# @api private
|
|
41
|
+
def done?
|
|
42
|
+
@mutex.synchronize { @done }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# @return [Boolean] true when the operation was abandoned due to timeout
|
|
46
|
+
# @api private
|
|
47
|
+
def abandoned?
|
|
48
|
+
@abandoned
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# @return [Float] seconds spent in the queue before execution started
|
|
52
|
+
# @api private
|
|
53
|
+
def wait_time
|
|
54
|
+
@wait_time || 0.0
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Blocks until the operation completes and returns its value.
|
|
58
|
+
#
|
|
59
|
+
# An optional +timeout+ (in seconds) may be passed here; it is measured
|
|
60
|
+
# from the moment +await+ is called. If both a submit-time timeout and an
|
|
61
|
+
# await-time timeout are present, the earlier deadline wins. The worker
|
|
62
|
+
# thread is NOT interrupted — it runs to completion on its own.
|
|
63
|
+
#
|
|
64
|
+
# An optional +cancellation_token+ may be passed here (or at submit time).
|
|
65
|
+
# If the token is cancelled while waiting, {Phronomy::CancellationError} is
|
|
66
|
+
# raised immediately without interrupting the worker.
|
|
67
|
+
#
|
|
68
|
+
# **Cooperative path (`:fiber` / `DeterministicScheduler`):**
|
|
69
|
+
# When called from a Fiber managed by {DeterministicScheduler} (i.e. under
|
|
70
|
+
# the +:fiber+ runtime backend), the calling Fiber suspends cooperatively
|
|
71
|
+
# via +Fiber.yield+ rather than blocking the OS thread. The Fiber is
|
|
72
|
+
# resumed on the scheduler's ready queue once the worker thread completes
|
|
73
|
+
# the operation.
|
|
74
|
+
#
|
|
75
|
+
# @note **Cooperative cancellation semantics** (ADR-010):
|
|
76
|
+
# Phronomy uses a non-preemptive, cooperative-first concurrency model.
|
|
77
|
+
# Cancellation is *cooperative*, not preemptive:
|
|
78
|
+
# - When a +cancellation_token+ is cancelled, +CancellationError+ is
|
|
79
|
+
# raised to the +await+ caller immediately; when the timeout fires,
|
|
80
|
+
# +TimeoutError+ is raised instead. In both cases, the underlying
|
|
81
|
+
# worker thread is **not** forcibly stopped.
|
|
82
|
+
# - The worker thread will complete its submitted block naturally.
|
|
83
|
+
# Code inside the block must call +token.check!+ at suitable
|
|
84
|
+
# checkpoints to observe the cancelled state and exit early.
|
|
85
|
+
# - There is no +Thread#kill+ or +Thread#raise+ involved. The framework
|
|
86
|
+
# never forcibly terminates worker threads.
|
|
87
|
+
#
|
|
88
|
+
# @note **Cooperative timeout limitation**: the +timeout:+ parameter passed
|
|
89
|
+
# to +await+ is *not* enforced on the cooperative path. The calling Fiber
|
|
90
|
+
# remains suspended until the worker thread finishes regardless of how many
|
|
91
|
+
# seconds elapse. This is because the cooperative scheduler cannot
|
|
92
|
+
# preempt a running OS thread. If a time bound is required, set
|
|
93
|
+
# +timeout:+ at {BlockingAdapterPool#submit submit} time instead; the pool
|
|
94
|
+
# will then abandon the operation on the worker side and mark it as
|
|
95
|
+
# {#abandoned?}.
|
|
96
|
+
#
|
|
97
|
+
# @param timeout [Numeric, nil] seconds from now before raising TimeoutError
|
|
98
|
+
# (thread path only; ignored on the cooperative/fiber path)
|
|
99
|
+
# @param cancellation_token [CancellationToken, nil]
|
|
100
|
+
# @return [Object]
|
|
101
|
+
# @raise [Phronomy::TimeoutError]
|
|
102
|
+
# @raise [Phronomy::CancellationError]
|
|
103
|
+
# @raise [Exception] error raised inside the submitted block
|
|
104
|
+
# @api private
|
|
105
|
+
def await(timeout: nil, cancellation_token: nil)
|
|
106
|
+
effective_timeout = [timeout, @timeout].compact.min
|
|
107
|
+
effective_token = cancellation_token || @cancellation_token
|
|
108
|
+
|
|
109
|
+
raise CancellationError, "blocking operation cancelled" if effective_token&.cancelled?
|
|
110
|
+
|
|
111
|
+
# Cooperative context: suspend the calling Fiber rather than blocking
|
|
112
|
+
# the OS thread so that DeterministicScheduler can continue dispatching
|
|
113
|
+
# other tasks while waiting for the blocking worker to finish.
|
|
114
|
+
# (Issue #338, ADR-010 Rule 3)
|
|
115
|
+
# Uses the same thread-local key as Task::FiberBackend::SCHEDULER_KEY
|
|
116
|
+
# (:phronomy_deterministic_scheduler) to avoid a cross-file constant
|
|
117
|
+
# dependency at load time.
|
|
118
|
+
scheduler = Thread.current.thread_variable_get(:phronomy_deterministic_scheduler)
|
|
119
|
+
in_managed_fiber = !Fiber.respond_to?(:main) || Fiber.current != Fiber.main
|
|
120
|
+
if scheduler && in_managed_fiber
|
|
121
|
+
unless @done
|
|
122
|
+
# Register this await with the scheduler so run_until_idle knows
|
|
123
|
+
# not to exit until the worker thread completes (Issue #338).
|
|
124
|
+
scheduler.track_blocking_await
|
|
125
|
+
waiting_fiber = Fiber.current
|
|
126
|
+
on_complete do |_result, _error|
|
|
127
|
+
# Decrement the counter and wake run_until_idle, then re-enqueue
|
|
128
|
+
# the suspended Fiber for cooperative resumption.
|
|
129
|
+
scheduler.complete_blocking_await
|
|
130
|
+
scheduler.enqueue_fiber(-> { waiting_fiber.resume })
|
|
131
|
+
end
|
|
132
|
+
Fiber.yield(:cooperative_suspend)
|
|
133
|
+
end
|
|
134
|
+
raise CancellationError, "blocking operation cancelled" if effective_token&.cancelled?
|
|
135
|
+
raise @error if @error
|
|
136
|
+
|
|
137
|
+
return @value
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Wake up the waiting thread whenever the token is cancelled so we can
|
|
141
|
+
# propagate cancellation without sleeping until the timeout expires.
|
|
142
|
+
effective_token&.on_cancel { @mutex.synchronize { @cond.broadcast } }
|
|
143
|
+
|
|
144
|
+
if effective_timeout
|
|
145
|
+
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + effective_timeout
|
|
146
|
+
@mutex.synchronize do
|
|
147
|
+
until @done
|
|
148
|
+
raise CancellationError, "blocking operation cancelled" if effective_token&.cancelled?
|
|
149
|
+
|
|
150
|
+
remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
151
|
+
if remaining <= 0
|
|
152
|
+
# Guard against double-counting when await is called multiple times.
|
|
153
|
+
unless @abandoned
|
|
154
|
+
@abandoned = true
|
|
155
|
+
@on_abandoned&.call
|
|
156
|
+
end
|
|
157
|
+
raise Phronomy::TimeoutError, "blocking operation timed out after #{effective_timeout}s"
|
|
158
|
+
end
|
|
159
|
+
@cond.wait(@mutex, remaining)
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
else
|
|
163
|
+
@mutex.synchronize do
|
|
164
|
+
until @done
|
|
165
|
+
raise CancellationError, "blocking operation cancelled" if effective_token&.cancelled?
|
|
166
|
+
|
|
167
|
+
@cond.wait(@mutex)
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
raise @error if @error
|
|
172
|
+
|
|
173
|
+
@value
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Registers a callback to be called when the operation finishes.
|
|
177
|
+
# If the operation has already finished the callback is invoked immediately
|
|
178
|
+
# on the calling thread. Otherwise it is invoked on the worker thread that
|
|
179
|
+
# completes the operation.
|
|
180
|
+
#
|
|
181
|
+
# The callback receives +result+ and +error+ (one of them will be +nil+).
|
|
182
|
+
#
|
|
183
|
+
# @yield [result, error]
|
|
184
|
+
# @return [self]
|
|
185
|
+
# @api private
|
|
186
|
+
def on_complete(&callback)
|
|
187
|
+
fire_args = nil
|
|
188
|
+
@mutex.synchronize do
|
|
189
|
+
if @done
|
|
190
|
+
fire_args = [@value, @error]
|
|
191
|
+
else
|
|
192
|
+
@callbacks ||= []
|
|
193
|
+
@callbacks << callback
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
callback.call(*fire_args) if fire_args
|
|
197
|
+
self
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# @api private
|
|
201
|
+
def initialize(block, timeout: nil, cancellation_token: nil, on_abandoned: nil)
|
|
202
|
+
@block = block
|
|
203
|
+
@timeout = timeout
|
|
204
|
+
@cancellation_token = cancellation_token
|
|
205
|
+
@on_abandoned = on_abandoned
|
|
206
|
+
@value = nil
|
|
207
|
+
@error = nil
|
|
208
|
+
@done = false
|
|
209
|
+
@abandoned = false
|
|
210
|
+
@wait_time = nil
|
|
211
|
+
@submitted_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
212
|
+
@mutex = Mutex.new
|
|
213
|
+
@cond = ConditionVariable.new
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# @api private
|
|
217
|
+
def execute!
|
|
218
|
+
@wait_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - @submitted_at
|
|
219
|
+
|
|
220
|
+
if @cancellation_token&.cancelled?
|
|
221
|
+
complete_with_error!(CancellationError.new("operation cancelled before execution"))
|
|
222
|
+
return
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Do NOT use Timeout.timeout here — it delivers an async Thread#raise
|
|
226
|
+
# that can corrupt external library state (mutexes, C extensions, etc.).
|
|
227
|
+
# Timeout enforcement is handled cooperatively in #await instead.
|
|
228
|
+
# Each blocking library (Net::HTTP, pg, redis, etc.) should set its
|
|
229
|
+
# own native connection/read timeouts.
|
|
230
|
+
begin
|
|
231
|
+
complete_with_value!(@block.call)
|
|
232
|
+
rescue => e
|
|
233
|
+
complete_with_error!(e)
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
private
|
|
238
|
+
|
|
239
|
+
def complete_with_value!(value)
|
|
240
|
+
cbs = nil
|
|
241
|
+
@mutex.synchronize do
|
|
242
|
+
@value = value
|
|
243
|
+
@done = true
|
|
244
|
+
@cond.broadcast
|
|
245
|
+
cbs = @callbacks
|
|
246
|
+
@callbacks = nil
|
|
247
|
+
end
|
|
248
|
+
cbs&.each { |cb| cb.call(value, nil) }
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def complete_with_error!(error)
|
|
252
|
+
cbs = nil
|
|
253
|
+
@mutex.synchronize do
|
|
254
|
+
@error = error
|
|
255
|
+
@done = true
|
|
256
|
+
@cond.broadcast
|
|
257
|
+
cbs = @callbacks
|
|
258
|
+
@callbacks = nil
|
|
259
|
+
end
|
|
260
|
+
cbs&.each { |cb| cb.call(nil, error) }
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# @param pool_size [Integer] maximum number of worker threads
|
|
265
|
+
# @param queue_size [Integer] maximum pending operations waiting for a worker
|
|
266
|
+
# @param name [String, Symbol, nil] optional pool name used in thread labels
|
|
267
|
+
# @param logger [Logger, nil] optional logger for warnings
|
|
268
|
+
# @api private
|
|
269
|
+
def initialize(pool_size: 10, queue_size: 100, name: nil, logger: nil)
|
|
270
|
+
@pool_size = pool_size
|
|
271
|
+
@queue_size = queue_size
|
|
272
|
+
@name = name
|
|
273
|
+
@logger = logger
|
|
274
|
+
@queue = SizedQueue.new(queue_size)
|
|
275
|
+
@active_count = 0
|
|
276
|
+
@abandoned_count = 0
|
|
277
|
+
@total_wait_ns = 0
|
|
278
|
+
@completed_count = 0
|
|
279
|
+
@mutex = Mutex.new
|
|
280
|
+
@shutdown = false
|
|
281
|
+
@workers = Array.new(pool_size) { |i| spawn_worker(i) }
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
# Submits a blocking operation to the pool.
|
|
285
|
+
# Returns a {PendingOperation} immediately; the block runs on a worker thread.
|
|
286
|
+
#
|
|
287
|
+
# @note **Cooperative callers**: if you are running under the `:fiber` backend
|
|
288
|
+
# (i.e. inside a {DeterministicScheduler} Fiber), set +timeout:+ here
|
|
289
|
+
# rather than on {PendingOperation#await}. The await-time timeout is not
|
|
290
|
+
# enforced on the cooperative path (the Fiber cannot preempt a running
|
|
291
|
+
# worker thread). A submit-time timeout triggers on the worker side and
|
|
292
|
+
# marks the operation {PendingOperation#abandoned? abandoned}, which
|
|
293
|
+
# unblocks the waiting Fiber via the normal on-complete callback.
|
|
294
|
+
# @param timeout [Numeric, nil] seconds before the operation is abandoned
|
|
295
|
+
# @param cancellation_token [CancellationToken, nil]
|
|
296
|
+
# @yield block containing the blocking call
|
|
297
|
+
# @return [PendingOperation]
|
|
298
|
+
# @raise [Phronomy::PoolShutdownError] when the pool has been shut down
|
|
299
|
+
# @raise [Phronomy::BackpressureError] when +on_full: :raise+ and queue is full
|
|
300
|
+
# @raise [Phronomy::TimeoutError] when +on_full: :timeout+ and wait exceeds +full_timeout+
|
|
301
|
+
# @api private
|
|
302
|
+
def submit(timeout: nil, cancellation_token: nil, on_full: :wait, full_timeout: nil, &block)
|
|
303
|
+
raise Phronomy::PoolShutdownError, "pool has been shut down" if @shutdown
|
|
304
|
+
|
|
305
|
+
op = PendingOperation.new(block, timeout: timeout, cancellation_token: cancellation_token,
|
|
306
|
+
on_abandoned: timeout ? -> { @mutex.synchronize { @abandoned_count += 1 } } : nil)
|
|
307
|
+
begin
|
|
308
|
+
case on_full
|
|
309
|
+
when :raise
|
|
310
|
+
begin
|
|
311
|
+
@queue.push(op, true)
|
|
312
|
+
rescue ThreadError
|
|
313
|
+
raise Phronomy::BackpressureError, "BlockingAdapterPool queue is full (depth: #{@queue_size})"
|
|
314
|
+
end
|
|
315
|
+
when :timeout
|
|
316
|
+
deadline = full_timeout ? (Process.clock_gettime(Process::CLOCK_MONOTONIC) + full_timeout) : nil
|
|
317
|
+
loop do
|
|
318
|
+
@queue.push(op, true)
|
|
319
|
+
break
|
|
320
|
+
rescue ThreadError
|
|
321
|
+
if deadline && Process.clock_gettime(Process::CLOCK_MONOTONIC) >= deadline
|
|
322
|
+
raise Phronomy::TimeoutError, "timed out waiting for a free slot in BlockingAdapterPool"
|
|
323
|
+
end
|
|
324
|
+
sleep(0.005)
|
|
325
|
+
end
|
|
326
|
+
else # :wait (default)
|
|
327
|
+
@queue.push(op)
|
|
328
|
+
end
|
|
329
|
+
rescue ClosedQueueError
|
|
330
|
+
# Shutdown raced with this submit — treat as if @shutdown was already set.
|
|
331
|
+
raise Phronomy::PoolShutdownError, "pool has been shut down"
|
|
332
|
+
end
|
|
333
|
+
op
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# Gracefully drains the pool and terminates all worker threads.
|
|
337
|
+
# Waits up to +drain_timeout+ seconds for in-flight operations to finish.
|
|
338
|
+
#
|
|
339
|
+
# Closing the underlying SizedQueue signals workers to exit after draining
|
|
340
|
+
# remaining items, without blocking on a full-queue push.
|
|
341
|
+
#
|
|
342
|
+
# @param drain_timeout [Numeric] seconds to wait for workers to finish
|
|
343
|
+
# @return [self]
|
|
344
|
+
# @api private
|
|
345
|
+
def shutdown(drain_timeout: 30)
|
|
346
|
+
@shutdown = true
|
|
347
|
+
@queue.close
|
|
348
|
+
@workers.each { |t| t.join(drain_timeout) }
|
|
349
|
+
self
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# --- Metrics ----------------------------------------------------------
|
|
353
|
+
|
|
354
|
+
# @return [Integer] number of operations currently executing on workers
|
|
355
|
+
# @api private
|
|
356
|
+
def active_count
|
|
357
|
+
@mutex.synchronize { @active_count }
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
# @return [Integer] number of operations waiting in the queue
|
|
361
|
+
# @api private
|
|
362
|
+
def queue_depth
|
|
363
|
+
@queue.size
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
# @return [Integer] number of operations that were abandoned due to timeout
|
|
367
|
+
# @api private
|
|
368
|
+
def abandoned_count
|
|
369
|
+
@mutex.synchronize { @abandoned_count }
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# Average time (in seconds) that completed operations spent in the queue
|
|
373
|
+
# waiting for a worker. Returns 0.0 when no operations have completed yet.
|
|
374
|
+
# @return [Float]
|
|
375
|
+
# @api private
|
|
376
|
+
def average_wait_seconds
|
|
377
|
+
@mutex.synchronize do
|
|
378
|
+
return 0.0 if @completed_count.zero?
|
|
379
|
+
|
|
380
|
+
@total_wait_ns / @completed_count.to_f / 1_000_000_000.0
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# @return [Integer] configured maximum number of worker threads
|
|
385
|
+
attr_reader :pool_size
|
|
386
|
+
|
|
387
|
+
# @return [Integer] configured maximum queue depth
|
|
388
|
+
attr_reader :queue_size
|
|
389
|
+
|
|
390
|
+
# @return [String, Symbol, nil] pool name used in thread labels
|
|
391
|
+
attr_reader :name
|
|
392
|
+
|
|
393
|
+
private
|
|
394
|
+
|
|
395
|
+
SENTINEL = :shutdown
|
|
396
|
+
private_constant :SENTINEL
|
|
397
|
+
|
|
398
|
+
def spawn_worker(index = nil)
|
|
399
|
+
label = ["phronomy", "blocking-pool", @name, index].compact.join("-")
|
|
400
|
+
Thread.new do
|
|
401
|
+
Thread.current.name = label
|
|
402
|
+
loop do
|
|
403
|
+
op = begin
|
|
404
|
+
@queue.pop
|
|
405
|
+
rescue ClosedQueueError
|
|
406
|
+
break
|
|
407
|
+
end
|
|
408
|
+
# nil is returned by a closed, empty Queue on some Ruby versions
|
|
409
|
+
break if op.nil? || op == SENTINEL
|
|
410
|
+
|
|
411
|
+
run_operation(op)
|
|
412
|
+
end
|
|
413
|
+
end
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
def run_operation(op)
|
|
417
|
+
@mutex.synchronize { @active_count += 1 }
|
|
418
|
+
|
|
419
|
+
begin
|
|
420
|
+
op.execute!
|
|
421
|
+
ensure
|
|
422
|
+
@mutex.synchronize do
|
|
423
|
+
@active_count -= 1
|
|
424
|
+
|
|
425
|
+
if op.abandoned?
|
|
426
|
+
@logger&.warn { "BlockingAdapterPool: worker finished operation after caller timed out" }
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
@total_wait_ns += (op.wait_time * 1_000_000_000).to_i
|
|
430
|
+
@completed_count += 1
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
end
|
|
434
|
+
end
|
|
435
|
+
end
|