cdc-parallel 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -21
- data/README.md +54 -48
- data/lib/cdc/parallel/configuration.rb +28 -5
- data/lib/cdc/parallel/errors.rb +59 -5
- data/lib/cdc/parallel/processor_pool.rb +198 -44
- data/lib/cdc/parallel/result_collector.rb +43 -2
- data/lib/cdc/parallel/router.rb +26 -1
- data/lib/cdc/parallel/runtime.rb +65 -4
- data/lib/cdc/parallel/transaction_pool.rb +54 -3
- data/lib/cdc/parallel/version.rb +6 -1
- data/lib/cdc/parallel.rb +33 -1
- data/sig/cdc/parallel/configuration.rbs +8 -2
- data/sig/cdc/parallel/errors.rbs +7 -7
- data/sig/cdc/parallel/processor_pool.rbs +33 -16
- data/sig/cdc/parallel/result_collector.rbs +6 -4
- data/sig/cdc/parallel/router.rbs +7 -4
- data/sig/cdc/parallel/runtime.rbs +11 -8
- data/sig/cdc/parallel/transaction_pool.rbs +4 -4
- data/sig/cdc/parallel/version.rbs +1 -1
- metadata +5 -23
- data/sig/shims/cdc_core.rbs +0 -14
- data/sig/shims/data_define.rbs +0 -0
- data/sig/shims/etc.rbs +0 -3
- data/sig/shims/timeout.rbs +0 -3
|
@@ -2,76 +2,205 @@
|
|
|
2
2
|
|
|
3
3
|
module CDC
|
|
4
4
|
module Parallel
|
|
5
|
-
# Executes one Ractor-safe processor
|
|
5
|
+
# Executes one Ractor-safe `cdc-core` processor across a fixed set of
|
|
6
|
+
# pre-warmed Ractor workers.
|
|
6
7
|
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
8
|
+
# `ProcessorPool` is the low-level execution primitive used by
|
|
9
|
+
# {CDC::Parallel::Runtime}. It accepts normalized `cdc-core` work items,
|
|
10
|
+
# sends them across Ractor boundaries, invokes the configured processor, and
|
|
11
|
+
# returns `CDC::Core::ProcessorResult` objects in input order.
|
|
12
|
+
#
|
|
13
|
+
# This class is intentionally focused on **CPU-bound parallel execution**.
|
|
14
|
+
# Use it when the processor spends most of its time doing Ruby work such as
|
|
15
|
+
# transformation, enrichment, serialization, compression, scoring, or other
|
|
16
|
+
# in-memory computation. For I/O-heavy work, the CDC Ecosystem boundary is a
|
|
17
|
+
# future fiber-friendly runtime such as `cdc-concurrent`.
|
|
18
|
+
#
|
|
19
|
+
# ## Processor safety contract
|
|
20
|
+
#
|
|
21
|
+
# The supplied processor must declare `ractor_safe!` on its class. That
|
|
22
|
+
# declaration is treated as the processor author's explicit promise that the
|
|
23
|
+
# processor object and its dependencies can safely cross a Ractor boundary.
|
|
24
|
+
#
|
|
25
|
+
# `ProcessorPool` validates this declaration before booting workers:
|
|
26
|
+
#
|
|
27
|
+
# @example Declaring a processor as Ractor-safe
|
|
28
|
+
# class AnalyticsProcessor < CDC::Core::Processor
|
|
29
|
+
# ractor_safe!
|
|
30
|
+
#
|
|
31
|
+
# def process(event)
|
|
32
|
+
# CDC::Core::ProcessorResult.success(event)
|
|
33
|
+
# end
|
|
34
|
+
# end
|
|
35
|
+
#
|
|
36
|
+
# pool = CDC::Parallel::ProcessorPool.new(
|
|
37
|
+
# processor: AnalyticsProcessor.new,
|
|
38
|
+
# size: 4
|
|
39
|
+
# )
|
|
40
|
+
#
|
|
41
|
+
# Declaring `ractor_safe!` does not make unsafe code safe. It only allows the
|
|
42
|
+
# processor to be passed into worker Ractors. Mutable global state, database
|
|
43
|
+
# connections, sockets, caches, file handles, and non-shareable objects still
|
|
44
|
+
# need to be designed carefully by the processor implementor.
|
|
45
|
+
#
|
|
46
|
+
# ## Execution model
|
|
47
|
+
#
|
|
48
|
+
# Workers are created during initialization and reused for all dispatches.
|
|
49
|
+
# This pays Ractor startup cost once and keeps the pool stable even when
|
|
50
|
+
# individual processor calls fail.
|
|
51
|
+
#
|
|
52
|
+
# The pool uses a fan-out / fan-in pattern:
|
|
53
|
+
#
|
|
54
|
+
# ```text
|
|
55
|
+
# work items
|
|
56
|
+
# |
|
|
57
|
+
# v
|
|
58
|
+
# ProcessorPool
|
|
59
|
+
# |
|
|
60
|
+
# +----> Worker Ractor 1
|
|
61
|
+
# +----> Worker Ractor 2
|
|
62
|
+
# +----> Worker Ractor N
|
|
63
|
+
# |
|
|
64
|
+
# v
|
|
65
|
+
# ProcessorResult
|
|
66
|
+
# |
|
|
67
|
+
# v
|
|
68
|
+
# ordered results
|
|
69
|
+
# ```
|
|
70
|
+
#
|
|
71
|
+
# Fan-out uses round-robin worker selection. Fan-in collects responses from a
|
|
72
|
+
# reply port and reorders them by submission index, so `process_many` always
|
|
73
|
+
# returns results in the same order as the input array even when work
|
|
74
|
+
# completes out of order.
|
|
75
|
+
#
|
|
76
|
+
# @example Processing one item
|
|
77
|
+
# result = pool.process(event)
|
|
78
|
+
# result.success? #=> true
|
|
79
|
+
#
|
|
80
|
+
# @example Processing a batch while preserving result order
|
|
81
|
+
# results = pool.process_many([event_a, event_b, event_c])
|
|
82
|
+
# results.map(&:success?)
|
|
83
|
+
#
|
|
84
|
+
# @example Shutting down explicitly
|
|
85
|
+
# pool.shutdown
|
|
86
|
+
#
|
|
87
|
+
# @note `ProcessorPool` preserves the order of returned results, not the
|
|
88
|
+
# order in which independent items execute. If a sink needs strict ordering
|
|
89
|
+
# by transaction, relation, or primary key, use the ecosystem ordering
|
|
90
|
+
# contract and an ordered dispatcher/runtime above this primitive.
|
|
91
|
+
#
|
|
92
|
+
# @see CDC::Parallel::Runtime High-level facade for processing supported CDC work items
|
|
93
|
+
# @see CDC::Parallel::TransactionPool Transaction-envelope processing wrapper
|
|
94
|
+
# @see CDC::Parallel::ResultCollector Worker response normalization
|
|
95
|
+
# @api public
|
|
11
96
|
class ProcessorPool # rubocop:disable Metrics/ClassLength
|
|
97
|
+
# Create a new pool and boot its worker Ractors.
|
|
98
|
+
#
|
|
12
99
|
# @param processor [CDC::Core::Processor]
|
|
100
|
+
# Processor instance used by every worker. Its class must respond to
|
|
101
|
+
# `ractor_safe?` and return `true`.
|
|
13
102
|
# @param size [Integer]
|
|
14
|
-
#
|
|
103
|
+
# Number of worker Ractors to boot. Defaults to `Etc.nprocessors`.
|
|
104
|
+
# @param timeout [Numeric, nil]
|
|
105
|
+
# Optional timeout, in seconds, used when waiting for worker results and
|
|
106
|
+
# during shutdown. `nil` means wait indefinitely.
|
|
107
|
+
# @raise [UnsafeProcessorError]
|
|
108
|
+
# Raised when the processor class has not declared `ractor_safe!`.
|
|
109
|
+
# @raise [ArgumentError]
|
|
110
|
+
# Raised by {Configuration} when `size` or `timeout` is invalid.
|
|
15
111
|
# @return [void]
|
|
16
112
|
def initialize(processor:, size: Etc.nprocessors, timeout: nil)
|
|
17
113
|
validate_processor!(processor)
|
|
18
114
|
|
|
19
115
|
@processor = ::Ractor.make_shareable(processor)
|
|
20
116
|
@configuration = Configuration.new(size:, timeout:)
|
|
21
|
-
|
|
117
|
+
booted_workers = Array.new(@configuration.size) do
|
|
22
118
|
build_worker(@processor)
|
|
23
|
-
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
@workers = booted_workers.map(&:first).freeze
|
|
122
|
+
@worker_inboxes = booted_workers.map(&:last).freeze
|
|
24
123
|
|
|
25
124
|
@next_worker = 0
|
|
125
|
+
@dispatch_mutex = Mutex.new
|
|
26
126
|
@shutdown = false
|
|
27
127
|
end
|
|
28
128
|
|
|
29
129
|
# Process one work item synchronously.
|
|
30
130
|
#
|
|
131
|
+
# This is a convenience wrapper around {#process_many}. The work still
|
|
132
|
+
# executes inside a worker Ractor; the call blocks until the corresponding
|
|
133
|
+
# `CDC::Core::ProcessorResult` is available or until the optional timeout
|
|
134
|
+
# is reached.
|
|
135
|
+
#
|
|
31
136
|
# @param item [Object]
|
|
137
|
+
# Shareable work item, usually a `CDC::Core::ChangeEvent`.
|
|
138
|
+
# @raise [ShutdownError]
|
|
139
|
+
# Raised when work is submitted after {#shutdown} has started.
|
|
32
140
|
# @return [CDC::Core::ProcessorResult]
|
|
141
|
+
# Normalized processor result. Processor exceptions are captured as
|
|
142
|
+
# failure results rather than escaping directly from the worker Ractor.
|
|
33
143
|
def process(item)
|
|
34
144
|
process_many([item]).fetch(0)
|
|
35
145
|
end
|
|
36
146
|
|
|
37
147
|
# Process many work items using the pre-warmed worker pool.
|
|
38
148
|
#
|
|
39
|
-
#
|
|
149
|
+
# Each item is made shareable before dispatch. Items are assigned to worker
|
|
150
|
+
# inboxes using round-robin selection. Responses are collected through a
|
|
151
|
+
# per-call reply port and returned in the same order as the input array.
|
|
40
152
|
#
|
|
41
153
|
# @param items [Array<Object>]
|
|
154
|
+
# Work items to process. Empty arrays are valid and return an empty
|
|
155
|
+
# frozen array.
|
|
156
|
+
# @raise [ShutdownError]
|
|
157
|
+
# Raised when work is submitted after {#shutdown} has started.
|
|
42
158
|
# @return [Array<CDC::Core::ProcessorResult>]
|
|
159
|
+
# Frozen array of normalized results, ordered to match `items`.
|
|
43
160
|
def process_many(items)
|
|
44
|
-
raise ShutdownError, "processor pool has been shut down" if @shutdown
|
|
45
|
-
|
|
46
161
|
work_items = items.map { |item| ::Ractor.make_shareable(item) }
|
|
47
162
|
reply_port = ::Ractor::Port.new
|
|
48
163
|
|
|
49
|
-
work_items
|
|
50
|
-
next_worker.send([index, item, reply_port])
|
|
51
|
-
end
|
|
164
|
+
dispatch(work_items, reply_port)
|
|
52
165
|
|
|
53
166
|
collect_results(reply_port, work_items.length)
|
|
54
167
|
ensure
|
|
55
168
|
reply_port&.close
|
|
56
169
|
end
|
|
57
170
|
|
|
58
|
-
# Shut down the pool.
|
|
171
|
+
# Shut down the pool and wait for worker Ractors to exit.
|
|
172
|
+
#
|
|
173
|
+
# Shutdown is idempotent. The first caller signals all worker inboxes with
|
|
174
|
+
# a stop message and waits for workers to join. Later calls return without
|
|
175
|
+
# doing anything.
|
|
59
176
|
#
|
|
60
177
|
# @return [void]
|
|
61
178
|
def shutdown
|
|
62
|
-
|
|
179
|
+
@dispatch_mutex.synchronize do
|
|
180
|
+
return if @shutdown
|
|
63
181
|
|
|
64
|
-
|
|
182
|
+
@shutdown = true
|
|
183
|
+
signal_workers
|
|
184
|
+
end
|
|
65
185
|
|
|
66
|
-
signal_workers
|
|
67
186
|
wait_for_workers
|
|
68
187
|
end
|
|
69
188
|
|
|
70
189
|
private
|
|
71
190
|
|
|
191
|
+
def dispatch(work_items, reply_port)
|
|
192
|
+
@dispatch_mutex.synchronize do
|
|
193
|
+
raise ShutdownError, "processor pool has been shut down" if @shutdown
|
|
194
|
+
|
|
195
|
+
work_items.each_with_index do |item, index|
|
|
196
|
+
next_worker_inbox.send([index, item, reply_port])
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
72
201
|
def signal_workers
|
|
73
|
-
@
|
|
74
|
-
|
|
202
|
+
@worker_inboxes.each do |inbox|
|
|
203
|
+
inbox.send(nil)
|
|
75
204
|
rescue Ractor::ClosedError
|
|
76
205
|
# Already stopped.
|
|
77
206
|
end
|
|
@@ -86,7 +215,10 @@ module CDC
|
|
|
86
215
|
end
|
|
87
216
|
|
|
88
217
|
def wait_for_workers_with_timeout
|
|
89
|
-
|
|
218
|
+
timeout = @configuration.timeout
|
|
219
|
+
return unless timeout
|
|
220
|
+
|
|
221
|
+
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
|
|
90
222
|
|
|
91
223
|
@workers.each do |worker|
|
|
92
224
|
remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
@@ -106,38 +238,57 @@ module CDC
|
|
|
106
238
|
"#{processor.class} must declare ractor_safe!"
|
|
107
239
|
end
|
|
108
240
|
|
|
109
|
-
def build_worker(processor)
|
|
110
|
-
::Ractor.new
|
|
111
|
-
|
|
112
|
-
message = ::Ractor.receive
|
|
113
|
-
break if message.nil?
|
|
241
|
+
def build_worker(processor)
|
|
242
|
+
boot_port = ::Ractor::Port.new
|
|
243
|
+
worker = start_worker(processor, boot_port)
|
|
114
244
|
|
|
115
|
-
|
|
245
|
+
[worker, boot_port.receive]
|
|
246
|
+
ensure
|
|
247
|
+
boot_port.close
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def start_worker(processor, boot_port)
|
|
251
|
+
::Ractor.new(processor, boot_port) do |safe_processor, ready_port|
|
|
252
|
+
inbox = ::Ractor::Port.new
|
|
253
|
+
ready_port << inbox
|
|
254
|
+
|
|
255
|
+
CDC::Parallel::ProcessorPool.send(:run_worker_loop, safe_processor, inbox)
|
|
256
|
+
end
|
|
257
|
+
end
|
|
116
258
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
rescue StandardError => e
|
|
122
|
-
CDC::Parallel::ResultCollector.worker_failure(e)
|
|
123
|
-
end
|
|
259
|
+
def self.run_worker_loop(safe_processor, inbox)
|
|
260
|
+
loop do
|
|
261
|
+
message = inbox.receive
|
|
262
|
+
break if message.nil?
|
|
124
263
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
264
|
+
index, item, reply_port = message
|
|
265
|
+
response = worker_response(safe_processor, item)
|
|
266
|
+
|
|
267
|
+
begin
|
|
268
|
+
reply_port << [index, response]
|
|
269
|
+
rescue Ractor::ClosedError
|
|
270
|
+
# The caller may have timed out and closed the reply port.
|
|
130
271
|
end
|
|
131
272
|
end
|
|
132
273
|
end
|
|
274
|
+
private_class_method :run_worker_loop
|
|
133
275
|
|
|
134
|
-
def
|
|
135
|
-
|
|
276
|
+
def self.worker_response(safe_processor, item)
|
|
277
|
+
CDC::Parallel::ResultCollector.worker_success(
|
|
278
|
+
safe_processor.process(item)
|
|
279
|
+
)
|
|
280
|
+
rescue StandardError => e
|
|
281
|
+
CDC::Parallel::ResultCollector.worker_failure(e)
|
|
282
|
+
end
|
|
283
|
+
private_class_method :worker_response
|
|
284
|
+
|
|
285
|
+
def next_worker_inbox
|
|
286
|
+
inbox = @worker_inboxes[@next_worker]
|
|
136
287
|
|
|
137
288
|
@next_worker += 1
|
|
138
|
-
@next_worker = 0 if @next_worker >= @
|
|
289
|
+
@next_worker = 0 if @next_worker >= @worker_inboxes.length
|
|
139
290
|
|
|
140
|
-
|
|
291
|
+
inbox
|
|
141
292
|
end
|
|
142
293
|
|
|
143
294
|
def collect_results(reply_port, count)
|
|
@@ -161,7 +312,10 @@ module CDC
|
|
|
161
312
|
end
|
|
162
313
|
|
|
163
314
|
def collect_results_with_timeout(reply_port, results)
|
|
164
|
-
|
|
315
|
+
timeout = @configuration.timeout
|
|
316
|
+
return results.freeze unless timeout
|
|
317
|
+
|
|
318
|
+
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
|
|
165
319
|
|
|
166
320
|
results.length.times do
|
|
167
321
|
remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
@@ -2,22 +2,58 @@
|
|
|
2
2
|
|
|
3
3
|
module CDC
|
|
4
4
|
module Parallel
|
|
5
|
-
#
|
|
5
|
+
# Converts raw worker responses into `CDC::Core::ProcessorResult` objects.
|
|
6
|
+
#
|
|
7
|
+
# Ractors cannot freely share arbitrary mutable Ruby objects. Worker
|
|
8
|
+
# responses must therefore be normalized into shareable payloads before they
|
|
9
|
+
# cross back to the caller. `ResultCollector` owns that small translation
|
|
10
|
+
# boundary.
|
|
11
|
+
#
|
|
12
|
+
# The worker side uses {worker_success} and {worker_failure}. The caller side
|
|
13
|
+
# uses {normalize} to convert those payloads into the public result contract.
|
|
14
|
+
#
|
|
15
|
+
# @example Normalizing a processor return value
|
|
16
|
+
# response = CDC::Parallel::ResultCollector.worker_success(value)
|
|
17
|
+
# result = CDC::Parallel::ResultCollector.normalize(response)
|
|
18
|
+
#
|
|
19
|
+
# @example Normalizing a worker exception
|
|
20
|
+
# response = CDC::Parallel::ResultCollector.worker_failure(error)
|
|
21
|
+
# result = CDC::Parallel::ResultCollector.normalize(response)
|
|
22
|
+
# result.failure? #=> true
|
|
23
|
+
#
|
|
24
|
+
# @api public
|
|
6
25
|
class ResultCollector
|
|
26
|
+
# Internal marker used to identify serialized worker failures.
|
|
27
|
+
#
|
|
28
|
+
# @return [Symbol]
|
|
7
29
|
FAILURE_MARKER = :__cdc_parallel_failure__
|
|
8
30
|
|
|
9
31
|
# Build a shareable success payload that can safely cross a Ractor boundary.
|
|
10
32
|
#
|
|
33
|
+
# If the processor already returned a `CDC::Core::ProcessorResult`, that
|
|
34
|
+
# result is preserved. Any other shareable value will later be wrapped in a
|
|
35
|
+
# success result by {normalize}.
|
|
36
|
+
#
|
|
11
37
|
# @param value [Object]
|
|
38
|
+
# Processor return value.
|
|
39
|
+
# @raise [Ractor::Error]
|
|
40
|
+
# Raised by Ruby when the value cannot be made shareable.
|
|
12
41
|
# @return [Object]
|
|
42
|
+
# Shareable success payload.
|
|
13
43
|
def self.worker_success(value)
|
|
14
44
|
::Ractor.make_shareable(value)
|
|
15
45
|
end
|
|
16
46
|
|
|
17
47
|
# Build a shareable failure payload that can safely cross a Ractor boundary.
|
|
18
48
|
#
|
|
49
|
+
# Exceptions themselves are not used as the cross-Ractor payload. Instead,
|
|
50
|
+
# the class name, message, and backtrace are serialized into a simple hash
|
|
51
|
+
# that can be reconstructed as a {ProcessorExecutionError} by {normalize}.
|
|
52
|
+
#
|
|
19
53
|
# @param error [Exception]
|
|
54
|
+
# Exception raised inside a worker Ractor.
|
|
20
55
|
# @return [Hash]
|
|
56
|
+
# Shareable serialized failure payload.
|
|
21
57
|
def self.worker_failure(error)
|
|
22
58
|
::Ractor.make_shareable(
|
|
23
59
|
{
|
|
@@ -29,9 +65,14 @@ module CDC
|
|
|
29
65
|
)
|
|
30
66
|
end
|
|
31
67
|
|
|
32
|
-
# Normalize a worker return value into a ProcessorResult
|
|
68
|
+
# Normalize a worker return value into a `CDC::Core::ProcessorResult`.
|
|
69
|
+
#
|
|
70
|
+
# Failure payloads become failed processor results containing a
|
|
71
|
+
# {ProcessorExecutionError}. Existing processor results are returned
|
|
72
|
+
# unchanged. Other values are wrapped in a successful processor result.
|
|
33
73
|
#
|
|
34
74
|
# @param value [Object]
|
|
75
|
+
# Raw worker response.
|
|
35
76
|
# @return [CDC::Core::ProcessorResult]
|
|
36
77
|
def self.normalize(value)
|
|
37
78
|
if worker_failure?(value)
|
data/lib/cdc/parallel/router.rb
CHANGED
|
@@ -2,10 +2,32 @@
|
|
|
2
2
|
|
|
3
3
|
module CDC
|
|
4
4
|
module Parallel
|
|
5
|
-
# Routes
|
|
5
|
+
# Routes normalized `cdc-core` work items to the matching parallel runtime
|
|
6
|
+
# primitive.
|
|
7
|
+
#
|
|
8
|
+
# `Router` is deliberately small. It does not inspect source-specific
|
|
9
|
+
# payloads, apply filters, decode database values, or decide scheduling
|
|
10
|
+
# policy. Its responsibility is only to look at the already-normalized
|
|
11
|
+
# `cdc-core` object shape and forward it to the pool that knows how to
|
|
12
|
+
# process that shape.
|
|
13
|
+
#
|
|
14
|
+
# @example Routing a single event
|
|
15
|
+
# router.process(change_event)
|
|
16
|
+
#
|
|
17
|
+
# @example Routing a transaction envelope
|
|
18
|
+
# router.process(transaction_envelope)
|
|
19
|
+
#
|
|
20
|
+
# @see CDC::Parallel::ProcessorPool
|
|
21
|
+
# @see CDC::Parallel::TransactionPool
|
|
22
|
+
# @api public
|
|
6
23
|
class Router
|
|
24
|
+
# Create a router for event and transaction work items.
|
|
25
|
+
#
|
|
7
26
|
# @param processor_pool [ProcessorPool]
|
|
27
|
+
# Pool used for individual `CDC::Core::ChangeEvent` objects.
|
|
8
28
|
# @param transaction_pool [TransactionPool]
|
|
29
|
+
# Pool used for `CDC::Core::TransactionEnvelope` objects.
|
|
30
|
+
# @return [void]
|
|
9
31
|
def initialize(processor_pool:, transaction_pool:)
|
|
10
32
|
@processor_pool = processor_pool
|
|
11
33
|
@transaction_pool = transaction_pool
|
|
@@ -14,6 +36,9 @@ module CDC
|
|
|
14
36
|
# Process a supported CDC work item.
|
|
15
37
|
#
|
|
16
38
|
# @param item [CDC::Core::ChangeEvent, CDC::Core::TransactionEnvelope]
|
|
39
|
+
# Normalized CDC work item.
|
|
40
|
+
# @raise [UnsupportedWorkItemError]
|
|
41
|
+
# Raised when the item is not a supported `cdc-core` work item shape.
|
|
17
42
|
# @return [CDC::Core::ProcessorResult]
|
|
18
43
|
def process(item)
|
|
19
44
|
case item
|
data/lib/cdc/parallel/runtime.rb
CHANGED
|
@@ -2,11 +2,56 @@
|
|
|
2
2
|
|
|
3
3
|
module CDC
|
|
4
4
|
module Parallel
|
|
5
|
-
# High-level Ractor runtime facade for cdc-core processors.
|
|
5
|
+
# High-level Ractor runtime facade for `cdc-core` processors.
|
|
6
|
+
#
|
|
7
|
+
# `Runtime` is the primary public entry point for applications that want to
|
|
8
|
+
# execute normalized CDC work items with `cdc-parallel`. It wires together a
|
|
9
|
+
# {ProcessorPool}, a {TransactionPool}, and a {Router} so callers can submit
|
|
10
|
+
# either a single `CDC::Core::ChangeEvent` or a
|
|
11
|
+
# `CDC::Core::TransactionEnvelope` through one object.
|
|
12
|
+
#
|
|
13
|
+
# Use this class when you want the default cdc-parallel behavior:
|
|
14
|
+
#
|
|
15
|
+
# * validate that the processor declared `ractor_safe!`
|
|
16
|
+
# * boot a fixed set of worker Ractors
|
|
17
|
+
# * route events and transaction envelopes to the right pool
|
|
18
|
+
# * return `CDC::Core::ProcessorResult` objects
|
|
19
|
+
# * shut down all worker resources together
|
|
20
|
+
#
|
|
21
|
+
# @example Processing a change event
|
|
22
|
+
# runtime = CDC::Parallel::Runtime.new(
|
|
23
|
+
# processor: AnalyticsProcessor.new,
|
|
24
|
+
# size: 4,
|
|
25
|
+
# timeout: 5
|
|
26
|
+
# )
|
|
27
|
+
#
|
|
28
|
+
# result = runtime.process(change_event)
|
|
29
|
+
# result.success? #=> true
|
|
30
|
+
#
|
|
31
|
+
# runtime.shutdown
|
|
32
|
+
#
|
|
33
|
+
# @example Processing a transaction envelope
|
|
34
|
+
# result = runtime.process_transaction(transaction)
|
|
35
|
+
#
|
|
36
|
+
# @note `Runtime` is an execution facade, not a source adapter. It expects
|
|
37
|
+
# work that has already been normalized into `cdc-core` primitives.
|
|
38
|
+
# @see CDC::Parallel::ProcessorPool
|
|
39
|
+
# @see CDC::Parallel::TransactionPool
|
|
40
|
+
# @see CDC::Parallel::Router
|
|
41
|
+
# @api public
|
|
6
42
|
class Runtime
|
|
43
|
+
# Create a runtime with event and transaction pools.
|
|
44
|
+
#
|
|
7
45
|
# @param processor [CDC::Core::Processor]
|
|
46
|
+
# Ractor-safe processor used for both event and transaction processing.
|
|
8
47
|
# @param size [Integer]
|
|
9
|
-
#
|
|
48
|
+
# Number of worker Ractors per internal pool.
|
|
49
|
+
# @param timeout [Numeric, nil]
|
|
50
|
+
# Optional timeout in seconds for result collection and shutdown waits.
|
|
51
|
+
# @raise [UnsafeProcessorError]
|
|
52
|
+
# Raised when the processor class has not declared `ractor_safe!`.
|
|
53
|
+
# @raise [ArgumentError]
|
|
54
|
+
# Raised when size or timeout is invalid.
|
|
10
55
|
# @return [void]
|
|
11
56
|
def initialize(processor:, size: Etc.nprocessors, timeout: nil)
|
|
12
57
|
@processor_pool = ProcessorPool.new(processor:, size:, timeout:)
|
|
@@ -15,9 +60,18 @@ module CDC
|
|
|
15
60
|
@shutdown = false
|
|
16
61
|
end
|
|
17
62
|
|
|
18
|
-
# Process a
|
|
63
|
+
# Process a supported normalized CDC work item.
|
|
64
|
+
#
|
|
65
|
+
# Supported items are `CDC::Core::ChangeEvent` and
|
|
66
|
+
# `CDC::Core::TransactionEnvelope`. Unsupported objects raise
|
|
67
|
+
# {UnsupportedWorkItemError} from the router.
|
|
19
68
|
#
|
|
20
69
|
# @param item [CDC::Core::ChangeEvent, CDC::Core::TransactionEnvelope]
|
|
70
|
+
# Normalized CDC work item.
|
|
71
|
+
# @raise [ShutdownError]
|
|
72
|
+
# Raised when called after {#shutdown}.
|
|
73
|
+
# @raise [UnsupportedWorkItemError]
|
|
74
|
+
# Raised for objects that are not supported CDC work item shapes.
|
|
21
75
|
# @return [CDC::Core::ProcessorResult]
|
|
22
76
|
def process(item)
|
|
23
77
|
raise ShutdownError, "runtime has been shut down" if @shutdown
|
|
@@ -25,7 +79,11 @@ module CDC
|
|
|
25
79
|
@router.process(item)
|
|
26
80
|
end
|
|
27
81
|
|
|
28
|
-
#
|
|
82
|
+
# Process a transaction envelope.
|
|
83
|
+
#
|
|
84
|
+
# This method is a readability alias for transaction-oriented call sites.
|
|
85
|
+
# It delegates to {#process}, so it has the same validation, shutdown, and
|
|
86
|
+
# result behavior.
|
|
29
87
|
#
|
|
30
88
|
# @param transaction [CDC::Core::TransactionEnvelope]
|
|
31
89
|
# @return [CDC::Core::ProcessorResult]
|
|
@@ -35,6 +93,9 @@ module CDC
|
|
|
35
93
|
|
|
36
94
|
# Shut down all runtime resources.
|
|
37
95
|
#
|
|
96
|
+
# Shutdown is idempotent and cascades to the internal event and transaction
|
|
97
|
+
# pools. After shutdown, {#process} raises {ShutdownError}.
|
|
98
|
+
#
|
|
38
99
|
# @return [void]
|
|
39
100
|
def shutdown
|
|
40
101
|
return if @shutdown
|
|
@@ -2,30 +2,81 @@
|
|
|
2
2
|
|
|
3
3
|
module CDC
|
|
4
4
|
module Parallel
|
|
5
|
-
# Processes a TransactionEnvelope as
|
|
5
|
+
# Processes a `CDC::Core::TransactionEnvelope` as one transaction-oriented
|
|
6
|
+
# work unit.
|
|
7
|
+
#
|
|
8
|
+
# `TransactionPool` uses {ProcessorPool} to process the events inside an
|
|
9
|
+
# envelope and then collapses the event-level results into one
|
|
10
|
+
# `CDC::Core::ProcessorResult` for the whole transaction.
|
|
11
|
+
#
|
|
12
|
+
# This class preserves the transaction boundary at the API level: callers
|
|
13
|
+
# submit a transaction envelope and receive a single success or failure
|
|
14
|
+
# result. Event results inside the transaction are still produced by the
|
|
15
|
+
# configured processor and are returned as the success value when every event
|
|
16
|
+
# succeeds.
|
|
17
|
+
#
|
|
18
|
+
# @example Processing a transaction envelope
|
|
19
|
+
# pool = CDC::Parallel::TransactionPool.new(
|
|
20
|
+
# processor: AuditProcessor.new,
|
|
21
|
+
# size: 4
|
|
22
|
+
# )
|
|
23
|
+
#
|
|
24
|
+
# result = pool.process(transaction)
|
|
25
|
+
# result.success? #=> true
|
|
26
|
+
#
|
|
27
|
+
# @note This class preserves the transaction as a result boundary. More
|
|
28
|
+
# advanced ordering, checkpointing, retry, and atomic sink semantics belong
|
|
29
|
+
# to higher-level runtime/sink contracts.
|
|
30
|
+
# @see CDC::Parallel::ProcessorPool
|
|
31
|
+
# @api public
|
|
6
32
|
class TransactionPool
|
|
33
|
+
# Create a transaction pool.
|
|
34
|
+
#
|
|
7
35
|
# @param processor [CDC::Core::Processor]
|
|
36
|
+
# Ractor-safe processor used for each event inside the transaction.
|
|
8
37
|
# @param size [Integer]
|
|
9
|
-
#
|
|
38
|
+
# Number of worker Ractors in the underlying processor pool.
|
|
39
|
+
# @param timeout [Numeric, nil]
|
|
40
|
+
# Optional timeout in seconds for result collection and shutdown waits.
|
|
41
|
+
# @raise [UnsafeProcessorError]
|
|
42
|
+
# Raised when the processor class has not declared `ractor_safe!`.
|
|
43
|
+
# @return [void]
|
|
10
44
|
def initialize(processor:, size: Etc.nprocessors, timeout: nil)
|
|
11
45
|
@processor_pool = ProcessorPool.new(processor:, size:, timeout:)
|
|
12
46
|
end
|
|
13
47
|
|
|
14
48
|
# Process all events inside a transaction envelope.
|
|
15
49
|
#
|
|
50
|
+
# The returned result is successful only when every event result succeeds.
|
|
51
|
+
# If any event fails, the transaction result is a failure using the first
|
|
52
|
+
# failure error and the complete event-result list as context.
|
|
53
|
+
#
|
|
16
54
|
# @param transaction [CDC::Core::TransactionEnvelope]
|
|
55
|
+
# Transaction envelope whose `events` will be processed.
|
|
17
56
|
# @return [CDC::Core::ProcessorResult]
|
|
57
|
+
# Success containing the ordered event results, or failure containing the
|
|
58
|
+
# first event error.
|
|
18
59
|
def process(transaction)
|
|
19
60
|
results = @processor_pool.process_many(transaction.events).freeze
|
|
20
61
|
failure = results.find(&:failure?)
|
|
21
62
|
|
|
22
|
-
|
|
63
|
+
if failure
|
|
64
|
+
error = failure.error || ProcessorExecutionError.new(
|
|
65
|
+
original_class: "CDC::Core::ProcessorResult",
|
|
66
|
+
original_message: "failed processor result did not include an error"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
return CDC::Core::ProcessorResult.failure(error, event: results)
|
|
70
|
+
end
|
|
23
71
|
|
|
24
72
|
CDC::Core::ProcessorResult.success(results)
|
|
25
73
|
end
|
|
26
74
|
|
|
27
75
|
# Shut down worker resources.
|
|
28
76
|
#
|
|
77
|
+
# Delegates to the underlying {ProcessorPool}. Shutdown is idempotent
|
|
78
|
+
# because the underlying pool is idempotent.
|
|
79
|
+
#
|
|
29
80
|
# @return [void]
|
|
30
81
|
def shutdown
|
|
31
82
|
@processor_pool.shutdown
|
data/lib/cdc/parallel/version.rb
CHANGED
|
@@ -3,6 +3,11 @@
|
|
|
3
3
|
module CDC
|
|
4
4
|
module Parallel
|
|
5
5
|
# Current cdc-parallel version.
|
|
6
|
-
|
|
6
|
+
#
|
|
7
|
+
# This constant is used by RubyGems and by applications that need to inspect
|
|
8
|
+
# the loaded runtime version at boot time.
|
|
9
|
+
#
|
|
10
|
+
# @return [String]
|
|
11
|
+
VERSION = "0.2.3"
|
|
7
12
|
end
|
|
8
13
|
end
|