cdc-solid-queue 0.1.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +79 -1
- data/lib/cdc/solid_queue/checkpoint.rb +2 -0
- data/lib/cdc/solid_queue/configuration.rb +26 -1
- data/lib/cdc/solid_queue/downstream_processor.rb +100 -0
- data/lib/cdc/solid_queue/enqueuer.rb +15 -4
- data/lib/cdc/solid_queue/event_serializer.rb +65 -13
- data/lib/cdc/solid_queue/processor_job.rb +10 -3
- data/lib/cdc/solid_queue/runner.rb +18 -4
- data/lib/cdc/solid_queue/version.rb +1 -1
- data/lib/cdc/solid_queue.rb +1 -0
- data/sig/cdc/solid_queue.rbs +54 -4
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: abd5d503ed994dcf6881b7636c24a2669d358e8e29ab3f0c88aef1e0236ed998
|
|
4
|
+
data.tar.gz: 36235e6ab5e44b9b40f75b5a6ca3cfc41fd03746dca852074ee14509c1ad4c07
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e9d3c198e816c337815e2f7a840365b9176a789b9ab142f46ec9bc65f2414a9024c639f2e7d4b06ff2ce6e357d7e3b9376292997a435d9779f07f156ed8f7a3a
|
|
7
|
+
data.tar.gz: e7f0efe0daa73c5410eb52ea4b1076ec9c64b8022e80c5793bfccbd8b8540517c8ac8fe8d29b5cc0f6199400857a228661923e1918c0e3627035364ec347a106
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
## 0.3.0
|
|
6
|
+
|
|
7
|
+
- Batch-sized enqueueing now feeds arrays into downstream `process_many`, so
|
|
8
|
+
the Solid Queue job path matches batch-style downstream runtimes.
|
|
9
|
+
|
|
10
|
+
## 0.2.0
|
|
11
|
+
|
|
12
|
+
- Optional downstream processor delegation to `cdc-concurrent` and `cdc-parallel`.
|
|
13
|
+
- Rails example now demonstrates `cdc-concurrent` downstream processing.
|
|
14
|
+
- Benchmark can measure direct downstream delegation overhead.
|
|
15
|
+
|
|
5
16
|
## 0.1.2
|
|
6
17
|
|
|
7
18
|
- Minimal Rails app example.
|
data/README.md
CHANGED
|
@@ -52,7 +52,47 @@ end
|
|
|
52
52
|
`config.queue` is applied through Active Job's `set(queue:)` API when the job
|
|
53
53
|
class supports it. When `preserve_order` is enabled, the enqueued payload also
|
|
54
54
|
includes cdc-solid-queue metadata with the configured ordering key and computed
|
|
55
|
-
ordering value.
|
|
55
|
+
ordering value. Set `config.batch_size` above `1` to enqueue multiple CDC
|
|
56
|
+
events in one Solid Queue job and hand the batch to downstream `process_many`.
|
|
57
|
+
|
|
58
|
+
## Downstream Processing
|
|
59
|
+
|
|
60
|
+
Processor jobs can delegate work to CDC downstream runtime primitives. The
|
|
61
|
+
default downstream runtime is `:concurrent`, backed by `cdc-concurrent`, which
|
|
62
|
+
fits Solid Queue jobs that spend most of their time on I/O. CPU-heavy work can
|
|
63
|
+
opt into `:parallel`, backed by `cdc-parallel`, in Ruby 4 applications.
|
|
64
|
+
|
|
65
|
+
```ruby
|
|
66
|
+
class WebhookProcessor < CDC::Core::Processor
|
|
67
|
+
concurrent_safe!
|
|
68
|
+
|
|
69
|
+
def process(event)
|
|
70
|
+
# perform I/O-bound work
|
|
71
|
+
CDC::Core::ProcessorResult.success(event)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
CDC::SolidQueue.configure do |config|
|
|
76
|
+
config.processor_job = UserChangedJob
|
|
77
|
+
config.downstream_processor = WebhookProcessor.new
|
|
78
|
+
config.downstream_runtime = :concurrent
|
|
79
|
+
config.downstream_options = { concurrency: 100, timeout: 5.0 }
|
|
80
|
+
end
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Use `:parallel` only when the processor is Ractor-safe and the application runs
|
|
84
|
+
on Ruby 4:
|
|
85
|
+
|
|
86
|
+
```ruby
|
|
87
|
+
config.downstream_runtime = :parallel
|
|
88
|
+
config.downstream_options = { size: 4, timeout: 5 }
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Both runtime gems are optional. Add `cdc-concurrent` or `cdc-parallel` to the
|
|
92
|
+
application Gemfile when selecting that runtime. Without a configured
|
|
93
|
+
`downstream_processor`, `CDC::SolidQueue::ProcessorJob` falls back to the job's
|
|
94
|
+
own `#process(event)` method, or `#process_many(events)` when a batched payload
|
|
95
|
+
reaches a job that defines it.
|
|
56
96
|
|
|
57
97
|
## Rails Task
|
|
58
98
|
|
|
@@ -96,6 +136,11 @@ bundle exec rake benchmark:enqueue
|
|
|
96
136
|
```
|
|
97
137
|
|
|
98
138
|
Set `CDC_SOLID_QUEUE_BENCH_EVENTS` to control the event count.
|
|
139
|
+
Set `CDC_SOLID_QUEUE_BENCH_MODE=downstream_direct` to measure direct downstream
|
|
140
|
+
processor delegation overhead without Solid Queue enqueue translation.
|
|
141
|
+
Set `CDC_SOLID_QUEUE_BENCH_MODE=downstream_batch` to measure batched downstream
|
|
142
|
+
delegation overhead. Set `CDC_SOLID_QUEUE_BENCH_BATCH_SIZE` to control the batch
|
|
143
|
+
width.
|
|
99
144
|
|
|
100
145
|
Example local result on Ruby 3.4.9:
|
|
101
146
|
|
|
@@ -113,6 +158,39 @@ In that run, `cdc-solid-queue` translated and dispatched about 63.6k synthetic
|
|
|
113
158
|
events per second, so real throughput will usually be dominated by Solid Queue
|
|
114
159
|
persistence, database latency, job execution cost, and CDC source throughput.
|
|
115
160
|
|
|
161
|
+
Example `downstream_direct` results on the same machine:
|
|
162
|
+
|
|
163
|
+
```text
|
|
164
|
+
mode=downstream_direct events=100000000 elapsed=16.2669s rate=6147457.32 events/s
|
|
165
|
+
mode=downstream_direct events=1000000000 elapsed=157.8708s rate=6334292.58 events/s
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
These runs measure the lowest-overhead downstream delegation path:
|
|
169
|
+
|
|
170
|
+
```text
|
|
171
|
+
CDC::SolidQueue::DownstreamProcessor
|
|
172
|
+
-> :direct runtime branch
|
|
173
|
+
-> BenchmarkProcessor#process(event)
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
They do not measure Solid Queue enqueueing, Active Job serialization,
|
|
177
|
+
PostgreSQL CDC, pgoutput parsing or decoding, `cdc-concurrent`,
|
|
178
|
+
`cdc-parallel`, real application processor work, network I/O, or database I/O.
|
|
179
|
+
The result means the direct downstream adapter can dispatch about 6.1M to 6.3M
|
|
180
|
+
prebuilt synthetic events per second on that machine, making the adapter layer
|
|
181
|
+
negligible compared with real persistence, CDC source, and processor costs.
|
|
182
|
+
|
|
183
|
+
Batch mode example:
|
|
184
|
+
|
|
185
|
+
```text
|
|
186
|
+
mode=downstream_batch events=100000000 elapsed=... rate=... events/s
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Batch mode measures one more layer: batch deserialization plus `process_many`
|
|
190
|
+
dispatch through the downstream adapter. When a downstream runtime such as
|
|
191
|
+
`cdc-concurrent` or `cdc-parallel` is configured, that batch is handed to the
|
|
192
|
+
runtime pool in one call instead of event-by-event.
|
|
193
|
+
|
|
116
194
|
## MVP Checkpoint Rule
|
|
117
195
|
|
|
118
196
|
A checkpoint advances after the Solid Queue job is durably inserted. Job execution success is handled by Solid Queue retry semantics.
|
|
@@ -27,6 +27,8 @@ module CDC
|
|
|
27
27
|
private
|
|
28
28
|
|
|
29
29
|
def position_for(event)
|
|
30
|
+
return event.map { |item| position_for(item) }.compact.last if event.is_a?(Array)
|
|
31
|
+
|
|
30
32
|
payload = EventSerializer.dump(event)
|
|
31
33
|
payload['source_position'] || payload['commit_lsn'] || payload.dig('metadata', 'wal_end_lsn')
|
|
32
34
|
rescue SerializationError
|
|
@@ -12,8 +12,11 @@ module CDC
|
|
|
12
12
|
SUPPORTED_SOURCE = :postgresql
|
|
13
13
|
# Supported ordering scopes for serialized CDC events.
|
|
14
14
|
ORDERING_KEYS = %i[identity primary_key relation transaction global none].freeze
|
|
15
|
+
# Supported downstream execution runtimes for processor jobs.
|
|
16
|
+
DOWNSTREAM_RUNTIMES = %i[concurrent parallel direct].freeze
|
|
15
17
|
|
|
16
|
-
attr_accessor :processor_job, :queue, :preserve_order, :ordering_key, :postgresql, :checkpoint
|
|
18
|
+
attr_accessor :processor_job, :queue, :preserve_order, :ordering_key, :postgresql, :checkpoint,
|
|
19
|
+
:downstream_processor, :downstream_runtime, :downstream_options, :batch_size
|
|
17
20
|
|
|
18
21
|
# Build a configuration with safe defaults.
|
|
19
22
|
def initialize
|
|
@@ -23,6 +26,10 @@ module CDC
|
|
|
23
26
|
@ordering_key = :identity
|
|
24
27
|
@postgresql = {}
|
|
25
28
|
@checkpoint = Checkpoint.new
|
|
29
|
+
@downstream_processor = nil
|
|
30
|
+
@downstream_runtime = :concurrent
|
|
31
|
+
@downstream_options = {}
|
|
32
|
+
@batch_size = 1
|
|
26
33
|
end
|
|
27
34
|
|
|
28
35
|
# Validate this configuration.
|
|
@@ -37,6 +44,8 @@ module CDC
|
|
|
37
44
|
validate_ordering_key!
|
|
38
45
|
validate_postgresql!
|
|
39
46
|
validate_checkpoint!
|
|
47
|
+
validate_downstream!
|
|
48
|
+
validate_batch_size!
|
|
40
49
|
true
|
|
41
50
|
end
|
|
42
51
|
# rubocop:enable Naming/PredicateMethod
|
|
@@ -81,6 +90,22 @@ module CDC
|
|
|
81
90
|
|
|
82
91
|
raise ConfigurationError, 'checkpoint must respond to advance'
|
|
83
92
|
end
|
|
93
|
+
|
|
94
|
+
def validate_downstream!
|
|
95
|
+
unless DOWNSTREAM_RUNTIMES.include?(@downstream_runtime)
|
|
96
|
+
raise ConfigurationError, "downstream_runtime must be one of: #{DOWNSTREAM_RUNTIMES.join(', ')}"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
return if @downstream_processor.nil? || @downstream_processor.respond_to?(:process)
|
|
100
|
+
|
|
101
|
+
raise ConfigurationError, 'downstream_processor must respond to process'
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def validate_batch_size!
|
|
105
|
+
return if @batch_size.is_a?(Integer) && @batch_size.positive?
|
|
106
|
+
|
|
107
|
+
raise ConfigurationError, 'batch_size must be a positive Integer'
|
|
108
|
+
end
|
|
84
109
|
end
|
|
85
110
|
end
|
|
86
111
|
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CDC
|
|
4
|
+
module SolidQueue
|
|
5
|
+
# Delegates processor-job work to CDC downstream runtime primitives.
|
|
6
|
+
class DownstreamProcessor
|
|
7
|
+
# @return [Configuration]
|
|
8
|
+
attr_reader :configuration
|
|
9
|
+
|
|
10
|
+
# @param configuration [Configuration]
|
|
11
|
+
def initialize(configuration)
|
|
12
|
+
@configuration = configuration
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Process one normalized CDC work item.
|
|
16
|
+
#
|
|
17
|
+
# @param item [Object, Array<Object>]
|
|
18
|
+
# @return [Object]
|
|
19
|
+
def process(item)
|
|
20
|
+
return process_many(item) if item.is_a?(Array)
|
|
21
|
+
|
|
22
|
+
process_one(item)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Process many normalized CDC work items.
|
|
26
|
+
#
|
|
27
|
+
# @param items [Array<Object>]
|
|
28
|
+
# @return [Object]
|
|
29
|
+
def process_many(items)
|
|
30
|
+
case configuration.downstream_runtime
|
|
31
|
+
when :direct
|
|
32
|
+
process_many_direct(items)
|
|
33
|
+
when :concurrent
|
|
34
|
+
process_with_runtime(concurrent_runtime, items)
|
|
35
|
+
when :parallel
|
|
36
|
+
process_with_runtime(parallel_runtime, items)
|
|
37
|
+
else
|
|
38
|
+
raise ConfigurationError, "unsupported downstream_runtime: #{configuration.downstream_runtime.inspect}"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def processor
|
|
45
|
+
configuration.downstream_processor || raise(ConfigurationError, 'downstream_processor is required')
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def process_one(item)
|
|
49
|
+
case configuration.downstream_runtime
|
|
50
|
+
when :direct
|
|
51
|
+
processor.process(item)
|
|
52
|
+
when :concurrent
|
|
53
|
+
unwrap_single_result(process_with_runtime(concurrent_runtime, [item]))
|
|
54
|
+
when :parallel
|
|
55
|
+
unwrap_single_result(process_with_runtime(parallel_runtime, [item]))
|
|
56
|
+
else
|
|
57
|
+
raise ConfigurationError, "unsupported downstream_runtime: #{configuration.downstream_runtime.inspect}"
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def process_many_direct(items)
|
|
62
|
+
return processor.process_many(items) if processor.respond_to?(:process_many)
|
|
63
|
+
|
|
64
|
+
items.map { |item| processor.process(item) }
|
|
65
|
+
end
|
|
66
|
+
private :process_many_direct
|
|
67
|
+
|
|
68
|
+
def process_with_runtime(runtime, items)
|
|
69
|
+
runtime.process_many(items)
|
|
70
|
+
ensure
|
|
71
|
+
runtime.shutdown
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def unwrap_single_result(result)
|
|
75
|
+
result.is_a?(Array) && result.length == 1 ? result.first : result
|
|
76
|
+
end
|
|
77
|
+
private :unwrap_single_result
|
|
78
|
+
|
|
79
|
+
def concurrent_runtime
|
|
80
|
+
require_runtime('cdc/concurrent', 'cdc-concurrent') unless defined?(CDC::Concurrent::Runtime)
|
|
81
|
+
CDC::Concurrent::Runtime.new(processor:, **configuration.downstream_options)
|
|
82
|
+
rescue LoadError => e
|
|
83
|
+
raise ConfigurationError, "cdc-concurrent is required for downstream_runtime :concurrent: #{e.message}"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def parallel_runtime
|
|
87
|
+
require_runtime('cdc/parallel', 'cdc-parallel') unless defined?(CDC::Parallel::Runtime)
|
|
88
|
+
CDC::Parallel::Runtime.new(processor:, **configuration.downstream_options)
|
|
89
|
+
rescue LoadError => e
|
|
90
|
+
raise ConfigurationError, "cdc-parallel is required for downstream_runtime :parallel: #{e.message}"
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def require_runtime(feature, gem_name)
|
|
94
|
+
require feature
|
|
95
|
+
rescue LoadError
|
|
96
|
+
raise LoadError, "install #{gem_name} and require #{feature}"
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
@@ -15,10 +15,10 @@ module CDC
|
|
|
15
15
|
|
|
16
16
|
# Enqueue one CDC event.
|
|
17
17
|
#
|
|
18
|
-
# @param event [Object, Hash]
|
|
18
|
+
# @param event [Object, Hash, Array<Object>]
|
|
19
19
|
# @return [Object] Active Job return value
|
|
20
20
|
def enqueue(event)
|
|
21
|
-
payload =
|
|
21
|
+
payload = payload_for(event)
|
|
22
22
|
payload = EventSerializer.with_enqueue_metadata(payload, enqueue_metadata(payload))
|
|
23
23
|
job = configuration.processor_job
|
|
24
24
|
return async_job(job).perform_later(payload) if job.respond_to?(:perform_later)
|
|
@@ -39,14 +39,25 @@ module CDC
|
|
|
39
39
|
'queue' => configuration.queue,
|
|
40
40
|
'preserve_order' => configuration.preserve_order,
|
|
41
41
|
'ordering_key' => configuration.ordering_key,
|
|
42
|
-
'ordering_value' => ordering_value(payload)
|
|
42
|
+
'ordering_value' => ordering_value(payload),
|
|
43
|
+
'batch_size' => configuration.batch_size
|
|
43
44
|
}
|
|
44
45
|
end
|
|
45
46
|
|
|
46
47
|
def ordering_value(payload)
|
|
47
48
|
return nil unless configuration.preserve_order
|
|
48
49
|
|
|
49
|
-
|
|
50
|
+
if payload.is_a?(Array)
|
|
51
|
+
payload.map { |event| EventSerializer.ordering_value(event, configuration.ordering_key) }
|
|
52
|
+
else
|
|
53
|
+
EventSerializer.ordering_value(payload, configuration.ordering_key)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def payload_for(event)
|
|
58
|
+
return EventSerializer.dump_batch(event) if event.is_a?(Array)
|
|
59
|
+
|
|
60
|
+
EventSerializer.dump(event)
|
|
50
61
|
end
|
|
51
62
|
end
|
|
52
63
|
end
|
|
@@ -6,9 +6,18 @@ module CDC
|
|
|
6
6
|
#
|
|
7
7
|
# Payloads are plain hashes so Active Job can serialize them without needing
|
|
8
8
|
# to load the original event object in the queue database.
|
|
9
|
+
# rubocop:disable Metrics/ClassLength
|
|
9
10
|
class EventSerializer
|
|
10
11
|
# Reserved payload key for cdc-solid-queue enqueue metadata.
|
|
11
12
|
INTERNAL_METADATA_KEY = '_cdc_solid_queue'
|
|
13
|
+
# Lookup table for ordering value extraction by ordering key.
|
|
14
|
+
ORDERING_VALUE_FETCHERS = {
|
|
15
|
+
identity: ->(payload) { payload['identity'] || payload['primary_key'] },
|
|
16
|
+
primary_key: ->(payload) { payload['identity'] || payload['primary_key'] },
|
|
17
|
+
relation: ->(payload) { [payload['namespace'] || payload['schema'], payload['entity'] || payload['table']] },
|
|
18
|
+
transaction: ->(payload) { payload['transaction_id'] },
|
|
19
|
+
global: ->(payload) { payload['source_position'] || payload['commit_lsn'] }
|
|
20
|
+
}.freeze
|
|
12
21
|
|
|
13
22
|
# Serialize an event-like object.
|
|
14
23
|
#
|
|
@@ -27,6 +36,16 @@ module CDC
|
|
|
27
36
|
normalize_hash(payload)
|
|
28
37
|
end
|
|
29
38
|
|
|
39
|
+
# Serialize a batch of event-like objects.
|
|
40
|
+
#
|
|
41
|
+
# @param events [Array<Object>]
|
|
42
|
+
# @return [Array<Hash>]
|
|
43
|
+
def self.dump_batch(events)
|
|
44
|
+
raise SerializationError, 'events must be an Array' unless events.is_a?(Array)
|
|
45
|
+
|
|
46
|
+
events.map { |event| dump(event) }
|
|
47
|
+
end
|
|
48
|
+
|
|
30
49
|
# Load a serialized event payload.
|
|
31
50
|
#
|
|
32
51
|
# @param payload [Hash]
|
|
@@ -38,11 +57,23 @@ module CDC
|
|
|
38
57
|
strip_internal_metadata(normalize_hash(payload))
|
|
39
58
|
end
|
|
40
59
|
|
|
60
|
+
# Load a batch of serialized event payloads.
|
|
61
|
+
#
|
|
62
|
+
# @param payloads [Array<Hash>]
|
|
63
|
+
# @return [Array<Hash>]
|
|
64
|
+
def self.load_batch(payloads)
|
|
65
|
+
raise SerializationError, 'payloads must be an Array' unless payloads.is_a?(Array)
|
|
66
|
+
|
|
67
|
+
payloads.map { |payload| load(payload) }
|
|
68
|
+
end
|
|
69
|
+
|
|
41
70
|
# Load a serialized event payload into a CDC event when possible.
|
|
42
71
|
#
|
|
43
72
|
# @param payload [Hash]
|
|
44
73
|
# @return [CDC::Core::ChangeEvent, Hash]
|
|
45
74
|
def self.load_event(payload)
|
|
75
|
+
return load_batch(payload).map { |item| load_event(item) } if payload.is_a?(Array)
|
|
76
|
+
|
|
46
77
|
normalized = load(payload)
|
|
47
78
|
return normalized unless change_event_payload?(normalized)
|
|
48
79
|
|
|
@@ -55,6 +86,12 @@ module CDC
|
|
|
55
86
|
# @param metadata [Hash]
|
|
56
87
|
# @return [Hash]
|
|
57
88
|
def self.with_enqueue_metadata(payload, metadata)
|
|
89
|
+
if payload.is_a?(Array)
|
|
90
|
+
return payload.each_with_index.map do |child, index|
|
|
91
|
+
with_enqueue_metadata(child, metadata_for_batch_item(metadata, index))
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
58
95
|
normalized = normalize_hash(payload)
|
|
59
96
|
normalized.merge(INTERNAL_METADATA_KEY => normalize_hash(metadata))
|
|
60
97
|
end
|
|
@@ -64,6 +101,8 @@ module CDC
|
|
|
64
101
|
# @param payload [Hash]
|
|
65
102
|
# @return [Hash]
|
|
66
103
|
def self.enqueue_metadata(payload)
|
|
104
|
+
return enqueue_metadata_for_batch(payload) if payload.is_a?(Array)
|
|
105
|
+
|
|
67
106
|
normalized = normalize_hash(payload)
|
|
68
107
|
metadata = normalized[INTERNAL_METADATA_KEY]
|
|
69
108
|
metadata.is_a?(Hash) ? metadata : {}
|
|
@@ -75,19 +114,13 @@ module CDC
|
|
|
75
114
|
# @param key [Symbol]
|
|
76
115
|
# @return [Object, nil]
|
|
77
116
|
def self.ordering_value(payload, key)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
normalized['transaction_id']
|
|
86
|
-
when :global
|
|
87
|
-
normalized['source_position'] || normalized['commit_lsn']
|
|
88
|
-
when :none
|
|
89
|
-
nil
|
|
90
|
-
end
|
|
117
|
+
return payload.map { |item| ordering_value(item, key) } if payload.is_a?(Array)
|
|
118
|
+
return nil if key == :none
|
|
119
|
+
|
|
120
|
+
fetcher = ORDERING_VALUE_FETCHERS[key]
|
|
121
|
+
return nil unless fetcher
|
|
122
|
+
|
|
123
|
+
fetcher.call(load(payload))
|
|
91
124
|
end
|
|
92
125
|
|
|
93
126
|
# Normalize hash keys to strings recursively.
|
|
@@ -138,6 +171,25 @@ module CDC
|
|
|
138
171
|
payload.key?('operation') && payload.key?('schema') && payload.key?('table')
|
|
139
172
|
end
|
|
140
173
|
private_class_method :change_event_payload?
|
|
174
|
+
|
|
175
|
+
def self.enqueue_metadata_for_batch(payloads)
|
|
176
|
+
payloads.each_with_index.map do |payload, index|
|
|
177
|
+
enqueue_metadata(payload).merge(
|
|
178
|
+
'batch_size' => payloads.length,
|
|
179
|
+
'batch_index' => index
|
|
180
|
+
)
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
private_class_method :enqueue_metadata_for_batch
|
|
184
|
+
|
|
185
|
+
def self.metadata_for_batch_item(metadata, index)
|
|
186
|
+
normalize_hash(metadata).merge(
|
|
187
|
+
'batch_size' => normalize_hash(metadata).fetch('batch_size'),
|
|
188
|
+
'batch_index' => index
|
|
189
|
+
)
|
|
190
|
+
end
|
|
191
|
+
private_class_method :metadata_for_batch_item
|
|
141
192
|
end
|
|
193
|
+
# rubocop:enable Metrics/ClassLength
|
|
142
194
|
end
|
|
143
195
|
end
|
|
@@ -17,15 +17,22 @@ module CDC
|
|
|
17
17
|
|
|
18
18
|
# Active Job entrypoint.
|
|
19
19
|
#
|
|
20
|
-
# @param payload [Hash]
|
|
20
|
+
# @param payload [Hash, Array<Hash>]
|
|
21
21
|
# @return [Object] process return value
|
|
22
22
|
def perform(payload)
|
|
23
|
-
|
|
23
|
+
event = EventSerializer.load_event(payload)
|
|
24
|
+
if SolidQueue.configuration.downstream_processor
|
|
25
|
+
return DownstreamProcessor.new(SolidQueue.configuration).process(event)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
return process_many(event) if event.is_a?(Array) && respond_to?(:process_many)
|
|
29
|
+
|
|
30
|
+
process(event)
|
|
24
31
|
end
|
|
25
32
|
|
|
26
33
|
# Process a normalized CDC event payload.
|
|
27
34
|
#
|
|
28
|
-
# @param event [Hash]
|
|
35
|
+
# @param event [Hash, Array<Hash>]
|
|
29
36
|
# @raise [NotImplementedError] when the including job does not override it
|
|
30
37
|
def process(event)
|
|
31
38
|
raise NotImplementedError, "#{self.class} must implement #process"
|
|
@@ -21,17 +21,31 @@ module CDC
|
|
|
21
21
|
#
|
|
22
22
|
# @return [Integer] number of enqueued events
|
|
23
23
|
def start
|
|
24
|
+
# @type var batch: Array[untyped]
|
|
25
|
+
batch = []
|
|
24
26
|
count = 0
|
|
27
|
+
|
|
25
28
|
@stream.each do |event|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
+
batch << event
|
|
30
|
+
next unless batch.length >= @enqueuer.configuration.batch_size
|
|
31
|
+
|
|
32
|
+
count += flush_batch(batch)
|
|
33
|
+
batch = []
|
|
29
34
|
end
|
|
30
|
-
|
|
35
|
+
|
|
36
|
+
count + flush_batch(batch)
|
|
31
37
|
end
|
|
32
38
|
|
|
33
39
|
private
|
|
34
40
|
|
|
41
|
+
def flush_batch(batch)
|
|
42
|
+
return 0 if batch.empty?
|
|
43
|
+
|
|
44
|
+
result = @enqueuer.enqueue(batch.length == 1 ? batch.first : batch.dup)
|
|
45
|
+
checkpoint(batch, result)
|
|
46
|
+
batch.length
|
|
47
|
+
end
|
|
48
|
+
|
|
35
49
|
def checkpoint(event, result)
|
|
36
50
|
store = @enqueuer.configuration.checkpoint
|
|
37
51
|
store&.advance(event, result)
|
data/lib/cdc/solid_queue.rb
CHANGED
|
@@ -6,6 +6,7 @@ require_relative 'solid_queue/event_serializer'
|
|
|
6
6
|
require_relative 'solid_queue/checkpoint'
|
|
7
7
|
require_relative 'solid_queue/configuration'
|
|
8
8
|
require_relative 'solid_queue/enqueuer'
|
|
9
|
+
require_relative 'solid_queue/downstream_processor'
|
|
9
10
|
require_relative 'solid_queue/processor_job'
|
|
10
11
|
require_relative 'solid_queue/postgresql_stream'
|
|
11
12
|
require_relative 'solid_queue/runner'
|
data/sig/cdc/solid_queue.rbs
CHANGED
|
@@ -32,6 +32,22 @@ module Pgoutput
|
|
|
32
32
|
end
|
|
33
33
|
|
|
34
34
|
module CDC
|
|
35
|
+
module Concurrent
|
|
36
|
+
class Runtime
|
|
37
|
+
def initialize: (processor: untyped, **untyped options) -> void
|
|
38
|
+
def process: (untyped item) -> untyped
|
|
39
|
+
def shutdown: () -> untyped
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
module Parallel
|
|
44
|
+
class Runtime
|
|
45
|
+
def initialize: (processor: untyped, **untyped options) -> void
|
|
46
|
+
def process: (untyped item) -> untyped
|
|
47
|
+
def shutdown: () -> untyped
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
35
51
|
module SolidQueue
|
|
36
52
|
VERSION: String
|
|
37
53
|
|
|
@@ -54,6 +70,7 @@ module CDC
|
|
|
54
70
|
class Configuration
|
|
55
71
|
SUPPORTED_SOURCE: Symbol
|
|
56
72
|
ORDERING_KEYS: Array[Symbol]
|
|
73
|
+
DOWNSTREAM_RUNTIMES: Array[Symbol]
|
|
57
74
|
|
|
58
75
|
attr_accessor processor_job: untyped
|
|
59
76
|
attr_accessor queue: String
|
|
@@ -61,6 +78,10 @@ module CDC
|
|
|
61
78
|
attr_accessor ordering_key: Symbol
|
|
62
79
|
attr_accessor postgresql: Hash[Symbol, untyped]
|
|
63
80
|
attr_accessor checkpoint: untyped
|
|
81
|
+
attr_accessor downstream_processor: untyped
|
|
82
|
+
attr_accessor downstream_runtime: Symbol
|
|
83
|
+
attr_accessor downstream_options: Hash[Symbol, untyped]
|
|
84
|
+
attr_accessor batch_size: Integer
|
|
64
85
|
|
|
65
86
|
def initialize: () -> void
|
|
66
87
|
def validate!: () -> true
|
|
@@ -73,17 +94,22 @@ module CDC
|
|
|
73
94
|
def validate_ordering_key!: () -> nil
|
|
74
95
|
def validate_postgresql!: () -> nil
|
|
75
96
|
def validate_checkpoint!: () -> nil
|
|
97
|
+
def validate_downstream!: () -> nil
|
|
98
|
+
def validate_batch_size!: () -> nil
|
|
76
99
|
end
|
|
77
100
|
|
|
78
101
|
class EventSerializer
|
|
79
102
|
INTERNAL_METADATA_KEY: String
|
|
103
|
+
ORDERING_VALUE_FETCHERS: Hash[Symbol, Proc]
|
|
80
104
|
|
|
81
105
|
def self.dump: (untyped event) -> Hash[String, untyped]
|
|
106
|
+
def self.dump_batch: (::Array[untyped] events) -> ::Array[Hash[String, untyped]]
|
|
82
107
|
def self.load: (Hash[untyped, untyped] payload) -> Hash[String, untyped]
|
|
83
|
-
def self.
|
|
84
|
-
def self.
|
|
85
|
-
def self.
|
|
86
|
-
def self.
|
|
108
|
+
def self.load_batch: (::Array[Hash[untyped, untyped]] payloads) -> ::Array[Hash[String, untyped]]
|
|
109
|
+
def self.load_event: (Hash[untyped, untyped] | ::Array[Hash[untyped, untyped]]) -> untyped
|
|
110
|
+
def self.with_enqueue_metadata: (Hash[untyped, untyped] | ::Array[Hash[untyped, untyped]], Hash[untyped, untyped]) -> untyped
|
|
111
|
+
def self.enqueue_metadata: (Hash[untyped, untyped] | ::Array[Hash[untyped, untyped]]) -> untyped
|
|
112
|
+
def self.ordering_value: (Hash[untyped, untyped] | ::Array[Hash[untyped, untyped]], Symbol) -> untyped
|
|
87
113
|
|
|
88
114
|
private
|
|
89
115
|
|
|
@@ -91,6 +117,8 @@ module CDC
|
|
|
91
117
|
def self.build_change_event: (Hash[String, untyped] normalized) -> untyped
|
|
92
118
|
def self.strip_internal_metadata: (Hash[String, untyped] payload) -> Hash[String, untyped]
|
|
93
119
|
def self.change_event_payload?: (Hash[String, untyped] payload) -> bool
|
|
120
|
+
def self.enqueue_metadata_for_batch: (::Array[Hash[String, untyped]] payloads) -> ::Array[Hash[String, untyped]]
|
|
121
|
+
def self.metadata_for_batch_item: (Hash[untyped, untyped] metadata, Integer index) -> Hash[String, untyped]
|
|
94
122
|
end
|
|
95
123
|
|
|
96
124
|
class Checkpoint
|
|
@@ -114,12 +142,33 @@ module CDC
|
|
|
114
142
|
def async_job: (untyped job) -> untyped
|
|
115
143
|
def enqueue_metadata: (Hash[untyped, untyped] payload) -> Hash[String, untyped]
|
|
116
144
|
def ordering_value: (Hash[untyped, untyped] payload) -> untyped
|
|
145
|
+
def payload_for: (untyped event) -> untyped
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
class DownstreamProcessor
|
|
149
|
+
attr_reader configuration: Configuration
|
|
150
|
+
def initialize: (Configuration configuration) -> void
|
|
151
|
+
def process: (untyped item) -> untyped
|
|
152
|
+
def process_many: (::Array[untyped] items) -> untyped
|
|
153
|
+
|
|
154
|
+
private
|
|
155
|
+
|
|
156
|
+
def processor: () -> untyped
|
|
157
|
+
def process_one: (untyped item) -> untyped
|
|
158
|
+
def process_many_direct: (::Array[untyped] items) -> untyped
|
|
159
|
+
def process_with_runtime: (untyped runtime, ::Array[untyped] items) -> untyped
|
|
160
|
+
def unwrap_single_result: (untyped result) -> untyped
|
|
161
|
+
def concurrent_runtime: () -> untyped
|
|
162
|
+
def parallel_runtime: () -> untyped
|
|
163
|
+
def require_runtime: (String feature, String gem_name) -> untyped
|
|
117
164
|
end
|
|
118
165
|
|
|
119
166
|
module ProcessorJob
|
|
120
167
|
def self.included: (untyped base) -> void
|
|
121
168
|
def perform: (Hash[untyped, untyped] payload) -> untyped
|
|
169
|
+
| (::Array[Hash[untyped, untyped]]) -> untyped
|
|
122
170
|
def process: (Hash[String, untyped] event) -> untyped
|
|
171
|
+
def process_many: (::Array[untyped] events) -> untyped
|
|
123
172
|
end
|
|
124
173
|
|
|
125
174
|
class Runner
|
|
@@ -129,6 +178,7 @@ module CDC
|
|
|
129
178
|
private
|
|
130
179
|
|
|
131
180
|
def checkpoint: (untyped event, untyped result) -> untyped
|
|
181
|
+
def flush_batch: (::Array[untyped] batch) -> Integer
|
|
132
182
|
end
|
|
133
183
|
|
|
134
184
|
class PostgresqlStream
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cdc-solid-queue
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ken C. Demanawa
|
|
@@ -93,6 +93,7 @@ files:
|
|
|
93
93
|
- lib/cdc/solid_queue/checkpoint.rb
|
|
94
94
|
- lib/cdc/solid_queue/cli.rb
|
|
95
95
|
- lib/cdc/solid_queue/configuration.rb
|
|
96
|
+
- lib/cdc/solid_queue/downstream_processor.rb
|
|
96
97
|
- lib/cdc/solid_queue/enqueuer.rb
|
|
97
98
|
- lib/cdc/solid_queue/error.rb
|
|
98
99
|
- lib/cdc/solid_queue/event_serializer.rb
|