concurrent_pipeline 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,363 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "concurrent/edge/erlang_actor"
4
+
5
+ require_relative "../changeset"
6
+ require_relative "../read_only_store"
7
+
8
+ module ConcurrentPipeline
9
+ module Processors
10
+ class ActorProcessor
11
+ Message = Struct.new(:type, :payload) do
12
+ def to_s(...)
13
+ inspect(...)
14
+ end
15
+ def inspect(...)
16
+ "<Message #{type} (#{payload.class}) >"
17
+ end
18
+ end
19
+
20
+ Ms = Module.new do
21
+ def self.g(...)
22
+ Message.new(...)
23
+ end
24
+
25
+ def msg(...)
26
+ Message.new(...)
27
+ end
28
+ end
29
+
30
+ Msg = Message.method(:new) #->(*args, **opts) { Message.new(*args, **opts) }
31
+
32
+ def self.call(...)
33
+ new(...).call
34
+ end
35
+
36
+ attr_reader :store, :pipelineables, :registry, :stream
37
+ def initialize(store:, pipelineables:, registry:, stream:)
38
+ @store = store
39
+ @pipelineables = pipelineables
40
+ @registry = registry
41
+ @stream = stream
42
+ end
43
+
44
+ module PipeActor
45
+ module InstanceMethods
46
+ attr_accessor :ctx
47
+ def respond(msg)
48
+ if self.class.on_blocks.key?(msg.type)
49
+ instance_exec(msg, &self.class.on_blocks[msg.type])
50
+ else
51
+ instance_exec(msg, &self.class.default_block)
52
+ end
53
+ rescue => e
54
+ Log.warn("error: #{e.class}:#{e.message}\n---\n#{e.backtrace.join("\n")}")
55
+ terminate :error
56
+ end
57
+
58
+ def reply(...)
59
+ ctx.reply(...)
60
+ end
61
+
62
+ def terminate(...)
63
+ ctx.terminate(...)
64
+ end
65
+ end
66
+
67
+ def self.extended(base)
68
+ base.include(InstanceMethods)
69
+ end
70
+
71
+ def spawn(...)
72
+ instance = self.new(...)
73
+ Concurrent::ErlangActor.spawn(type: :on_pool) do
74
+ receive(keep: true) do |msg|
75
+ instance.ctx = self
76
+ instance.respond(msg)
77
+ end
78
+ end
79
+ end
80
+
81
+ def on_blocks
82
+ @on_blocks ||= {}
83
+ end
84
+
85
+ def default_block
86
+ @default_block
87
+ end
88
+
89
+ private
90
+
91
+ def on(type, &block)
92
+ on_blocks[type] = block
93
+ end
94
+
95
+ def default(&block)
96
+ @default_block = block
97
+ end
98
+ end
99
+
100
+ class ActorPool
101
+ attr_reader :pool
102
+ def initialize(concurrency = 10000)
103
+ @pool = Pool.spawn(concurrency)
104
+ end
105
+
106
+ def process(bodies)
107
+ # This can be blocking because it is called by perform
108
+ # which is already being called by an actor, so blocking is ok.
109
+ # However, we still want the pool to be limited in size across
110
+ # all actors.
111
+ bodies.map { |body|
112
+ pool.ask(
113
+ Processors::ActorProcessor::Message.new(
114
+ :queue,
115
+ body
116
+ )
117
+ )
118
+ }.map { _1.terminated.value! }
119
+ end
120
+
121
+ class Pool
122
+ extend Processors::ActorProcessor::PipeActor
123
+ attr_reader :concurrency, :queue, :processing_count
124
+ def initialize(concurrency= 10000)
125
+ @concurrency = concurrency
126
+ @queue = []
127
+ @processing_count = 0
128
+ end
129
+
130
+ on :queue do |msg|
131
+ pid = spawn_queued_actor(msg.payload)
132
+ queue << pid
133
+ try_process
134
+ reply(pid)
135
+ end
136
+
137
+ on :finished do |msg|
138
+ @processing_count -= 1
139
+ try_process()
140
+ end
141
+
142
+ private
143
+
144
+ def try_process
145
+ return if queue.empty?
146
+ return if processing_count >= concurrency
147
+
148
+ @processing_count += 1
149
+ pid = queue.shift
150
+ pid.tell(ctx.pid)
151
+ end
152
+
153
+ def spawn_queued_actor(body)
154
+ Concurrent::ErlangActor.spawn(type: :on_pool) do
155
+ receive do |sender|
156
+ # begin
157
+ body.call
158
+
159
+ sender.tell(Processors::ActorProcessor::Message.new(:finished, nil))
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
165
+
166
+ class Changeset
167
+ extend PipeActor
168
+
169
+ attr_reader :dispatch, :pipelines, :store
170
+ def initialize(dispatch:, store:)
171
+ @dispatch = dispatch
172
+ @store = store
173
+ @pipelines = []
174
+ end
175
+
176
+ on :changeset do |msg|
177
+ pipelines << msg.payload
178
+ end
179
+
180
+ on :flush_queue do |msg|
181
+ next unless pipelines.any?
182
+
183
+ diffed = store.apply(pipelines.map(&:changeset))
184
+ if diffed
185
+ dispatch.tell(Msg.(:pipelines_updated, pipelines.map(&:id)))
186
+ else
187
+ dispatch.tell(Msg.(:pipelines_processed, pipelines.map(&:id)))
188
+ end
189
+
190
+ @pipelines = []
191
+ end
192
+ end
193
+
194
+ class Scheduler
195
+ extend PipeActor
196
+
197
+ attr_reader :dispatch, :status, :store, :pipelineables, :stream
198
+ def initialize(dispatch:, store:, pipelineables:, stream:)
199
+ @dispatch = dispatch
200
+ @store = store
201
+ @pipelineables = pipelineables
202
+ @stream = stream
203
+ @status = {}
204
+ @unlimited_pool = ActorPool.new
205
+ @pools = {}
206
+ end
207
+
208
+ def pool_for(pipelineable)
209
+ @pools[pipelineable] ||= (
210
+ if pipelineable.concurrency
211
+ ActorPool.new(pipelineable.concurrency)
212
+ else
213
+ @unlimited_pool
214
+ end
215
+ )
216
+ end
217
+
218
+ default do |msg|
219
+ # we update pipeline_ids on both messages.
220
+ pipeline_ids = msg.payload || []
221
+ pipeline_ids.each do |pipeline_id|
222
+ status[pipeline_id] = :processed
223
+ end
224
+
225
+ case msg.type
226
+ when :requeue
227
+ reader = store.reader
228
+
229
+ pipelineables
230
+ .map { _1.build_pipelines(store: reader, stream: stream, pool: pool_for(_1)) }
231
+ .flatten
232
+ .each do |c|
233
+ if status[c.id] != :queued && c.should_perform?
234
+ Log.debug("enqueuing: #{c.id}")
235
+ status[c.id] = :queued
236
+ dispatch.tell(Msg.(:enqueue, c))
237
+ end
238
+ end
239
+ end
240
+
241
+ if status.values.all? { _1 == :processed }
242
+ dispatch.tell(Msg.(:all_pipelines_processed))
243
+ end
244
+ reply :ok
245
+ end
246
+ end
247
+
248
+ class Ticker
249
+ extend PipeActor
250
+
251
+ attr_reader :dispatch
252
+ def initialize(dispatch)
253
+ @dispatch = dispatch
254
+ end
255
+
256
+ on :start do |msg|
257
+ loop do
258
+ sleep 0.1
259
+ dispatch.tell(Msg.(:tick))
260
+ end
261
+ end
262
+ end
263
+
264
+ class Dispatch
265
+ extend PipeActor
266
+
267
+ attr_reader :work, :changeset, :scheduler
268
+
269
+ on :init do |msg|
270
+ @work = msg.payload[:work]
271
+ @changeset = msg.payload[:changeset]
272
+ @scheduler = msg.payload[:scheduler]
273
+ reply :ok
274
+ end
275
+
276
+ on :tick do |msg|
277
+ changeset.tell(Msg.(:flush_queue))
278
+ end
279
+
280
+ on :enqueue do |msg|
281
+ work.tell(Msg.(:process, msg.payload))
282
+ end
283
+
284
+ on :error do |msg|
285
+ warn "error: #{msg.payload.class}:#{msg.payload.message}\n---\n#{msg.payload.backtrace.join("\n")}"
286
+ terminate :error
287
+ end
288
+
289
+ on :all_pipelines_processed do |msg|
290
+ terminate :ok
291
+ end
292
+
293
+ on :pipelines_updated do |msg|
294
+ scheduler.tell(Msg.(:requeue, msg.payload))
295
+ end
296
+
297
+ on :pipelines_processed do |msg|
298
+ scheduler.tell(Msg.(:pipelines_processed, msg.payload))
299
+ end
300
+
301
+ on :changeset do |msg|
302
+ changeset.tell(Msg.(:changeset, msg.payload))
303
+ end
304
+
305
+ default do |msg|
306
+ Log.debug("unknown message: #{msg.inspect}")
307
+ end
308
+ end
309
+
310
+ class Work
311
+ extend PipeActor
312
+
313
+ attr_reader :dispatch
314
+ def initialize(dispatch)
315
+ @dispatch = dispatch
316
+ end
317
+
318
+ on :process do |msg|
319
+ a = Concurrent::ErlangActor.spawn(type: :on_pool) do
320
+ receive do |pipeline, dispatch|
321
+ pipeline = msg.payload
322
+ Log.debug("starting perform: #{pipeline.class}: #{pipeline.id}")
323
+ pipeline.perform
324
+ Log.debug("finished perform: #{pipeline.class}: #{pipeline.id}")
325
+ dispatch.tell(Msg.(:changeset, pipeline))
326
+ rescue => e
327
+ dispatch.tell(Msg.(:error, e))
328
+ end
329
+ end
330
+
331
+ a.tell([msg.payload, dispatch])
332
+ end
333
+ end
334
+
335
+ def call
336
+ dispatch = Dispatch.spawn
337
+ ticker = Ticker.spawn(dispatch)
338
+ work = Work.spawn(dispatch)
339
+ changeset = Changeset.spawn(dispatch: dispatch, store: store)
340
+ scheduler = Scheduler.spawn(
341
+ dispatch: dispatch,
342
+ store: store,
343
+ pipelineables: pipelineables,
344
+ stream: stream
345
+ )
346
+
347
+ dispatch.tell(Msg.(
348
+ :init,
349
+ work: work,
350
+ changeset: changeset,
351
+ scheduler: scheduler
352
+ ))
353
+
354
+ Log.debug("triggering initial queue")
355
+
356
+ ticker.tell(Msg.(:start))
357
+ scheduler.tell(Msg.(:requeue))
358
+
359
+ dispatch.terminated.result
360
+ end
361
+ end
362
+ end
363
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tmpdir"
4
+
5
+ require_relative "processors/actor_processor"
6
+ require_relative "registry"
7
+ require_relative "store"
8
+ require_relative "stores/yaml"
9
+
10
+ module ConcurrentPipeline
11
+ class Producer
12
+ class Stream
13
+ attr_reader :receivers
14
+ def initialize
15
+ @receivers = {
16
+ default: -> (type, *) { Log.warn("No stream handler for type: #{type.inspect}") },
17
+ }
18
+ end
19
+
20
+ def on(type, &block)
21
+ receivers[type] = block
22
+ end
23
+
24
+ def push(type, payload)
25
+ receivers[type]
26
+ .tap { Log.warn("No stream handler for type: #{type.inspect}") if _1.nil? }
27
+ &.call(payload)
28
+ end
29
+ end
30
+
31
+ def initialize(data: nil, store: nil, stream: nil, dir: nil, &initialization_block)
32
+ raise ArgumentError.new("provide data or store but not both") if data && store
33
+ raise ArgumentError.new("must provide initial data, a store, or a block") unless data || store || initialization_block
34
+ @dir = dir
35
+ @data = data
36
+ @store = store&.reader? ? store.store : store
37
+ @initialization_block = initialization_block
38
+ @stream = stream
39
+ end
40
+
41
+ def call(&block)
42
+ changeset = self.store.changeset
43
+ @initialization_block&.call(changeset)
44
+ store.apply(changeset)
45
+
46
+ Processors::ActorProcessor.call(
47
+ store: store,
48
+ pipelineables: pipelineables,
49
+ registry: registry,
50
+ stream: stream
51
+ )
52
+
53
+ store.reader.all(:PipelineStep).all?(&:success?)
54
+ end
55
+
56
+ def data
57
+ store.reader.to_h
58
+ end
59
+
60
+ def store
61
+ @store ||= self.class.store.build_writer(data: @data || {}, dir: dir, registry: registry)
62
+ end
63
+
64
+ def stream
65
+ @stream || self.class.stream
66
+ end
67
+
68
+ def versions
69
+ self.class.store.versions(dir: dir, registry: registry)
70
+ end
71
+
72
+ def history
73
+ self.class.store.history(dir: dir, registry: registry)
74
+ end
75
+
76
+ def dir
77
+ @dir ||= Dir.mktmpdir
78
+ end
79
+
80
+ private
81
+
82
+ def registry
83
+ self.class.registry
84
+ end
85
+
86
+ def pipelineables
87
+ self.class.pipelineables
88
+ end
89
+
90
+
91
+ class << self
92
+ def store(klass = nil)
93
+ @store = klass || Stores::Yaml
94
+ @store
95
+ end
96
+
97
+ def pipelineables
98
+ @pipelineables ||= []
99
+ end
100
+
101
+ def registry
102
+ @registry ||= (
103
+ Registry
104
+ .new
105
+ .tap { _1.register(:PipelineStep, Pipeline::PipelineStep) }
106
+ )
107
+ end
108
+
109
+ def model(klass_or_symbol, as: nil, &block)
110
+ if klass_or_symbol.is_a?(Class)
111
+ raise ArgumentError.new("Cannot provide both a class and a block") if block
112
+ as ||= klass_or_symbol.name.split("::").last.to_sym
113
+ registry.register(as, klass_or_symbol)
114
+ elsif klass_or_symbol.is_a?(Symbol)
115
+ registry.register(klass_or_symbol, Class.new do
116
+ extend Model
117
+ instance_eval(&block)
118
+ end)
119
+ else
120
+ raise ArgumentError.new("Must provide either a class or a symbol")
121
+ end
122
+ end
123
+
124
+ module CustomPipelines
125
+ end
126
+
127
+ def stream(&block)
128
+ return @stream unless block
129
+ @stream = Stream.new.tap { _1.instance_exec(&block) }
130
+ end
131
+
132
+ def pipeline(klass_or_symbol = nil, **opts, &block)
133
+ pipelineable = (
134
+ if klass_or_symbol.is_a?(Class)
135
+ raise ArgumentError.new("Cannot provide both a class and a block") if block
136
+ klass_or_symbol
137
+ elsif klass_or_symbol.is_a?(Symbol) || klass_or_symbol.nil?
138
+ klass_or_symbol ||= "Pipeline#{pipelineables.count}"
139
+ pipeline_class = Class.new(Pipeline, &block)
140
+ class_name = klass_or_symbol.to_s.split("_").collect(&:capitalize).join
141
+ CustomPipelines.const_set(class_name, pipeline_class)
142
+ pipeline_class
143
+ else
144
+ raise ArgumentError.new("Must provide either a class or a symbol")
145
+ end
146
+ )
147
+
148
+ opts.each do |meth, args|
149
+ pipelineable.public_send(meth, *Array(args))
150
+ end
151
+
152
+ pipelineables << pipelineable
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ConcurrentPipeline
4
+ class ReadyOnlyStore
5
+ attr_reader :store
6
+ def initialize(store)
7
+ @store = store
8
+ end
9
+
10
+ def find(...)
11
+ store.find(...)
12
+ end
13
+
14
+ def all(...)
15
+ store.all(...)
16
+ end
17
+
18
+ def everything(...)
19
+ store.everything(...)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ConcurrentPipeline
4
+ class Registry
5
+ def build(type, attributes)
6
+ lookup.fetch(type).new(attributes)
7
+ end
8
+
9
+ def register(type, klass)
10
+ if lookup.key?(type)
11
+ raise <<~TXT
12
+ Duplicate type: #{type} for class #{klass.name}. Use the `as:` \
13
+ option to avoid this collision, eg `model(MyModel, as: :MyModel).
14
+ TXT
15
+ end
16
+
17
+ lookup[type] = klass
18
+ end
19
+
20
+ def type_for(type)
21
+ return type if lookup.key?(type)
22
+
23
+ reverse_lookup = lookup.invert
24
+
25
+ return reverse_lookup[type] if reverse_lookup.key?(type)
26
+
27
+ raise ArgumentError, "Unknown type: #{type}"
28
+ end
29
+
30
+ private
31
+
32
+ def lookup
33
+ @lookup ||= {}
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "open3"
4
+
5
+ module ConcurrentPipeline
6
+ class Shell
7
+ Error = Class.new(StandardError)
8
+ class << self
9
+ Result = Struct.new(:command, :success, :stdout, :stderr, keyword_init: true) do
10
+ def success?
11
+ success
12
+ end
13
+ end
14
+
15
+ def run!(...)
16
+ # only returns stdout just because.
17
+ run(...)
18
+ .tap { raise "command failed: \n#{_1.inspect}" unless _1.success? }
19
+ .stdout
20
+ end
21
+
22
+ def run(command)
23
+ Open3.popen3(command) do |_in, stdout, stderr, wait_thr|
24
+ process_stdout = []
25
+ stdout_thr = Thread.new do
26
+ while line = stdout.gets&.chomp
27
+ yield(:stdout, line) if block_given?
28
+ process_stdout << line
29
+ end
30
+ end
31
+
32
+ process_stderr = []
33
+ stderr_thr = Thread.new do
34
+ while line = stderr.gets&.chomp
35
+ yield(:stderr, line) if block_given?
36
+ process_stderr << line
37
+ end
38
+ end
39
+
40
+ [
41
+ stderr_thr,
42
+ stdout_thr,
43
+ ].each(&:join)
44
+
45
+ Result.new(
46
+ command: command,
47
+ success: wait_thr.value.success?,
48
+ stdout: process_stdout.join("\n"),
49
+ stderr: process_stderr.join("\n"),
50
+ )
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ConcurrentPipeline
4
+ class Store
5
+ attr_reader :db, :changeset
6
+ def initialize(db:, changeset:)
7
+ @changeset = changeset
8
+ @db = db
9
+ end
10
+
11
+ def find(...)
12
+ db.find(...)
13
+ end
14
+
15
+ def all(...)
16
+ db.find(...)
17
+ end
18
+
19
+ def create(...)
20
+ changeset.create(...)
21
+ end
22
+
23
+ def update(...)
24
+ changeset.update(...)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+ require "yaml"
5
+
6
+ module ConcurrentPipeline
7
+ module Stores
8
+ class Versioned
9
+ Version = Struct.new(:data, :registry, keyword_init: true)
10
+ include Yaml::QueryMethods
11
+ end
12
+
13
+ attr_reader :data, :registry
14
+ def initialize(data:, registry:)
15
+ @data = data
16
+ @registry = registry
17
+ end
18
+
19
+ def versions
20
+
21
+ end
22
+ end
23
+ end
24
+ end