concurrent_pipeline 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,363 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "concurrent/edge/erlang_actor"
4
+
5
+ require_relative "../changeset"
6
+ require_relative "../read_only_store"
7
+
8
+ module ConcurrentPipeline
9
+ module Processors
10
+ class ActorProcessor
11
+ Message = Struct.new(:type, :payload) do
12
+ def to_s(...)
13
+ inspect(...)
14
+ end
15
+ def inspect(...)
16
+ "<Message #{type} (#{payload.class}) >"
17
+ end
18
+ end
19
+
20
+ Ms = Module.new do
21
+ def self.g(...)
22
+ Message.new(...)
23
+ end
24
+
25
+ def msg(...)
26
+ Message.new(...)
27
+ end
28
+ end
29
+
30
+ Msg = Message.method(:new) #->(*args, **opts) { Message.new(*args, **opts) }
31
+
32
+ def self.call(...)
33
+ new(...).call
34
+ end
35
+
36
+ attr_reader :store, :pipelineables, :registry, :stream
37
+ def initialize(store:, pipelineables:, registry:, stream:)
38
+ @store = store
39
+ @pipelineables = pipelineables
40
+ @registry = registry
41
+ @stream = stream
42
+ end
43
+
44
+ module PipeActor
45
+ module InstanceMethods
46
+ attr_accessor :ctx
47
+ def respond(msg)
48
+ if self.class.on_blocks.key?(msg.type)
49
+ instance_exec(msg, &self.class.on_blocks[msg.type])
50
+ else
51
+ instance_exec(msg, &self.class.default_block)
52
+ end
53
+ rescue => e
54
+ Log.warn("error: #{e.class}:#{e.message}\n---\n#{e.backtrace.join("\n")}")
55
+ terminate :error
56
+ end
57
+
58
+ def reply(...)
59
+ ctx.reply(...)
60
+ end
61
+
62
+ def terminate(...)
63
+ ctx.terminate(...)
64
+ end
65
+ end
66
+
67
+ def self.extended(base)
68
+ base.include(InstanceMethods)
69
+ end
70
+
71
+ def spawn(...)
72
+ instance = self.new(...)
73
+ Concurrent::ErlangActor.spawn(type: :on_pool) do
74
+ receive(keep: true) do |msg|
75
+ instance.ctx = self
76
+ instance.respond(msg)
77
+ end
78
+ end
79
+ end
80
+
81
+ def on_blocks
82
+ @on_blocks ||= {}
83
+ end
84
+
85
+ def default_block
86
+ @default_block
87
+ end
88
+
89
+ private
90
+
91
+ def on(type, &block)
92
+ on_blocks[type] = block
93
+ end
94
+
95
+ def default(&block)
96
+ @default_block = block
97
+ end
98
+ end
99
+
100
+ class ActorPool
101
+ attr_reader :pool
102
+ def initialize(concurrency = 10000)
103
+ @pool = Pool.spawn(concurrency)
104
+ end
105
+
106
+ def process(bodies)
107
+ # This can be blocking because it is called by perform
108
+ # which is already being called by an actor, so blocking is ok.
109
+ # However, we still want the pool to be limited in size across
110
+ # all actors.
111
+ bodies.map { |body|
112
+ pool.ask(
113
+ Processors::ActorProcessor::Message.new(
114
+ :queue,
115
+ body
116
+ )
117
+ )
118
+ }.map { _1.terminated.value! }
119
+ end
120
+
121
+ class Pool
122
+ extend Processors::ActorProcessor::PipeActor
123
+ attr_reader :concurrency, :queue, :processing_count
124
+ def initialize(concurrency= 10000)
125
+ @concurrency = concurrency
126
+ @queue = []
127
+ @processing_count = 0
128
+ end
129
+
130
+ on :queue do |msg|
131
+ pid = spawn_queued_actor(msg.payload)
132
+ queue << pid
133
+ try_process
134
+ reply(pid)
135
+ end
136
+
137
+ on :finished do |msg|
138
+ @processing_count -= 1
139
+ try_process()
140
+ end
141
+
142
+ private
143
+
144
+ def try_process
145
+ return if queue.empty?
146
+ return if processing_count >= concurrency
147
+
148
+ @processing_count += 1
149
+ pid = queue.shift
150
+ pid.tell(ctx.pid)
151
+ end
152
+
153
+ def spawn_queued_actor(body)
154
+ Concurrent::ErlangActor.spawn(type: :on_pool) do
155
+ receive do |sender|
156
+ # begin
157
+ body.call
158
+
159
+ sender.tell(Processors::ActorProcessor::Message.new(:finished, nil))
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
165
+
166
+ class Changeset
167
+ extend PipeActor
168
+
169
+ attr_reader :dispatch, :pipelines, :store
170
+ def initialize(dispatch:, store:)
171
+ @dispatch = dispatch
172
+ @store = store
173
+ @pipelines = []
174
+ end
175
+
176
+ on :changeset do |msg|
177
+ pipelines << msg.payload
178
+ end
179
+
180
+ on :flush_queue do |msg|
181
+ next unless pipelines.any?
182
+
183
+ diffed = store.apply(pipelines.map(&:changeset))
184
+ if diffed
185
+ dispatch.tell(Msg.(:pipelines_updated, pipelines.map(&:id)))
186
+ else
187
+ dispatch.tell(Msg.(:pipelines_processed, pipelines.map(&:id)))
188
+ end
189
+
190
+ @pipelines = []
191
+ end
192
+ end
193
+
194
+ class Scheduler
195
+ extend PipeActor
196
+
197
+ attr_reader :dispatch, :status, :store, :pipelineables, :stream
198
+ def initialize(dispatch:, store:, pipelineables:, stream:)
199
+ @dispatch = dispatch
200
+ @store = store
201
+ @pipelineables = pipelineables
202
+ @stream = stream
203
+ @status = {}
204
+ @unlimited_pool = ActorPool.new
205
+ @pools = {}
206
+ end
207
+
208
+ def pool_for(pipelineable)
209
+ @pools[pipelineable] ||= (
210
+ if pipelineable.concurrency
211
+ ActorPool.new(pipelineable.concurrency)
212
+ else
213
+ @unlimited_pool
214
+ end
215
+ )
216
+ end
217
+
218
+ default do |msg|
219
+ # we update pipeline_ids on both messages.
220
+ pipeline_ids = msg.payload || []
221
+ pipeline_ids.each do |pipeline_id|
222
+ status[pipeline_id] = :processed
223
+ end
224
+
225
+ case msg.type
226
+ when :requeue
227
+ reader = store.reader
228
+
229
+ pipelineables
230
+ .map { _1.build_pipelines(store: reader, stream: stream, pool: pool_for(_1)) }
231
+ .flatten
232
+ .each do |c|
233
+ if status[c.id] != :queued && c.should_perform?
234
+ Log.debug("enqueuing: #{c.id}")
235
+ status[c.id] = :queued
236
+ dispatch.tell(Msg.(:enqueue, c))
237
+ end
238
+ end
239
+ end
240
+
241
+ if status.values.all? { _1 == :processed }
242
+ dispatch.tell(Msg.(:all_pipelines_processed))
243
+ end
244
+ reply :ok
245
+ end
246
+ end
247
+
248
+ class Ticker
249
+ extend PipeActor
250
+
251
+ attr_reader :dispatch
252
+ def initialize(dispatch)
253
+ @dispatch = dispatch
254
+ end
255
+
256
+ on :start do |msg|
257
+ loop do
258
+ sleep 0.1
259
+ dispatch.tell(Msg.(:tick))
260
+ end
261
+ end
262
+ end
263
+
264
+ class Dispatch
265
+ extend PipeActor
266
+
267
+ attr_reader :work, :changeset, :scheduler
268
+
269
+ on :init do |msg|
270
+ @work = msg.payload[:work]
271
+ @changeset = msg.payload[:changeset]
272
+ @scheduler = msg.payload[:scheduler]
273
+ reply :ok
274
+ end
275
+
276
+ on :tick do |msg|
277
+ changeset.tell(Msg.(:flush_queue))
278
+ end
279
+
280
+ on :enqueue do |msg|
281
+ work.tell(Msg.(:process, msg.payload))
282
+ end
283
+
284
+ on :error do |msg|
285
+ warn "error: #{msg.payload.class}:#{msg.payload.message}\n---\n#{msg.payload.backtrace.join("\n")}"
286
+ terminate :error
287
+ end
288
+
289
+ on :all_pipelines_processed do |msg|
290
+ terminate :ok
291
+ end
292
+
293
+ on :pipelines_updated do |msg|
294
+ scheduler.tell(Msg.(:requeue, msg.payload))
295
+ end
296
+
297
+ on :pipelines_processed do |msg|
298
+ scheduler.tell(Msg.(:pipelines_processed, msg.payload))
299
+ end
300
+
301
+ on :changeset do |msg|
302
+ changeset.tell(Msg.(:changeset, msg.payload))
303
+ end
304
+
305
+ default do |msg|
306
+ Log.debug("unknown message: #{msg.inspect}")
307
+ end
308
+ end
309
+
310
+ class Work
311
+ extend PipeActor
312
+
313
+ attr_reader :dispatch
314
+ def initialize(dispatch)
315
+ @dispatch = dispatch
316
+ end
317
+
318
+ on :process do |msg|
319
+ a = Concurrent::ErlangActor.spawn(type: :on_pool) do
320
+ receive do |pipeline, dispatch|
321
+ pipeline = msg.payload
322
+ Log.debug("starting perform: #{pipeline.class}: #{pipeline.id}")
323
+ pipeline.perform
324
+ Log.debug("finished perform: #{pipeline.class}: #{pipeline.id}")
325
+ dispatch.tell(Msg.(:changeset, pipeline))
326
+ rescue => e
327
+ dispatch.tell(Msg.(:error, e))
328
+ end
329
+ end
330
+
331
+ a.tell([msg.payload, dispatch])
332
+ end
333
+ end
334
+
335
+ def call
336
+ dispatch = Dispatch.spawn
337
+ ticker = Ticker.spawn(dispatch)
338
+ work = Work.spawn(dispatch)
339
+ changeset = Changeset.spawn(dispatch: dispatch, store: store)
340
+ scheduler = Scheduler.spawn(
341
+ dispatch: dispatch,
342
+ store: store,
343
+ pipelineables: pipelineables,
344
+ stream: stream
345
+ )
346
+
347
+ dispatch.tell(Msg.(
348
+ :init,
349
+ work: work,
350
+ changeset: changeset,
351
+ scheduler: scheduler
352
+ ))
353
+
354
+ Log.debug("triggering initial queue")
355
+
356
+ ticker.tell(Msg.(:start))
357
+ scheduler.tell(Msg.(:requeue))
358
+
359
+ dispatch.terminated.result
360
+ end
361
+ end
362
+ end
363
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tmpdir"
4
+
5
+ require_relative "processors/actor_processor"
6
+ require_relative "registry"
7
+ require_relative "store"
8
+ require_relative "stores/yaml"
9
+
10
+ module ConcurrentPipeline
11
+ class Producer
12
+ class Stream
13
+ attr_reader :receivers
14
+ def initialize
15
+ @receivers = {
16
+ default: -> (type, *) { Log.warn("No stream handler for type: #{type.inspect}") },
17
+ }
18
+ end
19
+
20
+ def on(type, &block)
21
+ receivers[type] = block
22
+ end
23
+
24
+ def push(type, payload)
25
+ receivers[type]
26
+ .tap { Log.warn("No stream handler for type: #{type.inspect}") if _1.nil? }
27
+ &.call(payload)
28
+ end
29
+ end
30
+
31
+ def initialize(data: nil, store: nil, stream: nil, dir: nil, &initialization_block)
32
+ raise ArgumentError.new("provide data or store but not both") if data && store
33
+ raise ArgumentError.new("must provide initial data, a store, or a block") unless data || store || initialization_block
34
+ @dir = dir
35
+ @data = data
36
+ @store = store&.reader? ? store.store : store
37
+ @initialization_block = initialization_block
38
+ @stream = stream
39
+ end
40
+
41
+ def call(&block)
42
+ changeset = self.store.changeset
43
+ @initialization_block&.call(changeset)
44
+ store.apply(changeset)
45
+
46
+ Processors::ActorProcessor.call(
47
+ store: store,
48
+ pipelineables: pipelineables,
49
+ registry: registry,
50
+ stream: stream
51
+ )
52
+
53
+ store.reader.all(:PipelineStep).all?(&:success?)
54
+ end
55
+
56
+ def data
57
+ store.reader.to_h
58
+ end
59
+
60
+ def store
61
+ @store ||= self.class.store.build_writer(data: @data || {}, dir: dir, registry: registry)
62
+ end
63
+
64
+ def stream
65
+ @stream || self.class.stream
66
+ end
67
+
68
+ def versions
69
+ self.class.store.versions(dir: dir, registry: registry)
70
+ end
71
+
72
+ def history
73
+ self.class.store.history(dir: dir, registry: registry)
74
+ end
75
+
76
+ def dir
77
+ @dir ||= Dir.mktmpdir
78
+ end
79
+
80
+ private
81
+
82
+ def registry
83
+ self.class.registry
84
+ end
85
+
86
+ def pipelineables
87
+ self.class.pipelineables
88
+ end
89
+
90
+
91
+ class << self
92
+ def store(klass = nil)
93
+ @store = klass || Stores::Yaml
94
+ @store
95
+ end
96
+
97
+ def pipelineables
98
+ @pipelineables ||= []
99
+ end
100
+
101
+ def registry
102
+ @registry ||= (
103
+ Registry
104
+ .new
105
+ .tap { _1.register(:PipelineStep, Pipeline::PipelineStep) }
106
+ )
107
+ end
108
+
109
+ def model(klass_or_symbol, as: nil, &block)
110
+ if klass_or_symbol.is_a?(Class)
111
+ raise ArgumentError.new("Cannot provide both a class and a block") if block
112
+ as ||= klass_or_symbol.name.split("::").last.to_sym
113
+ registry.register(as, klass_or_symbol)
114
+ elsif klass_or_symbol.is_a?(Symbol)
115
+ registry.register(klass_or_symbol, Class.new do
116
+ extend Model
117
+ instance_eval(&block)
118
+ end)
119
+ else
120
+ raise ArgumentError.new("Must provide either a class or a symbol")
121
+ end
122
+ end
123
+
124
+ module CustomPipelines
125
+ end
126
+
127
+ def stream(&block)
128
+ return @stream unless block
129
+ @stream = Stream.new.tap { _1.instance_exec(&block) }
130
+ end
131
+
132
+ def pipeline(klass_or_symbol = nil, **opts, &block)
133
+ pipelineable = (
134
+ if klass_or_symbol.is_a?(Class)
135
+ raise ArgumentError.new("Cannot provide both a class and a block") if block
136
+ klass_or_symbol
137
+ elsif klass_or_symbol.is_a?(Symbol) || klass_or_symbol.nil?
138
+ klass_or_symbol ||= "Pipeline#{pipelineables.count}"
139
+ pipeline_class = Class.new(Pipeline, &block)
140
+ class_name = klass_or_symbol.to_s.split("_").collect(&:capitalize).join
141
+ CustomPipelines.const_set(class_name, pipeline_class)
142
+ pipeline_class
143
+ else
144
+ raise ArgumentError.new("Must provide either a class or a symbol")
145
+ end
146
+ )
147
+
148
+ opts.each do |meth, args|
149
+ pipelineable.public_send(meth, *Array(args))
150
+ end
151
+
152
+ pipelineables << pipelineable
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ConcurrentPipeline
4
+ class ReadyOnlyStore
5
+ attr_reader :store
6
+ def initialize(store)
7
+ @store = store
8
+ end
9
+
10
+ def find(...)
11
+ store.find(...)
12
+ end
13
+
14
+ def all(...)
15
+ store.all(...)
16
+ end
17
+
18
+ def everything(...)
19
+ store.everything(...)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ConcurrentPipeline
4
+ class Registry
5
+ def build(type, attributes)
6
+ lookup.fetch(type).new(attributes)
7
+ end
8
+
9
+ def register(type, klass)
10
+ if lookup.key?(type)
11
+ raise <<~TXT
12
+ Duplicate type: #{type} for class #{klass.name}. Use the `as:` \
13
+ option to avoid this collision, eg `model(MyModel, as: :MyModel).
14
+ TXT
15
+ end
16
+
17
+ lookup[type] = klass
18
+ end
19
+
20
+ def type_for(type)
21
+ return type if lookup.key?(type)
22
+
23
+ reverse_lookup = lookup.invert
24
+
25
+ return reverse_lookup[type] if reverse_lookup.key?(type)
26
+
27
+ raise ArgumentError, "Unknown type: #{type}"
28
+ end
29
+
30
+ private
31
+
32
+ def lookup
33
+ @lookup ||= {}
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "open3"
4
+
5
+ module ConcurrentPipeline
6
+ class Shell
7
+ Error = Class.new(StandardError)
8
+ class << self
9
+ Result = Struct.new(:command, :success, :stdout, :stderr, keyword_init: true) do
10
+ def success?
11
+ success
12
+ end
13
+ end
14
+
15
+ def run!(...)
16
+ # only returns stdout just because.
17
+ run(...)
18
+ .tap { raise "command failed: \n#{_1.inspect}" unless _1.success? }
19
+ .stdout
20
+ end
21
+
22
+ def run(command)
23
+ Open3.popen3(command) do |_in, stdout, stderr, wait_thr|
24
+ process_stdout = []
25
+ stdout_thr = Thread.new do
26
+ while line = stdout.gets&.chomp
27
+ yield(:stdout, line) if block_given?
28
+ process_stdout << line
29
+ end
30
+ end
31
+
32
+ process_stderr = []
33
+ stderr_thr = Thread.new do
34
+ while line = stderr.gets&.chomp
35
+ yield(:stderr, line) if block_given?
36
+ process_stderr << line
37
+ end
38
+ end
39
+
40
+ [
41
+ stderr_thr,
42
+ stdout_thr,
43
+ ].each(&:join)
44
+
45
+ Result.new(
46
+ command: command,
47
+ success: wait_thr.value.success?,
48
+ stdout: process_stdout.join("\n"),
49
+ stderr: process_stderr.join("\n"),
50
+ )
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ConcurrentPipeline
4
+ class Store
5
+ attr_reader :db, :changeset
6
+ def initialize(db:, changeset:)
7
+ @changeset = changeset
8
+ @db = db
9
+ end
10
+
11
+ def find(...)
12
+ db.find(...)
13
+ end
14
+
15
+ def all(...)
16
+ db.find(...)
17
+ end
18
+
19
+ def create(...)
20
+ changeset.create(...)
21
+ end
22
+
23
+ def update(...)
24
+ changeset.update(...)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+ require "yaml"
5
+
6
+ module ConcurrentPipeline
7
+ module Stores
8
+ class Versioned
9
+ Version = Struct.new(:data, :registry, keyword_init: true)
10
+ include Yaml::QueryMethods
11
+ end
12
+
13
+ attr_reader :data, :registry
14
+ def initialize(data:, registry:)
15
+ @data = data
16
+ @registry = registry
17
+ end
18
+
19
+ def versions
20
+
21
+ end
22
+ end
23
+ end
24
+ end