concurrent_pipeline 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -6,11 +6,13 @@ require "rake/testtask"
6
6
  Rake::TestTask.new(:test) do |t|
7
7
  t.libs << "test"
8
8
  t.libs << "lib"
9
- t.test_files = FileList["test/**/test_*.rb"]
9
+ t.test_files = FileList["test/**/*_test.rb"]
10
+ t.verbose = true
11
+ t.warning = false
10
12
  end
11
13
 
12
14
  require "rubocop/rake_task"
13
15
 
14
16
  RuboCop::RakeTask.new
15
17
 
16
- task default: %i[test rubocop]
18
+ task default: %i[test]
@@ -30,5 +30,7 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ["lib"]
32
32
 
33
- spec.add_dependency("concurrent-ruby-edge")
33
+ spec.add_dependency("zeitwerk")
34
+ spec.add_dependency("yaml")
35
+ spec.add_dependency("async")
34
36
  end
@@ -1,214 +1,27 @@
1
- require "time"
1
+ require "securerandom"
2
2
 
3
3
  module ConcurrentPipeline
4
4
  class Pipeline
5
+ def self.define(&block)
6
+ schema = Pipelines::Schema.new
7
+ schema.instance_exec(&block)
5
8
 
6
- # {
7
- # type: PipelineStep,
8
- # pipeline_id: [MyPipeline, :vhost, 1],
9
- # name: {string},
10
- # result: nil | :success | :failure,
11
- # completed_at: nil | {timestamp},
12
- # sequence: 3
13
- # }
14
-
15
- class PipelineStep
16
- extend Model
17
-
18
- attribute :id
19
- attribute :pipeline_id
20
- attribute :name
21
- attribute :result
22
- attribute :completed_at
23
- attribute :sequence
24
- attribute :error_message
25
-
26
- def success?
27
- result == :success
28
- end
29
- end
30
-
31
- class Wrapper
32
- attr_reader :pipeline, :pool
33
- def initialize(pipeline:, pool:)
34
- @pipeline = pipeline
35
- @pool = pool
36
- end
37
-
38
- def id
39
- pipeline_id = (
40
- if pipeline.class.target_type
41
- pipeline.target.id
42
- end
43
- )
44
-
45
- [pipeline.class.name, pipeline_id].compact.join("__")
46
- end
47
-
48
- def perform
49
- if pipeline_steps.empty?
50
- create_pipeline_steps
51
- else
52
- pipeline_steps
53
- .reject(&:completed_at)
54
- .group_by(&:sequence)
55
- .values
56
- .first
57
- .map { |step|
58
- wrapper = self
59
- -> () do
60
- begin
61
- wrapper.pipeline.public_send(step.name)
62
- wrapper.changeset.update(
63
- step,
64
- completed_at: Time.now.iso8601,
65
- result: :success
66
- )
67
- rescue => e
68
- wrapper.changeset.update(
69
- step,
70
- completed_at: Time.now.iso8601,
71
- result: :failure,
72
- error: {class: e.class, message: e.message, backtrace: e.backtrace}
73
- )
74
- end
75
- end
76
- }
77
- .then { pool.process(_1) }
78
- end
79
- end
80
-
81
- def should_perform?
82
- ready? && !done?
83
- end
84
-
85
- def create_pipeline_steps
86
- sequence = (
87
- if pipeline.respond_to?(:steps)
88
- pipeline.steps
89
- else
90
- [:perform]
91
- end
92
- )
93
-
94
- sequence.each_with_index do |sub_seq, i|
95
- Array(sub_seq).each do |step_name|
96
- changeset.create(
97
- PipelineStep,
98
- pipeline_id: id,
99
- name: step_name,
100
- sequence: i
101
- )
102
- end
103
- end
104
- end
105
-
106
- def pipeline_steps
107
- @pipeline_steps ||= (
108
- store
109
- .all(PipelineStep)
110
- .select { _1.pipeline_id == id }
111
- .sort_by(&:sequence)
112
- )
113
- end
114
-
115
- def ready?
116
- if pipeline.respond_to?(:ready?)
117
- pipeline.ready?
118
- else
119
- true
120
- end
121
- end
122
-
123
- def done?
124
- if pipeline.respond_to?(:done?)
125
- pipeline.done?
126
- else
127
- !pipeline_steps.empty? && pipeline_steps.all?(&:completed_at)
128
- end
129
- end
130
-
131
- def store
132
- pipeline.store
133
- end
134
-
135
- def changeset
136
- pipeline.changeset
137
- end
138
-
139
- def stream(type, payload)
140
- pipeline.stream.push(type, payload)
141
- end
9
+ new(schema)
142
10
  end
143
11
 
144
- class << self
145
- attr_reader(:target_type)
146
-
147
- def build_pipelines(store:, stream:, pool:)
148
- if target_type
149
- store.all(target_type).map { |record|
150
- Wrapper.new(
151
- pipeline: new(
152
- target: record,
153
- store: store,
154
- changeset: store.changeset,
155
- stream: stream
156
- ),
157
- pool: pool
158
- )
159
- }
160
- else
161
- Wrapper.new(
162
- pipeline: new(
163
- target: nil,
164
- store: store,
165
- changeset: store.changeset,
166
- stream: stream
167
- ),
168
- pool: pool
169
- )
170
- end
171
- end
172
-
173
- def each(type, as: nil)
174
- @target_type = type
175
- define_method(as) { target } if as
176
- define_method(:record) { target }
177
- end
178
-
179
- def ready(...)
180
- define_method(:ready?, ...)
181
- end
182
-
183
- def done(...)
184
- define_method(:done?, ...)
185
- end
186
-
187
- def perform(...)
188
- steps(:perform)
189
- define_method(:perform, ...)
190
- end
191
-
192
- def steps(*sequence)
193
- define_method(:steps) { sequence }
194
- end
195
-
196
- def concurrency(size = nil)
197
- @concurrency = size if size
198
- @concurrency
199
- end
12
+ attr_reader :schema, :processor
13
+ def initialize(schema)
14
+ @schema = schema
15
+ @processor = nil
200
16
  end
201
17
 
202
- attr_reader :target, :store, :changeset
203
- def initialize(target:, store:, changeset:, stream:)
204
- @target = target
205
- @store = store
206
- @changeset = changeset
207
- @stream = stream
18
+ def process(store)
19
+ @processor = schema.build_processor(store)
20
+ @processor.call
208
21
  end
209
22
 
210
- def stream(type, payload)
211
- @stream.push(type, payload)
23
+ def errors
24
+ @processor&.errors || []
212
25
  end
213
26
  end
214
27
  end
@@ -0,0 +1,92 @@
1
+ require "async"
2
+ require "async/semaphore"
3
+
4
+ module ConcurrentPipeline
5
+ module Pipelines
6
+ module Processors
7
+ class Asynchronous
8
+ def self.call(...)
9
+ new(...).call
10
+ end
11
+
12
+ attr_reader(:store, :producers, :locker, :concurrency, :enqueue_seconds, :errors)
13
+ def initialize(store:, producers:, concurrency: 5, enqueue_seconds: 0.1)
14
+ @store = store
15
+ @producers = producers
16
+ @concurrency = concurrency
17
+ @enqueue_seconds = enqueue_seconds
18
+ @locker = Locker.new
19
+ @errors = []
20
+ end
21
+
22
+ def call
23
+ Async { |task|
24
+ semaphore = Async::Semaphore.new(concurrency)
25
+ active_tasks = []
26
+ result = true
27
+
28
+ loop do
29
+ # Set result to false if any task has failed
30
+ if errors.any?
31
+ result = false
32
+ break
33
+ end
34
+
35
+ # Clean up finished tasks
36
+ active_tasks.reject!(&:finished?)
37
+
38
+ # Try to enqueue more work (only if no failure)
39
+ enqueued_any = enqueue_all(semaphore, active_tasks, task)
40
+
41
+ # Stop when nothing is being processed AND nothing new was enqueued
42
+ break if active_tasks.empty? && !enqueued_any
43
+
44
+ # Yield to allow other tasks to progress
45
+ sleep(enqueue_seconds)
46
+ end
47
+
48
+ result # Return false if there was a failure, true otherwise
49
+ }.wait # Wait for the async block to complete and return its value
50
+ end
51
+
52
+ def enqueue_all(semaphore, active_tasks, parent_task)
53
+ enqueued_any = false
54
+
55
+ producers.each do |producer|
56
+ producer.records(store).each do |record|
57
+ next if locker.locked?(producer:, record:)
58
+
59
+ enqueued_any = true
60
+ locker.lock(producer:, record:)
61
+
62
+ # Spawn async task
63
+ new_task = parent_task.async do
64
+ begin
65
+ # Exit early if another task has already failed
66
+ next if errors.any?
67
+
68
+ semaphore.acquire do
69
+ begin
70
+ store.transaction do
71
+ producer.call(record)
72
+ end
73
+ rescue => e
74
+ # Append error to array to prevent async gem from logging it
75
+ errors << e
76
+ end
77
+ end
78
+ ensure
79
+ locker.unlock(producer:, record:)
80
+ end
81
+ end
82
+
83
+ active_tasks << new_task
84
+ end
85
+ end
86
+
87
+ enqueued_any
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,28 @@
1
+ require "securerandom"
2
+ require "async"
3
+ require "async/semaphore"
4
+
5
+ module ConcurrentPipeline
6
+ module Pipelines
7
+ module Processors
8
+ class Locker
9
+ attr_reader(:locks)
10
+ def initialize
11
+ @locks = {}
12
+ end
13
+
14
+ def locked?(producer:, record:)
15
+ locks.key?([producer, record.class.record_name, record.id])
16
+ end
17
+
18
+ def lock(producer:, record:)
19
+ locks[[producer, record.class.record_name, record.id]] = true
20
+ end
21
+
22
+ def unlock(producer:, record:)
23
+ locks.delete([producer, record.class.record_name, record.id])
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,50 @@
1
+ module ConcurrentPipeline
2
+ module Pipelines
3
+ module Processors
4
+ class Synchronous
5
+ def self.call(...)
6
+ new(...).call
7
+ end
8
+
9
+ attr_reader(:store, :producers, :locker, :errors)
10
+ def initialize(store:, producers:)
11
+ @store = store
12
+ @producers = producers
13
+ @locker = Locker.new
14
+ @errors = []
15
+ end
16
+
17
+ def call
18
+ while(enqueue_all) do end
19
+ errors.empty?
20
+ end
21
+
22
+ def enqueue_all
23
+ enqueued_any = false
24
+
25
+ producers.each do |producer|
26
+ producer.records(store).each do |record|
27
+ next if locker.locked?(producer:, record:)
28
+
29
+ enqueued_any = true
30
+ locker.lock(producer:, record:)
31
+
32
+ begin
33
+ store.transaction do
34
+ producer.call(record)
35
+ end
36
+ rescue => e
37
+ errors << e
38
+ return false
39
+ ensure
40
+ locker.unlock(producer:, record:)
41
+ end
42
+ end
43
+ end
44
+
45
+ enqueued_any
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,56 @@
1
+ require "securerandom"
2
+
3
+ module ConcurrentPipeline
4
+ module Pipelines
5
+ class Schema
6
+ PROCESSORS = {
7
+ sync: Processors::Synchronous,
8
+ async: Processors::Asynchronous,
9
+ }
10
+
11
+ Producer = Struct.new(:query, :block) do
12
+ def call(*a, **p)
13
+ instance_exec(*a, **p, &block)
14
+ end
15
+
16
+ def shell
17
+ Shell
18
+ end
19
+
20
+ def records(store)
21
+ if query.is_a?(Proc)
22
+ query.call
23
+ else
24
+ # Query is a hash with record_name and filters
25
+ store.where(query[:record_name], **query[:filters])
26
+ end
27
+ end
28
+ end
29
+
30
+ def producers
31
+ @producers ||= []
32
+ end
33
+
34
+ def processor(type, **attrs)
35
+ @processor = {type:, attrs:}
36
+ end
37
+
38
+ def build_processor(store)
39
+ PROCESSORS
40
+ .fetch(@processor.fetch(:type))
41
+ .new(store:, producers:, **@processor.fetch(:attrs))
42
+ end
43
+
44
+ def process(query_or_record_name, **filters, &block)
45
+ if query_or_record_name.is_a?(Proc)
46
+ # Lambda-based query (current behavior)
47
+ producers << Producer.new(query: query_or_record_name, block:)
48
+ else
49
+ # Record name with filters
50
+ query = { record_name: query_or_record_name, filters: }
51
+ producers << Producer.new(query:, block:)
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -1,27 +1,102 @@
1
- # frozen_string_literal: true
1
+ require "securerandom"
2
2
 
3
3
  module ConcurrentPipeline
4
4
  class Store
5
- attr_reader :db, :changeset
6
- def initialize(db:, changeset:)
7
- @changeset = changeset
8
- @db = db
5
+ def self.define(&block)
6
+ schema = Stores::Schema.new
7
+ schema.instance_exec(&block)
8
+
9
+ klass = Class.new(Store) do
10
+ define_method(:schema) { schema }
11
+ end
12
+
13
+
14
+ klass.new(schema.storage)
15
+ end
16
+
17
+ attr_reader :storage
18
+ def initialize(storage)
19
+ @storage = storage
20
+ end
21
+
22
+ def transaction(&block)
23
+ ensure_writable
24
+
25
+ if storage.in_transaction?
26
+ raise "Nested transactions are not supported"
27
+ end
28
+
29
+ storage.transaction(&block)
30
+
31
+ nil
32
+ end
33
+
34
+ def create(record_name, **attrs)
35
+ ensure_writable
36
+
37
+ storage.create(
38
+ name: record_name,
39
+ attrs: { id: SecureRandom.uuid }.merge(attrs)
40
+ )
41
+
42
+ nil
43
+ end
44
+
45
+ def update(record, **attrs)
46
+ ensure_writable
47
+
48
+ # Create a temporary record to apply and validate setter methods
49
+ temp_record = record.class.new(record.attributes)
50
+
51
+ # Apply attributes using setter methods (will raise NoMethodError if attribute doesn't exist)
52
+ attrs.each do |key, value|
53
+ temp_record.public_send("#{key}=", value)
54
+ end
55
+
56
+ storage.update(
57
+ name: record.class.record_name,
58
+ id: record.id,
59
+ attrs: temp_record.attributes
60
+ )
61
+
62
+ nil
63
+ end
64
+
65
+ def all(record_name)
66
+ storage
67
+ .all(name: record_name)
68
+ .map { schema.build(record_name, attrs: _1) }
9
69
  end
10
70
 
11
- def find(...)
12
- db.find(...)
71
+ def where(record_name, **filters)
72
+ records = all(record_name)
73
+
74
+ return records if filters.empty?
75
+
76
+ records.select do |record|
77
+ filters.all? do |key, value|
78
+ attr_value = record.public_send(key)
79
+ if value.respond_to?(:call)
80
+ value.call(attr_value)
81
+ else
82
+ attr_value == value
83
+ end
84
+ end
85
+ end
13
86
  end
14
87
 
15
- def all(...)
16
- db.find(...)
88
+ def versions
89
+ storage.versions.map { self.class.new(_1) }
17
90
  end
18
91
 
19
- def create(...)
20
- changeset.create(...)
92
+ def restore
93
+ self.class.new(storage.restore)
21
94
  end
22
95
 
23
- def update(...)
24
- changeset.update(...)
96
+ def ensure_writable
97
+ unless storage.writeable?
98
+ raise "Unwritable storage: Must 'restore' it before you can write to it"
99
+ end
25
100
  end
26
101
  end
27
102
  end
@@ -0,0 +1,47 @@
1
+ module ConcurrentPipeline
2
+ module Stores
3
+ class Schema
4
+ class Record
5
+ class << self
6
+ def attribute(name, **options)
7
+ attributes << name
8
+ attribute_defaults[name] = options[:default] if options.key?(:default)
9
+
10
+ define_method(name) do
11
+ attributes[name]
12
+ end
13
+
14
+ define_method("#{name}=") do |value|
15
+ @attributes[name] = value
16
+ end
17
+ end
18
+
19
+ def attributes
20
+ @attributes ||= []
21
+ end
22
+
23
+ def attribute_defaults
24
+ @attribute_defaults ||= {}
25
+ end
26
+
27
+ def inherited(mod)
28
+ mod.attribute(:id)
29
+ end
30
+ end
31
+
32
+ attr_reader :attributes
33
+ def initialize(attributes = {})
34
+ # Apply defaults for missing attributes
35
+ defaults = self.class.attribute_defaults
36
+ @attributes = self.class.attributes.each_with_object({}) do |attr_name, hash|
37
+ if attributes.key?(attr_name)
38
+ hash[attr_name] = attributes[attr_name]
39
+ elsif defaults.key?(attr_name)
40
+ hash[attr_name] = defaults[attr_name]
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,35 @@
1
+ module ConcurrentPipeline
2
+ module Stores
3
+ class Schema
4
+ STORAGE = {
5
+ yaml: Storage::Yaml
6
+ }
7
+
8
+ def build(name, attrs:)
9
+ records.fetch(name).new(attrs)
10
+ end
11
+
12
+ def storage(type = nil, **attrs)
13
+ @storage = STORAGE.fetch(type).new(**attrs) if type
14
+ @storage
15
+ end
16
+
17
+ def record(name, &)
18
+ records[name] = Class.new(Record) do
19
+ define_singleton_method(:name) { "PipelineRecord.#{name}" }
20
+ define_singleton_method(:record_name) { name }
21
+
22
+ class_exec(&)
23
+
24
+ define_method(:inspect) do
25
+ "#<#{self.class.name} #{attributes.inspect[0..100]}>"
26
+ end
27
+ end
28
+ end
29
+
30
+ def records
31
+ @records ||= {}
32
+ end
33
+ end
34
+ end
35
+ end