dipa 0.1.0.pre.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +126 -0
  4. data/Rakefile +10 -0
  5. data/app/jobs/dipa/application_job.rb +6 -0
  6. data/app/jobs/dipa/service_job.rb +9 -0
  7. data/app/models/concerns/models/dipa/dumpable.rb +16 -0
  8. data/app/models/concerns/models/dipa/loadable.rb +17 -0
  9. data/app/models/concerns/models/dipa/state_attribute_handling.rb +38 -0
  10. data/app/models/dipa/agent.rb +67 -0
  11. data/app/models/dipa/application_record.rb +7 -0
  12. data/app/models/dipa/coordinator.rb +81 -0
  13. data/app/models/modules/models/dipa/status_constants.rb +24 -0
  14. data/app/services/dipa/agent_services/coordinator_state_service.rb +14 -0
  15. data/app/services/dipa/agent_services/post_processing_service.rb +16 -0
  16. data/app/services/dipa/agent_services/processing_service.rb +13 -0
  17. data/app/services/dipa/agent_services/start_processing_service.rb +18 -0
  18. data/app/services/dipa/application_service.rb +9 -0
  19. data/app/services/dipa/coordinator_services/create_agents_service.rb +23 -0
  20. data/app/services/dipa/coordinator_services/start_processing_service.rb +18 -0
  21. data/app/validators/dipa/date_validator.rb +26 -0
  22. data/db/migrate/20220102132652_create_dipa_coordinators.rb +20 -0
  23. data/db/migrate/20220106183616_create_dipa_agents.rb +19 -0
  24. data/lib/dipa/engine.rb +49 -0
  25. data/lib/dipa/errors.rb +12 -0
  26. data/lib/dipa/processor/base.rb +141 -0
  27. data/lib/dipa/processor/each.rb +11 -0
  28. data/lib/dipa/processor/map.rb +8 -0
  29. data/lib/dipa/version.rb +5 -0
  30. data/lib/dipa.rb +37 -0
  31. data/lib/tasks/auto_annotate_models.rake +62 -0
  32. data/lib/tasks/dipa_tasks.rake +5 -0
  33. metadata +184 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6403004fa28c6c0ae9f9cc40f9da8f5b812ed5ea4feb94bd00210785dd4bb950
4
+ data.tar.gz: 5d485e3096f6b835520844bf6564354eb07640159f4fc7579566df78c8c7c999
5
+ SHA512:
6
+ metadata.gz: 4903b379f5772adf30f2791c74e6355a3a753bb8808a3d8bf655790e7d601e925806d110ec3561993979b3f3aee9a19c5fade0a7d2b17afc7d856e3b5709fecf
7
+ data.tar.gz: 4022abee161ffd77df8a8a11f9712285689287b369d994334c0bc82ca0f6615f087d1f9aeafa184536b09e399a74921e26d15e670274e98ae836d7aff033c1fd
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Merten Falk
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,126 @@
1
+ ![status-badge](https://ci.codeberg.org/api/badges/empunkt/dipa/status.svg)
2
+
3
+ # Dipa
4
+
5
+ This gem provides an API for parallel processing like the [parallel
6
+ gem](https://github.com/grosser/parallel) but distributed and scalable over
7
+ different machines. All this with minimum configuration and minimum dependencies
8
+ to specific technologies and using the rails ecosystem.
9
+
10
+ Dipa provides a rails engine which depends on
11
+ [ActiveJob](https://guides.rubyonrails.org/active_job_basics.html) and
12
+ [ActiveStorage](https://guides.rubyonrails.org/active_storage_overview.html).
13
+ You can use whatever backend you like for any of this components and configure
14
+ them for your specific usecase.
15
+
16
+ The purpose of this gem is to distribute load heavy and long running processing
17
+ of large datasets over multiple processes or machines using
18
+ [ActiveJob](https://guides.rubyonrails.org/active_job_basics.html).
19
+
20
+ ## Installation
21
+
22
+ Before you install Dipa make sure
23
+ [ActiveJob](https://guides.rubyonrails.org/active_job_basics.html) and
24
+ [ActiveStorage](https://guides.rubyonrails.org/active_storage_overview.html) are
25
+ installed and configured properly.
26
+
27
+ Add this line to your application's Gemfile:
28
+
29
+ ```ruby
30
+ gem 'dipa'
31
+ ```
32
+
33
+ And then execute:
34
+ ```bash
35
+ $ bundle install
36
+ ```
37
+
38
+ Or install it yourself as:
39
+ ```bash
40
+ $ gem install dipa
41
+ ```
42
+
43
+ Install Dipa migrations
44
+ ```bash
45
+ bundle exec rake app:dipa:install:migrations
46
+ bundle exec rake db:migrate
47
+ ```
48
+
49
+ ## Configuration
50
+
51
+ Dipa can be configured in the application config.
52
+
53
+ ```ruby
54
+ config.dipa.agent_queue = :default_queue_for_dipa_agent_jobs
55
+ config.dipa.coordinator_queue = :default_queue_for_coordinator_queue_jobs
56
+ ```
57
+
58
+ If not configured `config.active_job.default_queue_name` or `:default` will be
59
+ used.
60
+
61
+ ## Usage
62
+
63
+ Minimum example:
64
+ ```ruby
65
+ Dipa.map(1..100).with('Integer', :sqrt)
66
+ ```
67
+
68
+ More realistic examples:
69
+ ```ruby
70
+ Dipa.map(large_dataset, options).with('ProcessorClassName', :processor_class_method)
71
+ Dipa.each(large_dataset, options).with('ProcessorClassName', :processor_class_method)
72
+ ```
73
+
74
+ `Dipa.map` returns an `Array` of the processed items. The result is in the same order as the input (`large_dataset`).
75
+
76
+ `Dipa.each` returns `large_dataset.to_a`.
77
+
78
+ `large_dataset` must be an `Enumerable`.
79
+
80
+ `options` is a hash. Following keys are allowed:
81
+
82
+ - `agent_queue:` [Symbol] Defaults to `config.dipa.agent_queue`.
83
+ - `coordinator_queue:` [Symbol] Defaults to `config.dipa.coordinator_queue`.
84
+ - `async:` [true|false] Defaults to `false`. Usually no need to change, but
85
+ could be useful for `Dipa.each` if you have a alternative way to monitor your
86
+ jobs.
87
+ - `keep_data:` [true|false] Defaults to `false`. Useful for debugging. After
88
+ processing all `Dipa::Agent` and `Dipa::Coordinator` records and the
89
+ associated ActiveStorage data will be removed. If you don't want that to
90
+ happen, set this to `true`.
91
+
92
+ `ProcessorClassName` must be a `Class` or a `String`. Defines the class which
93
+ provides the processor method.
94
+
95
+ `:processor_class_method` must be a `Symbol` or a `String`. Defines the method
96
+ which is used to process each single element of `large_dataset`. MUST be a class
97
+ method. MUST except just one element as argument.
98
+
99
+ ## TODO
100
+
101
+ [TODO.md](TODO.md)
102
+
103
+ ## Development
104
+
105
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run
106
+ `bundle exec rspec` to run the tests. You can also run `bin/console` for an
107
+ interactive prompt that will allow you to experiment.
108
+
109
+ ## Contributing
110
+
111
+ Bug reports and pull requests are welcome on Codeberg at
112
+ https://codeberg.org/empunkt/dipa. This project is intended to be a safe,
113
+ welcoming space for collaboration, and contributors are expected to adhere to
114
+ the [code of
115
+ conduct](https://codeberg.org/empunkt/dipa/src/branch/main/CODE_OF_CONDUCT.md).
116
+
117
+ ## License
118
+
119
+ The gem is available as open source under the terms of the
120
+ [MIT License](https://opensource.org/licenses/MIT).
121
+
122
+ ## Code of Conduct
123
+
124
+ Everyone interacting in the Dipa project's codebases, issue trackers, chat rooms
125
+ and mailing lists is expected to follow the
126
+ [code of conduct](https://codeberg.org/empunkt/dipa/src/branch/main/CODE_OF_CONDUCT.md).
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/setup'
4
+
5
+ APP_RAKEFILE = File.expand_path('spec/dummy/Rakefile', __dir__)
6
+ load 'rails/tasks/engine.rake'
7
+
8
+ load 'rails/tasks/statistics.rake'
9
+
10
+ require 'bundler/gem_tasks'
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ class ApplicationJob < ActiveJob::Base
5
+ end
6
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ class ServiceJob < ApplicationJob
5
+ def perform(service_class_name:, args: [], kwargs: {})
6
+ service_class_name.constantize.call(*args, **kwargs)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Models
4
+ module Dipa
5
+ module Dumpable
6
+ extend ActiveSupport::Concern
7
+
8
+ def dump_to_file(data:, attacher:)
9
+ io = StringIO.new(Marshal.dump(data), 'rb')
10
+ filename = "#{attacher}.dat"
11
+
12
+ public_send(attacher).attach(io: io, filename: filename)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Models
4
+ module Dipa
5
+ module Loadable
6
+ extend ActiveSupport::Concern
7
+
8
+ def load_from_file(attacher:)
9
+ return unless public_send(attacher).attached?
10
+
11
+ Marshal.load( # rubocop:disable Security/MarshalLoad
12
+ public_send(attacher).download
13
+ )
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Models
4
+ module Dipa
5
+ module StateAttributeHandling
6
+ extend ActiveSupport::Concern
7
+
8
+ include Modules::Models::Dipa::StatusConstants
9
+
10
+ included do
11
+ attribute :state, :string, default: INITIALIZED_STATE
12
+
13
+ validates :state, presence: true, inclusion: { in: STATES }
14
+ if Rails.version >= '7'
15
+ validates :state, comparison: { equal_to: INITIALIZED_STATE },
16
+ on: :create
17
+ else
18
+ validates :state, inclusion: { in: [INITIALIZED_STATE] },
19
+ on: :create
20
+ end
21
+
22
+ STATES.each do |state_value|
23
+ define_method("#{state_value}?".to_sym) do
24
+ state == state_value
25
+ end
26
+ end
27
+ end
28
+
29
+ def started!
30
+ update!(started_at: Time.zone.now, state: PROCESSING_STATE)
31
+ end
32
+
33
+ def finished!
34
+ update!(finished_at: Time.zone.now, state: PROCESSED_STATE)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ # == Schema Information
4
+ #
5
+ # Table name: dipa_agents
6
+ #
7
+ # id :bigint not null, primary key
8
+ # finished_at :datetime
9
+ # index :integer not null
10
+ # started_at :datetime
11
+ # state :string(255) not null
12
+ # created_at :datetime not null
13
+ # updated_at :datetime not null
14
+ # dipa_coordinator_id :bigint not null
15
+ #
16
+ # Indexes
17
+ #
18
+ # index_dipa_agents_on_dipa_coordinator_id (dipa_coordinator_id)
19
+ #
20
+ # Foreign Keys
21
+ #
22
+ # fk_rails_... (dipa_coordinator_id => dipa_coordinators.id)
23
+ #
24
+
25
+ module Dipa
26
+ class Agent < ApplicationRecord
27
+ include Models::Dipa::Dumpable
28
+ include Models::Dipa::Loadable
29
+ include Models::Dipa::StateAttributeHandling
30
+
31
+ # validation and default for `state` attribute is included by
32
+ # Models::Dipa::StateAttributeHandling
33
+
34
+ validates :index, numericality: { only_integer: true }
35
+
36
+ validates :started_at, 'dipa/date' => true, allow_nil: true
37
+ validates :finished_at, 'dipa/date' => true, allow_nil: true
38
+
39
+ belongs_to :coordinator, inverse_of: :agents,
40
+ foreign_key: :dipa_coordinator_id
41
+
42
+ has_one_attached :source_dump
43
+ has_one_attached :result_dump
44
+
45
+ def result
46
+ return unless processed?
47
+
48
+ load_from_file(attacher: :result_dump)
49
+ end
50
+
51
+ def source
52
+ load_from_file(attacher: :source_dump)
53
+ end
54
+
55
+ def process!
56
+ processor_result = coordinator.processor_class_name.constantize
57
+ .public_send(
58
+ coordinator.processor_method_name,
59
+ source
60
+ )
61
+
62
+ dump_to_file(data: processor_result, attacher: :result_dump)
63
+
64
+ finished!
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ class ApplicationRecord < ActiveRecord::Base
5
+ self.abstract_class = true
6
+ end
7
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ # == Schema Information
4
+ #
5
+ # Table name: dipa_coordinators
6
+ #
7
+ # id :bigint not null, primary key
8
+ # agent_queue :string(255) not null
9
+ # coordinator_queue :string(255) not null
10
+ # finished_at :datetime
11
+ # keep_data :boolean default(FALSE), not null
12
+ # processor_class_name :string(255) not null
13
+ # processor_method_name :string(255) not null
14
+ # size :integer not null
15
+ # started_at :datetime
16
+ # state :string(255) not null
17
+ # want_result :boolean default(TRUE), not null
18
+ # created_at :datetime not null
19
+ # updated_at :datetime not null
20
+ #
21
+
22
+ module Dipa
23
+ class Coordinator < ApplicationRecord
24
+ include Models::Dipa::Dumpable
25
+ include Models::Dipa::Loadable
26
+ include Models::Dipa::StateAttributeHandling
27
+
28
+ # validation and default for `state` attribute is included by
29
+ # Models::Dipa::StateAttributeHandling
30
+
31
+ attribute :keep_data, :boolean, default: false
32
+ attribute :want_result, :boolean, default: true
33
+
34
+ validates :agent_queue, presence: true
35
+ validates :coordinator_queue, presence: true
36
+
37
+ validates :size, numericality: { only_integer: true }
38
+
39
+ validates :started_at, 'dipa/date' => true, allow_nil: true
40
+ validates :finished_at, 'dipa/date' => true, allow_nil: true
41
+
42
+ validates :processor_class_name, presence: true
43
+ validates :processor_method_name, presence: true
44
+
45
+ validates :keep_data, inclusion: [true, false]
46
+ validates :want_result, inclusion: [true, false]
47
+
48
+ has_many :agents, dependent: :destroy, inverse_of: :coordinator,
49
+ foreign_key: :dipa_coordinator_id
50
+
51
+ has_one_attached :source_dump
52
+
53
+ def result
54
+ return unless processed?
55
+
56
+ _result_from_agents
57
+ end
58
+
59
+ def source
60
+ load_from_file(attacher: :source_dump)
61
+ end
62
+
63
+ def all_agents_created_and_processed?
64
+ _all_agents_created? && _all_agents_processed?
65
+ end
66
+
67
+ private
68
+
69
+ def _result_from_agents
70
+ agents.with_attached_result_dump.order(:index).map(&:result)
71
+ end
72
+
73
+ def _all_agents_processed?
74
+ agents.all?(&:processed?)
75
+ end
76
+
77
+ def _all_agents_created?
78
+ agents.length == size
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Modules
4
+ module Models
5
+ module Dipa
6
+ module StatusConstants
7
+ ABORTED_STATE = 'aborted'
8
+ INITIALIZED_STATE = 'initialized'
9
+ PROCESSED_STATE = 'processed'
10
+ PROCESSING_STATE = 'processing'
11
+ PROCESSING_FAILED_STATE = 'processing_failed'
12
+ TIMEOUT_STATE = 'timed_out'
13
+ STATES = [
14
+ ABORTED_STATE,
15
+ INITIALIZED_STATE,
16
+ PROCESSED_STATE,
17
+ PROCESSING_STATE,
18
+ PROCESSING_FAILED_STATE,
19
+ TIMEOUT_STATE
20
+ ].freeze
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ module AgentServices
5
+ class CoordinatorStateService < ApplicationService
6
+ def call(agent:)
7
+ return if agent.coordinator.processed?
8
+ return unless agent.coordinator.all_agents_created_and_processed?
9
+
10
+ agent.coordinator.finished!
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ module AgentServices
5
+ class PostProcessingService < ApplicationService
6
+ def call(agent:)
7
+ Dipa::ServiceJob.set(queue_as: agent.coordinator.agent_queue)
8
+ .perform_later(
9
+ service_class_name:
10
+ 'Dipa::AgentServices::CoordinatorStateService',
11
+ kwargs: { agent: agent }
12
+ )
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ module AgentServices
5
+ class ProcessingService < ApplicationService
6
+ def call(agent:)
7
+ agent.process!
8
+
9
+ Dipa::AgentServices::PostProcessingService.call(agent: agent)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ module AgentServices
5
+ class StartProcessingService < ApplicationService
6
+ def call(agent:)
7
+ agent.started!
8
+
9
+ Dipa::ServiceJob.set(queue_as: agent.coordinator.agent_queue)
10
+ .perform_later(
11
+ service_class_name:
12
+ 'Dipa::AgentServices::ProcessingService',
13
+ kwargs: { agent: agent }
14
+ )
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ class ApplicationService
5
+ def self.call(...)
6
+ new.call(...)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ module CoordinatorServices
5
+ class CreateAgentsService < ApplicationService
6
+ def call(coordinator:)
7
+ coordinator.source.each_with_index do |item, i|
8
+ _create_agent(coordinator: coordinator, item: item, index: i)
9
+ end
10
+ end
11
+
12
+ private
13
+
14
+ def _create_agent(coordinator:, item:, index:)
15
+ agent = coordinator.agents.create!(index: index)
16
+
17
+ agent.dump_to_file(data: item, attacher: :source_dump)
18
+
19
+ Dipa::AgentServices::StartProcessingService.call(agent: agent)
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ module CoordinatorServices
5
+ class StartProcessingService < ApplicationService
6
+ def call(coordinator:)
7
+ coordinator.started!
8
+
9
+ ServiceJob.set(queue_as: coordinator.coordinator_queue)
10
+ .perform_later(
11
+ service_class_name:
12
+ 'Dipa::CoordinatorServices::CreateAgentsService',
13
+ kwargs: { coordinator: coordinator }
14
+ )
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ class DateValidator < ActiveModel::EachValidator
5
+ def validate_each(record, attribute, value)
6
+ return if self.class.valid?(value: value)
7
+
8
+ record.errors.add(
9
+ attribute,
10
+ :invalid,
11
+ message: (options[:message] || 'is not valid Date')
12
+ )
13
+ end
14
+
15
+ def self.valid?(value:)
16
+ return false if value.blank?
17
+
18
+ begin
19
+ Date.parse(value.to_s)
20
+ true
21
+ rescue ArgumentError
22
+ false
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateDipaCoordinators < ActiveRecord::Migration[6.0]
4
+ def change # rubocop:disable Metrics/MethodLength
5
+ create_table :dipa_coordinators do |t|
6
+ t.boolean :keep_data, default: false, null: false
7
+ t.boolean :want_result, default: true, null: false
8
+ t.datetime :finished_at
9
+ t.datetime :started_at
10
+ t.integer :size, null: false
11
+ t.string :agent_queue, null: false
12
+ t.string :coordinator_queue, null: false
13
+ t.string :processor_class_name, null: false
14
+ t.string :processor_method_name, null: false
15
+ t.string :state, null: false
16
+
17
+ t.timestamps
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateDipaAgents < ActiveRecord::Migration[6.0]
4
+ def change # rubocop:disable Metrics/MethodLength
5
+ create_table :dipa_agents do |t|
6
+ t.datetime :finished_at
7
+ t.datetime :started_at
8
+ t.integer :index, null: false
9
+ t.string :state, null: false
10
+
11
+ t.timestamps
12
+ end
13
+
14
+ add_belongs_to(
15
+ :dipa_agents, :dipa_coordinator,
16
+ foreign_key: true, index: true, null: false
17
+ )
18
+ end
19
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'socket'
4
+ require 'rails'
5
+ require 'active_job/railtie'
6
+ require 'active_record/railtie'
7
+ require 'active_storage/engine'
8
+
9
+ module Dipa
10
+ class Engine < ::Rails::Engine
11
+ isolate_namespace Dipa
12
+
13
+ config.eager_load_namespaces << Dipa
14
+
15
+ config.generators do |g|
16
+ g.test_framework :rspec
17
+ g.api_only = true
18
+ end
19
+
20
+ config.dipa = ActiveSupport::OrderedOptions.new
21
+
22
+ initializer 'dipa.queue_names' do
23
+ config.after_initialize do |app|
24
+ Dipa.agent_queue = (
25
+ app.config.dipa.agent_queue ||
26
+ app.config.active_job.default_queue_name ||
27
+ :default
28
+ ).to_sym
29
+ Dipa.coordinator_queue = (
30
+ app.config.dipa.coordinator_queue ||
31
+ app.config.active_job.default_queue_name ||
32
+ :default
33
+ ).to_sym
34
+ end
35
+ end
36
+
37
+ initializer 'dipa.timeouts' do
38
+ config.after_initialize do |app|
39
+ Dipa.agent_timeout = (
40
+ app.config.dipa.agent_timeout || Dipa::DEFAULT_AGENT_TIMEOUT
41
+ ).to_i
42
+ Dipa.coordinator_timeout = (
43
+ app.config.dipa.coordinator_timeout ||
44
+ Dipa::DEFAULT_COORDINATOR_TIMEOUT
45
+ ).to_i
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ class Error < StandardError; end
5
+
6
+ class AbortedError < Error; end
7
+ class ProcessingFailedError < Error; end
8
+ class TimeoutError < Error; end
9
+ class UnknownProcessingStateError < Error; end
10
+ class UnknownProcessorClassError < Error; end
11
+ class UnknownProcessorMethodError < Error; end
12
+ end
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ module Processor
5
+ class Base
6
+ SYNC_MODE_WAIT_CYCLE_SECONDS = 2
7
+
8
+ DEFAULT_OPTIONS = {
9
+ # queue names
10
+ agent_queue: Dipa.agent_queue,
11
+ coordinator_queue: Dipa.coordinator_queue,
12
+ # timeouts
13
+ agent_timeout: Dipa.agent_timeout,
14
+ coordinator_timeout: Dipa.coordinator_timeout,
15
+ # misc
16
+ async: false,
17
+ keep_data: false,
18
+ want_result: true
19
+ }.freeze
20
+ OVERRIDE_OPTIONS = {}.freeze
21
+
22
+ def with(processor_class, processor_method)
23
+ _validate_processor_arguments(processor_class: processor_class.to_s,
24
+ processor_method: processor_method.to_s)
25
+
26
+ _prepare_coordinator(processor_class: processor_class.to_s,
27
+ processor_method: processor_method.to_s)
28
+
29
+ _start_process
30
+
31
+ return if _async?
32
+
33
+ _wait_for_it
34
+ end
35
+
36
+ private
37
+
38
+ attr_reader :_source, :_raw_opts, :_coordinator
39
+
40
+ def initialize(source, options: {})
41
+ options.assert_valid_keys(*DEFAULT_OPTIONS.keys)
42
+
43
+ @_source = source
44
+ @_raw_opts = options
45
+ end
46
+
47
+ def _validate_processor_arguments(processor_class:, processor_method:)
48
+ return if processor_class.constantize.respond_to?(processor_method)
49
+
50
+ raise Dipa::UnknownProcessorMethodError,
51
+ "Method .#{processor_method} does not exist on processor class " \
52
+ "#{processor_class}"
53
+ rescue NameError => e
54
+ raise Dipa::UnknownProcessorClassError, e.original_message
55
+ end
56
+
57
+ def _wait_for_it
58
+ sleep(SYNC_MODE_WAIT_CYCLE_SECONDS) while _wait_for_it?
59
+
60
+ return _result if _coordinator.processed?
61
+
62
+ # must be an error then
63
+ _raise_error
64
+ end
65
+
66
+ def _wait_for_it?
67
+ _coordinator.reload
68
+
69
+ _coordinator.initialized? || _coordinator.processing?
70
+ end
71
+
72
+ def _raise_error
73
+ raise Dipa::AbortedError if _coordinator.aborted?
74
+ raise Dipa::ProcessingFailedError if _coordinator.processing_failed?
75
+ raise Dipa::TimeoutError if _coordinator.timed_out?
76
+
77
+ raise Dipa::UnknownProcessingStateError
78
+ end
79
+
80
+ def _result
81
+ result = _fetch_result
82
+
83
+ _maybe_cleanup
84
+
85
+ result
86
+ end
87
+
88
+ def _maybe_cleanup
89
+ _coordinator.destroy! unless _keep_data?
90
+ end
91
+
92
+ def _fetch_result
93
+ _want_result? ? _coordinator.result : _coordinator.source
94
+ end
95
+
96
+ def _start_process
97
+ Dipa::CoordinatorServices::StartProcessingService.call(
98
+ coordinator: _coordinator
99
+ )
100
+ end
101
+
102
+ def _prepare_coordinator(processor_class:, processor_method:)
103
+ @_coordinator = Dipa::Coordinator.create!(
104
+ agent_queue: _agent_queue,
105
+ coordinator_queue: _coordinator_queue,
106
+ keep_data: _keep_data?,
107
+ processor_class_name: processor_class,
108
+ processor_method_name: processor_method,
109
+ size: _source.to_a.size,
110
+ want_result: _want_result?
111
+ )
112
+
113
+ _coordinator.dump_to_file(data: _source.to_a, attacher: :source_dump)
114
+ end
115
+
116
+ def _agent_queue
117
+ _option(option: :agent_queue)
118
+ end
119
+
120
+ def _coordinator_queue
121
+ _option(option: :coordinator_queue)
122
+ end
123
+
124
+ def _async?
125
+ _option(option: :async)
126
+ end
127
+
128
+ def _keep_data?
129
+ _option(option: :keep_data)
130
+ end
131
+
132
+ def _want_result?
133
+ _option(option: :want_result)
134
+ end
135
+
136
+ def _option(option:)
137
+ OVERRIDE_OPTIONS[option] || _raw_opts[option] || DEFAULT_OPTIONS[option]
138
+ end
139
+ end
140
+ end
141
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ module Processor
5
+ class Each < Base
6
+ OVERRIDE_OPTIONS = {
7
+ want_result: false
8
+ }.freeze
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ module Processor
5
+ class Map < Base
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dipa
4
+ VERSION = '0.1.0.pre.1'
5
+ end
data/lib/dipa.rb ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support'
4
+
5
+ require 'dipa/version'
6
+ require 'dipa/engine'
7
+ require 'dipa/errors'
8
+
9
+ module Dipa
10
+ extend ActiveSupport::Autoload
11
+
12
+ DEFAULT_AGENT_TIMEOUT = 0
13
+ DEFAULT_COORDINATOR_TIMEOUT = 0
14
+
15
+ # rubocop:disable ThreadSafety/ClassAndModuleAttributes
16
+ mattr_accessor :agent_queue
17
+ mattr_accessor :agent_timeout, default: DEFAULT_AGENT_TIMEOUT
18
+ mattr_accessor :coordinator_queue
19
+ mattr_accessor :coordinator_timeout, default: DEFAULT_COORDINATOR_TIMEOUT
20
+ # rubocop:enable ThreadSafety/ClassAndModuleAttributes
21
+
22
+ def self.map(source, options: {})
23
+ Dipa::Processor::Map.new(source, options: options)
24
+ end
25
+
26
+ def self.each(source, options: {})
27
+ Dipa::Processor::Each.new(source, options: options)
28
+ end
29
+
30
+ module Processor
31
+ extend ActiveSupport::Autoload
32
+
33
+ autoload :Base
34
+ autoload :Each
35
+ autoload :Map
36
+ end
37
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ # NOTE: only doing this in development as some production environments (Heroku)
4
+ # NOTE: are sensitive to local FS writes, and besides -- it's just not proper
5
+ # NOTE: to have a dev-mode tool do its thing in production.
6
+ if Rails.env.development?
7
+ require 'annotate'
8
+ desc 'annotate'
9
+ task set_annotation_options: :environment do # rubocop:disable Metrics/BlockLength
10
+ # You can override any of these by setting an environment variable of the
11
+ # same name.
12
+ Annotate.set_defaults(
13
+ 'active_admin' => 'false',
14
+ 'additional_file_patterns' => [],
15
+ 'routes' => 'false',
16
+ 'models' => 'true',
17
+ 'position_in_routes' => 'before',
18
+ 'position_in_class' => 'before',
19
+ 'position_in_test' => 'before',
20
+ 'position_in_fixture' => 'before',
21
+ 'position_in_factory' => 'before',
22
+ 'position_in_serializer' => 'before',
23
+ 'show_foreign_keys' => 'true',
24
+ 'show_complete_foreign_keys' => 'false',
25
+ 'show_indexes' => 'true',
26
+ 'simple_indexes' => 'false',
27
+ 'model_dir' => 'app/models',
28
+ 'root_dir' => '',
29
+ 'include_version' => 'false',
30
+ 'require' => '',
31
+ 'exclude_tests' => 'true',
32
+ 'exclude_fixtures' => 'true',
33
+ 'exclude_factories' => 'false',
34
+ 'exclude_serializers' => 'false',
35
+ 'exclude_scaffolds' => 'true',
36
+ 'exclude_controllers' => 'true',
37
+ 'exclude_helpers' => 'true',
38
+ 'exclude_sti_subclasses' => 'false',
39
+ 'ignore_model_sub_dir' => 'false',
40
+ 'ignore_columns' => nil,
41
+ 'ignore_routes' => nil,
42
+ 'ignore_unknown_models' => 'false',
43
+ 'hide_limit_column_types' => 'integer,bigint,boolean',
44
+ 'hide_default_column_types' => 'json,jsonb,hstore',
45
+ 'skip_on_db_migrate' => 'false',
46
+ 'format_bare' => 'true',
47
+ 'format_rdoc' => 'false',
48
+ 'format_yard' => 'false',
49
+ 'format_markdown' => 'false',
50
+ 'sort' => 'true',
51
+ 'force' => 'false',
52
+ 'frozen' => 'false',
53
+ 'classified_sort' => 'true',
54
+ 'trace' => 'false',
55
+ 'wrapper_open' => nil,
56
+ 'wrapper_close' => nil,
57
+ 'with_comment' => 'true'
58
+ )
59
+ end
60
+
61
+ Annotate.load_tasks
62
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ # desc "Explaining what the task does"
3
+ # task :dipa do
4
+ # # Task goes here
5
+ # end
metadata ADDED
@@ -0,0 +1,184 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dipa
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0.pre.1
5
+ platform: ruby
6
+ authors:
7
+ - Merten Falk
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-03-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activejob
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">"
18
+ - !ruby/object:Gem::Version
19
+ version: 6.0.0
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: 8.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">"
28
+ - !ruby/object:Gem::Version
29
+ version: 6.0.0
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: 8.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: activerecord
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">"
38
+ - !ruby/object:Gem::Version
39
+ version: 6.0.0
40
+ - - "<"
41
+ - !ruby/object:Gem::Version
42
+ version: 8.0.0
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">"
48
+ - !ruby/object:Gem::Version
49
+ version: 6.0.0
50
+ - - "<"
51
+ - !ruby/object:Gem::Version
52
+ version: 8.0.0
53
+ - !ruby/object:Gem::Dependency
54
+ name: activestorage
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">"
58
+ - !ruby/object:Gem::Version
59
+ version: 6.0.0
60
+ - - "<"
61
+ - !ruby/object:Gem::Version
62
+ version: 8.0.0
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">"
68
+ - !ruby/object:Gem::Version
69
+ version: 6.0.0
70
+ - - "<"
71
+ - !ruby/object:Gem::Version
72
+ version: 8.0.0
73
+ - !ruby/object:Gem::Dependency
74
+ name: activesupport
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">"
78
+ - !ruby/object:Gem::Version
79
+ version: 6.0.0
80
+ - - "<"
81
+ - !ruby/object:Gem::Version
82
+ version: 8.0.0
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">"
88
+ - !ruby/object:Gem::Version
89
+ version: 6.0.0
90
+ - - "<"
91
+ - !ruby/object:Gem::Version
92
+ version: 8.0.0
93
+ - !ruby/object:Gem::Dependency
94
+ name: rake
95
+ requirement: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: '13.0'
100
+ type: :runtime
101
+ prerelease: false
102
+ version_requirements: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - "~>"
105
+ - !ruby/object:Gem::Version
106
+ version: '13.0'
107
+ description: |
108
+ This gem provides an API for parallel processing like the [parallel
109
+ gem](https://github.com/grosser/parallel) but distributed and scalable over
110
+ different machines. All this with minimum configuration and minimum
111
+ dependencies to specific technologies and using the rails ecosystem.
112
+
113
+ Dipa provides a rails engine which depends on
114
+ [ActiveJob](https://guides.rubyonrails.org/active_job_basics.html) and
115
+ [ActiveStorage](https://guides.rubyonrails.org/active_storage_overview.html).
116
+ You can use whatever backend you like for any of this components and
117
+ configure them for your specific usecase.
118
+
119
+ The purpose of this gem is to distribute load heavy and long running
120
+ processing of large datasets over multiple processes or machines using
121
+ [ActiveJob](https://guides.rubyonrails.org/active_job_basics.html).
122
+ email:
123
+ - empunkt@mailbox.org
124
+ executables: []
125
+ extensions: []
126
+ extra_rdoc_files: []
127
+ files:
128
+ - LICENSE.txt
129
+ - README.md
130
+ - Rakefile
131
+ - app/jobs/dipa/application_job.rb
132
+ - app/jobs/dipa/service_job.rb
133
+ - app/models/concerns/models/dipa/dumpable.rb
134
+ - app/models/concerns/models/dipa/loadable.rb
135
+ - app/models/concerns/models/dipa/state_attribute_handling.rb
136
+ - app/models/dipa/agent.rb
137
+ - app/models/dipa/application_record.rb
138
+ - app/models/dipa/coordinator.rb
139
+ - app/models/modules/models/dipa/status_constants.rb
140
+ - app/services/dipa/agent_services/coordinator_state_service.rb
141
+ - app/services/dipa/agent_services/post_processing_service.rb
142
+ - app/services/dipa/agent_services/processing_service.rb
143
+ - app/services/dipa/agent_services/start_processing_service.rb
144
+ - app/services/dipa/application_service.rb
145
+ - app/services/dipa/coordinator_services/create_agents_service.rb
146
+ - app/services/dipa/coordinator_services/start_processing_service.rb
147
+ - app/validators/dipa/date_validator.rb
148
+ - db/migrate/20220102132652_create_dipa_coordinators.rb
149
+ - db/migrate/20220106183616_create_dipa_agents.rb
150
+ - lib/dipa.rb
151
+ - lib/dipa/engine.rb
152
+ - lib/dipa/errors.rb
153
+ - lib/dipa/processor/base.rb
154
+ - lib/dipa/processor/each.rb
155
+ - lib/dipa/processor/map.rb
156
+ - lib/dipa/version.rb
157
+ - lib/tasks/auto_annotate_models.rake
158
+ - lib/tasks/dipa_tasks.rake
159
+ homepage: https://codeberg.org/empunkt/dipa
160
+ licenses:
161
+ - MIT
162
+ metadata:
163
+ rubygems_mfa_required: 'true'
164
+ post_install_message:
165
+ rdoc_options: []
166
+ require_paths:
167
+ - lib
168
+ required_ruby_version: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - ">="
171
+ - !ruby/object:Gem::Version
172
+ version: 2.7.0
173
+ required_rubygems_version: !ruby/object:Gem::Requirement
174
+ requirements:
175
+ - - ">"
176
+ - !ruby/object:Gem::Version
177
+ version: 1.3.1
178
+ requirements: []
179
+ rubygems_version: 3.3.7
180
+ signing_key:
181
+ specification_version: 4
182
+ summary: Rails engine that provides an API to execute code in parallel and distributed
183
+ using the rails ecosystem.
184
+ test_files: []