attr-gather 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/deploy.yml +15 -0
- data/.github/workflows/doc.yml +25 -0
- data/.github/workflows/ruby.yml +20 -0
- data/.gitignore +12 -0
- data/.ignore +1 -0
- data/.rspec +3 -0
- data/.rubocop.yml +38 -0
- data/.ruby-version +1 -0
- data/.travis.yml +7 -0
- data/.vim/coc-settings.json +12 -0
- data/.vim/install.sh +38 -0
- data/.yardopts +4 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +147 -0
- data/LICENSE.txt +21 -0
- data/README.md +63 -0
- data/Rakefile +21 -0
- data/attr-gather.gemspec +38 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/bin/solargraph +29 -0
- data/examples/post_enhancer.rb +119 -0
- data/examples/post_enhancer.svg +55 -0
- data/lib/attr-gather.rb +3 -0
- data/lib/attr/gather.rb +16 -0
- data/lib/attr/gather/aggregators.rb +31 -0
- data/lib/attr/gather/aggregators/base.rb +38 -0
- data/lib/attr/gather/aggregators/deep_merge.rb +50 -0
- data/lib/attr/gather/aggregators/shallow_merge.rb +40 -0
- data/lib/attr/gather/concerns/identifiable.rb +24 -0
- data/lib/attr/gather/concerns/registrable.rb +50 -0
- data/lib/attr/gather/filters.rb +34 -0
- data/lib/attr/gather/filters/base.rb +20 -0
- data/lib/attr/gather/filters/contract.rb +60 -0
- data/lib/attr/gather/filters/filtering.rb +27 -0
- data/lib/attr/gather/filters/noop.rb +14 -0
- data/lib/attr/gather/filters/result.rb +23 -0
- data/lib/attr/gather/version.rb +7 -0
- data/lib/attr/gather/workflow.rb +29 -0
- data/lib/attr/gather/workflow/async_task_executor.rb +17 -0
- data/lib/attr/gather/workflow/callable.rb +84 -0
- data/lib/attr/gather/workflow/dot_serializer.rb +46 -0
- data/lib/attr/gather/workflow/dsl.rb +184 -0
- data/lib/attr/gather/workflow/graphable.rb +50 -0
- data/lib/attr/gather/workflow/task.rb +29 -0
- data/lib/attr/gather/workflow/task_execution_result.rb +58 -0
- data/lib/attr/gather/workflow/task_executor.rb +31 -0
- data/lib/attr/gather/workflow/task_graph.rb +107 -0
- metadata +150 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
module Attr
|
6
|
+
module Gather
|
7
|
+
module Workflow
|
8
|
+
# @api private
|
9
|
+
class DotSerializer
|
10
|
+
def initialize(task_graph)
|
11
|
+
@task_graph = task_graph
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_dot
|
15
|
+
lines = @task_graph.tsort.map { |t| serialize_row(t) }
|
16
|
+
joined_lines = lines.flatten.map { |l| " #{l}" }.join("\n").strip
|
17
|
+
|
18
|
+
<<~DOT
|
19
|
+
digraph TaskGraph {
|
20
|
+
#{joined_lines}
|
21
|
+
}
|
22
|
+
DOT
|
23
|
+
end
|
24
|
+
|
25
|
+
def preview
|
26
|
+
Tempfile.open(['task-graph-preview', '.svg']) do |tf|
|
27
|
+
IO.popen("dot -Tsvg -o #{tf.path}", 'w') { |p| p.write(to_dot) }
|
28
|
+
`xdg-open #{tf.path}`
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def serialize_row(task)
|
35
|
+
row = all_dependants_for_task(task).map { |dt| [task, dt] }
|
36
|
+
lines = row.map { |item| item.map(&:name).join(' -> ') + ';' }
|
37
|
+
lines
|
38
|
+
end
|
39
|
+
|
40
|
+
def all_dependants_for_task(input_task)
|
41
|
+
@task_graph.to_h.keys.select { |task| task.depends_on?(input_task) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,184 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Attr
|
4
|
+
module Gather
|
5
|
+
module Workflow
|
6
|
+
# DSL for configuring a workflow
|
7
|
+
#
|
8
|
+
# @api public
|
9
|
+
module DSL
|
10
|
+
# @api private
|
11
|
+
Undefined = Object.new.freeze
|
12
|
+
|
13
|
+
# Defines a task with name and options
|
14
|
+
#
|
15
|
+
# @param task_name [Symbol] the name of the task
|
16
|
+
#
|
17
|
+
# @example
|
18
|
+
# class EnhanceUserProfile
|
19
|
+
# extend Attr::Gather::Workflow
|
20
|
+
#
|
21
|
+
# # ...
|
22
|
+
#
|
23
|
+
# task :fetch_database_info do |t|
|
24
|
+
# t.depends_on = []
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# task :fetch_avatar_info do |t|
|
28
|
+
# t.depends_on = [:fetch_gravatar_info]
|
29
|
+
# end
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# Calling `task` will yield a task object which you can configure like
|
33
|
+
# a PORO. Tasks will be registered for execution in the workflow.
|
34
|
+
#
|
35
|
+
# @yield [Attr::Gather::Workflow::Task] A task to configure
|
36
|
+
#
|
37
|
+
# @api public
|
38
|
+
def task(task_name, opts = EMPTY_HASH)
|
39
|
+
task = Task.new(name: task_name, **opts)
|
40
|
+
yield task
|
41
|
+
tasks << task
|
42
|
+
self
|
43
|
+
end
|
44
|
+
|
45
|
+
# Defines a container for task dependencies
|
46
|
+
#
|
47
|
+
# Using a container makes it easy to re-use workflows with different
|
48
|
+
# data sources. Say one workflow was required to use a legacy DB, and
|
49
|
+
# one wanted to use a new DB. Using a container makes it easy to
|
50
|
+
# configure that dependency.
|
51
|
+
#
|
52
|
+
# @example
|
53
|
+
# LegacySystem = Dry::Container.new.tap do |c|
|
54
|
+
# c.register(:database) { Sequel.connect('sqlite://legacy.db')
|
55
|
+
# end
|
56
|
+
#
|
57
|
+
# class EnhanceUserProfile
|
58
|
+
# extend Attr::Gather::Workflow
|
59
|
+
#
|
60
|
+
# container LegacySystem
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# @param cont [Dry::Container] the Dry::Container to use
|
64
|
+
#
|
65
|
+
# @note For more information, check out {https://dry-rb.org/gems/dry-container}
|
66
|
+
#
|
67
|
+
# @api public
|
68
|
+
def container(cont = nil)
|
69
|
+
@container = cont if cont
|
70
|
+
@container
|
71
|
+
end
|
72
|
+
|
73
|
+
# Configures the result aggregator
|
74
|
+
#
|
75
|
+
# Aggregators make is possible to build custom logic about
|
76
|
+
# how results should be "merged" together. For example,
|
77
|
+
# yuo could build and aggregator that prioritizes the
|
78
|
+
# values of some tasks over others.
|
79
|
+
#
|
80
|
+
# @example
|
81
|
+
# class EnhanceUserProfile
|
82
|
+
# extend Attr::Gather::Workflow
|
83
|
+
#
|
84
|
+
# aggregator :deep_merge
|
85
|
+
# end
|
86
|
+
#
|
87
|
+
# @param agg [#call] the aggregator to use
|
88
|
+
#
|
89
|
+
# @api public
|
90
|
+
def aggregator(agg = nil, opts = EMPTY_HASH)
|
91
|
+
if agg.nil? && !defined?(@aggregator)
|
92
|
+
@aggregator = Aggregators.default
|
93
|
+
return @aggregator
|
94
|
+
end
|
95
|
+
|
96
|
+
@aggregator = Aggregators.resolve(agg, filter: filter, **opts) if agg
|
97
|
+
@aggregator
|
98
|
+
end
|
99
|
+
|
100
|
+
# Defines a filter for filtering out invalid values
|
101
|
+
#
|
102
|
+
# When aggregating data from many sources, it is hard to reason about
|
103
|
+
# all the ways invalid data will be returned. For example, if you are
|
104
|
+
# pulling data from a spreadsheet, there will often be typos, etc.
|
105
|
+
#
|
106
|
+
# Defining a filter allows you to declaratively state what is valid.
|
107
|
+
# attr-gather will use this definition to automatically filter out
|
108
|
+
# invalid values, so they never make it into your system.
|
109
|
+
#
|
110
|
+
# Filtering happens during each step of the workflow, which means that
|
111
|
+
# every Task will receive validated input that you can rely on.
|
112
|
+
#
|
113
|
+
# @example
|
114
|
+
# class UserContract < Dry::Validation::Contract do
|
115
|
+
# params do
|
116
|
+
# optional(:id).filled(:integer)
|
117
|
+
# optional(:email).filled(:str?, format?: /@/)
|
118
|
+
# end
|
119
|
+
# end
|
120
|
+
#
|
121
|
+
# class EnhanceUserProfile
|
122
|
+
# extend Attr::Gather::Workflow
|
123
|
+
#
|
124
|
+
# # Any of the key/value pairs that had validation errors will be
|
125
|
+
# # filtered from the output.
|
126
|
+
# filter :contract, UserContract.new
|
127
|
+
# end
|
128
|
+
#
|
129
|
+
# @param filt [Symbol] the name filter to use
|
130
|
+
# @param args [Array<Object>] arguments for initializing the filter
|
131
|
+
#
|
132
|
+
# @api public
|
133
|
+
def filter(filt = Undefined, *args)
|
134
|
+
if filt == Undefined && !defined?(@filter)
|
135
|
+
@filter = Filters.default
|
136
|
+
elsif filt != Undefined
|
137
|
+
@filter = Filters.resolve(filt, *args)
|
138
|
+
end
|
139
|
+
|
140
|
+
@filter
|
141
|
+
end
|
142
|
+
|
143
|
+
# Defines a filter for filtering invalid values with an inline contract
|
144
|
+
#
|
145
|
+
# This serves as a convenience method for defining a contract filter.
|
146
|
+
#
|
147
|
+
# @example
|
148
|
+
#
|
149
|
+
# class EnhanceUserProfile
|
150
|
+
# extend Attr::Gather::Workflow
|
151
|
+
#
|
152
|
+
# # Any of the key/value pairs that had validation errors will be
|
153
|
+
# # filtered from the output.
|
154
|
+
# filter_with_contract do
|
155
|
+
# params do
|
156
|
+
# required(:name).filled(:string)
|
157
|
+
# required(:age).value(:integer)
|
158
|
+
# end
|
159
|
+
#
|
160
|
+
# rule(:age) do
|
161
|
+
# key.failure('must be greater than 18') if value < 18
|
162
|
+
# end
|
163
|
+
# end
|
164
|
+
# end
|
165
|
+
#
|
166
|
+
# @return [Dry::Validation::Contract,NilClass]
|
167
|
+
# @see https://dry-rb.org/gems/dry-validation
|
168
|
+
#
|
169
|
+
# @api public
|
170
|
+
def filter_with_contract(arg = nil, &blk)
|
171
|
+
contract = block_given? ? build_inline_contract_filter(&blk) : arg
|
172
|
+
@filter = Filters.resolve(:contract, contract)
|
173
|
+
end
|
174
|
+
|
175
|
+
private
|
176
|
+
|
177
|
+
def build_inline_contract_filter(&blk)
|
178
|
+
contract_klass = Class.new(Dry::Validation::Contract, &blk)
|
179
|
+
contract_klass.new
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Attr
|
4
|
+
module Gather
|
5
|
+
module Workflow
|
6
|
+
# Module containing graph functionality
|
7
|
+
#
|
8
|
+
# @api public
|
9
|
+
module Graphable
|
10
|
+
# Class methods for graph functionality
|
11
|
+
module ClassMethods
|
12
|
+
# Returns the graph of tasks
|
13
|
+
#
|
14
|
+
# @return [TaskGraph] the graph
|
15
|
+
#
|
16
|
+
# @api private
|
17
|
+
def tasks
|
18
|
+
@tasks ||= TaskGraph.new
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns a graphviz visualization of the workflow
|
22
|
+
#
|
23
|
+
# @param preview [Boolean] show a preview image of the Workflow
|
24
|
+
#
|
25
|
+
# @api public
|
26
|
+
def to_dot(preview: true)
|
27
|
+
tasks.to_dot(preview: preview)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Instance methods for graph functionality
|
32
|
+
module InstanceMethods
|
33
|
+
# Returns a graphviz visualization of the workflow
|
34
|
+
#
|
35
|
+
# @param preview [Boolean] show a preview image of the Workflow
|
36
|
+
#
|
37
|
+
# @api public
|
38
|
+
def to_dot(preview: true)
|
39
|
+
self.class.to_dot(preview: preview)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.included(klass)
|
44
|
+
klass.extend(ClassMethods)
|
45
|
+
klass.include(InstanceMethods)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Attr
|
4
|
+
module Gather
|
5
|
+
module Workflow
|
6
|
+
# @api private
|
7
|
+
class Task
|
8
|
+
attr_accessor :depends_on, :name
|
9
|
+
|
10
|
+
def initialize(name:, depends_on: [])
|
11
|
+
@name = name
|
12
|
+
@depends_on = depends_on
|
13
|
+
end
|
14
|
+
|
15
|
+
def depends_on?(other_task)
|
16
|
+
depends_on.include?(other_task.name)
|
17
|
+
end
|
18
|
+
|
19
|
+
def fullfilled_given_remaining_tasks?(task_list)
|
20
|
+
task_list.none? { |list_task| depends_on?(list_task) }
|
21
|
+
end
|
22
|
+
|
23
|
+
def as_json
|
24
|
+
{ name: name, depends_on: depends_on }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Attr
|
4
|
+
module Gather
|
5
|
+
module Workflow
|
6
|
+
# A wrapper containing information and results of a task execution
|
7
|
+
#
|
8
|
+
# @!attribute [r] started_at
|
9
|
+
# @return [Time] time which the execution occured
|
10
|
+
#
|
11
|
+
# @!attribute [r] task
|
12
|
+
# @return [Attr::Gather::Workflow::Task] task that was run
|
13
|
+
#
|
14
|
+
# @!attribute [r] result
|
15
|
+
# @return [Concurrent::Promise] the result promise of the the task
|
16
|
+
#
|
17
|
+
# @api public
|
18
|
+
class TaskExecutionResult
|
19
|
+
include Concerns::Identifiable
|
20
|
+
|
21
|
+
attr_reader :task, :result, :started_at, :uuid
|
22
|
+
|
23
|
+
def initialize(task, result)
|
24
|
+
@started_at = Time.now
|
25
|
+
@uuid = SecureRandom.uuid
|
26
|
+
@task = task
|
27
|
+
@result = result
|
28
|
+
end
|
29
|
+
|
30
|
+
# @!attribute [r] state
|
31
|
+
# @return [:unscheduled, :pending, :processing, :rejected, :fulfilled]
|
32
|
+
def state
|
33
|
+
result.state
|
34
|
+
end
|
35
|
+
|
36
|
+
# Extracts the result, this is an unsafe operation that blocks the
|
37
|
+
# operation, and returns either the value or an exception.
|
38
|
+
#
|
39
|
+
# @note For more information, check out {https://ruby-concurrency.github.io/concurrent-ruby/1.1.5/Concurrent/Concern/Obligation.html#value!-instance_method}
|
40
|
+
def value!
|
41
|
+
result.value!
|
42
|
+
end
|
43
|
+
|
44
|
+
# Represents the TaskExecutionResult as a hash
|
45
|
+
#
|
46
|
+
# @return [Hash]
|
47
|
+
def as_json
|
48
|
+
value = result.value
|
49
|
+
|
50
|
+
{ started_at: started_at,
|
51
|
+
task: task.as_json,
|
52
|
+
state: state,
|
53
|
+
value: value }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'concurrent'
|
4
|
+
require 'attr/gather/workflow/task_execution_result'
|
5
|
+
|
6
|
+
module Attr
|
7
|
+
module Gather
|
8
|
+
module Workflow
|
9
|
+
# @api private
|
10
|
+
class TaskExecutor
|
11
|
+
attr_reader :batch, :container, :executor
|
12
|
+
|
13
|
+
def initialize(batch, container:)
|
14
|
+
@batch = batch
|
15
|
+
@container = container
|
16
|
+
@executor = :immediate
|
17
|
+
end
|
18
|
+
|
19
|
+
def call(input)
|
20
|
+
batch.map do |task|
|
21
|
+
task_proc = container.resolve(task.name)
|
22
|
+
result = Concurrent::Promise.execute(executor: executor) do
|
23
|
+
task_proc.call(input)
|
24
|
+
end
|
25
|
+
TaskExecutionResult.new(task, result)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'tsort'
|
4
|
+
require 'attr/gather/workflow/dot_serializer'
|
5
|
+
|
6
|
+
module Attr
|
7
|
+
module Gather
|
8
|
+
module Workflow
|
9
|
+
# @api private
|
10
|
+
class TaskGraph
|
11
|
+
class UnfinishableError < StandardError; end
|
12
|
+
class InvalidTaskDepedencyError < StandardError; end
|
13
|
+
|
14
|
+
include TSort
|
15
|
+
|
16
|
+
attr_reader :tasks_hash
|
17
|
+
|
18
|
+
def initialize(tasks: [])
|
19
|
+
@tasks_hash = {}
|
20
|
+
tasks.each { |t| self << t }
|
21
|
+
end
|
22
|
+
|
23
|
+
def <<(task)
|
24
|
+
validate_for_insert!(task)
|
25
|
+
|
26
|
+
registered_tasks.each do |t|
|
27
|
+
tasks_hash[t] << task if t.depends_on?(task)
|
28
|
+
tasks_hash[t].uniq!
|
29
|
+
end
|
30
|
+
|
31
|
+
tasks_hash[task] = all_dependencies_for_task(task)
|
32
|
+
end
|
33
|
+
|
34
|
+
def runnable_tasks
|
35
|
+
tsort.take_while do |task|
|
36
|
+
task.fullfilled_given_remaining_tasks?(registered_tasks)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def each_batch
|
41
|
+
return enum_for(:each_batch) unless block_given?
|
42
|
+
|
43
|
+
to_execute = tsort
|
44
|
+
|
45
|
+
until to_execute.empty?
|
46
|
+
batch = to_execute.take_while do |task|
|
47
|
+
task.fullfilled_given_remaining_tasks?(to_execute)
|
48
|
+
end
|
49
|
+
|
50
|
+
to_execute -= batch
|
51
|
+
|
52
|
+
validate_finishable!(batch, to_execute)
|
53
|
+
|
54
|
+
yield batch
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
alias to_a tsort
|
59
|
+
|
60
|
+
def to_h
|
61
|
+
tasks_hash
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_dot(preview: false)
|
65
|
+
serializer = DotSerializer.new(self)
|
66
|
+
preview ? serializer.preview : serializer.to_dot
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def tsort_each_child(node, &blk)
|
72
|
+
to_h[node].each(&blk)
|
73
|
+
end
|
74
|
+
|
75
|
+
def tsort_each_node(&blk)
|
76
|
+
to_h.each_key(&blk)
|
77
|
+
end
|
78
|
+
|
79
|
+
def validate_finishable!(batch, to_execute)
|
80
|
+
return unless batch.empty? && !to_execute.empty?
|
81
|
+
|
82
|
+
# TODO: statically verify this
|
83
|
+
raise UnfinishableError, 'task dependencies are not fulfillable'
|
84
|
+
end
|
85
|
+
|
86
|
+
def validate_for_insert!(task)
|
87
|
+
return if depended_on_tasks_exist?(task)
|
88
|
+
|
89
|
+
raise InvalidTaskDepedencyError,
|
90
|
+
"could not find a matching task for #{task.name}"
|
91
|
+
end
|
92
|
+
|
93
|
+
def all_dependencies_for_task(input_task)
|
94
|
+
registered_tasks.select { |task| input_task.depends_on?(task) }
|
95
|
+
end
|
96
|
+
|
97
|
+
def registered_tasks
|
98
|
+
tasks_hash.keys
|
99
|
+
end
|
100
|
+
|
101
|
+
def depended_on_tasks_exist?(task)
|
102
|
+
task.depends_on.all? { |t| registered_tasks.map(&:name).include?(t) }
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|