attr-gather 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/deploy.yml +15 -0
  3. data/.github/workflows/doc.yml +25 -0
  4. data/.github/workflows/ruby.yml +20 -0
  5. data/.gitignore +12 -0
  6. data/.ignore +1 -0
  7. data/.rspec +3 -0
  8. data/.rubocop.yml +38 -0
  9. data/.ruby-version +1 -0
  10. data/.travis.yml +7 -0
  11. data/.vim/coc-settings.json +12 -0
  12. data/.vim/install.sh +38 -0
  13. data/.yardopts +4 -0
  14. data/CODE_OF_CONDUCT.md +74 -0
  15. data/Gemfile +19 -0
  16. data/Gemfile.lock +147 -0
  17. data/LICENSE.txt +21 -0
  18. data/README.md +63 -0
  19. data/Rakefile +21 -0
  20. data/attr-gather.gemspec +38 -0
  21. data/bin/console +15 -0
  22. data/bin/setup +8 -0
  23. data/bin/solargraph +29 -0
  24. data/examples/post_enhancer.rb +119 -0
  25. data/examples/post_enhancer.svg +55 -0
  26. data/lib/attr-gather.rb +3 -0
  27. data/lib/attr/gather.rb +16 -0
  28. data/lib/attr/gather/aggregators.rb +31 -0
  29. data/lib/attr/gather/aggregators/base.rb +38 -0
  30. data/lib/attr/gather/aggregators/deep_merge.rb +50 -0
  31. data/lib/attr/gather/aggregators/shallow_merge.rb +40 -0
  32. data/lib/attr/gather/concerns/identifiable.rb +24 -0
  33. data/lib/attr/gather/concerns/registrable.rb +50 -0
  34. data/lib/attr/gather/filters.rb +34 -0
  35. data/lib/attr/gather/filters/base.rb +20 -0
  36. data/lib/attr/gather/filters/contract.rb +60 -0
  37. data/lib/attr/gather/filters/filtering.rb +27 -0
  38. data/lib/attr/gather/filters/noop.rb +14 -0
  39. data/lib/attr/gather/filters/result.rb +23 -0
  40. data/lib/attr/gather/version.rb +7 -0
  41. data/lib/attr/gather/workflow.rb +29 -0
  42. data/lib/attr/gather/workflow/async_task_executor.rb +17 -0
  43. data/lib/attr/gather/workflow/callable.rb +84 -0
  44. data/lib/attr/gather/workflow/dot_serializer.rb +46 -0
  45. data/lib/attr/gather/workflow/dsl.rb +184 -0
  46. data/lib/attr/gather/workflow/graphable.rb +50 -0
  47. data/lib/attr/gather/workflow/task.rb +29 -0
  48. data/lib/attr/gather/workflow/task_execution_result.rb +58 -0
  49. data/lib/attr/gather/workflow/task_executor.rb +31 -0
  50. data/lib/attr/gather/workflow/task_graph.rb +107 -0
  51. metadata +150 -0
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tempfile'
4
+
5
+ module Attr
6
+ module Gather
7
+ module Workflow
8
+ # @api private
9
+ class DotSerializer
10
+ def initialize(task_graph)
11
+ @task_graph = task_graph
12
+ end
13
+
14
+ def to_dot
15
+ lines = @task_graph.tsort.map { |t| serialize_row(t) }
16
+ joined_lines = lines.flatten.map { |l| " #{l}" }.join("\n").strip
17
+
18
+ <<~DOT
19
+ digraph TaskGraph {
20
+ #{joined_lines}
21
+ }
22
+ DOT
23
+ end
24
+
25
+ def preview
26
+ Tempfile.open(['task-graph-preview', '.svg']) do |tf|
27
+ IO.popen("dot -Tsvg -o #{tf.path}", 'w') { |p| p.write(to_dot) }
28
+ `xdg-open #{tf.path}`
29
+ end
30
+ end
31
+
32
+ private
33
+
34
+ def serialize_row(task)
35
+ row = all_dependants_for_task(task).map { |dt| [task, dt] }
36
+ lines = row.map { |item| item.map(&:name).join(' -> ') + ';' }
37
+ lines
38
+ end
39
+
40
+ def all_dependants_for_task(input_task)
41
+ @task_graph.to_h.keys.select { |task| task.depends_on?(input_task) }
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Attr
4
+ module Gather
5
+ module Workflow
6
+ # DSL for configuring a workflow
7
+ #
8
+ # @api public
9
+ module DSL
10
+ # @api private
11
+ Undefined = Object.new.freeze
12
+
13
+ # Defines a task with name and options
14
+ #
15
+ # @param task_name [Symbol] the name of the task
16
+ #
17
+ # @example
18
+ # class EnhanceUserProfile
19
+ # extend Attr::Gather::Workflow
20
+ #
21
+ # # ...
22
+ #
23
+ # task :fetch_database_info do |t|
24
+ # t.depends_on = []
25
+ # end
26
+ #
27
+ # task :fetch_avatar_info do |t|
28
+ # t.depends_on = [:fetch_gravatar_info]
29
+ # end
30
+ # end
31
+ #
32
+ # Calling `task` will yield a task object which you can configure like
33
+ # a PORO. Tasks will be registered for execution in the workflow.
34
+ #
35
+ # @yield [Attr::Gather::Workflow::Task] A task to configure
36
+ #
37
+ # @api public
38
+ def task(task_name, opts = EMPTY_HASH)
39
+ task = Task.new(name: task_name, **opts)
40
+ yield task
41
+ tasks << task
42
+ self
43
+ end
44
+
45
+ # Defines a container for task dependencies
46
+ #
47
+ # Using a container makes it easy to re-use workflows with different
48
+ # data sources. Say one workflow was required to use a legacy DB, and
49
+ # one wanted to use a new DB. Using a container makes it easy to
50
+ # configure that dependency.
51
+ #
52
+ # @example
53
+ # LegacySystem = Dry::Container.new.tap do |c|
54
+ # c.register(:database) { Sequel.connect('sqlite://legacy.db')
55
+ # end
56
+ #
57
+ # class EnhanceUserProfile
58
+ # extend Attr::Gather::Workflow
59
+ #
60
+ # container LegacySystem
61
+ # end
62
+ #
63
+ # @param cont [Dry::Container] the Dry::Container to use
64
+ #
65
+ # @note For more information, check out {https://dry-rb.org/gems/dry-container}
66
+ #
67
+ # @api public
68
+ def container(cont = nil)
69
+ @container = cont if cont
70
+ @container
71
+ end
72
+
73
+ # Configures the result aggregator
74
+ #
75
+ # Aggregators make is possible to build custom logic about
76
+ # how results should be "merged" together. For example,
77
+ # yuo could build and aggregator that prioritizes the
78
+ # values of some tasks over others.
79
+ #
80
+ # @example
81
+ # class EnhanceUserProfile
82
+ # extend Attr::Gather::Workflow
83
+ #
84
+ # aggregator :deep_merge
85
+ # end
86
+ #
87
+ # @param agg [#call] the aggregator to use
88
+ #
89
+ # @api public
90
+ def aggregator(agg = nil, opts = EMPTY_HASH)
91
+ if agg.nil? && !defined?(@aggregator)
92
+ @aggregator = Aggregators.default
93
+ return @aggregator
94
+ end
95
+
96
+ @aggregator = Aggregators.resolve(agg, filter: filter, **opts) if agg
97
+ @aggregator
98
+ end
99
+
100
+ # Defines a filter for filtering out invalid values
101
+ #
102
+ # When aggregating data from many sources, it is hard to reason about
103
+ # all the ways invalid data will be returned. For example, if you are
104
+ # pulling data from a spreadsheet, there will often be typos, etc.
105
+ #
106
+ # Defining a filter allows you to declaratively state what is valid.
107
+ # attr-gather will use this definition to automatically filter out
108
+ # invalid values, so they never make it into your system.
109
+ #
110
+ # Filtering happens during each step of the workflow, which means that
111
+ # every Task will receive validated input that you can rely on.
112
+ #
113
+ # @example
114
+ # class UserContract < Dry::Validation::Contract do
115
+ # params do
116
+ # optional(:id).filled(:integer)
117
+ # optional(:email).filled(:str?, format?: /@/)
118
+ # end
119
+ # end
120
+ #
121
+ # class EnhanceUserProfile
122
+ # extend Attr::Gather::Workflow
123
+ #
124
+ # # Any of the key/value pairs that had validation errors will be
125
+ # # filtered from the output.
126
+ # filter :contract, UserContract.new
127
+ # end
128
+ #
129
+ # @param filt [Symbol] the name filter to use
130
+ # @param args [Array<Object>] arguments for initializing the filter
131
+ #
132
+ # @api public
133
+ def filter(filt = Undefined, *args)
134
+ if filt == Undefined && !defined?(@filter)
135
+ @filter = Filters.default
136
+ elsif filt != Undefined
137
+ @filter = Filters.resolve(filt, *args)
138
+ end
139
+
140
+ @filter
141
+ end
142
+
143
+ # Defines a filter for filtering invalid values with an inline contract
144
+ #
145
+ # This serves as a convenience method for defining a contract filter.
146
+ #
147
+ # @example
148
+ #
149
+ # class EnhanceUserProfile
150
+ # extend Attr::Gather::Workflow
151
+ #
152
+ # # Any of the key/value pairs that had validation errors will be
153
+ # # filtered from the output.
154
+ # filter_with_contract do
155
+ # params do
156
+ # required(:name).filled(:string)
157
+ # required(:age).value(:integer)
158
+ # end
159
+ #
160
+ # rule(:age) do
161
+ # key.failure('must be greater than 18') if value < 18
162
+ # end
163
+ # end
164
+ # end
165
+ #
166
+ # @return [Dry::Validation::Contract,NilClass]
167
+ # @see https://dry-rb.org/gems/dry-validation
168
+ #
169
+ # @api public
170
+ def filter_with_contract(arg = nil, &blk)
171
+ contract = block_given? ? build_inline_contract_filter(&blk) : arg
172
+ @filter = Filters.resolve(:contract, contract)
173
+ end
174
+
175
+ private
176
+
177
+ def build_inline_contract_filter(&blk)
178
+ contract_klass = Class.new(Dry::Validation::Contract, &blk)
179
+ contract_klass.new
180
+ end
181
+ end
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Attr
4
+ module Gather
5
+ module Workflow
6
+ # Module containing graph functionality
7
+ #
8
+ # @api public
9
+ module Graphable
10
+ # Class methods for graph functionality
11
+ module ClassMethods
12
+ # Returns the graph of tasks
13
+ #
14
+ # @return [TaskGraph] the graph
15
+ #
16
+ # @api private
17
+ def tasks
18
+ @tasks ||= TaskGraph.new
19
+ end
20
+
21
+ # Returns a graphviz visualization of the workflow
22
+ #
23
+ # @param preview [Boolean] show a preview image of the Workflow
24
+ #
25
+ # @api public
26
+ def to_dot(preview: true)
27
+ tasks.to_dot(preview: preview)
28
+ end
29
+ end
30
+
31
+ # Instance methods for graph functionality
32
+ module InstanceMethods
33
+ # Returns a graphviz visualization of the workflow
34
+ #
35
+ # @param preview [Boolean] show a preview image of the Workflow
36
+ #
37
+ # @api public
38
+ def to_dot(preview: true)
39
+ self.class.to_dot(preview: preview)
40
+ end
41
+ end
42
+
43
+ def self.included(klass)
44
+ klass.extend(ClassMethods)
45
+ klass.include(InstanceMethods)
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Attr
4
+ module Gather
5
+ module Workflow
6
+ # @api private
7
+ class Task
8
+ attr_accessor :depends_on, :name
9
+
10
+ def initialize(name:, depends_on: [])
11
+ @name = name
12
+ @depends_on = depends_on
13
+ end
14
+
15
+ def depends_on?(other_task)
16
+ depends_on.include?(other_task.name)
17
+ end
18
+
19
+ def fullfilled_given_remaining_tasks?(task_list)
20
+ task_list.none? { |list_task| depends_on?(list_task) }
21
+ end
22
+
23
+ def as_json
24
+ { name: name, depends_on: depends_on }
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Attr
4
+ module Gather
5
+ module Workflow
6
+ # A wrapper containing information and results of a task execution
7
+ #
8
+ # @!attribute [r] started_at
9
+ # @return [Time] time which the execution occured
10
+ #
11
+ # @!attribute [r] task
12
+ # @return [Attr::Gather::Workflow::Task] task that was run
13
+ #
14
+ # @!attribute [r] result
15
+ # @return [Concurrent::Promise] the result promise of the the task
16
+ #
17
+ # @api public
18
+ class TaskExecutionResult
19
+ include Concerns::Identifiable
20
+
21
+ attr_reader :task, :result, :started_at, :uuid
22
+
23
+ def initialize(task, result)
24
+ @started_at = Time.now
25
+ @uuid = SecureRandom.uuid
26
+ @task = task
27
+ @result = result
28
+ end
29
+
30
+ # @!attribute [r] state
31
+ # @return [:unscheduled, :pending, :processing, :rejected, :fulfilled]
32
+ def state
33
+ result.state
34
+ end
35
+
36
+ # Extracts the result, this is an unsafe operation that blocks the
37
+ # operation, and returns either the value or an exception.
38
+ #
39
+ # @note For more information, check out {https://ruby-concurrency.github.io/concurrent-ruby/1.1.5/Concurrent/Concern/Obligation.html#value!-instance_method}
40
+ def value!
41
+ result.value!
42
+ end
43
+
44
+ # Represents the TaskExecutionResult as a hash
45
+ #
46
+ # @return [Hash]
47
+ def as_json
48
+ value = result.value
49
+
50
+ { started_at: started_at,
51
+ task: task.as_json,
52
+ state: state,
53
+ value: value }
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'concurrent'
4
+ require 'attr/gather/workflow/task_execution_result'
5
+
6
+ module Attr
7
+ module Gather
8
+ module Workflow
9
+ # @api private
10
+ class TaskExecutor
11
+ attr_reader :batch, :container, :executor
12
+
13
+ def initialize(batch, container:)
14
+ @batch = batch
15
+ @container = container
16
+ @executor = :immediate
17
+ end
18
+
19
+ def call(input)
20
+ batch.map do |task|
21
+ task_proc = container.resolve(task.name)
22
+ result = Concurrent::Promise.execute(executor: executor) do
23
+ task_proc.call(input)
24
+ end
25
+ TaskExecutionResult.new(task, result)
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tsort'
4
+ require 'attr/gather/workflow/dot_serializer'
5
+
6
+ module Attr
7
+ module Gather
8
+ module Workflow
9
+ # @api private
10
+ class TaskGraph
11
+ class UnfinishableError < StandardError; end
12
+ class InvalidTaskDepedencyError < StandardError; end
13
+
14
+ include TSort
15
+
16
+ attr_reader :tasks_hash
17
+
18
+ def initialize(tasks: [])
19
+ @tasks_hash = {}
20
+ tasks.each { |t| self << t }
21
+ end
22
+
23
+ def <<(task)
24
+ validate_for_insert!(task)
25
+
26
+ registered_tasks.each do |t|
27
+ tasks_hash[t] << task if t.depends_on?(task)
28
+ tasks_hash[t].uniq!
29
+ end
30
+
31
+ tasks_hash[task] = all_dependencies_for_task(task)
32
+ end
33
+
34
+ def runnable_tasks
35
+ tsort.take_while do |task|
36
+ task.fullfilled_given_remaining_tasks?(registered_tasks)
37
+ end
38
+ end
39
+
40
+ def each_batch
41
+ return enum_for(:each_batch) unless block_given?
42
+
43
+ to_execute = tsort
44
+
45
+ until to_execute.empty?
46
+ batch = to_execute.take_while do |task|
47
+ task.fullfilled_given_remaining_tasks?(to_execute)
48
+ end
49
+
50
+ to_execute -= batch
51
+
52
+ validate_finishable!(batch, to_execute)
53
+
54
+ yield batch
55
+ end
56
+ end
57
+
58
+ alias to_a tsort
59
+
60
+ def to_h
61
+ tasks_hash
62
+ end
63
+
64
+ def to_dot(preview: false)
65
+ serializer = DotSerializer.new(self)
66
+ preview ? serializer.preview : serializer.to_dot
67
+ end
68
+
69
+ private
70
+
71
+ def tsort_each_child(node, &blk)
72
+ to_h[node].each(&blk)
73
+ end
74
+
75
+ def tsort_each_node(&blk)
76
+ to_h.each_key(&blk)
77
+ end
78
+
79
+ def validate_finishable!(batch, to_execute)
80
+ return unless batch.empty? && !to_execute.empty?
81
+
82
+ # TODO: statically verify this
83
+ raise UnfinishableError, 'task dependencies are not fulfillable'
84
+ end
85
+
86
+ def validate_for_insert!(task)
87
+ return if depended_on_tasks_exist?(task)
88
+
89
+ raise InvalidTaskDepedencyError,
90
+ "could not find a matching task for #{task.name}"
91
+ end
92
+
93
+ def all_dependencies_for_task(input_task)
94
+ registered_tasks.select { |task| input_task.depends_on?(task) }
95
+ end
96
+
97
+ def registered_tasks
98
+ tasks_hash.keys
99
+ end
100
+
101
+ def depended_on_tasks_exist?(task)
102
+ task.depends_on.all? { |t| registered_tasks.map(&:name).include?(t) }
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end