burner 1.6.0.pre.alpha → 1.9.0.pre.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.tool-versions +1 -0
  3. data/CHANGELOG.md +37 -1
  4. data/README.md +60 -2
  5. data/lib/burner/data.rb +46 -0
  6. data/lib/burner/job.rb +2 -10
  7. data/lib/burner/job_set.rb +64 -0
  8. data/lib/burner/job_with_register.rb +8 -1
  9. data/lib/burner/jobs.rb +7 -0
  10. data/lib/burner/library.rb +7 -0
  11. data/lib/burner/library/collection/arrays_to_objects.rb +1 -1
  12. data/lib/burner/library/collection/coalesce.rb +14 -9
  13. data/lib/burner/library/collection/concatenate.rb +1 -1
  14. data/lib/burner/library/collection/graph.rb +1 -1
  15. data/lib/burner/library/collection/group.rb +15 -11
  16. data/lib/burner/library/collection/nested_aggregate.rb +1 -1
  17. data/lib/burner/library/collection/number.rb +51 -0
  18. data/lib/burner/library/collection/objects_to_arrays.rb +1 -1
  19. data/lib/burner/library/collection/pivot.rb +150 -0
  20. data/lib/burner/library/collection/shift.rb +1 -1
  21. data/lib/burner/library/collection/transform.rb +1 -1
  22. data/lib/burner/library/collection/unpivot.rb +1 -1
  23. data/lib/burner/library/collection/validate.rb +1 -1
  24. data/lib/burner/library/collection/values.rb +1 -1
  25. data/lib/burner/library/collection/zip.rb +1 -1
  26. data/lib/burner/library/compress/row_reader.rb +1 -1
  27. data/lib/burner/library/deserialize/yaml.rb +1 -1
  28. data/lib/burner/library/echo.rb +1 -1
  29. data/lib/burner/library/io/exist.rb +1 -1
  30. data/lib/burner/library/io/open_file_base.rb +1 -1
  31. data/lib/burner/library/io/row_reader.rb +1 -1
  32. data/lib/burner/library/io/write.rb +1 -1
  33. data/lib/burner/library/param/base.rb +29 -0
  34. data/lib/burner/library/param/from_register.rb +30 -0
  35. data/lib/burner/library/param/to_register.rb +28 -0
  36. data/lib/burner/library/serialize/csv.rb +1 -1
  37. data/lib/burner/library/sleep.rb +1 -1
  38. data/lib/burner/library/value/copy.rb +1 -1
  39. data/lib/burner/library/value/nest.rb +37 -0
  40. data/lib/burner/library/value/static.rb +1 -1
  41. data/lib/burner/library/value/transform.rb +38 -0
  42. data/lib/burner/payload.rb +39 -15
  43. data/lib/burner/pipeline.rb +6 -34
  44. data/lib/burner/util.rb +1 -0
  45. data/lib/burner/util/keyable.rb +23 -0
  46. data/lib/burner/version.rb +1 -1
  47. metadata +16 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5d81ad3d936aefde8cb52a299a1b28d8baf8ca40cfefee478a1c1e525578aec
4
- data.tar.gz: f69f993fa1a51a4c5ed6e88a7225a1646c7ef47a38369b09680532854e961f84
3
+ metadata.gz: eca471b05b356ad1e96f71c0173673109622e14dc7d7d46bdab0beccc1e03396
4
+ data.tar.gz: 55f81d7080c1f0eb4a4cc1d51df7534376306362a1d97a8369f3fd85c0107a3b
5
5
  SHA512:
6
- metadata.gz: 4c670ad08031fc43df3aa1b995828ce507aa213e294c3ac4cfdef6a8d187a53224ad31bb44a959bb72cc220f06e9094dea4af6301bee5fea3d375c241bd0b493
7
- data.tar.gz: 11ef5e19a6160c0a9e943de848beb2b03ffa35bb5c8401eab9884cdddb5d4e43d2bd31ee4bc2f36479b28f7409afe32d7941f969a87f28743a50fceb7ec4342c
6
+ metadata.gz: bbd76fedb82b0499f7f8913e5530a1c35e2d4fc613ef11e95fb8ad6c759a28a7f4737d5ae29a71e0409dbd8f45b03de5b96dc22b84e3b85384cb256d1742eb9c
7
+ data.tar.gz: 311f81afb55fdbf5548c9394ec1ee30e020833f205e4e1c53fef87630c2e77d0b6cbd6639689efaa8956ae7bb188555f35ffb1dca7c08c2cedb6881f959cd508
data/.tool-versions ADDED
@@ -0,0 +1 @@
1
+ ruby 2.6.6
data/CHANGELOG.md CHANGED
@@ -1,5 +1,41 @@
1
+ # 1.9.0 (TBD)
1
2
 
2
- # 1.6.0 (TBD)
3
+ Added Jobs:
4
+
5
+ * b/collection/pivot
6
+ # 1.8.0 (March 31st, 2021)
7
+
8
+ Added Jobs:
9
+
10
+ * b/param/from_register
11
+ * b/param/to_register
12
+
13
+ Other:
14
+
15
+ * Payload#param was added to access a param key's value.
16
+ * Payload#update_param was added to update a param key's value.
17
+
18
+ Internal Notes:
19
+
20
+ Payload#register and Payload#params data stores have been internally consolidated while still maintaining the same public API surface area.
21
+
22
+ # 1.7.0 (January 22nd, 2021)
23
+
24
+ Added Jobs:
25
+
26
+ * b/collection/number
27
+ * b/value/nest
28
+ * b/value/transform
29
+
30
+ Enhanced Jobs:
31
+
32
+ * b/collection/coalesce and b/collection/group now support the notion of case and type-insensitivity (insensitive option).
33
+
34
+ Changes:
35
+
36
+ * Job names derived from Burner::Job are now optional. Pipelines themselves now can handle jobs without names.
37
+
38
+ # 1.6.0 (December 22nd, 2020)
3
39
 
4
40
  Additions:
5
41
 
data/README.md CHANGED
@@ -93,6 +93,54 @@ Some notes:
93
93
  * Jobs can be re-used (just like the output_id and output_value jobs).
94
94
  * If steps is nil then all jobs will execute in their declared order.
95
95
 
96
+ ### Omitting Job Names and Steps
97
+
98
+ Job names are optional, but steps can only correspond to named jobs. This means if steps is declared then anonymous jobs will have no way to be executed. Here is the same pipeline as above, but without job names and steps:
99
+
100
+ ````ruby
101
+ pipeline = {
102
+ jobs: [
103
+ {
104
+ type: 'b/io/read',
105
+ path: '{input_file}'
106
+ },
107
+ {
108
+ type: 'b/echo',
109
+ message: 'The job id is: {__id}'
110
+ },
111
+ {
112
+ type: 'b/echo',
113
+ message: 'The current value is: {__default_register}'
114
+ },
115
+ {
116
+ type: 'b/deserialize/json'
117
+ },
118
+ {
119
+ type: 'b/serialize/yaml'
120
+ },
121
+ {
122
+ type: 'b/echo',
123
+ message: 'The current value is: {__default_register}'
124
+ },
125
+ {
126
+ type: 'b/io/write',
127
+ path: '{output_file}'
128
+ }
129
+ ]
130
+ }
131
+
132
+ params = {
133
+ input_file: 'input.json',
134
+ output_file: 'output.yaml'
135
+ }
136
+
137
+ payload = Burner::Payload.new(params: params)
138
+
139
+ Burner::Pipeline.make(pipeline).execute(payload: payload)
140
+ ````
141
+
142
+ Like everything in software, there are trade-offs to the above two equivalent pipelines. The former (one with steps and job names) has less jobs but is more verbose. The latter (without steps and job names) has more jobs but reads terser. Names also can aid in self-documenting your code/configuration so it may be a good idea to enforce at least names are used.
143
+
96
144
  ### Capturing Feedback / Output
97
145
 
98
146
  By default, output will be emitted to `$stdout`. You can add or change listeners by passing in optional values into Pipeline#execute. For example, say we wanted to capture the output from our json-to-yaml example:
@@ -216,12 +264,15 @@ This library only ships with very basic, rudimentary jobs that are meant to just
216
264
  #### Collection
217
265
 
218
266
  * **b/collection/arrays_to_objects** [mappings, register]: Convert an array of arrays to an array of objects.
219
- * **b/collection/coalesce** [register, grouped_register, key_mappings, keys, separator]: Merge two datasets together based on the key values of one dataset (array) with a grouped dataset (hash).
267
+ * **b/collection/coalesce** [grouped_register, insensitive, key_mappings, keys, register, separator]: Merge two datasets together based on the key values of one dataset (array) with a grouped dataset (hash). If insensitive (false by default) is true then each key's value will be converted/coerced to a lowercase string.
220
268
  * **b/collection/concatenate** [from_registers, to_register]: Concatenate each from_register's value and place the newly concatenated array into the to_register. Note: this does not do any deep copying and should be assumed it is shallow copying all objects.
221
269
  * **b/collection/graph** [config, key, register]: Use [Hashematics](https://github.com/bluemarblepayroll/hashematics) to turn a flat array of objects into a deeply nested object tree.
222
- * **b/collection/group** [keys, register, separator]: Take a register's value (an array of objects) and group the objects by the specified keys.
270
+ * **b/collection/group** [insensitive, keys, register, separator]: Take a register's value (an array of objects) and group the objects by the specified keys. If insensitive (false by default) is true then each key's value will be converted/coerced to a lowercase string.
223
271
  * **b/collection/nested_aggregate** [register, key_mappings, key, separator]: Traverse a set of objects, resolving key's value for each object, optionally copying down key_mappings to the child records, then merging all the inner records together.
272
+ * **b/collection/number** [key, register, separator, start_at]: This job can iterate over a set of records and sequence them (set the specified key to a sequential index value.)
224
273
  * **b/collection/objects_to_arrays** [mappings, register]: Convert an array of objects to an array of arrays.
274
+ * **b/collection/pivot** [unique_keys, insensitive, other_keys, pivot_key, pivot_value_key, register, separator]:
275
+ Take an array of objects and pivot a key into multiple keys. It essentially takes all the values for a key and creates N number of keys (one per value.) Under the hood it uses HashMath's [Record](https://github.com/bluemarblepayroll/hash_math#record-the-hash-prototype) and [Table](https://github.com/bluemarblepayroll/hash_math#table-the-double-hash-hash-of-hashes) classes.
225
276
  * **b/collection/shift** [amount, register]: Remove the first N number of elements from an array.
226
277
  * **b/collection/transform** [attributes, exclusive, separator, register]: Iterate over all objects and transform each key per the attribute transformers specifications. If exclusive is set to false then the current object will be overridden/merged. Separator can also be set for key path support. This job uses [Realize](https://github.com/bluemarblepayroll/realize), which provides its own extendable value-transformation pipeline. If an attribute is not set with `explicit: true` then it will automatically start from the key's value from the record. If `explicit: true` is started, then it will start from the record itself.
227
278
  * **b/collection/unpivot** [pivot_set, register]: Take an array of objects and unpivot specific sets of keys into rows. Under the hood it uses [HashMath's Unpivot class](https://github.com/bluemarblepayroll/hash_math#unpivot-hash-key-coalescence-and-row-extrapolation).
@@ -248,6 +299,11 @@ By default all jobs will use the `Burner::Disks::Local` disk for its persistence
248
299
  * **b/io/row_reader** [data_key, disk, ignore_blank_path, ignore_file_not_found, path_key, register, separator]: Iterates over an array of objects, extracts a filepath from a key in each object, and attempts to load the file's content for each record. The file's content will be stored at the specified data_key. By default missing paths or files will be treated as hard errors. If you wish to ignore these then pass in true for ignore_blank_path and/or ignore_file_not_found.
249
300
  * **b/io/write** [binary, disk, path, register, supress_side_effect]: Write to a local file. The path parameter can be interpolated using `Payload#params`. If the contents are binary, pass in `binary: true` to open it up in binary+write mode. By default, written files are also logged as WrittenFile instances to the Payload#side_effects array. You can pass in supress_side_effect: true to disable this behavior.
250
301
 
302
+ #### Parameters
303
+
304
+ * **b/param/from_register** [param_key, register]: Copy the value of a register to a param key.
305
+ * **b/param/to_register** [param_key, register]: Copy the value of a param key to a register.
306
+
251
307
  #### Serialization
252
308
 
253
309
  * **b/serialize/csv** [byte_order_mark, register]: Take an array of arrays and create a CSV. You can optionally pre-pend a byte order mark, see Burner::Modeling::ByteOrderMark for acceptable options.
@@ -257,7 +313,9 @@ By default all jobs will use the `Burner::Disks::Local` disk for its persistence
257
313
  #### Value
258
314
 
259
315
  * **b/value/copy** [from_register, to_register]: Copy from_register's value into the to_register. Note: this does not do any deep copying and should be assumed it is shallow copying all objects.
316
+ * **b/value/nest** [key, register]: This job will nest the current value within a new outer hash. The specified key passed in will be the corresponding new hash key entry for the existing value.
260
317
  * **b/value/static** [register, value]: Set the value to any arbitrary value.
318
+ * **b/value/transform** [register, separator, transformers]: Transform the current value of the register through a Realize::Pipeline. This will transform the entire value, as opposed to the b/collection/transform job, which will iterate over each row/record in a dataset and transform each row/record.
261
319
 
262
320
  #### General
263
321
 
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2020-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ module Burner
11
+ # Defines a key value pair data store per our library. It is basically a composite
12
+ # object around a hash with indifferent key typing.
13
+ class Data
14
+ extend Forwardable
15
+
16
+ def_delegators :internal_hash, :transform_keys
17
+
18
+ def initialize(hash = {})
19
+ @internal_hash = {}
20
+
21
+ (hash || {}).each { |k, v| self[k] = v }
22
+ end
23
+
24
+ def []=(key, value)
25
+ internal_hash[key.to_s] = value
26
+ end
27
+
28
+ def [](key)
29
+ internal_hash[key.to_s]
30
+ end
31
+
32
+ def to_h
33
+ internal_hash
34
+ end
35
+
36
+ def ==(other)
37
+ other.instance_of?(self.class) &&
38
+ to_h == other.to_h
39
+ end
40
+ alias eql? ==
41
+
42
+ private
43
+
44
+ attr_reader :internal_hash
45
+ end
46
+ end
data/lib/burner/job.rb CHANGED
@@ -18,9 +18,7 @@ module Burner
18
18
 
19
19
  attr_reader :name
20
20
 
21
- def initialize(name:)
22
- raise ArgumentError, 'name is required' if name.to_s.empty?
23
-
21
+ def initialize(name: '')
24
22
  @name = name.to_s
25
23
  end
26
24
 
@@ -48,15 +46,9 @@ module Burner
48
46
  protected
49
47
 
50
48
  def job_string_template(expression, output, payload)
51
- templatable_params = payload.params
52
- .merge(__id: output.id)
53
- .merge(templatable_register_values(payload))
49
+ templatable_params = payload.params_and_registers_hash.merge(__id: output.id)
54
50
 
55
51
  Util::StringTemplate.instance.evaluate(expression, templatable_params)
56
52
  end
57
-
58
- def templatable_register_values(payload)
59
- payload.registers.transform_keys { |key| "__#{key}_register" }
60
- end
61
53
  end
62
54
  end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2020-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require_relative 'jobs'
11
+
12
+ module Burner
13
+ # This class understands how jobs fit together as a cohesive unit. It does not know how to
14
+ # use them, but it knows how to group them together in a logical manner following some simple
15
+ # rules, such as:
16
+ # - Jobs in a set should have unique names (unless the name is blank)
17
+ # - Subsets of jobs can be extracted, by name, in constant time.
18
+ class JobSet
19
+ class DuplicateJobNameError < StandardError; end
20
+ class JobNotFoundError < StandardError; end
21
+
22
+ def initialize(jobs = [])
23
+ @jobs = Jobs.array(jobs).freeze
24
+
25
+ assert_unique_job_names
26
+ end
27
+
28
+ def jobs(names = nil)
29
+ return @jobs unless names
30
+
31
+ Array(names).map do |name|
32
+ job = named_jobs_by_name[name.to_s]
33
+
34
+ raise JobNotFoundError, "#{name} was not declared as a job" unless job
35
+
36
+ job
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def named_jobs_by_name
43
+ @named_jobs_by_name ||= named_jobs.each_with_object({}) { |job, memo| memo[job.name] = job }
44
+ end
45
+
46
+ def named_jobs
47
+ @named_jobs ||= @jobs.reject { |job| job.name == '' }
48
+ end
49
+
50
+ def assert_unique_job_names
51
+ unique_job_names = Set.new
52
+
53
+ named_jobs.each do |job|
54
+ if unique_job_names.include?(job.name)
55
+ raise DuplicateJobNameError, "job with name: #{job.name} already declared"
56
+ end
57
+
58
+ unique_job_names << job.name
59
+ end
60
+
61
+ nil
62
+ end
63
+ end
64
+ end
@@ -15,10 +15,17 @@ module Burner
15
15
  class JobWithRegister < Job
16
16
  attr_reader :register
17
17
 
18
- def initialize(name:, register: DEFAULT_REGISTER)
18
+ def initialize(name: '', register: DEFAULT_REGISTER)
19
19
  super(name: name)
20
20
 
21
21
  @register = register.to_s
22
22
  end
23
+
24
+ protected
25
+
26
+ # Helper method that knows how to ensure the register is an array.
27
+ def ensure_array(payload)
28
+ payload[register] = array(payload[register])
29
+ end
23
30
  end
24
31
  end
data/lib/burner/jobs.rb CHANGED
@@ -28,7 +28,9 @@ module Burner
28
28
  register 'b/collection/graph', Library::Collection::Graph
29
29
  register 'b/collection/group', Library::Collection::Group
30
30
  register 'b/collection/nested_aggregate', Library::Collection::NestedAggregate
31
+ register 'b/collection/number', Library::Collection::Number
31
32
  register 'b/collection/objects_to_arrays', Library::Collection::ObjectsToArrays
33
+ register 'b/collection/pivot', Library::Collection::Pivot
32
34
  register 'b/collection/shift', Library::Collection::Shift
33
35
  register 'b/collection/transform', Library::Collection::Transform
34
36
  register 'b/collection/unpivot', Library::Collection::Unpivot
@@ -47,11 +49,16 @@ module Burner
47
49
  register 'b/io/row_reader', Library::IO::RowReader
48
50
  register 'b/io/write', Library::IO::Write
49
51
 
52
+ register 'b/param/from_register', Library::Param::FromRegister
53
+ register 'b/param/to_register', Library::Param::ToRegister
54
+
50
55
  register 'b/serialize/csv', Library::Serialize::Csv
51
56
  register 'b/serialize/json', Library::Serialize::Json
52
57
  register 'b/serialize/yaml', Library::Serialize::Yaml
53
58
 
54
59
  register 'b/value/copy', Library::Value::Copy
60
+ register 'b/value/nest', Library::Value::Nest
55
61
  register 'b/value/static', Library::Value::Static
62
+ register 'b/value/transform', Library::Value::Transform
56
63
  end
57
64
  end
@@ -19,7 +19,9 @@ require_relative 'library/collection/concatenate'
19
19
  require_relative 'library/collection/graph'
20
20
  require_relative 'library/collection/group'
21
21
  require_relative 'library/collection/nested_aggregate'
22
+ require_relative 'library/collection/number'
22
23
  require_relative 'library/collection/objects_to_arrays'
24
+ require_relative 'library/collection/pivot'
23
25
  require_relative 'library/collection/shift'
24
26
  require_relative 'library/collection/transform'
25
27
  require_relative 'library/collection/unpivot'
@@ -38,9 +40,14 @@ require_relative 'library/io/read'
38
40
  require_relative 'library/io/row_reader'
39
41
  require_relative 'library/io/write'
40
42
 
43
+ require_relative 'library/param/from_register'
44
+ require_relative 'library/param/to_register'
45
+
41
46
  require_relative 'library/serialize/csv'
42
47
  require_relative 'library/serialize/json'
43
48
  require_relative 'library/serialize/yaml'
44
49
 
45
50
  require_relative 'library/value/copy'
51
+ require_relative 'library/value/nest'
46
52
  require_relative 'library/value/static'
53
+ require_relative 'library/value/transform'
@@ -55,7 +55,7 @@ module Burner
55
55
  class ArraysToObjects < JobWithRegister
56
56
  attr_reader :mappings
57
57
 
58
- def initialize(name:, mappings: [], register: DEFAULT_REGISTER)
58
+ def initialize(mappings: [], name: '', register: DEFAULT_REGISTER)
59
59
  super(name: name, register: register)
60
60
 
61
61
  @mappings = Modeling::KeyIndexMapping.array(mappings)
@@ -18,19 +18,27 @@ module Burner
18
18
  # Expected Payload[register] input: array of objects.
19
19
  # Payload[register] output: array of objects.
20
20
  class Coalesce < JobWithRegister
21
- attr_reader :grouped_register, :key_mappings, :keys, :resolver
21
+ include Util::Keyable
22
+
23
+ attr_reader :grouped_register,
24
+ :insensitive,
25
+ :key_mappings,
26
+ :keys,
27
+ :resolver
22
28
 
23
29
  def initialize(
24
- name:,
25
30
  grouped_register:,
31
+ insensitive: false,
26
32
  key_mappings: [],
27
33
  keys: [],
34
+ name: '',
28
35
  register: DEFAULT_REGISTER,
29
36
  separator: ''
30
37
  )
31
38
  super(name: name, register: register)
32
39
 
33
40
  @grouped_register = grouped_register.to_s
41
+ @insensitive = insensitive || false
34
42
  @key_mappings = Modeling::KeyMapping.array(key_mappings)
35
43
  @keys = Array(keys)
36
44
  @resolver = Objectable.resolver(separator: separator.to_s)
@@ -41,13 +49,14 @@ module Burner
41
49
  end
42
50
 
43
51
  def perform(output, payload)
44
- payload[register] = array(payload[register])
45
- count = payload[register].length
52
+ ensure_array(payload)
53
+
54
+ count = payload[register].length
46
55
 
47
56
  output.detail("Coalescing based on key(s): #{keys} for #{count} records(s)")
48
57
 
49
58
  payload[register].each do |record|
50
- key = make_key(record)
59
+ key = make_key(record, keys, resolver, insensitive)
51
60
  lookup = find_lookup(payload, key)
52
61
 
53
62
  key_mappings.each do |key_mapping|
@@ -63,10 +72,6 @@ module Burner
63
72
  def find_lookup(payload, key)
64
73
  (payload[grouped_register] || {})[key] || {}
65
74
  end
66
-
67
- def make_key(record)
68
- keys.map { |key| resolver.get(record, key) }
69
- end
70
75
  end
71
76
  end
72
77
  end
@@ -18,7 +18,7 @@ module Burner
18
18
  class Concatenate < Job
19
19
  attr_reader :from_registers, :to_register
20
20
 
21
- def initialize(name:, from_registers: [], to_register: DEFAULT_REGISTER)
21
+ def initialize(from_registers: [], name: '', to_register: DEFAULT_REGISTER)
22
22
  super(name: name)
23
23
 
24
24
  @from_registers = Array(from_registers)