micdrop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,512 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+ require "forwardable"
5
+ require "json"
6
+
7
+ module Micdrop
8
+ class ItemContext # rubocop:disable Metrics/ClassLength
9
+ extend Forwardable
10
+
11
+ @@registered_lookups = {}
12
+
13
+ def initialize(record_context, value)
14
+ @record_context = record_context
15
+ @value = value
16
+ @original_value = value
17
+ end
18
+
19
+ attr_reader :record_context, :original_value
20
+ attr_accessor :value
21
+
22
+ ##
23
+ # Register a lookup, allowing it to be used in subsequent migrations
24
+ def self.register_lookup(name, lookup)
25
+ @@registered_lookups[name] = lookup
26
+ end
27
+
28
+ ##
29
+ # Directly update the current value
30
+ def update(value)
31
+ @value = value
32
+ self
33
+ end
34
+
35
+ ##
36
+ # Use plain Ruby code to modify this Item.
37
+ def convert(proc_or_symbol = nil, &block)
38
+ proc_or_symbol = method(proc_or_symbol) if proc_or_symbol.is_a? Symbol
39
+ @value = proc_or_symbol.call(@value) unless proc_or_symbol.nil?
40
+ @value = block.call(@value) unless block.nil?
41
+ self
42
+ end
43
+
44
+ ##
45
+ # Run a predefined pipline on this Item.
46
+ def apply(pipeline)
47
+ instance_eval(&pipeline) unless pipeline.nil?
48
+ self
49
+ end
50
+
51
+ ##
52
+ # Treat the current Item as a Record, allowing child objects to be Taken.
53
+ def enter(&block)
54
+ ctx = SubRecordContext.new(self, @record_context)
55
+ ctx.instance_eval(&block) unless block.nil?
56
+ ctx
57
+ end
58
+
59
+ ##
60
+ # Alias for scope.enter.take
61
+ def take(name, put: nil, convert: nil, apply: nil, &block)
62
+ scope.enter.take(name, put: put, convert: convert, apply: apply, &block)
63
+ end
64
+
65
+ ##
66
+ # Alias for scope.enter.take_dig
67
+ def take_dig(*keys, put: nil, convert: nil, apply: nil, &block)
68
+ scope.enter.take_dig(*keys, put: put, convert: convert, apply: apply, &block)
69
+ end
70
+
71
+ ##
72
+ # Alias for scope.enter.try_take
73
+ def try_take(name, put: nil, convert: nil, apply: nil, &block)
74
+ scope.enter.try_take(name, put: put, convert: convert, apply: apply, &block)
75
+ end
76
+
77
+ ##
78
+ # Create a new item context with the same value as exists currently. Allows operations in a
79
+ # scope that will not affect the value in the current scope.
80
+ def scope(&block)
81
+ ctx = ItemContext.new(@record_context, @value)
82
+ ctx.apply block unless block.nil?
83
+ ctx
84
+ end
85
+
86
+ ##
87
+ # Similar to Take, but replaces the current value in the current scope
88
+ #
89
+ # Can be used to take slices of arrays as well
90
+ def extract(name)
91
+ return self if @value.nil?
92
+
93
+ @value = @value[name]
94
+ self
95
+ end
96
+
97
+ ##
98
+ # Extract using the :dig method instead of :[]
99
+ def extract_dig(*keys)
100
+ return self if @value.nil?
101
+
102
+ @value = @value.dig(*keys)
103
+ self
104
+ end
105
+
106
+ ### record context passthru ###
107
+
108
+ ##
109
+ # Put the current value in the output record.
110
+ #
111
+ # Normally takes a single argument: the name to put the value under. However, a two-argument
112
+ # (name, value) form is also supported.
113
+ def put(*args)
114
+ if args.length == 1
115
+ @record_context.put args.first, @value
116
+ else
117
+ @record_context.put(*args)
118
+ end
119
+ self
120
+ end
121
+
122
+ ##
123
+ # Put the value into a nested structure.
124
+ #
125
+ # This is the opposite of :dig, allowing you to build up structure on the fly without manually
126
+ # constructing arrays and hashes. (This does require the collector to be a simple array or hash.)
127
+ def put_bury(*keys)
128
+ @record_context.put_bury keys, @value
129
+ end
130
+
131
+ def_delegators :@record_context, :static, :index, :collect_format_string, :collect_list, :stop, :skip, :flush
132
+
133
+ ### Debug transformers ###
134
+
135
+ ##
136
+ # Debug tool to print the current value to the console
137
+ def dump(prefix = nil)
138
+ puts prefix unless prefix.nil?
139
+ puts @value
140
+ puts "\n"
141
+ self
142
+ end
143
+
144
+ ### Basic parse/format transformers ###
145
+
146
+ ##
147
+ # Parse a value to an integer
148
+ def parse_int(base = 10)
149
+ return self if @value.nil?
150
+
151
+ @value = @value.to_i(base)
152
+ self
153
+ end
154
+
155
+ ##
156
+ # Parse a value to a float
157
+ def parse_float
158
+ return self if @value.nil?
159
+
160
+ @value = @value.to_f
161
+ self
162
+ end
163
+
164
+ ##
165
+ # Parse a date using a given format string
166
+ def parse_date(format = "%Y-%m-%d", zero_date: false)
167
+ if zero_date
168
+ zero = make_zero_date format
169
+ @value = nil if @value == zero
170
+ end
171
+ @value = ::Date.strptime(@value, format) unless @value.nil?
172
+ self
173
+ end
174
+
175
+ ##
176
+ # Parse a datetime using a given format string
177
+ def parse_datetime(format = "%Y-%m-%d %H:%M:%S", zero_date: false)
178
+ if zero_date
179
+ zero = make_zero_date format
180
+ @value = nil if @value == zero
181
+ end
182
+ @value = ::DateTime.strptime(@value, format) unless @value.nil?
183
+ self
184
+ end
185
+
186
+ ##
187
+ # Format a date using a given format string
188
+ def format_date(format = "%Y-%m-%d", zero_date: false)
189
+ if @value.nil? && zero_date
190
+ @value = make_zero_date format
191
+ elsif !@value.nil?
192
+ @value = @value.strftime(format)
193
+ end
194
+ self
195
+ end
196
+
197
+ ##
198
+ # Format a datetime using a given format string
199
+ def format_datetime(format = "%Y-%m-%d %H:%M:%S", zero_date: false)
200
+ if @value.nil? && zero_date
201
+ @value = make_zero_date format
202
+ elsif !@value.nil?
203
+ @value = @value.strftime(format)
204
+ end
205
+ self
206
+ end
207
+
208
+ ##
209
+ # Parse a value into a boolean using a list of common values for true or false
210
+ def parse_boolean(true_values = [1, "1", "true", "True", "TRUE", "yes", "Yes", "YES", "on", "On", "ON", "Y", "y"],
211
+ false_values = [0, "0", "false", "False", "FALSE", "no", "No", "NO", "off", "Off", "OFF", "N",
212
+ "n", ""])
213
+ if true_values.include? @value
214
+ @value = true
215
+ elsif false_values.include? @value
216
+ @value = false
217
+ elsif @value.nil?
218
+ nil
219
+ else
220
+ raise ValueError("Unrecognized value: {repr(value)}")
221
+ end
222
+ self
223
+ end
224
+
225
+ ##
226
+ # Format a boolean as a string
227
+ def format_boolean(true_value = "Yes", false_value = "No")
228
+ if @value.nil?
229
+ nil
230
+ elsif @value
231
+ @value = true_value
232
+ else
233
+ @value = false_value
234
+ end
235
+ self
236
+ end
237
+
238
+ ##
239
+ # Format the value into a string using sprintf-style formatting, or using `to_s` if no
240
+ # template is provided.
241
+ def format_string(template = nil)
242
+ return self if @value.nil?
243
+
244
+ @value = if template.nil?
245
+ @value.to_s
246
+ else
247
+ template % @value
248
+ end
249
+ self
250
+ end
251
+
252
+ ### Common operations ###
253
+
254
+ ##
255
+ # Lookup the value in a hash
256
+ #
257
+ # pass_if_not_found, if true, will cause the value to pass through the lookup unchanged if no
258
+ # match is found. If false, the value will instead be set to nil.
259
+ #
260
+ # apply_if_not_found, if provided, will be passed to an apply call if no match is found
261
+ def lookup(mapping, pass_if_not_found: false, warn_if_not_found: nil, apply_if_not_found: nil)
262
+ return self if @value.nil?
263
+
264
+ if mapping.is_a? Symbol
265
+ mapping = @@registered_lookups.fetch mapping do |key|
266
+ raise PipelineError, "No lookup '#{key}' found"
267
+ end
268
+ end
269
+
270
+ warn_if_not_found = true if warn_if_not_found.nil? && apply_if_not_found.nil?
271
+ @value = mapping.fetch @value do |v|
272
+ warn format "Value %s not found in lookup", v if warn_if_not_found
273
+ if !apply_if_not_found.nil?
274
+ apply apply_if_not_found
275
+ value
276
+ elsif pass_if_not_found
277
+ v
278
+ end
279
+ end
280
+ self
281
+ end
282
+
283
+ ##
284
+ # Perform a string replacement or regex replacement on the current value
285
+ def string_replace(find, replace)
286
+ @value = @value.gsub find, replace unless value.nil?
287
+ self
288
+ end
289
+
290
+ ##
291
+ # Strip whitespace from a string
292
+ def strip
293
+ @value = @value.strip unless value.nil?
294
+ self
295
+ end
296
+
297
+ ##
298
+ # Treats empty strings as nil
299
+ def empty_to_nil
300
+ @value = nil if @value == ""
301
+ self
302
+ end
303
+
304
+ ##
305
+ # Provide a default value if the current value is nill
306
+ def default(default_value)
307
+ @value = default_value if @value.nil?
308
+ self
309
+ end
310
+
311
+ ### String encoding & binary functions ###
312
+
313
+ ##
314
+ # Unpack binary data (using String.unpack)
315
+ def unpack(template, offset: 0)
316
+ @value = value.unpack template, offset: offset unless @value.nil?
317
+ self
318
+ end
319
+
320
+ ##
321
+ # Pack binary data (using Array.pack)
322
+ def pack(template)
323
+ @value = value.pack template unless @value.nil?
324
+ self
325
+ end
326
+
327
+ ##
328
+ # Re-encode a string in the given encoding
329
+ #
330
+ # Takes the same args and options as String.encode
331
+ def encode(*encoding, **options)
332
+ @value = @value.encode(*encoding, **options) unless value.nil?
333
+ self
334
+ end
335
+
336
+ ##
337
+ # Change the encoding of the current string without transcoding
338
+ def force_encoding(encoding)
339
+ @value = @value.force_encoding(encoding) unless value.nil?
340
+ self
341
+ end
342
+
343
+ ### String (de)structuring ###
344
+
345
+ ##
346
+ # Split a string according to a delimeter.
347
+ #
348
+ # Accepts an optional block in the record context of the newly created list of values.
349
+ def split(delimiter, &block)
350
+ return self if @value.nil?
351
+
352
+ @value = @value.split(delimiter)
353
+ enter(&block) unless block.nil?
354
+ self
355
+ end
356
+
357
+ ##
358
+ # Join a list into a string
359
+ def join(delimiter)
360
+ @value = @value.join(delimiter) unless @value.nil?
361
+ self
362
+ end
363
+
364
+ ##
365
+ # Split a string into a set of key/value pairs (as a hash) according to a set of delimiters.
366
+ #
367
+ # Accepts an optional block in the record context of the newly created hash of values.
368
+ def split_kv(kv_delimiter, item_delimiter = "\n", &block)
369
+ return self if @value.nil?
370
+
371
+ kv = {}
372
+ @value.each_line(item_delimiter, chomp: true) do |item|
373
+ k, v = item.split(kv_delimiter, 2)
374
+ kv[k] = v
375
+ end
376
+ @value = kv
377
+ enter(&block) unless block.nil?
378
+ self
379
+ end
380
+
381
+ ##
382
+ # Join a hash into a string
383
+ def join_kv(kv_delimiter, item_delimiter = "\n")
384
+ return self if @value.nil?
385
+
386
+ string = ""
387
+ @value.each_pair do |k, v|
388
+ string += item_delimiter if string != ""
389
+ string += k.to_s + kv_delimiter + v
390
+ end
391
+ @value = string
392
+ self
393
+ end
394
+
395
+ ### List operations ###
396
+
397
+ ##
398
+ # Filter for the first non-nil value in a list
399
+ def coalesce
400
+ return self if @value.nil?
401
+
402
+ @value = @value.compact.first
403
+ self
404
+ end
405
+
406
+ ##
407
+ # Filter out all nil values from a list
408
+ def compact
409
+ return self if @value.nil?
410
+
411
+ @value = @value.compact
412
+ self
413
+ end
414
+
415
+ ##
416
+ # Filter out values from a list based on a predicate
417
+ def filter(&predicate)
418
+ return self if @value.nil?
419
+
420
+ @value = @value.filter(&predicate)
421
+ self
422
+ end
423
+
424
+ ##
425
+ # Map the values in an array using a block
426
+ def map(&block)
427
+ return self if @value.nil?
428
+
429
+ @value = @value.map(&block)
430
+ self
431
+ end
432
+
433
+ ##
434
+ # Alternate version of map that takes a pipeline block which will be executed in an item context.
435
+ #
436
+ # This allows transforming individual items in a list using all of the micdrop operation methods.
437
+ def map_apply(&block)
438
+ return self if @value.nil?
439
+
440
+ rec_ctx = SubRecordContext.new self, @record_context
441
+ @value = @value.map do |v|
442
+ item_ctx = ItemContext.new rec_ctx, v
443
+ item_ctx.apply(block).value
444
+ end
445
+ self
446
+ end
447
+
448
+ ##
449
+ # Iterate an array or array-like object and run a block in the subrecord context of each
450
+ #
451
+ # Optionally flush and/or reset after each iteration. This is used to import multiple sink
452
+ # records from a single source record, such as, for example, a source record that contians a
453
+ # JSON list of multiple items.
454
+ def each_subrecord(flush: false, reset: false, &block)
455
+ rec_ctx = SubRecordContext.new self, @record_context
456
+ @value.each do |v|
457
+ item_ctx = ItemContext.new rec_ctx, v
458
+ ctx = SubRecordContext.new item_ctx, rec_ctx
459
+ ctx.instance_eval(&block)
460
+ @record_context.flush reset: false if flush
461
+ @record_context.reset if reset
462
+ end
463
+ end
464
+
465
+ ### Advanced parsing/formatting ###
466
+
467
+ ##
468
+ # Parse a string as JSON
469
+ #
470
+ # If a block is provided, it will act as a record context where object properties can be taken.
471
+ def parse_json(&block)
472
+ return self if @value.nil?
473
+
474
+ @value = JSON.parse @value
475
+ enter(&block) unless block.nil?
476
+ self
477
+ end
478
+
479
+ ##
480
+ # Format the data as a JSON string
481
+ def format_json
482
+ return self if @value.nil?
483
+
484
+ @value = JSON.generate @value
485
+ self
486
+ end
487
+
488
+ ##
489
+ # Perform a regular expression match, setting the current value to the match data
490
+ #
491
+ # If a block is provided, it will act as a record context where captured groups can be taken.
492
+ def regex(pattern, &block)
493
+ return self if @value.nil?
494
+
495
+ v = pattern.match @value
496
+ warn format "%s does not match %s", pattern.inspect, @value.inspect if v.nil?
497
+ @value = v
498
+ enter(&block) unless block.nil?
499
+ self
500
+ end
501
+
502
+ private
503
+
504
+ ##
505
+ # Make a "zero date" (e.g. 000-00-000) in the given format.
506
+ #
507
+ # Some systems store dates in this format as a representation of an empty or null date.
508
+ def make_zero_date(format)
509
+ ::DateTime.new(2000, 2, 2, 2, 2, 2).strftime(format).gsub!("2", "0")
510
+ end
511
+ end
512
+ end
@@ -0,0 +1,195 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+
5
+ # Represets
6
+ module Micdrop
7
+ ##
8
+ # Common functions for all record contexts
9
+ class RecordContext
10
+ attr_reader :record
11
+
12
+ ##
13
+ # `take` extracts a single item from a record (e.g. a column from a row) and allows it to be
14
+ # operated upon. This is one of the most common operations you will use. It takes the following
15
+ # additional options:
16
+ #
17
+ # * `put` specifies where the taken value will go in the sink, after all transformations are
18
+ # applied.
19
+ # * `convert` takes a proc or function that will be called on the taken value. The new value
20
+ # will be the return value of the function.
21
+ # * `apply` takes a proc or function that will be used as a pipeline to transform the value.
22
+ # See `ItemContext` for details. (Passing a block also has the same effect.)
23
+ def take(name, put: nil, convert: nil, apply: nil, &block)
24
+ value = @record.nil? ? nil : @record[name]
25
+ process_item_helper(value, put, convert, apply, block)
26
+ end
27
+
28
+ # Take a value using the :dig method instead of :[]
29
+ def take_dig(*keys, put: nil, convert: nil, apply: nil, &block)
30
+ value = @record&.dig(*keys)
31
+ process_item_helper(value, put, convert, apply, block)
32
+ end
33
+
34
+ ##
35
+ # Take a value if possible, or take nil otherwise
36
+ def try_take(name, put: nil, convert: nil, apply: nil, &block)
37
+ value = @record&.fetch(name, nil)
38
+ process_item_helper(value, put, convert, apply, block)
39
+ end
40
+
41
+ ##
42
+ # A combined take/put shorthand, for migrations where many of the column names are the same
43
+ def passthru(*names)
44
+ names.each do
45
+ take(name, put: name)
46
+ end
47
+ end
48
+
49
+ ##
50
+ # `static` is a variant of `take` that, instead of actually taking data from the source record,
51
+ # allows you to specify your own value. This is usually used to supply values which are not
52
+ # provided in the source, but required in the sink.
53
+ def static(value, put: nil, convert: nil, apply: nil, &block)
54
+ process_item_helper(value, put, convert, apply, block)
55
+ end
56
+
57
+ ##
58
+ # `index` is a special form of `take` which takes the record index rather than an actual value
59
+ # from the record. You can use this as a unique identifier if the source does not have an
60
+ # explicit identifier.
61
+ def index(put: nil, convert: nil, apply: nil, &block)
62
+ process_item_helper(loop_index, put, convert, apply, block)
63
+ end
64
+
65
+ ##
66
+ # Create a new list record which collections multiple `take`s into a single list.
67
+ #
68
+ # Accepts all the same arguments as `take`. Then taken value will be a list of all constituent
69
+ # taken values. This is often used to join or concatenate items in the source in some way.
70
+ def collect_list(*items, put: nil, convert: nil, apply: nil, &block)
71
+ value = items.map(&:value)
72
+ process_item_helper(value, put, convert, apply, block)
73
+ end
74
+
75
+ ##
76
+ # Create a new key/value record which collections multiple `take`s into a single hash.
77
+ #
78
+ # Accepts all the same arguments as `take`. Then taken value will be a list of all constituent
79
+ # taken values. This is often used to join or concatenate items in the source in some way.
80
+ def collect_kv(hash, put: nil, convert: nil, apply: nil, &block)
81
+ value = hash.transform_values(&:value)
82
+ process_item_helper(value, put, convert, apply, block)
83
+ end
84
+
85
+ ##
86
+ # collect multiple values into a format string
87
+ def collect_format_string(template, *items, put: nil, convert: nil, apply: nil, &block)
88
+ value = format template, *items.map(&:value)
89
+ process_item_helper(value, put, convert, apply, block)
90
+ end
91
+
92
+ # TODO: collect_hash (not sure what the signature of it should be?)
93
+
94
+ ##
95
+ # Skip the current record. This is similar to a plain-ruby `next` statement.
96
+ def skip
97
+ raise Skip
98
+ end
99
+
100
+ ##
101
+ # Stop processing values from the source. This is similar to a plain-ruby `break` statement.
102
+ def stop
103
+ raise Stop
104
+ end
105
+
106
+ private
107
+
108
+ def process_item_helper(value, put, convert, apply, block)
109
+ ctx = ItemContext.new(self, value)
110
+ ctx.convert(convert) unless convert.nil?
111
+ ctx.apply(apply) unless apply.nil?
112
+ ctx.apply(block) unless block.nil?
113
+ self.put(put, ctx.value) unless put.nil?
114
+ ctx
115
+ end
116
+ end
117
+
118
+ ##
119
+ # Record context for root-level
120
+ class RootRecordContext < RecordContext
121
+ def initialize(source, sink, loop_item, loop_index = nil)
122
+ @source = source
123
+ @sink = sink
124
+ @loop_item = loop_item
125
+ @record = loop_item
126
+ @loop_index = loop_index
127
+ reset
128
+ end
129
+
130
+ attr_reader :source, :sink, :loop_item, :loop_index, :collector
131
+
132
+ ##
133
+ # Put a value in the sink.
134
+ #
135
+ # You typically won't use this directly.
136
+ def put(name, value)
137
+ @collector[name] = value
138
+ @dirty = true
139
+ end
140
+
141
+ ##
142
+ # Put a value in the sink, using nested keys. (This is an inverse of :dig)
143
+ #
144
+ # You typically won't use this directly.
145
+ def put_bury(keys, value)
146
+ sb = StructureBuilder.new @collector
147
+ sb.bury value, *keys
148
+ @dirty = true
149
+ end
150
+
151
+ ##
152
+ # Flush all currently put values to the sink, optionally resetting as well.
153
+ def flush(reset: true)
154
+ return unless @dirty
155
+
156
+ @sink << @collector
157
+ self.reset if reset
158
+ end
159
+
160
+ ##
161
+ # Debug tool to print the current sink collector to the console
162
+ def dump_collector(prefix = nil)
163
+ puts prefix unless prefix.nil?
164
+ puts @collector.inspect
165
+ puts "\n"
166
+ self
167
+ end
168
+
169
+ ##
170
+ # Clear the collection of currently-put values.
171
+ def reset
172
+ @dirty = false
173
+ @collector = if @sink.respond_to? :make_collector
174
+ @sink.make_collector
175
+ else
176
+ {}
177
+ end
178
+ end
179
+ end
180
+
181
+ ##
182
+ # Record context for sub-records
183
+ class SubRecordContext < RecordContext
184
+ extend Forwardable
185
+
186
+ def initialize(item_context, parent_record_context)
187
+ @item_context = item_context
188
+ @parent_record_context = parent_record_context
189
+ @record = item_context.value
190
+ end
191
+
192
+ def_delegators :@parent_record_context, :source, :sink, :loop_item, :loop_index, :collector, :put, :flush,
193
+ :dump_collector, :reset
194
+ end
195
+ end
@@ -0,0 +1,7 @@
1
+ module Micdrop
2
+ class Skip < Exception # rubocop:disable Lint/InheritException
3
+ end
4
+
5
+ class Stop < Exception # rubocop:disable Lint/InheritException
6
+ end
7
+ end