micdrop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
4
+ require "micdrop"
5
+ require "sequel"
6
+ require "micdrop/ext/sequel"
7
+
8
+ DB = Sequel.sqlite "test.db"
9
+
10
+ # Create the destination data structure.
11
+ # Obviously in a real import script, these would probably already exist.
12
+
13
+ DB.create_table :people do
14
+ primary_key :id
15
+ String :f_name
16
+ String :l_name
17
+ String :addr1
18
+ String :addr2
19
+ String :city
20
+ String :state
21
+ String :zip
22
+ String :_tmp_id # Add a temporary column for storing the old system IDs
23
+ end
24
+
25
+ # Now start the migration
26
+
27
+ # Our source will iterate over all the JSON files in the given directory
28
+ source = Micdrop::FilesSource.new(__dir__, glob: "data/json/*.json")
29
+ sink = Micdrop::Ext::Sequel::InsertSink.new DB[:people]
30
+
31
+ Micdrop.migrate source, sink do
32
+ # The files source exposes the basename and content as takeable items
33
+ take :basename, put: :_tmp_id
34
+ take(:content).parse_json do
35
+ # parse_json accepts a block that will enter the sub-record
36
+ take "residency" do
37
+ regex(/(?<street>.+?) ?(?<unit>(?:Apt\.?|Suite|Unit|#) [#0-9a-zA-Z-]+)?\n(?<city>.+?), (?<state>[A-Z]{2}) (?<zip>\d{5}(?:-\d{4})?)/) do
38
+ # regex also enters a sub-record
39
+ take :street, put: :addr1
40
+ take :unit, put: :addr2
41
+ take :city, put: :city
42
+ take :state, put: :state
43
+ take :zip, put: :zip
44
+ end
45
+ end
46
+ take "name" do
47
+ split " " do
48
+ # split can enter a sub-record as well
49
+ take 0, put: :f_name
50
+ take 1, put: :l_name
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Micdrop
4
+ ##
5
+ # An error on the sink side of the migration
6
+ class SinkError < StandardError
7
+ end
8
+
9
+ ##
10
+ # An error on the source side of the migration
11
+ class SourceError < StandardError
12
+ end
13
+
14
+ ##
15
+ # An error with the current data value that prevents conversion operations from working
16
+ class ValueError < StandardError
17
+ end
18
+
19
+ ##
20
+ # An error with the data pipeline iteself
21
+ class PipelineError < StandardError
22
+ end
23
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sequel"
4
+
5
+ module Micdrop
6
+ module Ext
7
+ module Sequel
8
+ ##
9
+ # A sink which will exclusively insert new items into the database
10
+ class InsertSink
11
+ def initialize(dataset)
12
+ @dataset = dataset
13
+ end
14
+
15
+ def <<(collector)
16
+ @dataset.insert(**collector)
17
+ end
18
+ end
19
+
20
+ ##
21
+ # A sink which will always issue an update statement
22
+ class UpdateSink
23
+ def initialize(dataset, key_columns)
24
+ @dataset = dataset
25
+ @key_columns = if key_columns.is_a? Symbol
26
+ [key_columns]
27
+ elsif key_columns.respond_to? :each
28
+ key_columns
29
+ # TODO: else throw error
30
+ end
31
+ end
32
+
33
+ def <<(collector)
34
+ dataset = @dataset
35
+ @key_columns.each do |col|
36
+ dataset = dataset.where(**{ col => collector[col] })
37
+ end
38
+ dataset.update(**collector)
39
+ end
40
+ end
41
+
42
+ ##
43
+ # A sink which will update an item if it exists, or insert it otherwise
44
+ class InsertUpdateSink
45
+ def initialize(dataset, key_columns, update_actions: {}, default_update_action: :coalesce,
46
+ match_empty_key: false)
47
+ @dataset = dataset
48
+ @key_columns = if key_columns.is_a? Symbol
49
+ [key_columns]
50
+ elsif key_columns.respond_to? :each
51
+ key_columns
52
+ # TODO: else throw error
53
+ end
54
+ @update_actions = update_actions
55
+ @default_update_action = default_update_action
56
+ @match_empty_key = match_empty_key
57
+ end
58
+
59
+ def <<(collector)
60
+ dataset = @dataset
61
+ @key_columns.each do |col|
62
+ dataset = dataset.where(**{ col => collector[col] })
63
+ end
64
+ existing = dataset.limit(2).all
65
+ if existing.count > 1
66
+ raise Micdrop::SinkError, "Key column(s) of this InsertUpdateSink are not unique"
67
+ elsif existing.empty?
68
+ dataset.insert(**collector)
69
+ else
70
+ dataset.update(**update_merge(existing.first, collector))
71
+ end
72
+ end
73
+
74
+ private
75
+
76
+ def update_merge(existing, collector)
77
+ if @update_actions.empty?
78
+ # If we don't have per-column actions, we can take shortcuts for some actions types
79
+ return collector if @default_update_action == :always_overwrite
80
+ return collector.compact if @default_update_action == :coalesce
81
+ end
82
+ # Otherwise merge according to the rules specified
83
+ existing.merge(collector) do |key, oldval, newval|
84
+ case @update_actions.fetch(key, @default_update_action)
85
+ when :coalesce then newval.nil? ? oldval : newval
86
+ when :overwrite_nulls then oldval.nil? ? newval : oldval
87
+ when :always_overwrite then newval
88
+ when :keep_existing then oldval
89
+ when :append then format("%s %s", oldval, newval)
90
+ when :append_line then format("%s\n%s", oldval, newval)
91
+ when :prepend then format("%s %s", newval, oldval)
92
+ when :prepend_line then format("%s\n%s", newval, oldval)
93
+ when :add then oldval + newval
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ ##
102
+ # Sequel-specific extensions for ItemContext
103
+ class ItemContext
104
+ def db_lookup(dataset, key_col, val_col, pass_if_not_found: false, warn_if_not_found: nil, apply_if_not_found: nil)
105
+ # TODO: allow registering db_lookups like we do normal lookups
106
+ warn_if_not_found = true if warn_if_not_found.nil? && apply_if_not_found.nil?
107
+ found = dataset.where(key_col => @value).get(val_col)
108
+ if found.nil?
109
+ warn format "Value %s not found in db_lookup", @value if warn_if_not_found
110
+ if !apply_if_not_found.nil?
111
+ apply apply_if_not_found
112
+ elsif !pass_if_not_found
113
+ @value = nil
114
+ end
115
+ else
116
+ @value = found
117
+ end
118
+ self
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Micdrop
4
+ ##
5
+ # Takes a list of files, directory, or glob pattern as a source
6
+ #
7
+ # Records in a file source will have the following items available to `take`:
8
+ # * :content (The full content of the file, lazy-loaded only if you `take` it)
9
+ # * :stream (A file IO stream)
10
+ # * :filename (The filename that was used to load the file, e.g. files/s.json)
11
+ # * :basename (The basename of the name, e.g. x.json)
12
+ # * :path (The full path to the file, e.g. /data/migration/files/x.json)
13
+ # * Anything returned by File.stat (:ctime, :mtime, :size, etc...)
14
+ class FilesSource
15
+ def initialize(dir, files: nil, glob: nil, binary_mode: false, **file_opts)
16
+ @dir = dir
17
+ @files = if files.nil?
18
+ if glob.nil?
19
+ Dir.children(dir)
20
+ else
21
+ Dir.glob(glob, flags: File::FNM_EXTGLOB, base: dir)
22
+ end
23
+ else
24
+ files
25
+ end
26
+ @binary_mode = binary_mode
27
+ @file_opts = file_opts
28
+ end
29
+
30
+ def each_pair
31
+ return enum_for :each_pair unless block_given?
32
+
33
+ @files.each do |filename|
34
+ path = File.join(@dir, filename)
35
+ unless File.file? path
36
+ warn format("%s is not a file and will be skipped", path)
37
+ next
38
+ end
39
+
40
+ yield filename, FilesSourceRecord.new(path, @binary_mode, @file_opts)
41
+ end
42
+ end
43
+ end
44
+
45
+ ##
46
+ # Wrapper object to expose files as a source item
47
+ class FilesSourceRecord
48
+ def initialize(filename, binary_mode, file_opts)
49
+ @filename = filename
50
+ @binary_mode = binary_mode
51
+ @file_opts = file_opts
52
+ @stat = nil
53
+ end
54
+
55
+ def [](key)
56
+ case key
57
+ when :contents, :content
58
+ File.open @filename, @binary_mode ? "rb" : "r", **@file_opts, &:read
59
+ when :stream
60
+ File.open @filename, @binary_mode ? "rb" : "r", **@file_opts
61
+ when :path
62
+ File.absolute_path @filename
63
+ when :basename
64
+ File.basename @filename
65
+ when :filename
66
+ @filename
67
+ else
68
+ @stat = File.stat(@filename) if @stat.nil?
69
+ @stat.method(key).call
70
+ end
71
+ end
72
+ end
73
+ end