micdrop 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4e2a6458190e5d2bc6537824586b135a8d6a9a0c24ab55d9b4de5db9197b5c88
4
- data.tar.gz: 869d3c01befdb76e074c339caf9288a3be3734365fe5d3b4b9d6cf0e772cdc91
3
+ metadata.gz: 8e567ed6bfc0a336d28ec7f7cee0d3bd987109b83d1ac634071a79d875002824
4
+ data.tar.gz: 0bd90f322d2201b782b3cca29213ffd8ba4078f671ce3a04302e9404ec7a14f7
5
5
  SHA512:
6
- metadata.gz: 182e84bd143229048a783333aabaa4fdbf7ad26020b75c06ae6692e142ce223aad0262c60e3260916df96b1dc5432623b22e9e5ab40c2115fe4cb0b8aee1792d
7
- data.tar.gz: 6b81a5ef76726c2738e09a8fdb6f117f8e1df2d7c0cb3c699eb0fc65efd7ba3584ebdb16791dcad4d8e9612d94eacd4a7a5860e59972b1bd7112ba858dcdb00b
6
+ metadata.gz: ee163fb3d7be2fd634e465196fd5053622d9b46246cf5177a75c00bbbdf7cba99c60d7fae661648c652e8a8f500580f3342b7f4fb99764330960c3fecbd62c86
7
+ data.tar.gz: d081c87bcb894e3718290794892be8e48283507615cdeee2b6b47206fb3497f1e45f26d32894b58f7b0cc312031f75c9cd48aca540e3fd35d4d912117cfa75c9
data/README.md CHANGED
@@ -446,3 +446,15 @@ Micdrop.migrate source, sink do
446
446
  end
447
447
  ```
448
448
 
449
+ If needed, you can also use the `before_flush` or `after_flush` hooks to add actions before or after the flush. Both take the same form:
450
+
451
+ ```ruby
452
+ Micdrop.migrate source, sink do
453
+ after_flush do |record, collected|
454
+ # `record` is the RootRecordContext, `collected` is the hash of `put` values.
455
+ # For example, you could do something like this if the sink was a Sequel InsertSource
456
+ puts "Inserted ID #{record.sink.insert_id} with data #{collected.inspect}"
457
+ end
458
+ # Then do your normal migration operations here
459
+ end
460
+ ```
@@ -0,0 +1,46 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <catalog>
3
+ <product id="P001">
4
+ <name>Laptop Pro 15</name>
5
+ <category>Electronics</category>
6
+ <price currency="USD">1299.99</price>
7
+ <inStock>true</inStock>
8
+ <quantity>45</quantity>
9
+ <specifications>
10
+ <processor>Intel Core i7-13700H</processor>
11
+ <ram>16GB DDR5</ram>
12
+ <storage>512GB NVMe SSD</storage>
13
+ <display>15.6" FHD IPS</display>
14
+ <graphics>NVIDIA RTX 4060</graphics>
15
+ </specifications>
16
+ <images>
17
+ <image type="thumbnail">laptop-thumb.jpg</image>
18
+ <image type="main">laptop-main.jpg</image>
19
+ </images>
20
+ <ratings>
21
+ <average>4.5</average>
22
+ <count>127</count>
23
+ </ratings>
24
+ </product>
25
+ <product id="P002">
26
+ <name>Wireless Mouse</name>
27
+ <category>Accessories</category>
28
+ <price currency="USD">29.99</price>
29
+ <inStock>true</inStock>
30
+ <quantity>230</quantity>
31
+ <specifications>
32
+ <connectivity>Bluetooth 5.0</connectivity>
33
+ <battery>2x AA</battery>
34
+ <dpi>1600</dpi>
35
+ <buttons>6</buttons>
36
+ </specifications>
37
+ <images>
38
+ <image type="thumbnail">mouse-thumb.jpg</image>
39
+ <image type="main">mouse-main.jpg</image>
40
+ </images>
41
+ <ratings>
42
+ <average>4.7</average>
43
+ <count>89</count>
44
+ </ratings>
45
+ </product>
46
+ </catalog>
@@ -2,4 +2,5 @@
2
2
 
3
3
  Sources:
4
4
  * <https://github.com/datablist/sample-csv-files>
5
- * JSON made with [Faker](https://faker.readthedocs.io/en/master/)
5
+ * JSON made with [Faker](https://faker.readthedocs.io/en/master/)
6
+ * <https://jsontotable.org/blog/xml/sample-xml-files>
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
4
+ require "micdrop"
5
+ require "sequel"
6
+ require "micdrop/ext/sequel"
7
+ require "micdrop/ext/nokogiri"
8
+
9
+ DB = Sequel.sqlite "test.db"
10
+
11
+ # Create the destination data structure.
12
+ # Obviously in a real import script, these would probably already exist.
13
+
14
+ DB.create_table :products do
15
+ String :code, primary_key: true
16
+ String :name
17
+ String :category
18
+ BigDecimal :price, size: [6, 2]
19
+ FixNum :stock
20
+ end
21
+
22
+ DB.create_table :product_specs do
23
+ String :code
24
+ String :key
25
+ String :value
26
+ primary_key %i[code key]
27
+ end
28
+
29
+ # Now start the migration
30
+ document = Nokogiri::XML.parse File.open File.join(__dir__, "data/catalog.xml")
31
+
32
+ # Our source will iterate over the <product> elements in the XML document
33
+ source = document.css("product")
34
+ sink = Micdrop::Ext::Sequel::InsertSink.new DB[:products]
35
+
36
+ Micdrop.migrate source, sink do
37
+ # The files source exposes the basename and content as takeable items
38
+ take "id", put: :code
39
+ at_css("name").take_content put: :name
40
+ at_css("category").take_content put: :category
41
+ at_css("price").take_content do
42
+ parse_float
43
+ put :price
44
+ end
45
+ at_css("quantity").take_content put: :stock
46
+ end
47
+
48
+ # Then over the individual specs
49
+ source = document.css("product")
50
+ sink = Micdrop::Ext::Sequel::InsertSink.new DB[:product_specs]
51
+
52
+ Micdrop.migrate source, sink do
53
+ # The files source exposes the basename and content as takeable items
54
+ code = take "id"
55
+ css("specifications > *").each_subrecord(flush: true, reset: true) do
56
+ code.put :code
57
+ take_node_name do
58
+ lookup({
59
+ "battery" => "Battery",
60
+ "buttons" => "Button Count",
61
+ "connectivity" => "Connectivity",
62
+ "display" => "Screen",
63
+ "dpi" => "Screen DPI",
64
+ "graphics" => "GPU",
65
+ "processor" => "CPU",
66
+ "ram" => "Memory",
67
+ "storage" => "Storage"
68
+ })
69
+ put :key
70
+ end
71
+ take_content.put :value
72
+ end
73
+ end
@@ -0,0 +1,240 @@
1
+ require "date"
2
+ require "forwardable"
3
+
4
+ module Micdrop
5
+ module Ext
6
+ ##
7
+ # A simple parser to extract data from a "Micro Focus File with Header (DAT)" file.
8
+ #
9
+ # Based on this spec: https://www.microfocus.com/documentation/server-express/sx20books/fhfile.htm
10
+ #
11
+ # This format comes from old COBOL programs, and each file is conceptually similar to an SQL
12
+ # database table. Unlike SQL though, these DAT files lack type information; each row is raw
13
+ # binary and must be unpacked.
14
+ #
15
+ # This does not implement the full spec, and is not well tested, but "works on my machine".
16
+ module Microfocus
17
+ ##
18
+ # A header value that appears at the beginning of each record to determine the record type
19
+ module RecordType
20
+ DUPLICATE_SYSTEM = 0b0001
21
+ DELETED = 0b0010
22
+ SYSTEM = 0b0011
23
+ NORMAL = 0b0100
24
+ REDUCED = 0b0101
25
+ POINTER = 0b0110
26
+ POINTER_REF = 0b0111
27
+ REDUCED_POINTER_REF = 0b1000
28
+ end
29
+
30
+ ##
31
+ # Flag indicating how records are organized in the file
32
+ module RecordOrganization
33
+ SEQUENTIAL = 1
34
+ INDEXED = 2
35
+ RELATIVE = 3
36
+ end
37
+
38
+ ##
39
+ # Representation of a single record within a file
40
+ class Record
41
+ extend Forwardable
42
+
43
+ def initialize(type, body, unpack_spec: nil, unpack_mapping: nil)
44
+ @type = type
45
+ @body = body
46
+ @fields = nil
47
+ unpack unpack_spec, unpack_mapping unless unpack_spec.nil?
48
+ end
49
+
50
+ attr_reader :type, :body, :fields
51
+
52
+ def_delegators :@fields, :[], :each
53
+
54
+ private
55
+
56
+ def unpack(spec, mapping = nil)
57
+ fields = @body.unpack spec
58
+ fields = if mapping.nil?
59
+ fields
60
+ else
61
+ mapping.transform_values { |value| fields[value] }
62
+ end
63
+ @fields = fields.freeze
64
+ end
65
+ end
66
+
67
+ ##
68
+ # Read a MicroFocus data file
69
+ class MicroFocusReader
70
+ def initialize(data_file, unpack_spec: nil, unpack_mapping: nil)
71
+ @data_file = data_file
72
+ @unpack_spec = unpack_spec
73
+ @unpack_mapping = unpack_mapping
74
+ read_data_header
75
+ end
76
+
77
+ attr_reader :creation_time, :compression, :index_type, :variable_length, :min_legth, :max_length, :index_version
78
+
79
+ def long_records?
80
+ @long_records
81
+ end
82
+
83
+ def sequential?
84
+ @organization == RecordOrganization::SEQUENTIAL
85
+ end
86
+
87
+ def indexed?
88
+ @organization == RecordOrganization::INDEXED
89
+ end
90
+
91
+ def relative?
92
+ @organization == RecordOrganization::RELATIVE
93
+ end
94
+
95
+ def each
96
+ return enum_for :each unless block_given?
97
+
98
+ yield read_record until @data_file.eof?
99
+ end
100
+
101
+ private
102
+
103
+ def read_data_header
104
+ parse_data_file_header @data_file.read(128)
105
+ end
106
+
107
+ def read_record
108
+ header = @data_file.read(@long_records ? 4 : 2)
109
+ type = header.unpack1("C") >> 4
110
+ length = header.unpack1(@long_records ? "N" : "n") & (@long_records ? 0xFFFFFFF : 0xFFF)
111
+ body = @data_file.read length
112
+ scan_padding
113
+ Record.new type, body, unpack_spec: @unpack_spec, unpack_mapping: @unpack_mapping
114
+ end
115
+
116
+ ##
117
+ # Parse the first four bytes of the header, which are used to determine the record size
118
+ def parse_data_file_header(data)
119
+ # The first 4 bits are the record type, which must be SYSTEM
120
+ type = data.unpack1("C") >> 4
121
+ raise StandardError, "This file does not have a valid header" unless type == RecordType::SYSTEM
122
+
123
+ # The next 12 bits (or 28 bits, depending on the max record size) are the header record size
124
+ length = data.unpack1("n") & 0xFFF
125
+ if length == 126
126
+ # Header data is 126 bytes, max record length is less than 4095 bytes
127
+ @long_records = false
128
+ elsif length == 0
129
+ # Header data is 124 bytes, max record length is 4095 bytes or greater
130
+ length = data.unpack1("N") & 0xFFF
131
+ raise StandardError, "Invalid header record length" unless length == 124
132
+
133
+ @long_records = true
134
+ else
135
+ raise StandardError, "Invalid header record length"
136
+ end
137
+
138
+ # Regardless of the listed header length, actual header data always at the same byte offsets
139
+ (
140
+ @db_seq,
141
+ integrity, # The specs say this integrity flag is 3 bytes, not 2, but I think the spec must be wrong
142
+ creation_time,
143
+ special62,
144
+ @organization,
145
+ @compression,
146
+ @index_type,
147
+ variable_length,
148
+ @min_legth,
149
+ @max_length,
150
+ @index_version
151
+ ) = data.unpack "x4 n n A14 x14 n x C x C x C x C x5 N N x46 N"
152
+
153
+ # Check integrity
154
+ raise StandardError, "Integrity flag non-zero; file is corrupt" if integrity != 0
155
+ raise StandardError, "Bytes 36-37 not equal to 64; file is corrupt" if special62 != 62
156
+
157
+ # Type-cast some of the header values
158
+ @creation_time = DateTime.strptime creation_time[0..11], "%y%m%d%H%M%S"
159
+ @variable_length = !!variable_length.nil?
160
+ end
161
+
162
+ ##
163
+ # Scan forward to the next non-null byte
164
+ def scan_padding
165
+ # TODO: This is a work-around because it seems I don't have align_cursor working correctly yet
166
+ return if @data_file.eof?
167
+
168
+ return if @data_file.eof? until @data_file.readbyte.positive?
169
+ @data_file.seek(-1, :CUR)
170
+ end
171
+
172
+ ##
173
+ # Aligns the file cursor to the next address which is a multiple of the data alignment value
174
+ #
175
+ # Automatically detect the the alignment from the index if not provided
176
+ #
177
+ # Index formats 1 and 2 have no alignment, 3 and 4 are aligned to 4 bytes, and 8 is aligned to 8 bytes
178
+ def align_cursor
179
+ alignment = if @index_type < 3
180
+ return # offset of 1, so we don't need to do anything
181
+ elsif @index_type < 5
182
+ 4
183
+ else
184
+ 8
185
+ end
186
+
187
+ offset = @data_file.tell % alignment
188
+ @file.seek offset, :CUR if offset
189
+ end
190
+ end
191
+
192
+ ##
193
+ # This is the main entrypoint to read a file, and its output is usable as a source.
194
+ #
195
+ # `unpack_spec` is an optional spec, as would be passed to `String#unpack`, to extract the
196
+ # individual columns from the record. You may also provice an `unpack_mapping` which maps more
197
+ # human-readable columns names to column indexes.
198
+ def self.read_microfocus_file(filename, unpack_spec: nil, unpack_mapping: nil)
199
+ File.open filename, "rb" do |file|
200
+ reader = MicroFocusReader.new file, unpack_spec: unpack_spec, unpack_mapping: unpack_mapping
201
+ reader.each.entries
202
+ end
203
+ end
204
+ end
205
+ end
206
+
207
+ ##
208
+ # Extend ItemContext with parse_microfocus
209
+ class ItemContext
210
+ ##
211
+ # Parse a string as JSON
212
+ #
213
+ # If a block is provided, it will act as a record context where object properties can be taken.
214
+ #
215
+ # If include_header is true, the value will be a hash containing both the header information
216
+ # and the actual records.
217
+ def parse_microfocus(include_header: false, unpack_spec: nil, unpack_mapping: nil, &block)
218
+ return self if @value.nil?
219
+
220
+ reader = Micdrop::Ext::Microfocus::MicroFocusReader.new @value, unpack_spec: unpack_spec,
221
+ unpack_mapping: unpack_mapping
222
+ @value = if include_header
223
+ {
224
+ creation_time: reader.creation_time,
225
+ compression: reader.compression,
226
+ index_type: reader.index_type,
227
+ variable_length: reader.variable_length,
228
+ min_legth: reader.min_legth,
229
+ max_length: reader.max_length,
230
+ index_version: reader.index_version,
231
+ records: reader.each.entries
232
+ }
233
+ else
234
+ reader.each.entries
235
+ end
236
+ enter(&block) unless block.nil?
237
+ self
238
+ end
239
+ end
240
+ end
@@ -0,0 +1,180 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module Micdrop
6
+ ##
7
+ # Extend ItemContext with HTML/XML functions
8
+ class ItemContext
9
+ ##
10
+ # Alias for scope.enter.take_content
11
+ def take_content(put: nil, convert: nil, apply: nil, &block)
12
+ scope.enter.take_content(put: put, convert: convert, apply: apply, &block)
13
+ end
14
+
15
+ ##
16
+ # Alias for scope.enter.take_node_name
17
+ def take_node_name(put: nil, convert: nil, apply: nil, &block)
18
+ scope.enter.take_node_name(put: put, convert: convert, apply: apply, &block)
19
+ end
20
+
21
+ ##
22
+ # Parse HTML and enter a sub-record context for the root node
23
+ def parse_html(&block)
24
+ doc = @value.nil? ? nil : ::Nokogiri::HTML.parse(@value)
25
+ nokogiri_node_subrecord_helper(doc, block)
26
+ end
27
+
28
+ ##
29
+ # Parse HTML5 and enter a sub-record context for the root node
30
+ def parse_html5(&block)
31
+ doc = @value.nil? ? nil : ::Nokogiri::HTML5.parse(@value)
32
+ nokogiri_node_subrecord_helper(doc, block)
33
+ end
34
+
35
+ ##
36
+ # Parse XML and enter a sub-record context for the root node
37
+ def parse_xml(&block)
38
+ doc = @value.nil? ? nil : ::Nokogiri::XML.parse(@value)
39
+ nokogiri_node_subrecord_helper(doc, block)
40
+ end
41
+
42
+ ##
43
+ # Parse an HTML fragment and enter a sub-record context for the root node
44
+ def parse_html_fragment(&block)
45
+ doc = @value.nil? ? nil : ::Nokogiri::HTML.fragment(@value)
46
+ nokogiri_node_subrecord_helper(doc, block)
47
+ end
48
+
49
+ ##
50
+ # Parse an HTML5 fragment and enter a sub-record context for the root node
51
+ def parse_html5_fragment(&block)
52
+ doc = @value.nil? ? nil : ::Nokogiri::HTML5.fragment(@value)
53
+ nokogiri_node_subrecord_helper(doc, block)
54
+ end
55
+
56
+ ##
57
+ # Parse an XML fragment and enter a sub-record context for the root node
58
+ def parse_xml_fragment(&block)
59
+ doc = @value.nil? ? nil : Nokogiri::XML.fragment(@value)
60
+ nokogiri_node_subrecord_helper(doc, block)
61
+ end
62
+
63
+ ##
64
+ # Decode an HTML entity-encoded string to plain text
65
+ def decode_html
66
+ return self if @value.nil?
67
+
68
+ frag = ::Nokogiri::HTML.fragment @value
69
+ @value = frag.content
70
+ self
71
+ end
72
+
73
+ ##
74
+ # Encode a string using HTML entities
75
+ def encode_html(nl2br: false)
76
+ return self if @value.nil?
77
+
78
+ frag = ::Nokogiri::HTML.fragment ""
79
+ frag.content = @value
80
+ @value = frag.to_s
81
+ @value = @value.sub "\n", "<br/>" if nl2br
82
+ self
83
+ end
84
+
85
+ ##
86
+ # Decode an HTML5 entity-encoded string to plain text
87
+ def decode_html5
88
+ return self if @value.nil?
89
+
90
+ frag = ::Nokogiri::HTML5.fragment @value
91
+ @value = frag.content
92
+ self
93
+ end
94
+
95
+ ##
96
+ # Encode a string using HTML5 entities
97
+ def encode_html5(nl2br: false)
98
+ return self if @value.nil?
99
+
100
+ frag = ::Nokogiri::HTML5.fragment ""
101
+ frag.content = @value
102
+ @value = frag.to_s
103
+ @value = @value.sub "\n", "<br/>" if nl2br
104
+ self
105
+ end
106
+
107
+ ##
108
+ # Decode an XML entity-encoded string to plain text
109
+ def decode_xml
110
+ return self if @value.nil?
111
+
112
+ frag = ::Nokogiri::XML.fragment @value
113
+ @value = frag.content
114
+ self
115
+ end
116
+
117
+ ##
118
+ # Encode a string using XML entities
119
+ def encode_xml
120
+ return self if @value.nil?
121
+
122
+ frag = ::Nokogiri::XML.fragment ""
123
+ frag.content = @value
124
+ @value = frag.to_s
125
+ self
126
+ end
127
+
128
+ private
129
+
130
+ def nokogiri_node_subrecord_helper(node, block)
131
+ item_ctx = ItemContext.new @record_context, node
132
+ subrec_ctx = SubRecordContext.new item_ctx, @record_context
133
+ subrec_ctx.instance_eval(&block) unless block.nil?
134
+ subrec_ctx
135
+ end
136
+ end
137
+
138
+ ##
139
+ # Extend RecordContext with HTML/XML functions
140
+ class RecordContext
141
+ ##
142
+ # Take the text content of the XML or HTML node
143
+ def take_content(put: nil, convert: nil, apply: nil, &block)
144
+ value = @record&.content
145
+ process_item_helper(value, put, convert, apply, block)
146
+ end
147
+
148
+ ##
149
+ # Take the node name of the XML or HTML node
150
+ def take_node_name(put: nil, convert: nil, apply: nil, &block)
151
+ value = @record&.node_name
152
+ process_item_helper(value, put, convert, apply, block)
153
+ end
154
+
155
+ def xpath(*args, &block)
156
+ nokogiri_node_subrecord_helper(@record.xpath(*args), block)
157
+ end
158
+
159
+ def at_xpath(*args, &block)
160
+ nokogiri_node_subrecord_helper(@record.at_xpath(*args), block)
161
+ end
162
+
163
+ def css(*args, &block)
164
+ nokogiri_node_subrecord_helper(@record.css(*args), block)
165
+ end
166
+
167
+ def at_css(*args, &block)
168
+ nokogiri_node_subrecord_helper(@record.at_css(*args), block)
169
+ end
170
+
171
+ private
172
+
173
+ def nokogiri_node_subrecord_helper(node, block)
174
+ item_ctx = ItemContext.new self, node
175
+ subrec_ctx = SubRecordContext.new item_ctx, self
176
+ subrec_ctx.instance_eval(&block) unless block.nil?
177
+ subrec_ctx
178
+ end
179
+ end
180
+ end
@@ -12,8 +12,10 @@ module Micdrop
12
12
  @dataset = dataset
13
13
  end
14
14
 
15
+ attr_reader :insert_id
16
+
15
17
  def <<(collector)
16
- @dataset.insert(**collector)
18
+ @insert_id = @dataset.insert(**collector)
17
19
  end
18
20
  end
19
21
 
@@ -56,6 +58,8 @@ module Micdrop
56
58
  @match_empty_key = match_empty_key
57
59
  end
58
60
 
61
+ attr_reader :insert_id, :was_insert
62
+
59
63
  def <<(collector)
60
64
  dataset = @dataset
61
65
  @key_columns.each do |col|
@@ -65,9 +69,12 @@ module Micdrop
65
69
  if existing.count > 1
66
70
  raise Micdrop::SinkError, "Key column(s) of this InsertUpdateSink are not unique"
67
71
  elsif existing.empty?
68
- dataset.insert(**collector)
72
+ @insert_id = dataset.insert(**collector)
73
+ @was_insert = true
69
74
  else
70
75
  dataset.update(**update_merge(existing.first, collector))
76
+ @insert_id = nil
77
+ @was_insert = false
71
78
  end
72
79
  end
73
80
 
@@ -101,10 +108,15 @@ module Micdrop
101
108
  ##
102
109
  # Sequel-specific extensions for ItemContext
103
110
  class ItemContext
104
- def db_lookup(dataset, key_col, val_col, pass_if_not_found: false, warn_if_not_found: nil, apply_if_not_found: nil)
111
+ def db_lookup(dataset, key_col, val_col = nil, pass_if_not_found: false, warn_if_not_found: nil,
112
+ apply_if_not_found: nil)
105
113
  # TODO: allow registering db_lookups like we do normal lookups
106
114
  warn_if_not_found = true if warn_if_not_found.nil? && apply_if_not_found.nil?
107
- found = dataset.where(key_col => @value).get(val_col)
115
+ found = if val_col.nil?
116
+ dataset.where(key_col => @value).first
117
+ else
118
+ dataset.where(key_col => @value).get(val_col)
119
+ end
108
120
  if found.nil?
109
121
  warn format "Value %s not found in db_lookup", @value if warn_if_not_found
110
122
  if !apply_if_not_found.nil?
@@ -251,6 +251,13 @@ module Micdrop
251
251
 
252
252
  ### Common operations ###
253
253
 
254
+ def send(*args)
255
+ return self if @value.nil?
256
+
257
+ @value = @value.send(*args)
258
+ self
259
+ end
260
+
254
261
  ##
255
262
  # Lookup the value in a hash
256
263
  #
@@ -38,6 +38,18 @@ module Micdrop
38
38
  process_item_helper(value, put, convert, apply, block)
39
39
  end
40
40
 
41
+ ##
42
+ # Take the entire record as a single item
43
+ def take_whole(put: nil, convert: nil, apply: nil, &block)
44
+ process_item_helper(record, put, convert, apply, block)
45
+ end
46
+
47
+ ##
48
+ # alias for take_whole.each_subrecord
49
+ def each_subrecord(flush: false, reset: false, &block)
50
+ take_whole.each_subrecord(flush: flush, reset: reset, &block)
51
+ end
52
+
41
53
  ##
42
54
  # A combined take/put shorthand, for migrations where many of the column names are the same
43
55
  def passthru(*names)
@@ -89,8 +101,6 @@ module Micdrop
89
101
  process_item_helper(value, put, convert, apply, block)
90
102
  end
91
103
 
92
- # TODO: collect_hash (not sure what the signature of it should be?)
93
-
94
104
  ##
95
105
  # Skip the current record. This is similar to a plain-ruby `next` statement.
96
106
  def skip
@@ -124,6 +134,8 @@ module Micdrop
124
134
  @loop_item = loop_item
125
135
  @record = loop_item
126
136
  @loop_index = loop_index
137
+ @before_flush = nil
138
+ @after_flush = nil
127
139
  reset
128
140
  end
129
141
 
@@ -153,7 +165,9 @@ module Micdrop
153
165
  def flush(reset: true)
154
166
  return unless @dirty
155
167
 
168
+ @before_flush&.call self, @collector
156
169
  @sink << @collector
170
+ @after_flush&.call self, @collector
157
171
  self.reset if reset
158
172
  end
159
173
 
@@ -176,6 +190,22 @@ module Micdrop
176
190
  {}
177
191
  end
178
192
  end
193
+
194
+ ##
195
+ # Allows specifying a hook which will run before flush. The block will receive the record and the collector.
196
+ #
197
+ # Note that this must be called *before* any manual flush occurs to have any effect.
198
+ def before_flush(&block)
199
+ @before_flush = block
200
+ end
201
+
202
+ ##
203
+ # Allows specifying a hook which will run after flush. The block will receive the record and the collector.
204
+ #
205
+ # Note that this must be called *before* any manual flush occurs to have any effect.
206
+ def after_flush(&block)
207
+ @after_flush = block
208
+ end
179
209
  end
180
210
 
181
211
  ##
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Micdrop
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: micdrop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dominick Johnson
@@ -27,6 +27,7 @@ files:
27
27
  - Rakefile
28
28
  - TODO.md
29
29
  - examples/csvs_to_sql.rb
30
+ - examples/data/catalog.xml
30
31
  - examples/data/customers-100.csv
31
32
  - examples/data/json/1.json
32
33
  - examples/data/json/2.json
@@ -42,8 +43,11 @@ files:
42
43
  - examples/data/people-100.csv
43
44
  - examples/data/readme.md
44
45
  - examples/json_files_to_sql.rb
46
+ - examples/xml_to_sql.rb
45
47
  - lib/micdrop.rb
46
48
  - lib/micdrop/errors.rb
49
+ - lib/micdrop/ext/microfocus.rb
50
+ - lib/micdrop/ext/nokogiri.rb
47
51
  - lib/micdrop/ext/sequel.rb
48
52
  - lib/micdrop/files_source.rb
49
53
  - lib/micdrop/item_context.rb