fixed_width_file_parser 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6073402332a1ca5be01c75ac84b18ffda2786894
4
- data.tar.gz: 6ab237b87f96ea029e5713ab95685234511a1d1e
3
+ metadata.gz: a18ff4c3a7378536cc1b8a24dcc46397e9d2153b
4
+ data.tar.gz: faf99597df832a4e34870111334bd980c485a06e
5
5
  SHA512:
6
- metadata.gz: 5b875e3099e5cb8361553154374e4424713868778f0d4be0b7e36053d500981cee4cf777f30062341db53693961826414592052a973461a975c11e16a77736c0
7
- data.tar.gz: 0f6fd9a8ea774929fc322f2dee59e18fadb4772c3fafb316c411687fca99b6a8bae323db5f87652800eee6b0c053533b552d2ed285181b64bc21306ff2486b16
6
+ metadata.gz: 8fbaad05477bd91aff65e0d41cdace701bd20d71bf57e4c24f85a89d36d8774f3f6891df85290d6f989a34985158397adaaf9d423b44dd507520414218693f09
7
+ data.tar.gz: de10641a19c7d39f04dc8cb988b927a109a19f62a99b099eedf2f8d41b0b3fb7d6fe18ec538e55e125e5a7b2190690ff61467f871e56a57f3a6d22a736cae7bd
@@ -1,3 +1,3 @@
1
1
  module FixedWidthFileParser
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.7"
3
3
  end
@@ -77,4 +77,63 @@ module FixedWidthFileParser
77
77
 
78
78
  file.close
79
79
  end
80
+
81
+ def self.parse_in_batches(filepath, fields, options = {})
82
+ # Set options, or use default
83
+ batch_size = options.fetch(:batch_size, 1000)
84
+ force_utf8_encoding = options.fetch(:force_utf8_encoding, true)
85
+
86
+ # Verify `filepath` is a String
87
+ unless filepath.is_a?(String)
88
+ raise '`filepath` must be a String'
89
+ end
90
+
91
+ # Verify `fields` is an array
92
+ if fields.is_a?(Array)
93
+ # Verify fields is not emtpy
94
+ if fields.empty?
95
+ raise '`fields` must contain at least 1 item'
96
+ end
97
+ else
98
+ raise '`fields` must be an Array'
99
+ end
100
+
101
+ # Verify each field has a `name` and `position`
102
+ unless fields.all? { |item| item.key?(:name) && item.key?(:position) }
103
+ raise 'Each field hash must include a `name` and a `position`'
104
+ end
105
+
106
+ # Verify that each `position` is either a Range or an Integer
107
+ unless fields.all? { |item| item[:position].is_a?(Range) || item[:position].is_a?(Integer) }
108
+ raise "Each field's `position` must be a Range or an Integer"
109
+ end
110
+
111
+ GC.start
112
+
113
+ File.open(filepath) do |file|
114
+ file.lazy.drop(1).each_slice(batch_size) do |lines|
115
+ lines.each do |line|
116
+ # If the current line is blank, skip to the next line
117
+ # chomp to remove "\n" and "\r\n"
118
+ next if line.chomp.empty?
119
+
120
+ # Force UTF8 encoding if force_utf8_encoding is true (defaults to true)
121
+ if force_utf8_encoding
122
+ # Handle UTF Invalid Byte Sequence Errors
123
+ # e.g. https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences
124
+ line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
125
+ end
126
+
127
+ line_fields = {}
128
+ fields.each do |field|
129
+ line_fields[field[:name].to_sym] = line[ field[:position] ].nil? ? nil : line[ field[:position] ].strip
130
+ end
131
+
132
+ yield(line_fields)
133
+ end
134
+
135
+ GC.start
136
+ end
137
+ end
138
+ end
80
139
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fixed_width_file_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jim Smith