fixed_width_file_parser 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6073402332a1ca5be01c75ac84b18ffda2786894
4
- data.tar.gz: 6ab237b87f96ea029e5713ab95685234511a1d1e
3
+ metadata.gz: a18ff4c3a7378536cc1b8a24dcc46397e9d2153b
4
+ data.tar.gz: faf99597df832a4e34870111334bd980c485a06e
5
5
  SHA512:
6
- metadata.gz: 5b875e3099e5cb8361553154374e4424713868778f0d4be0b7e36053d500981cee4cf777f30062341db53693961826414592052a973461a975c11e16a77736c0
7
- data.tar.gz: 0f6fd9a8ea774929fc322f2dee59e18fadb4772c3fafb316c411687fca99b6a8bae323db5f87652800eee6b0c053533b552d2ed285181b64bc21306ff2486b16
6
+ metadata.gz: 8fbaad05477bd91aff65e0d41cdace701bd20d71bf57e4c24f85a89d36d8774f3f6891df85290d6f989a34985158397adaaf9d423b44dd507520414218693f09
7
+ data.tar.gz: de10641a19c7d39f04dc8cb988b927a109a19f62a99b099eedf2f8d41b0b3fb7d6fe18ec538e55e125e5a7b2190690ff61467f871e56a57f3a6d22a736cae7bd
@@ -1,3 +1,3 @@
1
1
  module FixedWidthFileParser
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.7"
3
3
  end
@@ -77,4 +77,63 @@ module FixedWidthFileParser
77
77
 
78
78
  file.close
79
79
  end
80
+
81
+ def self.parse_in_batches(filepath, fields, options = {})
82
+ # Set options, or use default
83
+ batch_size = options.fetch(:batch_size, 1000)
84
+ force_utf8_encoding = options.fetch(:force_utf8_encoding, true)
85
+
86
+ # Verify `filepath` is a String
87
+ unless filepath.is_a?(String)
88
+ raise '`filepath` must be a String'
89
+ end
90
+
91
+ # Verify `fields` is an array
92
+ if fields.is_a?(Array)
93
+ # Verify fields is not emtpy
94
+ if fields.empty?
95
+ raise '`fields` must contain at least 1 item'
96
+ end
97
+ else
98
+ raise '`fields` must be an Array'
99
+ end
100
+
101
+ # Verify each field has a `name` and `position`
102
+ unless fields.all? { |item| item.key?(:name) && item.key?(:position) }
103
+ raise 'Each field hash must include a `name` and a `position`'
104
+ end
105
+
106
+ # Verify that each `position` is either a Range or an Integer
107
+ unless fields.all? { |item| item[:position].is_a?(Range) || item[:position].is_a?(Integer) }
108
+ raise "Each field's `position` must be a Range or an Integer"
109
+ end
110
+
111
+ GC.start
112
+
113
+ File.open(filepath) do |file|
114
+ file.lazy.drop(1).each_slice(batch_size) do |lines|
115
+ lines.each do |line|
116
+ # If the current line is blank, skip to the next line
117
+ # chomp to remove "\n" and "\r\n"
118
+ next if line.chomp.empty?
119
+
120
+ # Force UTF8 encoding if force_utf8_encoding is true (defaults to true)
121
+ if force_utf8_encoding
122
+ # Handle UTF Invalid Byte Sequence Errors
123
+ # e.g. https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences
124
+ line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
125
+ end
126
+
127
+ line_fields = {}
128
+ fields.each do |field|
129
+ line_fields[field[:name].to_sym] = line[ field[:position] ].nil? ? nil : line[ field[:position] ].strip
130
+ end
131
+
132
+ yield(line_fields)
133
+ end
134
+
135
+ GC.start
136
+ end
137
+ end
138
+ end
80
139
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fixed_width_file_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jim Smith