fixed_width_file_parser 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/lib/fixed_width_file_parser.rb +11 -1
- data/lib/fixed_width_file_parser/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 976efbc15675621fa210269b957a18779104e541
|
4
|
+
data.tar.gz: b7839de06cc4b2596421c79988fe353510c199a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de67c84d35c1c8b4927032ac2902ecab46a62f9952f241e33f3e542880222caff2b00854f1b400b7c192bf0f4cc9c68227a0e5effd0bbdafb3b287d110e1c1ce
|
7
|
+
data.tar.gz: 3295067dec03f267224e8781356a2f9403884cfe03cb95156d553862cf2ef1cd573f9bf63ba411888bb598cb08d7b63b37ae36a8b0d7eb835bac8a353e2644cd
|
data/README.md
CHANGED
@@ -31,14 +31,24 @@ fields = [
|
|
31
31
|
{ name: 'middle_initial', position: 11 },
|
32
32
|
{ name: 'last_name', position: 12..25 }
|
33
33
|
]
|
34
|
+
options = {}
|
34
35
|
|
35
|
-
FixedWidthFileParser.parse(filepath, fields) do |row|
|
36
|
+
FixedWidthFileParser.parse(filepath, fields, options) do |row|
|
36
37
|
puts row[:first_name]
|
37
38
|
puts row[:middle_initial]
|
38
39
|
puts row[:last_name]
|
39
40
|
end
|
40
41
|
```
|
41
42
|
|
43
|
+
### Tips
|
44
|
+
If you need to parse a fixed width file that has the last field set as a variable width field, you can set the position similar to `position: 20..-1`. Setting the end of the range as `-1` will read to the end of that line.
|
45
|
+
|
46
|
+
## Options
|
47
|
+
|Name|Default Value|Description|
|
48
|
+
|---|---|---|
|
49
|
+
|force_utf8_encoding|true|Force UTF-8 encoding on lines being parsed. This alleviates `invalid byte sequence in UTF-8` errors thrown when trying to split a string with invalid UTF characters. For more information, view this [article](https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences).|
|
50
|
+
|
51
|
+
|
42
52
|
## Development
|
43
53
|
|
44
54
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -19,7 +19,10 @@ module FixedWidthFileParser
|
|
19
19
|
# puts row
|
20
20
|
# end
|
21
21
|
|
22
|
-
def self.parse(filepath, fields)
|
22
|
+
def self.parse(filepath, fields, options = {})
|
23
|
+
# Set options, or use default
|
24
|
+
force_utf8_encoding = options.fetch(:force_utf8_encoding, true)
|
25
|
+
|
23
26
|
# Verify `filepath` is a String
|
24
27
|
unless filepath.is_a?(String)
|
25
28
|
raise '`filepath` must be a String'
|
@@ -53,6 +56,13 @@ module FixedWidthFileParser
|
|
53
56
|
# chomp to remove "\n" and "\r\n"
|
54
57
|
next if line.chomp.empty?
|
55
58
|
|
59
|
+
# Force UTF8 encoding if force_utf8_encoding is true (defaults to true)
|
60
|
+
if force_utf8_encoding
|
61
|
+
# Handle UTF Invalid Byte Sequence Errors
|
62
|
+
# e.g. https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences
|
63
|
+
line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
64
|
+
end
|
65
|
+
|
56
66
|
line_fields = {}
|
57
67
|
fields.each do |field|
|
58
68
|
line_fields[field[:name].to_sym] = line[ field[:position] ].nil? ? nil : line[ field[:position] ].strip
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fixed_width_file_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Smith
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|