fixed_width_file_parser 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/lib/fixed_width_file_parser.rb +11 -1
- data/lib/fixed_width_file_parser/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 976efbc15675621fa210269b957a18779104e541
|
4
|
+
data.tar.gz: b7839de06cc4b2596421c79988fe353510c199a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de67c84d35c1c8b4927032ac2902ecab46a62f9952f241e33f3e542880222caff2b00854f1b400b7c192bf0f4cc9c68227a0e5effd0bbdafb3b287d110e1c1ce
|
7
|
+
data.tar.gz: 3295067dec03f267224e8781356a2f9403884cfe03cb95156d553862cf2ef1cd573f9bf63ba411888bb598cb08d7b63b37ae36a8b0d7eb835bac8a353e2644cd
|
data/README.md
CHANGED
@@ -31,14 +31,24 @@ fields = [
|
|
31
31
|
{ name: 'middle_initial', position: 11 },
|
32
32
|
{ name: 'last_name', position: 12..25 }
|
33
33
|
]
|
34
|
+
options = {}
|
34
35
|
|
35
|
-
FixedWidthFileParser.parse(filepath, fields) do |row|
|
36
|
+
FixedWidthFileParser.parse(filepath, fields, options) do |row|
|
36
37
|
puts row[:first_name]
|
37
38
|
puts row[:middle_initial]
|
38
39
|
puts row[:last_name]
|
39
40
|
end
|
40
41
|
```
|
41
42
|
|
43
|
+
### Tips
|
44
|
+
If you need to parse a fixed width file that has the last field set as a variable width field, you can set the position similar to `position: 20..-1`. Setting the end of the range as `-1` will read to the end of that line.
|
45
|
+
|
46
|
+
## Options
|
47
|
+
|Name|Default Value|Description|
|
48
|
+
|---|---|---|
|
49
|
+
|force_utf8_encoding|true|Force UTF-8 encoding on lines being parsed. This alleviates `invalid byte sequence in UTF-8` errors thrown when trying to split a string with invalid UTF characters. For more information, view this [article](https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences).|
|
50
|
+
|
51
|
+
|
42
52
|
## Development
|
43
53
|
|
44
54
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -19,7 +19,10 @@ module FixedWidthFileParser
|
|
19
19
|
# puts row
|
20
20
|
# end
|
21
21
|
|
22
|
-
def self.parse(filepath, fields)
|
22
|
+
def self.parse(filepath, fields, options = {})
|
23
|
+
# Set options, or use default
|
24
|
+
force_utf8_encoding = options.fetch(:force_utf8_encoding, true)
|
25
|
+
|
23
26
|
# Verify `filepath` is a String
|
24
27
|
unless filepath.is_a?(String)
|
25
28
|
raise '`filepath` must be a String'
|
@@ -53,6 +56,13 @@ module FixedWidthFileParser
|
|
53
56
|
# chomp to remove "\n" and "\r\n"
|
54
57
|
next if line.chomp.empty?
|
55
58
|
|
59
|
+
# Force UTF8 encoding if force_utf8_encoding is true (defaults to true)
|
60
|
+
if force_utf8_encoding
|
61
|
+
# Handle UTF Invalid Byte Sequence Errors
|
62
|
+
# e.g. https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences
|
63
|
+
line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
64
|
+
end
|
65
|
+
|
56
66
|
line_fields = {}
|
57
67
|
fields.each do |field|
|
58
68
|
line_fields[field[:name].to_sym] = line[ field[:position] ].nil? ? nil : line[ field[:position] ].strip
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fixed_width_file_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Smith
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|