fixed_width_file_parser 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5d247b1668e893c35afb637941e3ca01f1949a82
4
- data.tar.gz: 8aed0fef1f981f2ffd7e6a257bf798f21af63e6c
3
+ metadata.gz: 976efbc15675621fa210269b957a18779104e541
4
+ data.tar.gz: b7839de06cc4b2596421c79988fe353510c199a8
5
5
  SHA512:
6
- metadata.gz: 29f528e67ff45166d99ad6295f1715efddc44dfe1caa18440b444fe212dc76c3ec06b599b9da4658f1439508c0f7ea7b1617120cff69aac09c0d72a235a9bf75
7
- data.tar.gz: 58d1bed56e38186e6881aba7f62fe4801b06c2c839074a002440df32c8ffd75b61091f8118276c342f698bb283295750fccff5cc20882aa8964168d6661d2ded
6
+ metadata.gz: de67c84d35c1c8b4927032ac2902ecab46a62f9952f241e33f3e542880222caff2b00854f1b400b7c192bf0f4cc9c68227a0e5effd0bbdafb3b287d110e1c1ce
7
+ data.tar.gz: 3295067dec03f267224e8781356a2f9403884cfe03cb95156d553862cf2ef1cd573f9bf63ba411888bb598cb08d7b63b37ae36a8b0d7eb835bac8a353e2644cd
data/README.md CHANGED
@@ -31,14 +31,24 @@ fields = [
31
31
  { name: 'middle_initial', position: 11 },
32
32
  { name: 'last_name', position: 12..25 }
33
33
  ]
34
+ options = {}
34
35
 
35
- FixedWidthFileParser.parse(filepath, fields) do |row|
36
+ FixedWidthFileParser.parse(filepath, fields, options) do |row|
36
37
  puts row[:first_name]
37
38
  puts row[:middle_initial]
38
39
  puts row[:last_name]
39
40
  end
40
41
  ```
41
42
 
43
+ ### Tips
44
+ If you need to parse a fixed width file that has the last field set as a variable width field, you can set the position similar to `position: 20..-1`. Setting the end of the range as `-1` will read to the end of that line.
45
+
46
+ ## Options
47
+ |Name|Default Value|Description|
48
+ |---|---|---|
49
+ |force_utf8_encoding|true|Force UTF-8 encoding on lines being parsed. This alleviates `invalid byte sequence in UTF-8` errors thrown when trying to split a string with invalid UTF characters. For more information, view this [article](https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences).|
50
+
51
+
42
52
  ## Development
43
53
 
44
54
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -19,7 +19,10 @@ module FixedWidthFileParser
19
19
  # puts row
20
20
  # end
21
21
 
22
- def self.parse(filepath, fields)
22
+ def self.parse(filepath, fields, options = {})
23
+ # Set options, or use default
24
+ force_utf8_encoding = options.fetch(:force_utf8_encoding, true)
25
+
23
26
  # Verify `filepath` is a String
24
27
  unless filepath.is_a?(String)
25
28
  raise '`filepath` must be a String'
@@ -53,6 +56,13 @@ module FixedWidthFileParser
53
56
  # chomp to remove "\n" and "\r\n"
54
57
  next if line.chomp.empty?
55
58
 
59
+ # Force UTF8 encoding if force_utf8_encoding is true (defaults to true)
60
+ if force_utf8_encoding
61
+ # Handle UTF Invalid Byte Sequence Errors
62
+ # e.g. https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences
63
+ line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
64
+ end
65
+
56
66
  line_fields = {}
57
67
  fields.each do |field|
58
68
  line_fields[field[:name].to_sym] = line[ field[:position] ].nil? ? nil : line[ field[:position] ].strip
@@ -1,3 +1,3 @@
1
1
  module FixedWidthFileParser
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fixed_width_file_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jim Smith
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-26 00:00:00.000000000 Z
11
+ date: 2016-07-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler