fixed_width_file_parser 0.1.7 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +4 -0
- data/Gemfile +1 -0
- data/README.md +17 -5
- data/Rakefile +2 -1
- data/bin/console +4 -3
- data/fixed_width_file_parser.gemspec +11 -9
- data/lib/fixed_width_file_parser/version.rb +2 -1
- data/lib/fixed_width_file_parser.rb +19 -83
- data/lib/tasks/rspec.rake +1 -0
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59078b301af3aef44e9855d8d06a4a0d554a7ec4
|
4
|
+
data.tar.gz: fa56e8c90146151dba6d45f1925f10e68409feff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 37ea2200ebea71049b61aca3ceaebfa6f8890893ed49dd98a9401b1893aa122209e12386c12c70ab2a71c02c0f1a88f30c2f137e07c2e3078bdc72c995759e9c
|
7
|
+
data.tar.gz: 0bc5c285b39f7182b94964359403eda95f03c96a286001a849a323425ea2bae159ec3e8b5fb70f2e0bf8894643bf8b4de9ef969b6cf7dbbdc35e7fc1dbe56a5a
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -14,11 +14,15 @@ gem 'fixed_width_file_parser'
|
|
14
14
|
|
15
15
|
And then execute:
|
16
16
|
|
17
|
-
|
17
|
+
```
|
18
|
+
bundle
|
19
|
+
```
|
18
20
|
|
19
21
|
Or install it yourself as:
|
20
22
|
|
21
|
-
|
23
|
+
```
|
24
|
+
gem install fixed_width_file_parser
|
25
|
+
```
|
22
26
|
|
23
27
|
## Usage
|
24
28
|
|
@@ -31,9 +35,8 @@ fields = [
|
|
31
35
|
{ name: 'middle_initial', position: 11 },
|
32
36
|
{ name: 'last_name', position: 12..25 }
|
33
37
|
]
|
34
|
-
options = {}
|
35
38
|
|
36
|
-
FixedWidthFileParser.parse(filepath, fields
|
39
|
+
FixedWidthFileParser.parse(filepath, fields) do |row|
|
37
40
|
puts row[:first_name]
|
38
41
|
puts row[:middle_initial]
|
39
42
|
puts row[:last_name]
|
@@ -41,7 +44,16 @@ end
|
|
41
44
|
```
|
42
45
|
|
43
46
|
### Tips
|
44
|
-
If you need to parse a fixed width file that has the last field set as a variable width field, you can set the position similar to `position:
|
47
|
+
If you need to parse a fixed width file that has the last field set as a variable width field, you can set the position similar to `position: 12..-1`. Setting the end of the range as `-1` will read to the end of that line.
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
filepath = 'path/to/file.txt'
|
51
|
+
fields = [
|
52
|
+
{ name: 'first_name', position: 0..10 },
|
53
|
+
{ name: 'middle_initial', position: 11 },
|
54
|
+
{ name: 'last_name', position: 12..-1 }
|
55
|
+
]
|
56
|
+
```
|
45
57
|
|
46
58
|
## Options
|
47
59
|
|Name|Default Value|Description|
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require
|
4
|
-
require
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'fixed_width_file_parser'
|
5
6
|
|
6
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
8
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +11,5 @@ require "fixed_width_file_parser"
|
|
10
11
|
# require "pry"
|
11
12
|
# Pry.start
|
12
13
|
|
13
|
-
require
|
14
|
+
require 'irb'
|
14
15
|
IRB.start
|
@@ -1,23 +1,25 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
lib = File.expand_path('../lib', __FILE__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'fixed_width_file_parser/version'
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
8
|
+
spec.name = 'fixed_width_file_parser'
|
8
9
|
spec.version = FixedWidthFileParser::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
10
|
+
spec.authors = ['Jim Smith']
|
11
|
+
spec.email = ['jim@jimsmithdesign.com']
|
11
12
|
|
12
|
-
spec.summary =
|
13
|
-
spec.homepage =
|
13
|
+
spec.summary = 'Parse fixed width files easily and efficiently.'
|
14
|
+
spec.homepage = 'https://github.com/elevatorup/fixed_width_file_parser'
|
14
15
|
|
15
16
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
16
|
-
spec.bindir =
|
17
|
+
spec.bindir = 'exe'
|
17
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
18
|
-
spec.require_paths = [
|
19
|
+
spec.require_paths = ['lib']
|
19
20
|
|
20
|
-
spec.add_development_dependency
|
21
|
-
spec.add_development_dependency
|
21
|
+
spec.add_development_dependency 'bundler', '~> 1.8'
|
22
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
22
23
|
spec.add_development_dependency 'rspec'
|
24
|
+
spec.add_development_dependency 'codeclimate-test-reporter'
|
23
25
|
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'fixed_width_file_parser/version'
|
2
3
|
|
3
4
|
module FixedWidthFileParser
|
4
5
|
# Parse a fixed width file, yielding the proper data for each line based on the fields passed in
|
@@ -8,32 +9,28 @@ module FixedWidthFileParser
|
|
8
9
|
# @yield [Hash] Yields a hash object based on the fields provided.
|
9
10
|
#
|
10
11
|
# @example
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
12
|
+
# filepath = 'path/to/file'
|
13
|
+
# fields = [
|
14
|
+
# { name: 'first_name', position: 0..10 },
|
15
|
+
# { name: 'middle_initial', position: 11 },
|
16
|
+
# { name: 'last_name', position: 12..25 }
|
17
|
+
# ]
|
18
|
+
#
|
19
|
+
# FixedWidthFileParser.parse(filepath, fields) do |row|
|
20
|
+
# puts row
|
21
|
+
# end
|
21
22
|
|
22
23
|
def self.parse(filepath, fields, options = {})
|
23
24
|
# Set options, or use default
|
24
25
|
force_utf8_encoding = options.fetch(:force_utf8_encoding, true)
|
25
26
|
|
26
27
|
# Verify `filepath` is a String
|
27
|
-
unless filepath.is_a?(String)
|
28
|
-
raise '`filepath` must be a String'
|
29
|
-
end
|
28
|
+
raise '`filepath` must be a String' unless filepath.is_a?(String)
|
30
29
|
|
31
30
|
# Verify `fields` is an array
|
32
31
|
if fields.is_a?(Array)
|
33
32
|
# Verify fields is not emtpy
|
34
|
-
if fields.empty?
|
35
|
-
raise '`fields` must contain at least 1 item'
|
36
|
-
end
|
33
|
+
raise '`fields` must contain at least 1 item' if fields.empty?
|
37
34
|
else
|
38
35
|
raise '`fields` must be an Array'
|
39
36
|
end
|
@@ -52,22 +49,20 @@ module FixedWidthFileParser
|
|
52
49
|
|
53
50
|
file = File.open(filepath)
|
54
51
|
|
55
|
-
|
52
|
+
until file.eof?
|
56
53
|
line = file.readline
|
57
54
|
# If the current line is blank, skip to the next line
|
58
55
|
# chomp to remove "\n" and "\r\n"
|
59
56
|
next if line.chomp.empty?
|
60
57
|
|
61
58
|
# Force UTF8 encoding if force_utf8_encoding is true (defaults to true)
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
66
|
-
end
|
59
|
+
# Handle UTF Invalid Byte Sequence Errors
|
60
|
+
# e.g. https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences
|
61
|
+
line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if force_utf8_encoding
|
67
62
|
|
68
63
|
line_fields = {}
|
69
64
|
fields.each do |field|
|
70
|
-
line_fields[field[:name].to_sym] = line[
|
65
|
+
line_fields[field[:name].to_sym] = line[field[:position]].nil? ? nil : line[field[:position]].strip
|
71
66
|
end
|
72
67
|
|
73
68
|
yield(line_fields)
|
@@ -77,63 +72,4 @@ module FixedWidthFileParser
|
|
77
72
|
|
78
73
|
file.close
|
79
74
|
end
|
80
|
-
|
81
|
-
def self.parse_in_batches(filepath, fields, options = {})
|
82
|
-
# Set options, or use default
|
83
|
-
batch_size = options.fetch(:batch_size, 1000)
|
84
|
-
force_utf8_encoding = options.fetch(:force_utf8_encoding, true)
|
85
|
-
|
86
|
-
# Verify `filepath` is a String
|
87
|
-
unless filepath.is_a?(String)
|
88
|
-
raise '`filepath` must be a String'
|
89
|
-
end
|
90
|
-
|
91
|
-
# Verify `fields` is an array
|
92
|
-
if fields.is_a?(Array)
|
93
|
-
# Verify fields is not emtpy
|
94
|
-
if fields.empty?
|
95
|
-
raise '`fields` must contain at least 1 item'
|
96
|
-
end
|
97
|
-
else
|
98
|
-
raise '`fields` must be an Array'
|
99
|
-
end
|
100
|
-
|
101
|
-
# Verify each field has a `name` and `position`
|
102
|
-
unless fields.all? { |item| item.key?(:name) && item.key?(:position) }
|
103
|
-
raise 'Each field hash must include a `name` and a `position`'
|
104
|
-
end
|
105
|
-
|
106
|
-
# Verify that each `position` is either a Range or an Integer
|
107
|
-
unless fields.all? { |item| item[:position].is_a?(Range) || item[:position].is_a?(Integer) }
|
108
|
-
raise "Each field's `position` must be a Range or an Integer"
|
109
|
-
end
|
110
|
-
|
111
|
-
GC.start
|
112
|
-
|
113
|
-
File.open(filepath) do |file|
|
114
|
-
file.lazy.drop(1).each_slice(batch_size) do |lines|
|
115
|
-
lines.each do |line|
|
116
|
-
# If the current line is blank, skip to the next line
|
117
|
-
# chomp to remove "\n" and "\r\n"
|
118
|
-
next if line.chomp.empty?
|
119
|
-
|
120
|
-
# Force UTF8 encoding if force_utf8_encoding is true (defaults to true)
|
121
|
-
if force_utf8_encoding
|
122
|
-
# Handle UTF Invalid Byte Sequence Errors
|
123
|
-
# e.g. https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences
|
124
|
-
line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
125
|
-
end
|
126
|
-
|
127
|
-
line_fields = {}
|
128
|
-
fields.each do |field|
|
129
|
-
line_fields[field[:name].to_sym] = line[ field[:position] ].nil? ? nil : line[ field[:position] ].strip
|
130
|
-
end
|
131
|
-
|
132
|
-
yield(line_fields)
|
133
|
-
end
|
134
|
-
|
135
|
-
GC.start
|
136
|
-
end
|
137
|
-
end
|
138
|
-
end
|
139
75
|
end
|
data/lib/tasks/rspec.rake
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fixed_width_file_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Smith
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: codeclimate-test-reporter
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
description:
|
56
70
|
email:
|
57
71
|
- jim@jimsmithdesign.com
|
@@ -90,7 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
104
|
version: '0'
|
91
105
|
requirements: []
|
92
106
|
rubyforge_project:
|
93
|
-
rubygems_version: 2.4.
|
107
|
+
rubygems_version: 2.4.6
|
94
108
|
signing_key:
|
95
109
|
specification_version: 4
|
96
110
|
summary: Parse fixed width files easily and efficiently.
|