fixed_width_file_parser 0.1.7 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +4 -0
- data/Gemfile +1 -0
- data/README.md +17 -5
- data/Rakefile +2 -1
- data/bin/console +4 -3
- data/fixed_width_file_parser.gemspec +11 -9
- data/lib/fixed_width_file_parser/version.rb +2 -1
- data/lib/fixed_width_file_parser.rb +19 -83
- data/lib/tasks/rspec.rake +1 -0
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59078b301af3aef44e9855d8d06a4a0d554a7ec4
|
4
|
+
data.tar.gz: fa56e8c90146151dba6d45f1925f10e68409feff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 37ea2200ebea71049b61aca3ceaebfa6f8890893ed49dd98a9401b1893aa122209e12386c12c70ab2a71c02c0f1a88f30c2f137e07c2e3078bdc72c995759e9c
|
7
|
+
data.tar.gz: 0bc5c285b39f7182b94964359403eda95f03c96a286001a849a323425ea2bae159ec3e8b5fb70f2e0bf8894643bf8b4de9ef969b6cf7dbbdc35e7fc1dbe56a5a
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -14,11 +14,15 @@ gem 'fixed_width_file_parser'
|
|
14
14
|
|
15
15
|
And then execute:
|
16
16
|
|
17
|
-
|
17
|
+
```
|
18
|
+
bundle
|
19
|
+
```
|
18
20
|
|
19
21
|
Or install it yourself as:
|
20
22
|
|
21
|
-
|
23
|
+
```
|
24
|
+
gem install fixed_width_file_parser
|
25
|
+
```
|
22
26
|
|
23
27
|
## Usage
|
24
28
|
|
@@ -31,9 +35,8 @@ fields = [
|
|
31
35
|
{ name: 'middle_initial', position: 11 },
|
32
36
|
{ name: 'last_name', position: 12..25 }
|
33
37
|
]
|
34
|
-
options = {}
|
35
38
|
|
36
|
-
FixedWidthFileParser.parse(filepath, fields
|
39
|
+
FixedWidthFileParser.parse(filepath, fields) do |row|
|
37
40
|
puts row[:first_name]
|
38
41
|
puts row[:middle_initial]
|
39
42
|
puts row[:last_name]
|
@@ -41,7 +44,16 @@ end
|
|
41
44
|
```
|
42
45
|
|
43
46
|
### Tips
|
44
|
-
If you need to parse a fixed width file that has the last field set as a variable width field, you can set the position similar to `position:
|
47
|
+
If you need to parse a fixed width file that has the last field set as a variable width field, you can set the position similar to `position: 12..-1`. Setting the end of the range as `-1` will read to the end of that line.
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
filepath = 'path/to/file.txt'
|
51
|
+
fields = [
|
52
|
+
{ name: 'first_name', position: 0..10 },
|
53
|
+
{ name: 'middle_initial', position: 11 },
|
54
|
+
{ name: 'last_name', position: 12..-1 }
|
55
|
+
]
|
56
|
+
```
|
45
57
|
|
46
58
|
## Options
|
47
59
|
|Name|Default Value|Description|
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require
|
4
|
-
require
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'fixed_width_file_parser'
|
5
6
|
|
6
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
8
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +11,5 @@ require "fixed_width_file_parser"
|
|
10
11
|
# require "pry"
|
11
12
|
# Pry.start
|
12
13
|
|
13
|
-
require
|
14
|
+
require 'irb'
|
14
15
|
IRB.start
|
@@ -1,23 +1,25 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
lib = File.expand_path('../lib', __FILE__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'fixed_width_file_parser/version'
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
8
|
+
spec.name = 'fixed_width_file_parser'
|
8
9
|
spec.version = FixedWidthFileParser::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
10
|
+
spec.authors = ['Jim Smith']
|
11
|
+
spec.email = ['jim@jimsmithdesign.com']
|
11
12
|
|
12
|
-
spec.summary =
|
13
|
-
spec.homepage =
|
13
|
+
spec.summary = 'Parse fixed width files easily and efficiently.'
|
14
|
+
spec.homepage = 'https://github.com/elevatorup/fixed_width_file_parser'
|
14
15
|
|
15
16
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
16
|
-
spec.bindir =
|
17
|
+
spec.bindir = 'exe'
|
17
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
18
|
-
spec.require_paths = [
|
19
|
+
spec.require_paths = ['lib']
|
19
20
|
|
20
|
-
spec.add_development_dependency
|
21
|
-
spec.add_development_dependency
|
21
|
+
spec.add_development_dependency 'bundler', '~> 1.8'
|
22
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
22
23
|
spec.add_development_dependency 'rspec'
|
24
|
+
spec.add_development_dependency 'codeclimate-test-reporter'
|
23
25
|
end
|
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'fixed_width_file_parser/version'
|
2
3
|
|
3
4
|
module FixedWidthFileParser
|
4
5
|
# Parse a fixed width file, yielding the proper data for each line based on the fields passed in
|
@@ -8,32 +9,28 @@ module FixedWidthFileParser
|
|
8
9
|
# @yield [Hash] Yields a hash object based on the fields provided.
|
9
10
|
#
|
10
11
|
# @example
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
12
|
+
# filepath = 'path/to/file'
|
13
|
+
# fields = [
|
14
|
+
# { name: 'first_name', position: 0..10 },
|
15
|
+
# { name: 'middle_initial', position: 11 },
|
16
|
+
# { name: 'last_name', position: 12..25 }
|
17
|
+
# ]
|
18
|
+
#
|
19
|
+
# FixedWidthFileParser.parse(filepath, fields) do |row|
|
20
|
+
# puts row
|
21
|
+
# end
|
21
22
|
|
22
23
|
def self.parse(filepath, fields, options = {})
|
23
24
|
# Set options, or use default
|
24
25
|
force_utf8_encoding = options.fetch(:force_utf8_encoding, true)
|
25
26
|
|
26
27
|
# Verify `filepath` is a String
|
27
|
-
unless filepath.is_a?(String)
|
28
|
-
raise '`filepath` must be a String'
|
29
|
-
end
|
28
|
+
raise '`filepath` must be a String' unless filepath.is_a?(String)
|
30
29
|
|
31
30
|
# Verify `fields` is an array
|
32
31
|
if fields.is_a?(Array)
|
33
32
|
# Verify fields is not emtpy
|
34
|
-
if fields.empty?
|
35
|
-
raise '`fields` must contain at least 1 item'
|
36
|
-
end
|
33
|
+
raise '`fields` must contain at least 1 item' if fields.empty?
|
37
34
|
else
|
38
35
|
raise '`fields` must be an Array'
|
39
36
|
end
|
@@ -52,22 +49,20 @@ module FixedWidthFileParser
|
|
52
49
|
|
53
50
|
file = File.open(filepath)
|
54
51
|
|
55
|
-
|
52
|
+
until file.eof?
|
56
53
|
line = file.readline
|
57
54
|
# If the current line is blank, skip to the next line
|
58
55
|
# chomp to remove "\n" and "\r\n"
|
59
56
|
next if line.chomp.empty?
|
60
57
|
|
61
58
|
# Force UTF8 encoding if force_utf8_encoding is true (defaults to true)
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
66
|
-
end
|
59
|
+
# Handle UTF Invalid Byte Sequence Errors
|
60
|
+
# e.g. https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences
|
61
|
+
line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') if force_utf8_encoding
|
67
62
|
|
68
63
|
line_fields = {}
|
69
64
|
fields.each do |field|
|
70
|
-
line_fields[field[:name].to_sym] = line[
|
65
|
+
line_fields[field[:name].to_sym] = line[field[:position]].nil? ? nil : line[field[:position]].strip
|
71
66
|
end
|
72
67
|
|
73
68
|
yield(line_fields)
|
@@ -77,63 +72,4 @@ module FixedWidthFileParser
|
|
77
72
|
|
78
73
|
file.close
|
79
74
|
end
|
80
|
-
|
81
|
-
def self.parse_in_batches(filepath, fields, options = {})
|
82
|
-
# Set options, or use default
|
83
|
-
batch_size = options.fetch(:batch_size, 1000)
|
84
|
-
force_utf8_encoding = options.fetch(:force_utf8_encoding, true)
|
85
|
-
|
86
|
-
# Verify `filepath` is a String
|
87
|
-
unless filepath.is_a?(String)
|
88
|
-
raise '`filepath` must be a String'
|
89
|
-
end
|
90
|
-
|
91
|
-
# Verify `fields` is an array
|
92
|
-
if fields.is_a?(Array)
|
93
|
-
# Verify fields is not emtpy
|
94
|
-
if fields.empty?
|
95
|
-
raise '`fields` must contain at least 1 item'
|
96
|
-
end
|
97
|
-
else
|
98
|
-
raise '`fields` must be an Array'
|
99
|
-
end
|
100
|
-
|
101
|
-
# Verify each field has a `name` and `position`
|
102
|
-
unless fields.all? { |item| item.key?(:name) && item.key?(:position) }
|
103
|
-
raise 'Each field hash must include a `name` and a `position`'
|
104
|
-
end
|
105
|
-
|
106
|
-
# Verify that each `position` is either a Range or an Integer
|
107
|
-
unless fields.all? { |item| item[:position].is_a?(Range) || item[:position].is_a?(Integer) }
|
108
|
-
raise "Each field's `position` must be a Range or an Integer"
|
109
|
-
end
|
110
|
-
|
111
|
-
GC.start
|
112
|
-
|
113
|
-
File.open(filepath) do |file|
|
114
|
-
file.lazy.drop(1).each_slice(batch_size) do |lines|
|
115
|
-
lines.each do |line|
|
116
|
-
# If the current line is blank, skip to the next line
|
117
|
-
# chomp to remove "\n" and "\r\n"
|
118
|
-
next if line.chomp.empty?
|
119
|
-
|
120
|
-
# Force UTF8 encoding if force_utf8_encoding is true (defaults to true)
|
121
|
-
if force_utf8_encoding
|
122
|
-
# Handle UTF Invalid Byte Sequence Errors
|
123
|
-
# e.g. https://robots.thoughtbot.com/fight-back-utf-8-invalid-byte-sequences
|
124
|
-
line = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
125
|
-
end
|
126
|
-
|
127
|
-
line_fields = {}
|
128
|
-
fields.each do |field|
|
129
|
-
line_fields[field[:name].to_sym] = line[ field[:position] ].nil? ? nil : line[ field[:position] ].strip
|
130
|
-
end
|
131
|
-
|
132
|
-
yield(line_fields)
|
133
|
-
end
|
134
|
-
|
135
|
-
GC.start
|
136
|
-
end
|
137
|
-
end
|
138
|
-
end
|
139
75
|
end
|
data/lib/tasks/rspec.rake
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fixed_width_file_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Smith
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: codeclimate-test-reporter
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
description:
|
56
70
|
email:
|
57
71
|
- jim@jimsmithdesign.com
|
@@ -90,7 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
104
|
version: '0'
|
91
105
|
requirements: []
|
92
106
|
rubyforge_project:
|
93
|
-
rubygems_version: 2.4.
|
107
|
+
rubygems_version: 2.4.6
|
94
108
|
signing_key:
|
95
109
|
specification_version: 4
|
96
110
|
summary: Parse fixed width files easily and efficiently.
|