fixed_width_parser 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +4 -0
- data/.rvmrc +2 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +52 -0
- data/Guardfile +11 -0
- data/README.rdoc +0 -0
- data/Rakefile +1 -0
- data/fixed_width_parser.gemspec +30 -0
- data/lib/fixed_width_parser.rb +84 -0
- data/lib/fixed_width_parser/version.rb +3 -0
- data/spec/data/test.txt +8 -0
- data/spec/lib/fixed_width_parser_spec.rb +242 -0
- data/spec/spec_helper.rb +10 -0
- metadata +109 -0
data/.rspec
ADDED
data/.rvmrc
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
fixed_width_parser (1.0.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
columnize (0.3.4)
|
10
|
+
diff-lcs (1.1.3)
|
11
|
+
gem-dandy (0.2.1)
|
12
|
+
trollop (= 1.16.2)
|
13
|
+
growl (1.0.3)
|
14
|
+
guard (0.6.3)
|
15
|
+
thor (~> 0.14.6)
|
16
|
+
guard-rspec (0.4.5)
|
17
|
+
guard (>= 0.4.0)
|
18
|
+
guard-shell (0.1.1)
|
19
|
+
guard (>= 0.2.0)
|
20
|
+
linecache (0.46)
|
21
|
+
rbx-require-relative (> 0.0.4)
|
22
|
+
rb-fsevent (0.4.3.1)
|
23
|
+
rbx-require-relative (0.0.5)
|
24
|
+
rspec (2.6.0)
|
25
|
+
rspec-core (~> 2.6.0)
|
26
|
+
rspec-expectations (~> 2.6.0)
|
27
|
+
rspec-mocks (~> 2.6.0)
|
28
|
+
rspec-core (2.6.4)
|
29
|
+
rspec-expectations (2.6.0)
|
30
|
+
diff-lcs (~> 1.1.2)
|
31
|
+
rspec-mocks (2.6.0)
|
32
|
+
ruby-debug (0.10.4)
|
33
|
+
columnize (>= 0.1)
|
34
|
+
ruby-debug-base (~> 0.10.4.0)
|
35
|
+
ruby-debug-base (0.10.4)
|
36
|
+
linecache (>= 0.3)
|
37
|
+
thor (0.14.6)
|
38
|
+
trollop (1.16.2)
|
39
|
+
|
40
|
+
PLATFORMS
|
41
|
+
ruby
|
42
|
+
|
43
|
+
DEPENDENCIES
|
44
|
+
fixed_width_parser!
|
45
|
+
gem-dandy
|
46
|
+
growl
|
47
|
+
guard
|
48
|
+
guard-rspec
|
49
|
+
guard-shell
|
50
|
+
rb-fsevent
|
51
|
+
rspec
|
52
|
+
ruby-debug
|
data/Guardfile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# guard 'shell' do
|
4
|
+
# watch( 'fixed_width_parser.rb' ) { |m| `ruby fixed_width_parser.rb` }
|
5
|
+
# end
|
6
|
+
|
7
|
+
guard 'rspec', :version => 2, :all_on_start => true, :all_after_pass => false, :cli => '--color --format doc' do
|
8
|
+
watch( %r{^spec/.+_spec\.rb$} )
|
9
|
+
watch( %r{^lib/(.+)\.rb$} ) { |m| "spec/lib/#{m[1]}_spec.rb" }
|
10
|
+
watch( 'spec/spec_helper.rb' ) { "spec" }
|
11
|
+
end
|
data/README.rdoc
ADDED
File without changes
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "fixed_width_parser/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "fixed_width_parser"
|
7
|
+
s.version = FixedWidthParser::VERSION
|
8
|
+
s.authors = ["C. Jason Harrelson"]
|
9
|
+
s.email = ["jason@lookforwardenterprises.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{}
|
12
|
+
s.description = %q{}
|
13
|
+
|
14
|
+
s.rubyforge_project = "fixed_width_parser"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
# specify any dependencies here; for example:
|
22
|
+
%w(
|
23
|
+
gem-dandy
|
24
|
+
rspec
|
25
|
+
).each do |development_dependency|
|
26
|
+
s.add_development_dependency development_dependency
|
27
|
+
end
|
28
|
+
|
29
|
+
# s.add_runtime_dependency "rest-client"
|
30
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module FixedWidthParser
|
2
|
+
|
3
|
+
def self.foreach( filepath, formats, options={} )
|
4
|
+
File.open( filepath, 'r' ) do |f|
|
5
|
+
f.each_line do |line|
|
6
|
+
line.chomp!
|
7
|
+
next if line.nil? || line.empty?
|
8
|
+
|
9
|
+
yield parse( line, formats, options )
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.foreach_named( filepath, formats, options={} )
|
15
|
+
File.open( filepath, 'r' ) do |f|
|
16
|
+
f.each_line do |line|
|
17
|
+
line.chomp!
|
18
|
+
next if line.nil? || line.empty?
|
19
|
+
|
20
|
+
yield parse_named( line, formats, options )
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.parse( line, formats, options={} )
|
26
|
+
raise 'Invalid format: expected an array of integers' unless formats.is_a?( Array )
|
27
|
+
|
28
|
+
regex = generate_regex( formats )
|
29
|
+
|
30
|
+
line.chomp!
|
31
|
+
|
32
|
+
parts = regex.match( line )[1..formats.size]
|
33
|
+
|
34
|
+
options[:rstrip] ? parts.map { |p| p.rstrip } : parts
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.parse_named( line, formats, options={} )
|
38
|
+
unless formats.is_a?( Array )
|
39
|
+
unless formats.first.is_a?( Array ) #&& formats.first.size != 2
|
40
|
+
raise 'Invalid format: expected a hash-like array'
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
names = formats.collect { |name,length| name.to_s }
|
45
|
+
lengths = formats.collect { |name,length| length }
|
46
|
+
regex = generate_regex( lengths )
|
47
|
+
|
48
|
+
line.chomp!
|
49
|
+
|
50
|
+
parts = regex.match( line )[1..formats.size]
|
51
|
+
parts = options[:rstrip] ? parts.map { |p| p.rstrip } : parts
|
52
|
+
|
53
|
+
Hash[*(names.zip( parts ).flatten)]
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def self.calculate_ranges( formats )
|
59
|
+
ranges = []
|
60
|
+
start = 0
|
61
|
+
_end = 0
|
62
|
+
|
63
|
+
formats.each do |length|
|
64
|
+
start = _end
|
65
|
+
_end = _end + length
|
66
|
+
ranges << (start.._end-1)
|
67
|
+
end
|
68
|
+
|
69
|
+
ranges
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.generate_regex( formats )
|
73
|
+
regex = '^'
|
74
|
+
|
75
|
+
formats.each do |length|
|
76
|
+
regex << "(.{#{length}})"
|
77
|
+
end
|
78
|
+
|
79
|
+
regex << '$'
|
80
|
+
|
81
|
+
Regexp.new( regex )
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
data/spec/data/test.txt
ADDED
@@ -0,0 +1,242 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FixedWidthParser do
|
4
|
+
|
5
|
+
let :fixed_width_file do
|
6
|
+
File.join File.dirname( __FILE__ ), '..', 'data', 'test.txt'
|
7
|
+
end
|
8
|
+
|
9
|
+
let :lengths_format do
|
10
|
+
[10,8,8,14]
|
11
|
+
end
|
12
|
+
|
13
|
+
let :named_lengths_format do
|
14
|
+
[
|
15
|
+
['a', 10],
|
16
|
+
[:b, 8 ],
|
17
|
+
[:c, 8 ],
|
18
|
+
[:d, 14]
|
19
|
+
]
|
20
|
+
end
|
21
|
+
|
22
|
+
let :line do
|
23
|
+
"DOCUMENT 00014438P PLAT "
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'yielding the correct lines' do
|
27
|
+
|
28
|
+
context '.foreach' do
|
29
|
+
|
30
|
+
context 'when given a valid format' do
|
31
|
+
|
32
|
+
before :each do
|
33
|
+
@yielded_lines = []
|
34
|
+
end
|
35
|
+
|
36
|
+
let :yielded_lines do
|
37
|
+
@yielded_lines
|
38
|
+
end
|
39
|
+
|
40
|
+
context 'when rstrip is false' do
|
41
|
+
|
42
|
+
before :each do
|
43
|
+
FixedWidthParser.foreach( fixed_width_file,
|
44
|
+
lengths_format ) do |line|
|
45
|
+
@yielded_lines << line
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should yield the correct number of lines" do
|
50
|
+
yielded_lines.size.should == 7
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should yield each line correctly parsed" do
|
54
|
+
yielded_lines[0].should == ['DOCUMENT ', '00014438', 'P ', 'PLAT ']
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
context 'when rstrip is true' do
|
60
|
+
|
61
|
+
before :each do
|
62
|
+
FixedWidthParser.foreach( fixed_width_file,
|
63
|
+
lengths_format, :rstrip => true ) do |line|
|
64
|
+
@yielded_lines << line
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should yield the correct number of lines" do
|
69
|
+
yielded_lines.size.should == 7
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should yield each line correctly parsed" do
|
73
|
+
yielded_lines[0].should == ['DOCUMENT', '00014438', 'P', 'PLAT']
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
context 'when given an invalid format' do
|
81
|
+
|
82
|
+
it "should raise an exception when the format is not an array of integers" do
|
83
|
+
lambda { FixedWidthParser.foreach( fixed_width_file, {} ) { |l| } }.should(
|
84
|
+
raise_error( RuntimeError, 'Invalid format: expected an array of integers' )
|
85
|
+
)
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
context '.foreach_named' do
|
93
|
+
|
94
|
+
context 'when given a valid format' do
|
95
|
+
|
96
|
+
before :each do
|
97
|
+
@yielded_lines = []
|
98
|
+
end
|
99
|
+
|
100
|
+
let :yielded_lines do
|
101
|
+
@yielded_lines
|
102
|
+
end
|
103
|
+
|
104
|
+
context 'when rstrip is false' do
|
105
|
+
|
106
|
+
before :each do
|
107
|
+
FixedWidthParser.foreach_named( fixed_width_file,
|
108
|
+
named_lengths_format ) do |line|
|
109
|
+
@yielded_lines << line
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
it "should yield the correct number of lines" do
|
114
|
+
yielded_lines.size.should == 7
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should yield each line correctly parsed" do
|
118
|
+
yielded_lines[0].should == {
|
119
|
+
'a', 'DOCUMENT ',
|
120
|
+
'b', '00014438',
|
121
|
+
'c', 'P ',
|
122
|
+
'd', 'PLAT '
|
123
|
+
}
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
context 'when rstrip is true' do
|
129
|
+
|
130
|
+
before :each do
|
131
|
+
FixedWidthParser.foreach_named( fixed_width_file,
|
132
|
+
named_lengths_format, :rstrip => true ) do |line|
|
133
|
+
@yielded_lines << line
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
xit "should yield the correct number of lines" do
|
138
|
+
yielded_lines.size.should == 7
|
139
|
+
end
|
140
|
+
|
141
|
+
it "should yield each line correctly parsed" do
|
142
|
+
yielded_lines[0].should == {
|
143
|
+
'a', 'DOCUMENT',
|
144
|
+
'b', '00014438',
|
145
|
+
'c', 'P',
|
146
|
+
'd', 'PLAT'
|
147
|
+
}
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
153
|
+
|
154
|
+
context 'when given an invalid format' do
|
155
|
+
|
156
|
+
xit "should raise an exception when the format is an array of integers" do
|
157
|
+
lambda { FixedWidthParser.foreach_named( fixed_width_file, lengths_format ) { |l| } }.should(
|
158
|
+
raise_error( RuntimeError, 'Invalid format: expected a hash-like array' )
|
159
|
+
)
|
160
|
+
end
|
161
|
+
|
162
|
+
it "should raise an exception when the format is a hash" do
|
163
|
+
lambda { FixedWidthParser.foreach_named( fixed_width_file, {} ) { |l| } }.should(
|
164
|
+
raise_error( RuntimeError, 'Invalid format: expected a hash-like array' )
|
165
|
+
)
|
166
|
+
end
|
167
|
+
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
|
174
|
+
context '.parse' do
|
175
|
+
|
176
|
+
context 'when rstrip is false' do
|
177
|
+
|
178
|
+
subject { FixedWidthParser.parse( line, lengths_format ) }
|
179
|
+
|
180
|
+
it { should == ['DOCUMENT ', '00014438', 'P ', 'PLAT '] }
|
181
|
+
|
182
|
+
end
|
183
|
+
|
184
|
+
context 'when rstrip is true' do
|
185
|
+
|
186
|
+
subject { FixedWidthParser.parse( line, lengths_format, :rstrip => true ) }
|
187
|
+
|
188
|
+
it { should == ['DOCUMENT', '00014438', 'P', 'PLAT'] }
|
189
|
+
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
|
194
|
+
context '.parse_named' do
|
195
|
+
|
196
|
+
context 'when rstrip is false' do
|
197
|
+
|
198
|
+
subject { FixedWidthParser.parse_named( line, named_lengths_format ) }
|
199
|
+
|
200
|
+
it { should == {
|
201
|
+
'a', 'DOCUMENT ',
|
202
|
+
'b', '00014438',
|
203
|
+
'c', 'P ',
|
204
|
+
'd', 'PLAT '
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
end
|
209
|
+
|
210
|
+
context 'when rstrip is true' do
|
211
|
+
|
212
|
+
subject { FixedWidthParser.parse_named( line, named_lengths_format, :rstrip => true ) }
|
213
|
+
|
214
|
+
it { should == {
|
215
|
+
'a', 'DOCUMENT',
|
216
|
+
'b', '00014438',
|
217
|
+
'c', 'P',
|
218
|
+
'd', 'PLAT'
|
219
|
+
}
|
220
|
+
}
|
221
|
+
|
222
|
+
end
|
223
|
+
|
224
|
+
end
|
225
|
+
|
226
|
+
context 'calculating ranges from lengths' do
|
227
|
+
|
228
|
+
it "should return the correct ranges" do
|
229
|
+
FixedWidthParser.send( :calculate_ranges, lengths_format ).should == [(0..9), (10..17), (18..25), (26..39)]
|
230
|
+
end
|
231
|
+
|
232
|
+
end
|
233
|
+
|
234
|
+
context 'generating a regex from lengths' do
|
235
|
+
|
236
|
+
it "should return the correct regex" do
|
237
|
+
FixedWidthParser.send( :generate_regex, lengths_format ).should == /^(.{10})(.{8})(.{8})(.{14})$/
|
238
|
+
end
|
239
|
+
|
240
|
+
end
|
241
|
+
|
242
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fixed_width_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- C. Jason Harrelson
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-11-08 00:00:00 -06:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: gem-dandy
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: rspec
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id002
|
49
|
+
description: ""
|
50
|
+
email:
|
51
|
+
- jason@lookforwardenterprises.com
|
52
|
+
executables: []
|
53
|
+
|
54
|
+
extensions: []
|
55
|
+
|
56
|
+
extra_rdoc_files: []
|
57
|
+
|
58
|
+
files:
|
59
|
+
- .rspec
|
60
|
+
- .rvmrc
|
61
|
+
- Gemfile
|
62
|
+
- Gemfile.lock
|
63
|
+
- Guardfile
|
64
|
+
- README.rdoc
|
65
|
+
- Rakefile
|
66
|
+
- fixed_width_parser.gemspec
|
67
|
+
- lib/fixed_width_parser.rb
|
68
|
+
- lib/fixed_width_parser/version.rb
|
69
|
+
- spec/data/test.txt
|
70
|
+
- spec/lib/fixed_width_parser_spec.rb
|
71
|
+
- spec/spec_helper.rb
|
72
|
+
has_rdoc: true
|
73
|
+
homepage: ""
|
74
|
+
licenses: []
|
75
|
+
|
76
|
+
post_install_message:
|
77
|
+
rdoc_options: []
|
78
|
+
|
79
|
+
require_paths:
|
80
|
+
- lib
|
81
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
hash: 3
|
87
|
+
segments:
|
88
|
+
- 0
|
89
|
+
version: "0"
|
90
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
hash: 3
|
96
|
+
segments:
|
97
|
+
- 0
|
98
|
+
version: "0"
|
99
|
+
requirements: []
|
100
|
+
|
101
|
+
rubyforge_project: fixed_width_parser
|
102
|
+
rubygems_version: 1.6.0
|
103
|
+
signing_key:
|
104
|
+
specification_version: 3
|
105
|
+
summary: ""
|
106
|
+
test_files:
|
107
|
+
- spec/data/test.txt
|
108
|
+
- spec/lib/fixed_width_parser_spec.rb
|
109
|
+
- spec/spec_helper.rb
|