masticate 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/masticate/datify.rb +1 -3
- data/lib/masticate/mender.rb +16 -0
- data/lib/masticate/version.rb +1 -1
- data/masticate.gemspec +4 -4
- data/spec/data/buried_delims_in_input.psv +4 -0
- data/spec/data/delims_untangled.csv +4 -0
- data/spec/lib/mender_spec.rb +12 -0
- metadata +24 -20
data/lib/masticate/datify.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
# convert date columns to numerics
|
2
|
-
require "csv"
|
3
2
|
|
4
3
|
class Masticate::Datify < Masticate::Base
|
5
4
|
def configure(opts)
|
@@ -20,8 +19,7 @@ class Masticate::Datify < Masticate::Base
|
|
20
19
|
@index = row.index(@field) or raise "Unable to find column '#{@field}'"
|
21
20
|
end
|
22
21
|
elsif row
|
23
|
-
|
24
|
-
row[@index] = ts.to_i rescue nil
|
22
|
+
row[@index] = DateTime.strptime(row[@index], @format).to_time.to_i rescue nil
|
25
23
|
end
|
26
24
|
row
|
27
25
|
end
|
data/lib/masticate/mender.rb
CHANGED
@@ -10,6 +10,7 @@ class Masticate::Mender < Masticate::Base
|
|
10
10
|
@inlined = opts[:inlined]
|
11
11
|
@snip = opts[:snip]
|
12
12
|
@dejunk = opts[:dejunk]
|
13
|
+
@buried = opts[:buried]
|
13
14
|
|
14
15
|
@expected_field_count = nil
|
15
16
|
@holding = ''
|
@@ -47,6 +48,15 @@ class Masticate::Mender < Masticate::Base
|
|
47
48
|
else
|
48
49
|
raise "Do not understand snip instruction [#{@snip.inspect}]"
|
49
50
|
end
|
51
|
+
|
52
|
+
if @buried
|
53
|
+
if @buried.is_a?(Fixnum) || @buried =~ /^\d+/
|
54
|
+
@buried_index = @buried.to_i
|
55
|
+
else
|
56
|
+
@buried_index = row.index(@buried) or raise "Unable to find column '#{@buried}'"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
50
60
|
@expected_field_count = @headers.count
|
51
61
|
row = @headers
|
52
62
|
elsif row
|
@@ -65,6 +75,12 @@ class Masticate::Mender < Masticate::Base
|
|
65
75
|
@holding = ''
|
66
76
|
end
|
67
77
|
|
78
|
+
if @buried && (row.count > @expected_field_count)
|
79
|
+
# buried delimiter
|
80
|
+
# take the N+1th field and merge it onto the Nth field, moving up the remaining fields
|
81
|
+
row[@buried_index] += row.delete_at(@buried_index + 1)
|
82
|
+
end
|
83
|
+
|
68
84
|
if @dejunk && row && row.select {|s| s && !s.strip.empty?}.count <= 2
|
69
85
|
# junky row, suppress output
|
70
86
|
nil
|
data/lib/masticate/version.rb
CHANGED
data/masticate.gemspec
CHANGED
@@ -16,8 +16,8 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
gem.version = Masticate::VERSION
|
18
18
|
|
19
|
-
gem.add_development_dependency "rake"
|
20
|
-
gem.add_development_dependency "rspec"
|
21
|
-
gem.add_development_dependency "guard-rspec"
|
22
|
-
gem.add_development_dependency "ruby_gntp"
|
19
|
+
gem.add_development_dependency "rake"
|
20
|
+
gem.add_development_dependency "rspec"
|
21
|
+
gem.add_development_dependency "guard-rspec"
|
22
|
+
gem.add_development_dependency "ruby_gntp"
|
23
23
|
end
|
data/spec/lib/mender_spec.rb
CHANGED
@@ -51,4 +51,16 @@ describe "mending" do
|
|
51
51
|
results[:output_count].should == 4
|
52
52
|
output.should == correct_output
|
53
53
|
end
|
54
|
+
|
55
|
+
it "should consolidate fields with embedded delimiters" do
|
56
|
+
filename = File.dirname(__FILE__) + "/../data/buried_delims_in_input.psv"
|
57
|
+
tmp = Tempfile.new('mending')
|
58
|
+
results = Masticate.mend(filename, :col_sep => '|', :buried => 'Comment', :output => tmp)
|
59
|
+
output = File.read(tmp)
|
60
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/delims_untangled.csv")
|
61
|
+
|
62
|
+
results[:input_count].should == 4
|
63
|
+
results[:output_count].should == 4
|
64
|
+
output.should == correct_output
|
65
|
+
end
|
54
66
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masticate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,72 +9,72 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
|
-
- -
|
19
|
+
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 0
|
21
|
+
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0
|
29
|
+
version: '0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: rspec
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
none: false
|
34
34
|
requirements:
|
35
|
-
- -
|
35
|
+
- - ! '>='
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version:
|
37
|
+
version: '0'
|
38
38
|
type: :development
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
42
42
|
requirements:
|
43
|
-
- -
|
43
|
+
- - ! '>='
|
44
44
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
45
|
+
version: '0'
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: guard-rspec
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
49
49
|
none: false
|
50
50
|
requirements:
|
51
|
-
- -
|
51
|
+
- - ! '>='
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: 0
|
53
|
+
version: '0'
|
54
54
|
type: :development
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
57
|
none: false
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0
|
61
|
+
version: '0'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: ruby_gntp
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
65
65
|
none: false
|
66
66
|
requirements:
|
67
|
-
- -
|
67
|
+
- - ! '>='
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: 0
|
69
|
+
version: '0'
|
70
70
|
type: :development
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
74
74
|
requirements:
|
75
|
-
- -
|
75
|
+
- - ! '>='
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: 0
|
77
|
+
version: '0'
|
78
78
|
description: Data file crunching
|
79
79
|
email:
|
80
80
|
- jmay@pobox.com
|
@@ -111,6 +111,7 @@ files:
|
|
111
111
|
- spec/data/badnums.csv
|
112
112
|
- spec/data/badnums_fixed.csv
|
113
113
|
- spec/data/broken_psv.txt
|
114
|
+
- spec/data/buried_delims_in_input.psv
|
114
115
|
- spec/data/concat_result.txt
|
115
116
|
- spec/data/cookery_input.psv
|
116
117
|
- spec/data/cookery_result.csv
|
@@ -118,6 +119,7 @@ files:
|
|
118
119
|
- spec/data/cooking_result.csv
|
119
120
|
- spec/data/datify_input.csv
|
120
121
|
- spec/data/datify_result.csv
|
122
|
+
- spec/data/delims_untangled.csv
|
121
123
|
- spec/data/downcase_results.csv
|
122
124
|
- spec/data/events.csv
|
123
125
|
- spec/data/events_reduced.csv
|
@@ -166,7 +168,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
166
168
|
version: '0'
|
167
169
|
segments:
|
168
170
|
- 0
|
169
|
-
hash:
|
171
|
+
hash: 2222163119454515723
|
170
172
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
173
|
none: false
|
172
174
|
requirements:
|
@@ -175,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
177
|
version: '0'
|
176
178
|
segments:
|
177
179
|
- 0
|
178
|
-
hash:
|
180
|
+
hash: 2222163119454515723
|
179
181
|
requirements: []
|
180
182
|
rubyforge_project: masticate
|
181
183
|
rubygems_version: 1.8.24
|
@@ -186,6 +188,7 @@ test_files:
|
|
186
188
|
- spec/data/badnums.csv
|
187
189
|
- spec/data/badnums_fixed.csv
|
188
190
|
- spec/data/broken_psv.txt
|
191
|
+
- spec/data/buried_delims_in_input.psv
|
189
192
|
- spec/data/concat_result.txt
|
190
193
|
- spec/data/cookery_input.psv
|
191
194
|
- spec/data/cookery_result.csv
|
@@ -193,6 +196,7 @@ test_files:
|
|
193
196
|
- spec/data/cooking_result.csv
|
194
197
|
- spec/data/datify_input.csv
|
195
198
|
- spec/data/datify_result.csv
|
199
|
+
- spec/data/delims_untangled.csv
|
196
200
|
- spec/data/downcase_results.csv
|
197
201
|
- spec/data/events.csv
|
198
202
|
- spec/data/events_reduced.csv
|