masticate 0.3 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +8 -0
- data/Rakefile +1 -1
- data/lib/masticate/base.rb +8 -2
- data/lib/masticate/concat.rb +1 -1
- data/lib/masticate/cook.rb +10 -2
- data/lib/masticate/sniffer.rb +7 -2
- data/lib/masticate/version.rb +1 -1
- data/masticate.gemspec +1 -0
- data/spec/data/cookery_input.psv +8 -0
- data/spec/data/cookery_result.csv +5 -0
- data/spec/data/quoted_csv_data.txt +1 -0
- data/spec/data/recipe_cookery.txt +6 -0
- data/spec/data/recipe_mend.txt +2 -0
- data/spec/lib/cook_spec.rb +11 -0
- data/spec/lib/csvify_spec.rb +1 -1
- data/spec/lib/mender_spec.rb +4 -4
- data/spec/lib/plucker_spec.rb +2 -2
- metadata +26 -8
data/.travis.yml
ADDED
data/Rakefile
CHANGED
data/lib/masticate/base.rb
CHANGED
data/lib/masticate/concat.rb
CHANGED
data/lib/masticate/cook.rb
CHANGED
@@ -18,7 +18,8 @@ class Masticate::Cook < Masticate::Base
|
|
18
18
|
recipe = File.read(recipefile).lines
|
19
19
|
standard_options(opts)
|
20
20
|
|
21
|
-
|
21
|
+
# ignore blank lines in recipe file
|
22
|
+
steps = recipe.grep(/\S/).map do |step|
|
22
23
|
argv = Shellwords.split(step)
|
23
24
|
masticator = Masticate::MyOptionParser.new
|
24
25
|
command, options = masticator.parse(argv)
|
@@ -38,9 +39,16 @@ class Masticate::Cook < Masticate::Base
|
|
38
39
|
emit(row) if row
|
39
40
|
end
|
40
41
|
end
|
42
|
+
more_rows = []
|
41
43
|
steps.each do |step|
|
42
|
-
|
44
|
+
if more_rows.any?
|
45
|
+
more_rows = more_rows.map {|row| step.crunch(row)}
|
46
|
+
else
|
47
|
+
step.crunch(nil) {|row| more_rows << row}
|
48
|
+
end
|
43
49
|
end
|
50
|
+
more_rows.each {|row| emit(row)}
|
51
|
+
# step.crunch(nil) {|row| emit(row)}
|
44
52
|
|
45
53
|
@output.close if opts[:output]
|
46
54
|
|
data/lib/masticate/sniffer.rb
CHANGED
@@ -25,7 +25,8 @@ class Masticate::Sniffer < Masticate::Base
|
|
25
25
|
def find_col_sep
|
26
26
|
@delimstats = {}
|
27
27
|
with_input do |input|
|
28
|
-
|
28
|
+
lines = 10.times.map{get}.compact
|
29
|
+
lines.each do |line|
|
29
30
|
@line1 = line unless @line1
|
30
31
|
|
31
32
|
CandidateDelimiters.each do |delim|
|
@@ -67,8 +68,12 @@ class Masticate::Sniffer < Masticate::Base
|
|
67
68
|
end
|
68
69
|
|
69
70
|
def stats
|
71
|
+
counts = Hash.new(0)
|
70
72
|
with_input do |input|
|
71
|
-
|
73
|
+
while line = get
|
74
|
+
counts[CSV.parse_line(line, :col_sep => col_sep, :quote_char => quote_char || "\0").count] += 1
|
75
|
+
end
|
72
76
|
end
|
77
|
+
counts
|
73
78
|
end
|
74
79
|
end
|
data/lib/masticate/version.rb
CHANGED
data/masticate.gemspec
CHANGED
@@ -16,6 +16,7 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
gem.version = Masticate::VERSION
|
18
18
|
|
19
|
+
gem.add_development_dependency "rake", "~> 0.9.2"
|
19
20
|
gem.add_development_dependency "rspec", "~> 2.9.0"
|
20
21
|
gem.add_development_dependency "guard-rspec", "~> 0.7.0"
|
21
22
|
gem.add_development_dependency "ruby_gntp", "~> 0.3.4"
|
@@ -0,0 +1,8 @@
|
|
1
|
+
DTTM|AuditCode|AuditDesc|AuditEventTypeID|AuditByID
|
2
|
+
3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,123.00
|
3
|
+
3/13/2012 8:12:39AM|EXCEPTADD|Exception Added|10|9,234.00
|
4
|
+
3/13/2012 8:13:25AM|EXCEPTADD|Exception Added|10|9,123.00
|
5
|
+
3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,345.00
|
6
|
+
3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,234.00
|
7
|
+
3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,678.00
|
8
|
+
3/12/2012 11:20:32AM|1
|
@@ -98,3 +98,4 @@ site,ibex,unit,face,doctor,hospid,usrorder,dteorder,usrsend,dtesend,usrdone,dted
|
|
98
98
|
1,20120106100014,2044817,L,504,15550,504,201201061011,504,201201061011,0,201201061049,L,"594,856"
|
99
99
|
1,20120106100014,2044818,X,504,15550,504,201201061011,504,201201061011,0,201201061038,X,"594,855"
|
100
100
|
1,20120106100014,2044816,L,504,15550,504,201201061011,504,201201061011,0,201201061049,L,"594,860"
|
101
|
+
|
data/spec/data/recipe_mend.txt
CHANGED
data/spec/lib/cook_spec.rb
CHANGED
@@ -24,4 +24,15 @@ describe "cooking up a recipe" do
|
|
24
24
|
|
25
25
|
output.should == correct_output
|
26
26
|
end
|
27
|
+
|
28
|
+
it "should do correct datify & gsub in recipe" do
|
29
|
+
input = File.dirname(__FILE__) + "/../data/cookery_input.psv"
|
30
|
+
recipe = File.dirname(__FILE__) + "/../data/recipe_cookery.txt"
|
31
|
+
tmp = Tempfile.new('cooked')
|
32
|
+
results = Masticate.cook(input, :col_sep => '|', :output => tmp, :recipe => recipe)
|
33
|
+
output = File.read(tmp)
|
34
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/cookery_result.csv")
|
35
|
+
|
36
|
+
output.should == correct_output
|
37
|
+
end
|
27
38
|
end
|
data/spec/lib/csvify_spec.rb
CHANGED
data/spec/lib/mender_spec.rb
CHANGED
@@ -7,14 +7,14 @@ describe "mending" do
|
|
7
7
|
it "should merge lines when delimiter counts don't match'" do
|
8
8
|
filename = File.dirname(__FILE__) + "/../data/broken_psv.txt"
|
9
9
|
results = Masticate.mend(filename, :col_sep => '|', :output => "/dev/null")
|
10
|
-
results[:input_count].should ==
|
10
|
+
results[:input_count].should == 6
|
11
11
|
results[:output_count].should == 5
|
12
12
|
end
|
13
13
|
|
14
14
|
it "should strip trailer records" do
|
15
15
|
filename = File.dirname(__FILE__) + "/../data/junk_trailer.txt"
|
16
16
|
results = Masticate.mend(filename, :col_sep => '|', :output => "/dev/null", :dejunk => true)
|
17
|
-
results[:input_count].should ==
|
17
|
+
results[:input_count].should == 7
|
18
18
|
results[:output_count].should == 5
|
19
19
|
results[:headers].should == ['COL1', 'COL 2', 'Col 3', 'col-4', 'col5', 'col6']
|
20
20
|
end
|
@@ -22,7 +22,7 @@ describe "mending" do
|
|
22
22
|
it "should snip head fields" do
|
23
23
|
filename = File.dirname(__FILE__) + "/../data/junk_header.csv"
|
24
24
|
results = Masticate.mend(filename, :col_sep => ',', :snip => 1, :output => "/dev/null")
|
25
|
-
results[:input_count].should ==
|
25
|
+
results[:input_count].should == 5
|
26
26
|
results[:output_count].should == 5
|
27
27
|
results[:headers].should == %w(hospid usrorder dteorder usrsend dtesend usrdone dtedone department)
|
28
28
|
end
|
@@ -34,7 +34,7 @@ describe "mending" do
|
|
34
34
|
output = File.read(tmp)
|
35
35
|
correct_output = File.read(File.dirname(__FILE__) + "/../data/inlined_headers.csv.output")
|
36
36
|
|
37
|
-
results[:input_count].should ==
|
37
|
+
results[:input_count].should == 10
|
38
38
|
results[:output_count].should == 11
|
39
39
|
# results[:field_counts].should == {11 => 11}
|
40
40
|
output.should == correct_output
|
data/spec/lib/plucker_spec.rb
CHANGED
@@ -12,7 +12,7 @@ describe "plucker" do
|
|
12
12
|
correct_output = File.read(File.dirname(__FILE__) + "/../data/namedcols.csv.output")
|
13
13
|
tmp.unlink
|
14
14
|
|
15
|
-
results[:input_count].should ==
|
15
|
+
results[:input_count].should == 4
|
16
16
|
output.should == correct_output
|
17
17
|
end
|
18
18
|
|
@@ -24,7 +24,7 @@ describe "plucker" do
|
|
24
24
|
correct_output = File.read(File.dirname(__FILE__) + "/../data/namedcols.csv.output")
|
25
25
|
tmp.unlink
|
26
26
|
|
27
|
-
results[:input_count].should ==
|
27
|
+
results[:input_count].should == 4
|
28
28
|
output.should == correct_output
|
29
29
|
end
|
30
30
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masticate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.3.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05-
|
12
|
+
date: 2012-05-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: &2153051960 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.9.2
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2153051960
|
14
25
|
- !ruby/object:Gem::Dependency
|
15
26
|
name: rspec
|
16
|
-
requirement: &
|
27
|
+
requirement: &2153051460 !ruby/object:Gem::Requirement
|
17
28
|
none: false
|
18
29
|
requirements:
|
19
30
|
- - ~>
|
@@ -21,10 +32,10 @@ dependencies:
|
|
21
32
|
version: 2.9.0
|
22
33
|
type: :development
|
23
34
|
prerelease: false
|
24
|
-
version_requirements: *
|
35
|
+
version_requirements: *2153051460
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: guard-rspec
|
27
|
-
requirement: &
|
38
|
+
requirement: &2153051000 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ~>
|
@@ -32,10 +43,10 @@ dependencies:
|
|
32
43
|
version: 0.7.0
|
33
44
|
type: :development
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *2153051000
|
36
47
|
- !ruby/object:Gem::Dependency
|
37
48
|
name: ruby_gntp
|
38
|
-
requirement: &
|
49
|
+
requirement: &2153050540 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
51
|
requirements:
|
41
52
|
- - ~>
|
@@ -43,7 +54,7 @@ dependencies:
|
|
43
54
|
version: 0.3.4
|
44
55
|
type: :development
|
45
56
|
prerelease: false
|
46
|
-
version_requirements: *
|
57
|
+
version_requirements: *2153050540
|
47
58
|
description: Data file crunching
|
48
59
|
email:
|
49
60
|
- jmay@pobox.com
|
@@ -53,6 +64,7 @@ extensions: []
|
|
53
64
|
extra_rdoc_files: []
|
54
65
|
files:
|
55
66
|
- .gitignore
|
67
|
+
- .travis.yml
|
56
68
|
- Gemfile
|
57
69
|
- Guardfile
|
58
70
|
- LICENSE
|
@@ -79,6 +91,8 @@ files:
|
|
79
91
|
- spec/data/badnums_fixed.csv
|
80
92
|
- spec/data/broken_psv.txt
|
81
93
|
- spec/data/concat_result.txt
|
94
|
+
- spec/data/cookery_input.psv
|
95
|
+
- spec/data/cookery_result.csv
|
82
96
|
- spec/data/cooking_mend_result.csv
|
83
97
|
- spec/data/cooking_result.csv
|
84
98
|
- spec/data/datify_input.csv
|
@@ -96,6 +110,7 @@ files:
|
|
96
110
|
- spec/data/pipe_data.txt
|
97
111
|
- spec/data/quoted_csv_data.txt
|
98
112
|
- spec/data/recipe.txt
|
113
|
+
- spec/data/recipe_cookery.txt
|
99
114
|
- spec/data/recipe_mend.txt
|
100
115
|
- spec/data/relabel_result.csv
|
101
116
|
- spec/data/tabbed_data.txt
|
@@ -141,6 +156,8 @@ test_files:
|
|
141
156
|
- spec/data/badnums_fixed.csv
|
142
157
|
- spec/data/broken_psv.txt
|
143
158
|
- spec/data/concat_result.txt
|
159
|
+
- spec/data/cookery_input.psv
|
160
|
+
- spec/data/cookery_result.csv
|
144
161
|
- spec/data/cooking_mend_result.csv
|
145
162
|
- spec/data/cooking_result.csv
|
146
163
|
- spec/data/datify_input.csv
|
@@ -158,6 +175,7 @@ test_files:
|
|
158
175
|
- spec/data/pipe_data.txt
|
159
176
|
- spec/data/quoted_csv_data.txt
|
160
177
|
- spec/data/recipe.txt
|
178
|
+
- spec/data/recipe_cookery.txt
|
161
179
|
- spec/data/recipe_mend.txt
|
162
180
|
- spec/data/relabel_result.csv
|
163
181
|
- spec/data/tabbed_data.txt
|