masticate 0.2.3 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/masticate/base.rb +10 -2
- data/lib/masticate/cook.rb +0 -1
- data/lib/masticate/exclude.rb +13 -1
- data/lib/masticate/mender.rb +67 -3
- data/lib/masticate/myoptparse.rb +3 -1
- data/lib/masticate/version.rb +1 -1
- data/spec/data/cooking_mend_result.csv +4 -0
- data/spec/data/datify_input.csv +11 -0
- data/spec/data/datify_result.csv +11 -0
- data/spec/data/recipe_mend.txt +3 -0
- data/spec/lib/cook_spec.rb +11 -0
- data/spec/lib/datify_spec.rb +7 -8
- data/spec/lib/exclude_spec.rb +10 -0
- data/spec/lib/mender_spec.rb +1 -1
- metadata +16 -10
- data/spec/data/datify_input.txt +0 -20
data/lib/masticate/base.rb
CHANGED
@@ -60,7 +60,14 @@ class Masticate::Base
|
|
60
60
|
@output_count = 0
|
61
61
|
with_input do |input|
|
62
62
|
while line = get
|
63
|
-
row = CSV.parse_line(line, csv_options)
|
63
|
+
row = CSV.parse_line(line, csv_options) #.map {|s| s && s.strip}
|
64
|
+
if row
|
65
|
+
row = row.map {|s| s && s.strip}
|
66
|
+
end
|
67
|
+
# row2 = row.map {|s| s && s.strip}
|
68
|
+
# if row2.nil?
|
69
|
+
# puts "**** ROW IS [#{row.inspect}]"
|
70
|
+
# end
|
64
71
|
output = crunch(row)
|
65
72
|
emit(output) if output
|
66
73
|
end
|
@@ -70,7 +77,8 @@ class Masticate::Base
|
|
70
77
|
|
71
78
|
{
|
72
79
|
:input_count => input_count,
|
73
|
-
:output_count => @output_count
|
80
|
+
:output_count => @output_count,
|
81
|
+
:headers => @headers
|
74
82
|
}
|
75
83
|
end
|
76
84
|
end
|
data/lib/masticate/cook.rb
CHANGED
data/lib/masticate/exclude.rb
CHANGED
@@ -19,7 +19,19 @@ class Masticate::Exclude < Masticate::Base
|
|
19
19
|
def crunch(row)
|
20
20
|
if !@headers
|
21
21
|
@headers = row
|
22
|
-
|
22
|
+
f = @field
|
23
|
+
@index =
|
24
|
+
case f
|
25
|
+
when Fixnum, /^\d+$/
|
26
|
+
f = f.to_i
|
27
|
+
if f > row.count
|
28
|
+
raise "Cannot pluck column #{f}, there are only #{row.count} fields"
|
29
|
+
else
|
30
|
+
f-1
|
31
|
+
end
|
32
|
+
else
|
33
|
+
row.index(f) or raise "Unable to find column '#{f}' in headers"
|
34
|
+
end
|
23
35
|
row
|
24
36
|
elsif row
|
25
37
|
if row[@index] == @value
|
data/lib/masticate/mender.rb
CHANGED
@@ -4,13 +4,77 @@
|
|
4
4
|
# (due to a newline embedded in a field). Glue those two lines into a single line in the output.
|
5
5
|
|
6
6
|
class Masticate::Mender < Masticate::Base
|
7
|
-
|
7
|
+
def configure(opts)
|
8
|
+
standard_options(opts)
|
8
9
|
|
9
|
-
|
10
|
-
@
|
10
|
+
@inlined = opts[:inlined]
|
11
|
+
@snip = opts[:snip]
|
12
|
+
@dejunk = opts[:dejunk]
|
13
|
+
|
14
|
+
@expected_field_count = nil
|
15
|
+
@holding = []
|
11
16
|
end
|
12
17
|
|
18
|
+
# attr_reader :col_sep
|
19
|
+
|
20
|
+
# def initialize(filename)
|
21
|
+
# @filename = filename
|
22
|
+
# end
|
23
|
+
|
13
24
|
def mend(opts)
|
25
|
+
execute(opts)
|
26
|
+
end
|
27
|
+
|
28
|
+
def crunch(row)
|
29
|
+
if @inlined
|
30
|
+
if row
|
31
|
+
ncells = row.count/2-1
|
32
|
+
if !@headers
|
33
|
+
@headers = row[0..ncells]
|
34
|
+
@expected_field_count = @headers.count
|
35
|
+
emit(@headers)
|
36
|
+
else
|
37
|
+
if row[0..ncells] != @headers
|
38
|
+
raise "Header mismatch on line #{@input_count}\n Expected: #{@headers.join(',')}\n Found: #{row[0..ncells].join(',')}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
row = row[ncells+1, @expected_field_count]
|
42
|
+
end
|
43
|
+
elsif !@headers
|
44
|
+
# trust the first row
|
45
|
+
@headers = row
|
46
|
+
case @snip
|
47
|
+
when Fixnum
|
48
|
+
@headers.shift(@snip)
|
49
|
+
when String
|
50
|
+
raise "TODO: snip named header. Multiple?"
|
51
|
+
when nil
|
52
|
+
# do nothing
|
53
|
+
else
|
54
|
+
raise "Do not understand snip instruction [#{@snip.inspect}]"
|
55
|
+
end
|
56
|
+
@expected_field_count = @headers.count
|
57
|
+
row = @headers
|
58
|
+
elsif row
|
59
|
+
@holding += row
|
60
|
+
if @holding.count < @expected_field_count
|
61
|
+
# incomplete row; do not emit anything
|
62
|
+
row = nil
|
63
|
+
else
|
64
|
+
row = @holding
|
65
|
+
@holding = []
|
66
|
+
end
|
67
|
+
|
68
|
+
if @dejunk && row && row.select {|s| s && !s.strip.empty?}.count <= 2
|
69
|
+
# junky row, suppress output
|
70
|
+
nil
|
71
|
+
else
|
72
|
+
row
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def old_mend(opts)
|
14
78
|
@output = opts[:output] ? File.open(opts[:output], "w") : $stdout
|
15
79
|
@col_sep = opts[:col_sep] || ','
|
16
80
|
@quote_char = opts[:quote_char] || "\0"
|
data/lib/masticate/myoptparse.rb
CHANGED
@@ -91,7 +91,8 @@ class Masticate::MyOptionParser
|
|
91
91
|
'maxrows' => Masticate::MaxRows,
|
92
92
|
'relabel' => Masticate::Relabel,
|
93
93
|
'pluck' => Masticate::Plucker,
|
94
|
-
'exclude' => Masticate::Exclude
|
94
|
+
'exclude' => Masticate::Exclude,
|
95
|
+
'mend' => Masticate::Mender
|
95
96
|
}
|
96
97
|
|
97
98
|
klass = klasses[command]
|
@@ -165,6 +166,7 @@ EOT
|
|
165
166
|
* masticate #{command} (#{options.keys.join(', ')})
|
166
167
|
Lines in input: #{results[:input_count]}
|
167
168
|
Lines in output: #{results[:output_count]}
|
169
|
+
Headers: #{results[:headers].inspect}
|
168
170
|
EOT
|
169
171
|
if results[:field_counts]
|
170
172
|
$stderr.puts " Field counts: #{results[:field_counts].inspect}"
|
data/lib/masticate/version.rb
CHANGED
@@ -0,0 +1,11 @@
|
|
1
|
+
LAST_NAME,FIRST_NAME,MIDDLE_INIT,DEPT #,Empl #,timestamp,Term Date,Status,R_NAME,SEX,BIRTHDATE
|
2
|
+
WASHINGTON,GEORGE,D,824,9556,09/10/2005 4:23:16PM,07/01/2006,TM,Surgical House Staff,M,09/23/1975
|
3
|
+
JEFFERSON,TOM,,621,8052,07/23/2001 7:23:11AM,01/28/2011,TM,Telemetry,F,12/24/1976
|
4
|
+
ADAMS,JOHN,,655,8834,09/22/2003 01:23:45PM,,WA,6 East,F,08/07/1978
|
5
|
+
ADAMS,JOHN QUINCY,A,209,8637,02/24/2003 02:34:00AM,12/02/2007,TM,Imaging Svcs - MRI,F,11/03/1966
|
6
|
+
HAMILTON,ANDREW,,278,10065,01/09/2007 02:34:00AM,11/16/2007,TM,Information Technology,M,09/16/1968
|
7
|
+
MADISON,JAMES,F,672,10720,01/05/2009 02:34:00AM,02/16/2010,TM,Rehab Svcs - Outpatients,F,04/15/1985
|
8
|
+
FRANKLIN,BENJAMIN,R,674,8340,05/01/2002 02:34:00AM,09/01/2003,TM,"Rehab Svcs, xyz",F,03/15/1973
|
9
|
+
LINCOLN,ABRAHAM,M,634,11340,05/02/2011 02:34:00AM,,PN,Sibley Ambulatory Surgery Ctr,F,07/11/1960
|
10
|
+
MONROE,JAMES,L,614,10757,02/16/2009 02:34:00AM,,RF,Labor & Delivery,F,11/06/1983
|
11
|
+
REVERE,PAUL,B,424,8568,11/18/2002 02:34:00AM,06/27/2006,TM,Laundry & Linen,M,12/31/1976
|
@@ -0,0 +1,11 @@
|
|
1
|
+
LAST_NAME,FIRST_NAME,MIDDLE_INIT,DEPT #,Empl #,timestamp,Term Date,Status,R_NAME,SEX,BIRTHDATE
|
2
|
+
WASHINGTON,GEORGE,D,824,9556,1126369396,07/01/2006,TM,Surgical House Staff,M,09/23/1975
|
3
|
+
JEFFERSON,TOM,,621,8052,995872991,01/28/2011,TM,Telemetry,F,12/24/1976
|
4
|
+
ADAMS,JOHN,,655,8834,1064237025,,WA,6 East,F,08/07/1978
|
5
|
+
ADAMS,JOHN QUINCY,A,209,8637,1046054040,12/02/2007,TM,Imaging Svcs - MRI,F,11/03/1966
|
6
|
+
HAMILTON,ANDREW,,278,10065,1168310040,11/16/2007,TM,Information Technology,M,09/16/1968
|
7
|
+
MADISON,JAMES,F,672,10720,1231122840,02/16/2010,TM,Rehab Svcs - Outpatients,F,04/15/1985
|
8
|
+
FRANKLIN,BENJAMIN,R,674,8340,1020220440,09/01/2003,TM,"Rehab Svcs, xyz",F,03/15/1973
|
9
|
+
LINCOLN,ABRAHAM,M,634,11340,1304303640,,PN,Sibley Ambulatory Surgery Ctr,F,07/11/1960
|
10
|
+
MONROE,JAMES,L,614,10757,1234751640,,RF,Labor & Delivery,F,11/06/1983
|
11
|
+
REVERE,PAUL,B,424,8568,1037586840,06/27/2006,TM,Laundry & Linen,M,12/31/1976
|
data/spec/lib/cook_spec.rb
CHANGED
@@ -13,4 +13,15 @@ describe "cooking up a recipe" do
|
|
13
13
|
|
14
14
|
output.should == correct_output
|
15
15
|
end
|
16
|
+
|
17
|
+
it "should allow mend in recipe" do
|
18
|
+
input = File.dirname(__FILE__) + "/../data/broken_psv.txt"
|
19
|
+
recipe = File.dirname(__FILE__) + "/../data/recipe_mend.txt"
|
20
|
+
tmp = Tempfile.new('cooked')
|
21
|
+
results = Masticate.cook(input, :col_sep => '|', :output => tmp, :recipe => recipe)
|
22
|
+
output = File.read(tmp)
|
23
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/cooking_mend_result.csv")
|
24
|
+
|
25
|
+
output.should == correct_output
|
26
|
+
end
|
16
27
|
end
|
data/spec/lib/datify_spec.rb
CHANGED
@@ -2,15 +2,14 @@
|
|
2
2
|
|
3
3
|
require "spec_helper"
|
4
4
|
|
5
|
-
describe "
|
6
|
-
it "should
|
7
|
-
filename = File.dirname(__FILE__) + "/../data/
|
8
|
-
tmp = Tempfile.new('
|
9
|
-
results = Masticate.
|
5
|
+
describe "datification" do
|
6
|
+
it "should transform dates" do
|
7
|
+
filename = File.dirname(__FILE__) + "/../data/datify_input.csv"
|
8
|
+
tmp = Tempfile.new('datify')
|
9
|
+
results = Masticate.datify(filename, :output => tmp, :field => 'timestamp', :format => '%m/%d/%Y %H:%M:%S%p')
|
10
10
|
output = File.read(tmp)
|
11
11
|
tmp.unlink
|
12
|
-
|
13
|
-
|
14
|
-
results[:output_count].should == 5
|
12
|
+
|
13
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/datify_result.csv")
|
15
14
|
end
|
16
15
|
end
|
data/spec/lib/exclude_spec.rb
CHANGED
@@ -12,4 +12,14 @@ describe "exclude" do
|
|
12
12
|
|
13
13
|
output.should == correct_output
|
14
14
|
end
|
15
|
+
|
16
|
+
it "should be able to exclude rows by number as well as name" do
|
17
|
+
filename = File.dirname(__FILE__) + "/../data/exclude_input.csv"
|
18
|
+
tmp = Tempfile.new('exclude')
|
19
|
+
results = Masticate.exclude(filename, :output => tmp, :field => 1, :value => '')
|
20
|
+
output = File.read(tmp)
|
21
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/exclude_results.csv")
|
22
|
+
|
23
|
+
output.should == correct_output
|
24
|
+
end
|
15
25
|
end
|
data/spec/lib/mender_spec.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masticate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.3'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-05-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &2152079220 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.9.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2152079220
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: guard-rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2152076120 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.7.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2152076120
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ruby_gntp
|
38
|
-
requirement: &
|
38
|
+
requirement: &2152074480 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: 0.3.4
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2152074480
|
47
47
|
description: Data file crunching
|
48
48
|
email:
|
49
49
|
- jmay@pobox.com
|
@@ -79,8 +79,10 @@ files:
|
|
79
79
|
- spec/data/badnums_fixed.csv
|
80
80
|
- spec/data/broken_psv.txt
|
81
81
|
- spec/data/concat_result.txt
|
82
|
+
- spec/data/cooking_mend_result.csv
|
82
83
|
- spec/data/cooking_result.csv
|
83
|
-
- spec/data/datify_input.
|
84
|
+
- spec/data/datify_input.csv
|
85
|
+
- spec/data/datify_result.csv
|
84
86
|
- spec/data/events.csv
|
85
87
|
- spec/data/events_reduced.csv
|
86
88
|
- spec/data/exclude_input.csv
|
@@ -94,6 +96,7 @@ files:
|
|
94
96
|
- spec/data/pipe_data.txt
|
95
97
|
- spec/data/quoted_csv_data.txt
|
96
98
|
- spec/data/recipe.txt
|
99
|
+
- spec/data/recipe_mend.txt
|
97
100
|
- spec/data/relabel_result.csv
|
98
101
|
- spec/data/tabbed_data.txt
|
99
102
|
- spec/data/tilde_data.txt
|
@@ -138,8 +141,10 @@ test_files:
|
|
138
141
|
- spec/data/badnums_fixed.csv
|
139
142
|
- spec/data/broken_psv.txt
|
140
143
|
- spec/data/concat_result.txt
|
144
|
+
- spec/data/cooking_mend_result.csv
|
141
145
|
- spec/data/cooking_result.csv
|
142
|
-
- spec/data/datify_input.
|
146
|
+
- spec/data/datify_input.csv
|
147
|
+
- spec/data/datify_result.csv
|
143
148
|
- spec/data/events.csv
|
144
149
|
- spec/data/events_reduced.csv
|
145
150
|
- spec/data/exclude_input.csv
|
@@ -153,6 +158,7 @@ test_files:
|
|
153
158
|
- spec/data/pipe_data.txt
|
154
159
|
- spec/data/quoted_csv_data.txt
|
155
160
|
- spec/data/recipe.txt
|
161
|
+
- spec/data/recipe_mend.txt
|
156
162
|
- spec/data/relabel_result.csv
|
157
163
|
- spec/data/tabbed_data.txt
|
158
164
|
- spec/data/tilde_data.txt
|
data/spec/data/datify_input.txt
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
ChangeDTTM|AuditEventID|AuditEventCode|AuditEventTypeID|AuditEventName|AuditEventDesc|AuditByID
|
2
|
-
12/31/2011 3:41:54PM|57,314,552.00|LOGGEDOUT|30|Logged out|Logged out|4,426.00
|
3
|
-
1/8/2012 9:21:23AM|57,486,988.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
4
|
-
1/8/2012 9:21:24AM|57,486,989.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
5
|
-
2/19/2012 6:08:59PM|58,597,521.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
6
|
-
2/19/2012 7:02:50PM|58,597,816.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
7
|
-
2/19/2012 7:02:50PM|58,597,817.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
8
|
-
2/4/2012 9:29:20AM|58,201,259.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
9
|
-
2/4/2012 12:19:30AM|58,196,973.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
10
|
-
2/4/2012 12:20:53AM|58,196,975.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
11
|
-
2/3/2012 10:44:25PM|58,196,507.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
12
|
-
1/20/2012 8:01:07PM|57,833,496.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
13
|
-
1/22/2012 4:20:48AM|57,845,095.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
14
|
-
1/22/2012 4:20:49AM|57,845,096.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
|
15
|
-
1/14/2012 7:50:40AM|57,661,613.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
|
16
|
-
2/26/2012 7:48:07AM|58,776,243.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
|
17
|
-
2/23/2012 7:47:39AM|58,699,535.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
|
18
|
-
2/23/2012 7:39:56AM|58,699,387.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
|
19
|
-
2/23/2012 7:39:56AM|58,699,388.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
|
20
|
-
2/15/2012 3:15:06PM|58,502,037.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
|