masticate 0.2.3 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -60,7 +60,14 @@ class Masticate::Base
60
60
  @output_count = 0
61
61
  with_input do |input|
62
62
  while line = get
63
- row = CSV.parse_line(line, csv_options)
63
+ row = CSV.parse_line(line, csv_options) #.map {|s| s && s.strip}
64
+ if row
65
+ row = row.map {|s| s && s.strip}
66
+ end
67
+ # row2 = row.map {|s| s && s.strip}
68
+ # if row2.nil?
69
+ # puts "**** ROW IS [#{row.inspect}]"
70
+ # end
64
71
  output = crunch(row)
65
72
  emit(output) if output
66
73
  end
@@ -70,7 +77,8 @@ class Masticate::Base
70
77
 
71
78
  {
72
79
  :input_count => input_count,
73
- :output_count => @output_count
80
+ :output_count => @output_count,
81
+ :headers => @headers
74
82
  }
75
83
  end
76
84
  end
@@ -19,7 +19,6 @@ class Masticate::Cook < Masticate::Base
19
19
  standard_options(opts)
20
20
 
21
21
  steps = recipe.map do |step|
22
- # puts step
23
22
  argv = Shellwords.split(step)
24
23
  masticator = Masticate::MyOptionParser.new
25
24
  command, options = masticator.parse(argv)
@@ -19,7 +19,19 @@ class Masticate::Exclude < Masticate::Base
19
19
  def crunch(row)
20
20
  if !@headers
21
21
  @headers = row
22
- @index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
22
+ f = @field
23
+ @index =
24
+ case f
25
+ when Fixnum, /^\d+$/
26
+ f = f.to_i
27
+ if f > row.count
28
+ raise "Cannot pluck column #{f}, there are only #{row.count} fields"
29
+ else
30
+ f-1
31
+ end
32
+ else
33
+ row.index(f) or raise "Unable to find column '#{f}' in headers"
34
+ end
23
35
  row
24
36
  elsif row
25
37
  if row[@index] == @value
@@ -4,13 +4,77 @@
4
4
  # (due to a newline embedded in a field). Glue those two lines into a single line in the output.
5
5
 
6
6
  class Masticate::Mender < Masticate::Base
7
- attr_reader :col_sep
7
+ def configure(opts)
8
+ standard_options(opts)
8
9
 
9
- def initialize(filename)
10
- @filename = filename
10
+ @inlined = opts[:inlined]
11
+ @snip = opts[:snip]
12
+ @dejunk = opts[:dejunk]
13
+
14
+ @expected_field_count = nil
15
+ @holding = []
11
16
  end
12
17
 
18
+ # attr_reader :col_sep
19
+
20
+ # def initialize(filename)
21
+ # @filename = filename
22
+ # end
23
+
13
24
  def mend(opts)
25
+ execute(opts)
26
+ end
27
+
28
+ def crunch(row)
29
+ if @inlined
30
+ if row
31
+ ncells = row.count/2-1
32
+ if !@headers
33
+ @headers = row[0..ncells]
34
+ @expected_field_count = @headers.count
35
+ emit(@headers)
36
+ else
37
+ if row[0..ncells] != @headers
38
+ raise "Header mismatch on line #{@input_count}\n Expected: #{@headers.join(',')}\n Found: #{row[0..ncells].join(',')}"
39
+ end
40
+ end
41
+ row = row[ncells+1, @expected_field_count]
42
+ end
43
+ elsif !@headers
44
+ # trust the first row
45
+ @headers = row
46
+ case @snip
47
+ when Fixnum
48
+ @headers.shift(@snip)
49
+ when String
50
+ raise "TODO: snip named header. Multiple?"
51
+ when nil
52
+ # do nothing
53
+ else
54
+ raise "Do not understand snip instruction [#{@snip.inspect}]"
55
+ end
56
+ @expected_field_count = @headers.count
57
+ row = @headers
58
+ elsif row
59
+ @holding += row
60
+ if @holding.count < @expected_field_count
61
+ # incomplete row; do not emit anything
62
+ row = nil
63
+ else
64
+ row = @holding
65
+ @holding = []
66
+ end
67
+
68
+ if @dejunk && row && row.select {|s| s && !s.strip.empty?}.count <= 2
69
+ # junky row, suppress output
70
+ nil
71
+ else
72
+ row
73
+ end
74
+ end
75
+ end
76
+
77
+ def old_mend(opts)
14
78
  @output = opts[:output] ? File.open(opts[:output], "w") : $stdout
15
79
  @col_sep = opts[:col_sep] || ','
16
80
  @quote_char = opts[:quote_char] || "\0"
@@ -91,7 +91,8 @@ class Masticate::MyOptionParser
91
91
  'maxrows' => Masticate::MaxRows,
92
92
  'relabel' => Masticate::Relabel,
93
93
  'pluck' => Masticate::Plucker,
94
- 'exclude' => Masticate::Exclude
94
+ 'exclude' => Masticate::Exclude,
95
+ 'mend' => Masticate::Mender
95
96
  }
96
97
 
97
98
  klass = klasses[command]
@@ -165,6 +166,7 @@ EOT
165
166
  * masticate #{command} (#{options.keys.join(', ')})
166
167
  Lines in input: #{results[:input_count]}
167
168
  Lines in output: #{results[:output_count]}
169
+ Headers: #{results[:headers].inspect}
168
170
  EOT
169
171
  if results[:field_counts]
170
172
  $stderr.puts " Field counts: #{results[:field_counts].inspect}"
@@ -1,3 +1,3 @@
1
1
  module Masticate
2
- VERSION = "0.2.3"
2
+ VERSION = "0.3"
3
3
  end
@@ -0,0 +1,4 @@
1
+ Col 3
2
+ data
3
+ this long row
4
+ data
@@ -0,0 +1,11 @@
1
+ LAST_NAME,FIRST_NAME,MIDDLE_INIT,DEPT #,Empl #,timestamp,Term Date,Status,R_NAME,SEX,BIRTHDATE
2
+ WASHINGTON,GEORGE,D,824,9556,09/10/2005 4:23:16PM,07/01/2006,TM,Surgical House Staff,M,09/23/1975
3
+ JEFFERSON,TOM,,621,8052,07/23/2001 7:23:11AM,01/28/2011,TM,Telemetry,F,12/24/1976
4
+ ADAMS,JOHN,,655,8834,09/22/2003 01:23:45PM,,WA,6 East,F,08/07/1978
5
+ ADAMS,JOHN QUINCY,A,209,8637,02/24/2003 02:34:00AM,12/02/2007,TM,Imaging Svcs - MRI,F,11/03/1966
6
+ HAMILTON,ANDREW,,278,10065,01/09/2007 02:34:00AM,11/16/2007,TM,Information Technology,M,09/16/1968
7
+ MADISON,JAMES,F,672,10720,01/05/2009 02:34:00AM,02/16/2010,TM,Rehab Svcs - Outpatients,F,04/15/1985
8
+ FRANKLIN,BENJAMIN,R,674,8340,05/01/2002 02:34:00AM,09/01/2003,TM,"Rehab Svcs, xyz",F,03/15/1973
9
+ LINCOLN,ABRAHAM,M,634,11340,05/02/2011 02:34:00AM,,PN,Sibley Ambulatory Surgery Ctr,F,07/11/1960
10
+ MONROE,JAMES,L,614,10757,02/16/2009 02:34:00AM,,RF,Labor & Delivery,F,11/06/1983
11
+ REVERE,PAUL,B,424,8568,11/18/2002 02:34:00AM,06/27/2006,TM,Laundry & Linen,M,12/31/1976
@@ -0,0 +1,11 @@
1
+ LAST_NAME,FIRST_NAME,MIDDLE_INIT,DEPT #,Empl #,timestamp,Term Date,Status,R_NAME,SEX,BIRTHDATE
2
+ WASHINGTON,GEORGE,D,824,9556,1126369396,07/01/2006,TM,Surgical House Staff,M,09/23/1975
3
+ JEFFERSON,TOM,,621,8052,995872991,01/28/2011,TM,Telemetry,F,12/24/1976
4
+ ADAMS,JOHN,,655,8834,1064237025,,WA,6 East,F,08/07/1978
5
+ ADAMS,JOHN QUINCY,A,209,8637,1046054040,12/02/2007,TM,Imaging Svcs - MRI,F,11/03/1966
6
+ HAMILTON,ANDREW,,278,10065,1168310040,11/16/2007,TM,Information Technology,M,09/16/1968
7
+ MADISON,JAMES,F,672,10720,1231122840,02/16/2010,TM,Rehab Svcs - Outpatients,F,04/15/1985
8
+ FRANKLIN,BENJAMIN,R,674,8340,1020220440,09/01/2003,TM,"Rehab Svcs, xyz",F,03/15/1973
9
+ LINCOLN,ABRAHAM,M,634,11340,1304303640,,PN,Sibley Ambulatory Surgery Ctr,F,07/11/1960
10
+ MONROE,JAMES,L,614,10757,1234751640,,RF,Labor & Delivery,F,11/06/1983
11
+ REVERE,PAUL,B,424,8568,1037586840,06/27/2006,TM,Laundry & Linen,M,12/31/1976
@@ -0,0 +1,3 @@
1
+ mend
2
+ exclude --field 1 --value 'data2'
3
+ pluck --fields 3
@@ -13,4 +13,15 @@ describe "cooking up a recipe" do
13
13
 
14
14
  output.should == correct_output
15
15
  end
16
+
17
+ it "should allow mend in recipe" do
18
+ input = File.dirname(__FILE__) + "/../data/broken_psv.txt"
19
+ recipe = File.dirname(__FILE__) + "/../data/recipe_mend.txt"
20
+ tmp = Tempfile.new('cooked')
21
+ results = Masticate.cook(input, :col_sep => '|', :output => tmp, :recipe => recipe)
22
+ output = File.read(tmp)
23
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/cooking_mend_result.csv")
24
+
25
+ output.should == correct_output
26
+ end
16
27
  end
@@ -2,15 +2,14 @@
2
2
 
3
3
  require "spec_helper"
4
4
 
5
- describe "csvification" do
6
- it "should convert pipes to standard commas" do
7
- filename = File.dirname(__FILE__) + "/../data/pipe_data.txt"
8
- tmp = Tempfile.new('csvify')
9
- results = Masticate.csvify(filename, :output => tmp, :col_sep => '|')
5
+ describe "datification" do
6
+ it "should transform dates" do
7
+ filename = File.dirname(__FILE__) + "/../data/datify_input.csv"
8
+ tmp = Tempfile.new('datify')
9
+ results = Masticate.datify(filename, :output => tmp, :field => 'timestamp', :format => '%m/%d/%Y %H:%M:%S%p')
10
10
  output = File.read(tmp)
11
11
  tmp.unlink
12
- output.lines.count.should == 5
13
- results[:input_count].should == 6
14
- results[:output_count].should == 5
12
+
13
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/datify_result.csv")
15
14
  end
16
15
  end
@@ -12,4 +12,14 @@ describe "exclude" do
12
12
 
13
13
  output.should == correct_output
14
14
  end
15
+
16
+ it "should be able to exclude rows by number as well as name" do
17
+ filename = File.dirname(__FILE__) + "/../data/exclude_input.csv"
18
+ tmp = Tempfile.new('exclude')
19
+ results = Masticate.exclude(filename, :output => tmp, :field => 1, :value => '')
20
+ output = File.read(tmp)
21
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/exclude_results.csv")
22
+
23
+ output.should == correct_output
24
+ end
15
25
  end
@@ -36,7 +36,7 @@ describe "mending" do
36
36
 
37
37
  results[:input_count].should == 11
38
38
  results[:output_count].should == 11
39
- results[:field_counts].should == {11 => 11}
39
+ # results[:field_counts].should == {11 => 11}
40
40
  output.should == correct_output
41
41
  end
42
42
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masticate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: '0.3'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-25 00:00:00.000000000 Z
12
+ date: 2012-05-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &2156995160 !ruby/object:Gem::Requirement
16
+ requirement: &2152079220 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 2.9.0
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2156995160
24
+ version_requirements: *2152079220
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: guard-rspec
27
- requirement: &2156994180 !ruby/object:Gem::Requirement
27
+ requirement: &2152076120 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.7.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2156994180
35
+ version_requirements: *2152076120
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ruby_gntp
38
- requirement: &2156992960 !ruby/object:Gem::Requirement
38
+ requirement: &2152074480 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: 0.3.4
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2156992960
46
+ version_requirements: *2152074480
47
47
  description: Data file crunching
48
48
  email:
49
49
  - jmay@pobox.com
@@ -79,8 +79,10 @@ files:
79
79
  - spec/data/badnums_fixed.csv
80
80
  - spec/data/broken_psv.txt
81
81
  - spec/data/concat_result.txt
82
+ - spec/data/cooking_mend_result.csv
82
83
  - spec/data/cooking_result.csv
83
- - spec/data/datify_input.txt
84
+ - spec/data/datify_input.csv
85
+ - spec/data/datify_result.csv
84
86
  - spec/data/events.csv
85
87
  - spec/data/events_reduced.csv
86
88
  - spec/data/exclude_input.csv
@@ -94,6 +96,7 @@ files:
94
96
  - spec/data/pipe_data.txt
95
97
  - spec/data/quoted_csv_data.txt
96
98
  - spec/data/recipe.txt
99
+ - spec/data/recipe_mend.txt
97
100
  - spec/data/relabel_result.csv
98
101
  - spec/data/tabbed_data.txt
99
102
  - spec/data/tilde_data.txt
@@ -138,8 +141,10 @@ test_files:
138
141
  - spec/data/badnums_fixed.csv
139
142
  - spec/data/broken_psv.txt
140
143
  - spec/data/concat_result.txt
144
+ - spec/data/cooking_mend_result.csv
141
145
  - spec/data/cooking_result.csv
142
- - spec/data/datify_input.txt
146
+ - spec/data/datify_input.csv
147
+ - spec/data/datify_result.csv
143
148
  - spec/data/events.csv
144
149
  - spec/data/events_reduced.csv
145
150
  - spec/data/exclude_input.csv
@@ -153,6 +158,7 @@ test_files:
153
158
  - spec/data/pipe_data.txt
154
159
  - spec/data/quoted_csv_data.txt
155
160
  - spec/data/recipe.txt
161
+ - spec/data/recipe_mend.txt
156
162
  - spec/data/relabel_result.csv
157
163
  - spec/data/tabbed_data.txt
158
164
  - spec/data/tilde_data.txt
@@ -1,20 +0,0 @@
1
- ChangeDTTM|AuditEventID|AuditEventCode|AuditEventTypeID|AuditEventName|AuditEventDesc|AuditByID
2
- 12/31/2011 3:41:54PM|57,314,552.00|LOGGEDOUT|30|Logged out|Logged out|4,426.00
3
- 1/8/2012 9:21:23AM|57,486,988.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
4
- 1/8/2012 9:21:24AM|57,486,989.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
5
- 2/19/2012 6:08:59PM|58,597,521.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
6
- 2/19/2012 7:02:50PM|58,597,816.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
7
- 2/19/2012 7:02:50PM|58,597,817.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
8
- 2/4/2012 9:29:20AM|58,201,259.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
9
- 2/4/2012 12:19:30AM|58,196,973.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
10
- 2/4/2012 12:20:53AM|58,196,975.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
11
- 2/3/2012 10:44:25PM|58,196,507.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
12
- 1/20/2012 8:01:07PM|57,833,496.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
13
- 1/22/2012 4:20:48AM|57,845,095.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
14
- 1/22/2012 4:20:49AM|57,845,096.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
15
- 1/14/2012 7:50:40AM|57,661,613.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
16
- 2/26/2012 7:48:07AM|58,776,243.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
17
- 2/23/2012 7:47:39AM|58,699,535.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
18
- 2/23/2012 7:39:56AM|58,699,387.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
19
- 2/23/2012 7:39:56AM|58,699,388.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
20
- 2/15/2012 3:15:06PM|58,502,037.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00