masticate 0.2.3 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -60,7 +60,14 @@ class Masticate::Base
60
60
  @output_count = 0
61
61
  with_input do |input|
62
62
  while line = get
63
- row = CSV.parse_line(line, csv_options)
63
+ row = CSV.parse_line(line, csv_options) #.map {|s| s && s.strip}
64
+ if row
65
+ row = row.map {|s| s && s.strip}
66
+ end
67
+ # row2 = row.map {|s| s && s.strip}
68
+ # if row2.nil?
69
+ # puts "**** ROW IS [#{row.inspect}]"
70
+ # end
64
71
  output = crunch(row)
65
72
  emit(output) if output
66
73
  end
@@ -70,7 +77,8 @@ class Masticate::Base
70
77
 
71
78
  {
72
79
  :input_count => input_count,
73
- :output_count => @output_count
80
+ :output_count => @output_count,
81
+ :headers => @headers
74
82
  }
75
83
  end
76
84
  end
@@ -19,7 +19,6 @@ class Masticate::Cook < Masticate::Base
19
19
  standard_options(opts)
20
20
 
21
21
  steps = recipe.map do |step|
22
- # puts step
23
22
  argv = Shellwords.split(step)
24
23
  masticator = Masticate::MyOptionParser.new
25
24
  command, options = masticator.parse(argv)
@@ -19,7 +19,19 @@ class Masticate::Exclude < Masticate::Base
19
19
  def crunch(row)
20
20
  if !@headers
21
21
  @headers = row
22
- @index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
22
+ f = @field
23
+ @index =
24
+ case f
25
+ when Fixnum, /^\d+$/
26
+ f = f.to_i
27
+ if f > row.count
28
+ raise "Cannot pluck column #{f}, there are only #{row.count} fields"
29
+ else
30
+ f-1
31
+ end
32
+ else
33
+ row.index(f) or raise "Unable to find column '#{f}' in headers"
34
+ end
23
35
  row
24
36
  elsif row
25
37
  if row[@index] == @value
@@ -4,13 +4,77 @@
4
4
  # (due to a newline embedded in a field). Glue those two lines into a single line in the output.
5
5
 
6
6
  class Masticate::Mender < Masticate::Base
7
- attr_reader :col_sep
7
+ def configure(opts)
8
+ standard_options(opts)
8
9
 
9
- def initialize(filename)
10
- @filename = filename
10
+ @inlined = opts[:inlined]
11
+ @snip = opts[:snip]
12
+ @dejunk = opts[:dejunk]
13
+
14
+ @expected_field_count = nil
15
+ @holding = []
11
16
  end
12
17
 
18
+ # attr_reader :col_sep
19
+
20
+ # def initialize(filename)
21
+ # @filename = filename
22
+ # end
23
+
13
24
  def mend(opts)
25
+ execute(opts)
26
+ end
27
+
28
+ def crunch(row)
29
+ if @inlined
30
+ if row
31
+ ncells = row.count/2-1
32
+ if !@headers
33
+ @headers = row[0..ncells]
34
+ @expected_field_count = @headers.count
35
+ emit(@headers)
36
+ else
37
+ if row[0..ncells] != @headers
38
+ raise "Header mismatch on line #{@input_count}\n Expected: #{@headers.join(',')}\n Found: #{row[0..ncells].join(',')}"
39
+ end
40
+ end
41
+ row = row[ncells+1, @expected_field_count]
42
+ end
43
+ elsif !@headers
44
+ # trust the first row
45
+ @headers = row
46
+ case @snip
47
+ when Fixnum
48
+ @headers.shift(@snip)
49
+ when String
50
+ raise "TODO: snip named header. Multiple?"
51
+ when nil
52
+ # do nothing
53
+ else
54
+ raise "Do not understand snip instruction [#{@snip.inspect}]"
55
+ end
56
+ @expected_field_count = @headers.count
57
+ row = @headers
58
+ elsif row
59
+ @holding += row
60
+ if @holding.count < @expected_field_count
61
+ # incomplete row; do not emit anything
62
+ row = nil
63
+ else
64
+ row = @holding
65
+ @holding = []
66
+ end
67
+
68
+ if @dejunk && row && row.select {|s| s && !s.strip.empty?}.count <= 2
69
+ # junky row, suppress output
70
+ nil
71
+ else
72
+ row
73
+ end
74
+ end
75
+ end
76
+
77
+ def old_mend(opts)
14
78
  @output = opts[:output] ? File.open(opts[:output], "w") : $stdout
15
79
  @col_sep = opts[:col_sep] || ','
16
80
  @quote_char = opts[:quote_char] || "\0"
@@ -91,7 +91,8 @@ class Masticate::MyOptionParser
91
91
  'maxrows' => Masticate::MaxRows,
92
92
  'relabel' => Masticate::Relabel,
93
93
  'pluck' => Masticate::Plucker,
94
- 'exclude' => Masticate::Exclude
94
+ 'exclude' => Masticate::Exclude,
95
+ 'mend' => Masticate::Mender
95
96
  }
96
97
 
97
98
  klass = klasses[command]
@@ -165,6 +166,7 @@ EOT
165
166
  * masticate #{command} (#{options.keys.join(', ')})
166
167
  Lines in input: #{results[:input_count]}
167
168
  Lines in output: #{results[:output_count]}
169
+ Headers: #{results[:headers].inspect}
168
170
  EOT
169
171
  if results[:field_counts]
170
172
  $stderr.puts " Field counts: #{results[:field_counts].inspect}"
@@ -1,3 +1,3 @@
1
1
  module Masticate
2
- VERSION = "0.2.3"
2
+ VERSION = "0.3"
3
3
  end
@@ -0,0 +1,4 @@
1
+ Col 3
2
+ data
3
+ this long row
4
+ data
@@ -0,0 +1,11 @@
1
+ LAST_NAME,FIRST_NAME,MIDDLE_INIT,DEPT #,Empl #,timestamp,Term Date,Status,R_NAME,SEX,BIRTHDATE
2
+ WASHINGTON,GEORGE,D,824,9556,09/10/2005 4:23:16PM,07/01/2006,TM,Surgical House Staff,M,09/23/1975
3
+ JEFFERSON,TOM,,621,8052,07/23/2001 7:23:11AM,01/28/2011,TM,Telemetry,F,12/24/1976
4
+ ADAMS,JOHN,,655,8834,09/22/2003 01:23:45PM,,WA,6 East,F,08/07/1978
5
+ ADAMS,JOHN QUINCY,A,209,8637,02/24/2003 02:34:00AM,12/02/2007,TM,Imaging Svcs - MRI,F,11/03/1966
6
+ HAMILTON,ANDREW,,278,10065,01/09/2007 02:34:00AM,11/16/2007,TM,Information Technology,M,09/16/1968
7
+ MADISON,JAMES,F,672,10720,01/05/2009 02:34:00AM,02/16/2010,TM,Rehab Svcs - Outpatients,F,04/15/1985
8
+ FRANKLIN,BENJAMIN,R,674,8340,05/01/2002 02:34:00AM,09/01/2003,TM,"Rehab Svcs, xyz",F,03/15/1973
9
+ LINCOLN,ABRAHAM,M,634,11340,05/02/2011 02:34:00AM,,PN,Sibley Ambulatory Surgery Ctr,F,07/11/1960
10
+ MONROE,JAMES,L,614,10757,02/16/2009 02:34:00AM,,RF,Labor & Delivery,F,11/06/1983
11
+ REVERE,PAUL,B,424,8568,11/18/2002 02:34:00AM,06/27/2006,TM,Laundry & Linen,M,12/31/1976
@@ -0,0 +1,11 @@
1
+ LAST_NAME,FIRST_NAME,MIDDLE_INIT,DEPT #,Empl #,timestamp,Term Date,Status,R_NAME,SEX,BIRTHDATE
2
+ WASHINGTON,GEORGE,D,824,9556,1126369396,07/01/2006,TM,Surgical House Staff,M,09/23/1975
3
+ JEFFERSON,TOM,,621,8052,995872991,01/28/2011,TM,Telemetry,F,12/24/1976
4
+ ADAMS,JOHN,,655,8834,1064237025,,WA,6 East,F,08/07/1978
5
+ ADAMS,JOHN QUINCY,A,209,8637,1046054040,12/02/2007,TM,Imaging Svcs - MRI,F,11/03/1966
6
+ HAMILTON,ANDREW,,278,10065,1168310040,11/16/2007,TM,Information Technology,M,09/16/1968
7
+ MADISON,JAMES,F,672,10720,1231122840,02/16/2010,TM,Rehab Svcs - Outpatients,F,04/15/1985
8
+ FRANKLIN,BENJAMIN,R,674,8340,1020220440,09/01/2003,TM,"Rehab Svcs, xyz",F,03/15/1973
9
+ LINCOLN,ABRAHAM,M,634,11340,1304303640,,PN,Sibley Ambulatory Surgery Ctr,F,07/11/1960
10
+ MONROE,JAMES,L,614,10757,1234751640,,RF,Labor & Delivery,F,11/06/1983
11
+ REVERE,PAUL,B,424,8568,1037586840,06/27/2006,TM,Laundry & Linen,M,12/31/1976
@@ -0,0 +1,3 @@
1
+ mend
2
+ exclude --field 1 --value 'data2'
3
+ pluck --fields 3
@@ -13,4 +13,15 @@ describe "cooking up a recipe" do
13
13
 
14
14
  output.should == correct_output
15
15
  end
16
+
17
+ it "should allow mend in recipe" do
18
+ input = File.dirname(__FILE__) + "/../data/broken_psv.txt"
19
+ recipe = File.dirname(__FILE__) + "/../data/recipe_mend.txt"
20
+ tmp = Tempfile.new('cooked')
21
+ results = Masticate.cook(input, :col_sep => '|', :output => tmp, :recipe => recipe)
22
+ output = File.read(tmp)
23
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/cooking_mend_result.csv")
24
+
25
+ output.should == correct_output
26
+ end
16
27
  end
@@ -2,15 +2,14 @@
2
2
 
3
3
  require "spec_helper"
4
4
 
5
- describe "csvification" do
6
- it "should convert pipes to standard commas" do
7
- filename = File.dirname(__FILE__) + "/../data/pipe_data.txt"
8
- tmp = Tempfile.new('csvify')
9
- results = Masticate.csvify(filename, :output => tmp, :col_sep => '|')
5
+ describe "datification" do
6
+ it "should transform dates" do
7
+ filename = File.dirname(__FILE__) + "/../data/datify_input.csv"
8
+ tmp = Tempfile.new('datify')
9
+ results = Masticate.datify(filename, :output => tmp, :field => 'timestamp', :format => '%m/%d/%Y %H:%M:%S%p')
10
10
  output = File.read(tmp)
11
11
  tmp.unlink
12
- output.lines.count.should == 5
13
- results[:input_count].should == 6
14
- results[:output_count].should == 5
12
+
13
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/datify_result.csv")
15
14
  end
16
15
  end
@@ -12,4 +12,14 @@ describe "exclude" do
12
12
 
13
13
  output.should == correct_output
14
14
  end
15
+
16
+ it "should be able to exclude rows by number as well as name" do
17
+ filename = File.dirname(__FILE__) + "/../data/exclude_input.csv"
18
+ tmp = Tempfile.new('exclude')
19
+ results = Masticate.exclude(filename, :output => tmp, :field => 1, :value => '')
20
+ output = File.read(tmp)
21
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/exclude_results.csv")
22
+
23
+ output.should == correct_output
24
+ end
15
25
  end
@@ -36,7 +36,7 @@ describe "mending" do
36
36
 
37
37
  results[:input_count].should == 11
38
38
  results[:output_count].should == 11
39
- results[:field_counts].should == {11 => 11}
39
+ # results[:field_counts].should == {11 => 11}
40
40
  output.should == correct_output
41
41
  end
42
42
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masticate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: '0.3'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-25 00:00:00.000000000 Z
12
+ date: 2012-05-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &2156995160 !ruby/object:Gem::Requirement
16
+ requirement: &2152079220 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 2.9.0
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2156995160
24
+ version_requirements: *2152079220
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: guard-rspec
27
- requirement: &2156994180 !ruby/object:Gem::Requirement
27
+ requirement: &2152076120 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.7.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2156994180
35
+ version_requirements: *2152076120
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ruby_gntp
38
- requirement: &2156992960 !ruby/object:Gem::Requirement
38
+ requirement: &2152074480 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: 0.3.4
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2156992960
46
+ version_requirements: *2152074480
47
47
  description: Data file crunching
48
48
  email:
49
49
  - jmay@pobox.com
@@ -79,8 +79,10 @@ files:
79
79
  - spec/data/badnums_fixed.csv
80
80
  - spec/data/broken_psv.txt
81
81
  - spec/data/concat_result.txt
82
+ - spec/data/cooking_mend_result.csv
82
83
  - spec/data/cooking_result.csv
83
- - spec/data/datify_input.txt
84
+ - spec/data/datify_input.csv
85
+ - spec/data/datify_result.csv
84
86
  - spec/data/events.csv
85
87
  - spec/data/events_reduced.csv
86
88
  - spec/data/exclude_input.csv
@@ -94,6 +96,7 @@ files:
94
96
  - spec/data/pipe_data.txt
95
97
  - spec/data/quoted_csv_data.txt
96
98
  - spec/data/recipe.txt
99
+ - spec/data/recipe_mend.txt
97
100
  - spec/data/relabel_result.csv
98
101
  - spec/data/tabbed_data.txt
99
102
  - spec/data/tilde_data.txt
@@ -138,8 +141,10 @@ test_files:
138
141
  - spec/data/badnums_fixed.csv
139
142
  - spec/data/broken_psv.txt
140
143
  - spec/data/concat_result.txt
144
+ - spec/data/cooking_mend_result.csv
141
145
  - spec/data/cooking_result.csv
142
- - spec/data/datify_input.txt
146
+ - spec/data/datify_input.csv
147
+ - spec/data/datify_result.csv
143
148
  - spec/data/events.csv
144
149
  - spec/data/events_reduced.csv
145
150
  - spec/data/exclude_input.csv
@@ -153,6 +158,7 @@ test_files:
153
158
  - spec/data/pipe_data.txt
154
159
  - spec/data/quoted_csv_data.txt
155
160
  - spec/data/recipe.txt
161
+ - spec/data/recipe_mend.txt
156
162
  - spec/data/relabel_result.csv
157
163
  - spec/data/tabbed_data.txt
158
164
  - spec/data/tilde_data.txt
@@ -1,20 +0,0 @@
1
- ChangeDTTM|AuditEventID|AuditEventCode|AuditEventTypeID|AuditEventName|AuditEventDesc|AuditByID
2
- 12/31/2011 3:41:54PM|57,314,552.00|LOGGEDOUT|30|Logged out|Logged out|4,426.00
3
- 1/8/2012 9:21:23AM|57,486,988.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
4
- 1/8/2012 9:21:24AM|57,486,989.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
5
- 2/19/2012 6:08:59PM|58,597,521.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
6
- 2/19/2012 7:02:50PM|58,597,816.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
7
- 2/19/2012 7:02:50PM|58,597,817.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
8
- 2/4/2012 9:29:20AM|58,201,259.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
9
- 2/4/2012 12:19:30AM|58,196,973.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
10
- 2/4/2012 12:20:53AM|58,196,975.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
11
- 2/3/2012 10:44:25PM|58,196,507.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
12
- 1/20/2012 8:01:07PM|57,833,496.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
13
- 1/22/2012 4:20:48AM|57,845,095.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
14
- 1/22/2012 4:20:49AM|57,845,096.00|LOGGEDOUT|30|Logged out|Logged out|4,573.00
15
- 1/14/2012 7:50:40AM|57,661,613.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
16
- 2/26/2012 7:48:07AM|58,776,243.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
17
- 2/23/2012 7:47:39AM|58,699,535.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
18
- 2/23/2012 7:39:56AM|58,699,387.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
19
- 2/23/2012 7:39:56AM|58,699,388.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00
20
- 2/15/2012 3:15:06PM|58,502,037.00|LOGGEDOUT|30|Logged out|Logged out|5,019.00