masticate 0.3 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode # JRuby in 1.9 mode
6
+ - rbx-19mode
7
+ # uncomment this line if your project needs to run something other than `rake`:
8
+ # script: bundle exec rspec spec
data/Rakefile CHANGED
@@ -5,4 +5,4 @@ require 'rspec/core/rake_task'
5
5
 
6
6
  RSpec::Core::RakeTask.new(:spec)
7
7
 
8
- # task :default => :spec
8
+ task :default => :spec
@@ -25,8 +25,14 @@ class Masticate::Base
25
25
 
26
26
  def get
27
27
  line = @input.gets
28
- @input_count += 1
29
- line && line.chomp
28
+ return nil if line.nil?
29
+ line.chomp!
30
+ if line.empty?
31
+ get
32
+ else
33
+ @input_count += 1
34
+ line
35
+ end
30
36
  end
31
37
 
32
38
  def emit(line)
@@ -15,7 +15,7 @@ class Masticate::Concat #< Masticate::Base
15
15
  file1, *rest = @filenames
16
16
  system "cat #{file1} #{redirect}"
17
17
  rest.each do |file|
18
- system "tail +2 #{file} #{redirect}"
18
+ system "tail -n +2 #{file} | sed '/^$/d' #{redirect}"
19
19
  end
20
20
  end
21
21
  end
@@ -18,7 +18,8 @@ class Masticate::Cook < Masticate::Base
18
18
  recipe = File.read(recipefile).lines
19
19
  standard_options(opts)
20
20
 
21
- steps = recipe.map do |step|
21
+ # ignore blank lines in recipe file
22
+ steps = recipe.grep(/\S/).map do |step|
22
23
  argv = Shellwords.split(step)
23
24
  masticator = Masticate::MyOptionParser.new
24
25
  command, options = masticator.parse(argv)
@@ -38,9 +39,16 @@ class Masticate::Cook < Masticate::Base
38
39
  emit(row) if row
39
40
  end
40
41
  end
42
+ more_rows = []
41
43
  steps.each do |step|
42
- step.crunch(nil) {|row| emit(row)}
44
+ if more_rows.any?
45
+ more_rows = more_rows.map {|row| step.crunch(row)}
46
+ else
47
+ step.crunch(nil) {|row| more_rows << row}
48
+ end
43
49
  end
50
+ more_rows.each {|row| emit(row)}
51
+ # step.crunch(nil) {|row| emit(row)}
44
52
 
45
53
  @output.close if opts[:output]
46
54
 
@@ -25,7 +25,8 @@ class Masticate::Sniffer < Masticate::Base
25
25
  def find_col_sep
26
26
  @delimstats = {}
27
27
  with_input do |input|
28
- input.lines.take(10).each do |line|
28
+ lines = 10.times.map{get}.compact
29
+ lines.each do |line|
29
30
  @line1 = line unless @line1
30
31
 
31
32
  CandidateDelimiters.each do |delim|
@@ -67,8 +68,12 @@ class Masticate::Sniffer < Masticate::Base
67
68
  end
68
69
 
69
70
  def stats
71
+ counts = Hash.new(0)
70
72
  with_input do |input|
71
- input.lines.each_with_object(Hash.new(0)) {|line, counts| counts[CSV.parse_line(line, :col_sep => col_sep, :quote_char => quote_char || "\0").count] += 1}
73
+ while line = get
74
+ counts[CSV.parse_line(line, :col_sep => col_sep, :quote_char => quote_char || "\0").count] += 1
75
+ end
72
76
  end
77
+ counts
73
78
  end
74
79
  end
@@ -1,3 +1,3 @@
1
1
  module Masticate
2
- VERSION = "0.3"
2
+ VERSION = "0.3.1"
3
3
  end
data/masticate.gemspec CHANGED
@@ -16,6 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.require_paths = ["lib"]
17
17
  gem.version = Masticate::VERSION
18
18
 
19
+ gem.add_development_dependency "rake", "~> 0.9.2"
19
20
  gem.add_development_dependency "rspec", "~> 2.9.0"
20
21
  gem.add_development_dependency "guard-rspec", "~> 0.7.0"
21
22
  gem.add_development_dependency "ruby_gntp", "~> 0.3.4"
@@ -0,0 +1,8 @@
1
+ DTTM|AuditCode|AuditDesc|AuditEventTypeID|AuditByID
2
+ 3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,123.00
3
+ 3/13/2012 8:12:39AM|EXCEPTADD|Exception Added|10|9,234.00
4
+ 3/13/2012 8:13:25AM|EXCEPTADD|Exception Added|10|9,123.00
5
+ 3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,345.00
6
+ 3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,234.00
7
+ 3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,678.00
8
+ 3/12/2012 11:20:32AM|1
@@ -0,0 +1,5 @@
1
+ id,ts,desc
2
+ 9123,1331626405,Exception Added
3
+ 9234,1331626359,Exception Added
4
+ 9345,1331551232,Exception Added
5
+ 9678,1331551232,Exception Added
@@ -98,3 +98,4 @@ site,ibex,unit,face,doctor,hospid,usrorder,dteorder,usrsend,dtesend,usrdone,dted
98
98
  1,20120106100014,2044817,L,504,15550,504,201201061011,504,201201061011,0,201201061049,L,"594,856"
99
99
  1,20120106100014,2044818,X,504,15550,504,201201061011,504,201201061011,0,201201061038,X,"594,855"
100
100
  1,20120106100014,2044816,L,504,15550,504,201201061011,504,201201061011,0,201201061049,L,"594,860"
101
+
@@ -0,0 +1,6 @@
1
+ exclude --field AuditCode --value '1'
2
+ pluck --fields AuditByID,DTTM,AuditDesc
3
+ datify --field DTTM --format "%m/%d/%Y %H:%M:%S%p"
4
+ maxrows --by AuditByID --max DTTM
5
+ gsub --field AuditByID --from ',|(.00$)' --to ''
6
+ relabel --fields id,ts,desc
@@ -1,3 +1,5 @@
1
1
  mend
2
2
  exclude --field 1 --value 'data2'
3
3
  pluck --fields 3
4
+
5
+
@@ -24,4 +24,15 @@ describe "cooking up a recipe" do
24
24
 
25
25
  output.should == correct_output
26
26
  end
27
+
28
+ it "should do correct datify & gsub in recipe" do
29
+ input = File.dirname(__FILE__) + "/../data/cookery_input.psv"
30
+ recipe = File.dirname(__FILE__) + "/../data/recipe_cookery.txt"
31
+ tmp = Tempfile.new('cooked')
32
+ results = Masticate.cook(input, :col_sep => '|', :output => tmp, :recipe => recipe)
33
+ output = File.read(tmp)
34
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/cookery_result.csv")
35
+
36
+ output.should == correct_output
37
+ end
27
38
  end
@@ -10,7 +10,7 @@ describe "csvification" do
10
10
  output = File.read(tmp)
11
11
  tmp.unlink
12
12
  output.lines.count.should == 5
13
- results[:input_count].should == 6
13
+ results[:input_count].should == 5
14
14
  results[:output_count].should == 5
15
15
  end
16
16
  end
@@ -7,14 +7,14 @@ describe "mending" do
7
7
  it "should merge lines when delimiter counts don't match'" do
8
8
  filename = File.dirname(__FILE__) + "/../data/broken_psv.txt"
9
9
  results = Masticate.mend(filename, :col_sep => '|', :output => "/dev/null")
10
- results[:input_count].should == 7
10
+ results[:input_count].should == 6
11
11
  results[:output_count].should == 5
12
12
  end
13
13
 
14
14
  it "should strip trailer records" do
15
15
  filename = File.dirname(__FILE__) + "/../data/junk_trailer.txt"
16
16
  results = Masticate.mend(filename, :col_sep => '|', :output => "/dev/null", :dejunk => true)
17
- results[:input_count].should == 10
17
+ results[:input_count].should == 7
18
18
  results[:output_count].should == 5
19
19
  results[:headers].should == ['COL1', 'COL 2', 'Col 3', 'col-4', 'col5', 'col6']
20
20
  end
@@ -22,7 +22,7 @@ describe "mending" do
22
22
  it "should snip head fields" do
23
23
  filename = File.dirname(__FILE__) + "/../data/junk_header.csv"
24
24
  results = Masticate.mend(filename, :col_sep => ',', :snip => 1, :output => "/dev/null")
25
- results[:input_count].should == 6
25
+ results[:input_count].should == 5
26
26
  results[:output_count].should == 5
27
27
  results[:headers].should == %w(hospid usrorder dteorder usrsend dtesend usrdone dtedone department)
28
28
  end
@@ -34,7 +34,7 @@ describe "mending" do
34
34
  output = File.read(tmp)
35
35
  correct_output = File.read(File.dirname(__FILE__) + "/../data/inlined_headers.csv.output")
36
36
 
37
- results[:input_count].should == 11
37
+ results[:input_count].should == 10
38
38
  results[:output_count].should == 11
39
39
  # results[:field_counts].should == {11 => 11}
40
40
  output.should == correct_output
@@ -12,7 +12,7 @@ describe "plucker" do
12
12
  correct_output = File.read(File.dirname(__FILE__) + "/../data/namedcols.csv.output")
13
13
  tmp.unlink
14
14
 
15
- results[:input_count].should == 5
15
+ results[:input_count].should == 4
16
16
  output.should == correct_output
17
17
  end
18
18
 
@@ -24,7 +24,7 @@ describe "plucker" do
24
24
  correct_output = File.read(File.dirname(__FILE__) + "/../data/namedcols.csv.output")
25
25
  tmp.unlink
26
26
 
27
- results[:input_count].should == 5
27
+ results[:input_count].should == 4
28
28
  output.should == correct_output
29
29
  end
30
30
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masticate
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.3'
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,22 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-11 00:00:00.000000000 Z
12
+ date: 2012-05-30 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: &2153051960 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.9.2
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *2153051960
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: rspec
16
- requirement: &2152079220 !ruby/object:Gem::Requirement
27
+ requirement: &2153051460 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
30
  - - ~>
@@ -21,10 +32,10 @@ dependencies:
21
32
  version: 2.9.0
22
33
  type: :development
23
34
  prerelease: false
24
- version_requirements: *2152079220
35
+ version_requirements: *2153051460
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: guard-rspec
27
- requirement: &2152076120 !ruby/object:Gem::Requirement
38
+ requirement: &2153051000 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: 0.7.0
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *2152076120
46
+ version_requirements: *2153051000
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: ruby_gntp
38
- requirement: &2152074480 !ruby/object:Gem::Requirement
49
+ requirement: &2153050540 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ~>
@@ -43,7 +54,7 @@ dependencies:
43
54
  version: 0.3.4
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *2152074480
57
+ version_requirements: *2153050540
47
58
  description: Data file crunching
48
59
  email:
49
60
  - jmay@pobox.com
@@ -53,6 +64,7 @@ extensions: []
53
64
  extra_rdoc_files: []
54
65
  files:
55
66
  - .gitignore
67
+ - .travis.yml
56
68
  - Gemfile
57
69
  - Guardfile
58
70
  - LICENSE
@@ -79,6 +91,8 @@ files:
79
91
  - spec/data/badnums_fixed.csv
80
92
  - spec/data/broken_psv.txt
81
93
  - spec/data/concat_result.txt
94
+ - spec/data/cookery_input.psv
95
+ - spec/data/cookery_result.csv
82
96
  - spec/data/cooking_mend_result.csv
83
97
  - spec/data/cooking_result.csv
84
98
  - spec/data/datify_input.csv
@@ -96,6 +110,7 @@ files:
96
110
  - spec/data/pipe_data.txt
97
111
  - spec/data/quoted_csv_data.txt
98
112
  - spec/data/recipe.txt
113
+ - spec/data/recipe_cookery.txt
99
114
  - spec/data/recipe_mend.txt
100
115
  - spec/data/relabel_result.csv
101
116
  - spec/data/tabbed_data.txt
@@ -141,6 +156,8 @@ test_files:
141
156
  - spec/data/badnums_fixed.csv
142
157
  - spec/data/broken_psv.txt
143
158
  - spec/data/concat_result.txt
159
+ - spec/data/cookery_input.psv
160
+ - spec/data/cookery_result.csv
144
161
  - spec/data/cooking_mend_result.csv
145
162
  - spec/data/cooking_result.csv
146
163
  - spec/data/datify_input.csv
@@ -158,6 +175,7 @@ test_files:
158
175
  - spec/data/pipe_data.txt
159
176
  - spec/data/quoted_csv_data.txt
160
177
  - spec/data/recipe.txt
178
+ - spec/data/recipe_cookery.txt
161
179
  - spec/data/recipe_mend.txt
162
180
  - spec/data/relabel_result.csv
163
181
  - spec/data/tabbed_data.txt