masticate 0.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode # JRuby in 1.9 mode
6
+ - rbx-19mode
7
+ # uncomment this line if your project needs to run something other than `rake`:
8
+ # script: bundle exec rspec spec
data/Rakefile CHANGED
@@ -5,4 +5,4 @@ require 'rspec/core/rake_task'
5
5
 
6
6
  RSpec::Core::RakeTask.new(:spec)
7
7
 
8
- # task :default => :spec
8
+ task :default => :spec
@@ -25,8 +25,14 @@ class Masticate::Base
25
25
 
26
26
  def get
27
27
  line = @input.gets
28
- @input_count += 1
29
- line && line.chomp
28
+ return nil if line.nil?
29
+ line.chomp!
30
+ if line.empty?
31
+ get
32
+ else
33
+ @input_count += 1
34
+ line
35
+ end
30
36
  end
31
37
 
32
38
  def emit(line)
@@ -15,7 +15,7 @@ class Masticate::Concat #< Masticate::Base
15
15
  file1, *rest = @filenames
16
16
  system "cat #{file1} #{redirect}"
17
17
  rest.each do |file|
18
- system "tail +2 #{file} #{redirect}"
18
+ system "tail -n +2 #{file} | sed '/^$/d' #{redirect}"
19
19
  end
20
20
  end
21
21
  end
@@ -18,7 +18,8 @@ class Masticate::Cook < Masticate::Base
18
18
  recipe = File.read(recipefile).lines
19
19
  standard_options(opts)
20
20
 
21
- steps = recipe.map do |step|
21
+ # ignore blank lines in recipe file
22
+ steps = recipe.grep(/\S/).map do |step|
22
23
  argv = Shellwords.split(step)
23
24
  masticator = Masticate::MyOptionParser.new
24
25
  command, options = masticator.parse(argv)
@@ -38,9 +39,16 @@ class Masticate::Cook < Masticate::Base
38
39
  emit(row) if row
39
40
  end
40
41
  end
42
+ more_rows = []
41
43
  steps.each do |step|
42
- step.crunch(nil) {|row| emit(row)}
44
+ if more_rows.any?
45
+ more_rows = more_rows.map {|row| step.crunch(row)}
46
+ else
47
+ step.crunch(nil) {|row| more_rows << row}
48
+ end
43
49
  end
50
+ more_rows.each {|row| emit(row)}
51
+ # step.crunch(nil) {|row| emit(row)}
44
52
 
45
53
  @output.close if opts[:output]
46
54
 
@@ -25,7 +25,8 @@ class Masticate::Sniffer < Masticate::Base
25
25
  def find_col_sep
26
26
  @delimstats = {}
27
27
  with_input do |input|
28
- input.lines.take(10).each do |line|
28
+ lines = 10.times.map{get}.compact
29
+ lines.each do |line|
29
30
  @line1 = line unless @line1
30
31
 
31
32
  CandidateDelimiters.each do |delim|
@@ -67,8 +68,12 @@ class Masticate::Sniffer < Masticate::Base
67
68
  end
68
69
 
69
70
  def stats
71
+ counts = Hash.new(0)
70
72
  with_input do |input|
71
- input.lines.each_with_object(Hash.new(0)) {|line, counts| counts[CSV.parse_line(line, :col_sep => col_sep, :quote_char => quote_char || "\0").count] += 1}
73
+ while line = get
74
+ counts[CSV.parse_line(line, :col_sep => col_sep, :quote_char => quote_char || "\0").count] += 1
75
+ end
72
76
  end
77
+ counts
73
78
  end
74
79
  end
@@ -1,3 +1,3 @@
1
1
  module Masticate
2
- VERSION = "0.3"
2
+ VERSION = "0.3.1"
3
3
  end
data/masticate.gemspec CHANGED
@@ -16,6 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.require_paths = ["lib"]
17
17
  gem.version = Masticate::VERSION
18
18
 
19
+ gem.add_development_dependency "rake", "~> 0.9.2"
19
20
  gem.add_development_dependency "rspec", "~> 2.9.0"
20
21
  gem.add_development_dependency "guard-rspec", "~> 0.7.0"
21
22
  gem.add_development_dependency "ruby_gntp", "~> 0.3.4"
@@ -0,0 +1,8 @@
1
+ DTTM|AuditCode|AuditDesc|AuditEventTypeID|AuditByID
2
+ 3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,123.00
3
+ 3/13/2012 8:12:39AM|EXCEPTADD|Exception Added|10|9,234.00
4
+ 3/13/2012 8:13:25AM|EXCEPTADD|Exception Added|10|9,123.00
5
+ 3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,345.00
6
+ 3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,234.00
7
+ 3/12/2012 11:20:32AM|EXCEPTADD|Exception Added|10|9,678.00
8
+ 3/12/2012 11:20:32AM|1
@@ -0,0 +1,5 @@
1
+ id,ts,desc
2
+ 9123,1331626405,Exception Added
3
+ 9234,1331626359,Exception Added
4
+ 9345,1331551232,Exception Added
5
+ 9678,1331551232,Exception Added
@@ -98,3 +98,4 @@ site,ibex,unit,face,doctor,hospid,usrorder,dteorder,usrsend,dtesend,usrdone,dted
98
98
  1,20120106100014,2044817,L,504,15550,504,201201061011,504,201201061011,0,201201061049,L,"594,856"
99
99
  1,20120106100014,2044818,X,504,15550,504,201201061011,504,201201061011,0,201201061038,X,"594,855"
100
100
  1,20120106100014,2044816,L,504,15550,504,201201061011,504,201201061011,0,201201061049,L,"594,860"
101
+
@@ -0,0 +1,6 @@
1
+ exclude --field AuditCode --value '1'
2
+ pluck --fields AuditByID,DTTM,AuditDesc
3
+ datify --field DTTM --format "%m/%d/%Y %H:%M:%S%p"
4
+ maxrows --by AuditByID --max DTTM
5
+ gsub --field AuditByID --from ',|(.00$)' --to ''
6
+ relabel --fields id,ts,desc
@@ -1,3 +1,5 @@
1
1
  mend
2
2
  exclude --field 1 --value 'data2'
3
3
  pluck --fields 3
4
+
5
+
@@ -24,4 +24,15 @@ describe "cooking up a recipe" do
24
24
 
25
25
  output.should == correct_output
26
26
  end
27
+
28
+ it "should do correct datify & gsub in recipe" do
29
+ input = File.dirname(__FILE__) + "/../data/cookery_input.psv"
30
+ recipe = File.dirname(__FILE__) + "/../data/recipe_cookery.txt"
31
+ tmp = Tempfile.new('cooked')
32
+ results = Masticate.cook(input, :col_sep => '|', :output => tmp, :recipe => recipe)
33
+ output = File.read(tmp)
34
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/cookery_result.csv")
35
+
36
+ output.should == correct_output
37
+ end
27
38
  end
@@ -10,7 +10,7 @@ describe "csvification" do
10
10
  output = File.read(tmp)
11
11
  tmp.unlink
12
12
  output.lines.count.should == 5
13
- results[:input_count].should == 6
13
+ results[:input_count].should == 5
14
14
  results[:output_count].should == 5
15
15
  end
16
16
  end
@@ -7,14 +7,14 @@ describe "mending" do
7
7
  it "should merge lines when delimiter counts don't match'" do
8
8
  filename = File.dirname(__FILE__) + "/../data/broken_psv.txt"
9
9
  results = Masticate.mend(filename, :col_sep => '|', :output => "/dev/null")
10
- results[:input_count].should == 7
10
+ results[:input_count].should == 6
11
11
  results[:output_count].should == 5
12
12
  end
13
13
 
14
14
  it "should strip trailer records" do
15
15
  filename = File.dirname(__FILE__) + "/../data/junk_trailer.txt"
16
16
  results = Masticate.mend(filename, :col_sep => '|', :output => "/dev/null", :dejunk => true)
17
- results[:input_count].should == 10
17
+ results[:input_count].should == 7
18
18
  results[:output_count].should == 5
19
19
  results[:headers].should == ['COL1', 'COL 2', 'Col 3', 'col-4', 'col5', 'col6']
20
20
  end
@@ -22,7 +22,7 @@ describe "mending" do
22
22
  it "should snip head fields" do
23
23
  filename = File.dirname(__FILE__) + "/../data/junk_header.csv"
24
24
  results = Masticate.mend(filename, :col_sep => ',', :snip => 1, :output => "/dev/null")
25
- results[:input_count].should == 6
25
+ results[:input_count].should == 5
26
26
  results[:output_count].should == 5
27
27
  results[:headers].should == %w(hospid usrorder dteorder usrsend dtesend usrdone dtedone department)
28
28
  end
@@ -34,7 +34,7 @@ describe "mending" do
34
34
  output = File.read(tmp)
35
35
  correct_output = File.read(File.dirname(__FILE__) + "/../data/inlined_headers.csv.output")
36
36
 
37
- results[:input_count].should == 11
37
+ results[:input_count].should == 10
38
38
  results[:output_count].should == 11
39
39
  # results[:field_counts].should == {11 => 11}
40
40
  output.should == correct_output
@@ -12,7 +12,7 @@ describe "plucker" do
12
12
  correct_output = File.read(File.dirname(__FILE__) + "/../data/namedcols.csv.output")
13
13
  tmp.unlink
14
14
 
15
- results[:input_count].should == 5
15
+ results[:input_count].should == 4
16
16
  output.should == correct_output
17
17
  end
18
18
 
@@ -24,7 +24,7 @@ describe "plucker" do
24
24
  correct_output = File.read(File.dirname(__FILE__) + "/../data/namedcols.csv.output")
25
25
  tmp.unlink
26
26
 
27
- results[:input_count].should == 5
27
+ results[:input_count].should == 4
28
28
  output.should == correct_output
29
29
  end
30
30
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masticate
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.3'
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,22 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-11 00:00:00.000000000 Z
12
+ date: 2012-05-30 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: &2153051960 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.9.2
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *2153051960
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: rspec
16
- requirement: &2152079220 !ruby/object:Gem::Requirement
27
+ requirement: &2153051460 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
30
  - - ~>
@@ -21,10 +32,10 @@ dependencies:
21
32
  version: 2.9.0
22
33
  type: :development
23
34
  prerelease: false
24
- version_requirements: *2152079220
35
+ version_requirements: *2153051460
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: guard-rspec
27
- requirement: &2152076120 !ruby/object:Gem::Requirement
38
+ requirement: &2153051000 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: 0.7.0
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *2152076120
46
+ version_requirements: *2153051000
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: ruby_gntp
38
- requirement: &2152074480 !ruby/object:Gem::Requirement
49
+ requirement: &2153050540 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ~>
@@ -43,7 +54,7 @@ dependencies:
43
54
  version: 0.3.4
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *2152074480
57
+ version_requirements: *2153050540
47
58
  description: Data file crunching
48
59
  email:
49
60
  - jmay@pobox.com
@@ -53,6 +64,7 @@ extensions: []
53
64
  extra_rdoc_files: []
54
65
  files:
55
66
  - .gitignore
67
+ - .travis.yml
56
68
  - Gemfile
57
69
  - Guardfile
58
70
  - LICENSE
@@ -79,6 +91,8 @@ files:
79
91
  - spec/data/badnums_fixed.csv
80
92
  - spec/data/broken_psv.txt
81
93
  - spec/data/concat_result.txt
94
+ - spec/data/cookery_input.psv
95
+ - spec/data/cookery_result.csv
82
96
  - spec/data/cooking_mend_result.csv
83
97
  - spec/data/cooking_result.csv
84
98
  - spec/data/datify_input.csv
@@ -96,6 +110,7 @@ files:
96
110
  - spec/data/pipe_data.txt
97
111
  - spec/data/quoted_csv_data.txt
98
112
  - spec/data/recipe.txt
113
+ - spec/data/recipe_cookery.txt
99
114
  - spec/data/recipe_mend.txt
100
115
  - spec/data/relabel_result.csv
101
116
  - spec/data/tabbed_data.txt
@@ -141,6 +156,8 @@ test_files:
141
156
  - spec/data/badnums_fixed.csv
142
157
  - spec/data/broken_psv.txt
143
158
  - spec/data/concat_result.txt
159
+ - spec/data/cookery_input.psv
160
+ - spec/data/cookery_result.csv
144
161
  - spec/data/cooking_mend_result.csv
145
162
  - spec/data/cooking_result.csv
146
163
  - spec/data/datify_input.csv
@@ -158,6 +175,7 @@ test_files:
158
175
  - spec/data/pipe_data.txt
159
176
  - spec/data/quoted_csv_data.txt
160
177
  - spec/data/recipe.txt
178
+ - spec/data/recipe_cookery.txt
161
179
  - spec/data/recipe_mend.txt
162
180
  - spec/data/relabel_result.csv
163
181
  - spec/data/tabbed_data.txt