masticate 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,7 +6,7 @@ class Masticate::Base
6
6
 
7
7
  def initialize(args)
8
8
  case args
9
- when String
9
+ when String, nil
10
10
  @filename = args
11
11
  when Hash
12
12
  configure(args)
@@ -13,7 +13,10 @@ class Masticate::Csvify < Masticate::Base
13
13
  with_input do |input|
14
14
  while line = get
15
15
  row = CSV.parse_line(line, csv_options)
16
- emit(row.to_csv) if row
16
+ if row
17
+ row = row.map(&:strip)
18
+ emit(row)
19
+ end
17
20
  end
18
21
  end
19
22
  @output.close if opts[:output]
@@ -14,7 +14,11 @@ class Masticate::Datify < Masticate::Base
14
14
 
15
15
  def crunch(row)
16
16
  if !@index
17
- @index = row.index(@field) or raise "Unable to find column '#{@field}'"
17
+ if @field.is_a?(Fixnum) || @field =~ /^\d+/
18
+ @index = @field.to_i
19
+ else
20
+ @index = row.index(@field) or raise "Unable to find column '#{@field}'"
21
+ end
18
22
  elsif row
19
23
  ts = DateTime.strptime(row[@index], @format).to_time
20
24
  row[@index] = ts.to_i rescue nil
@@ -0,0 +1,32 @@
1
+ # exclude rows based on field = value
2
+
3
+ class Masticate::Exclude < Masticate::Base
4
+ def configure(opts)
5
+ standard_options(opts)
6
+
7
+ @field = opts[:field] or raise "missing field to exclude"
8
+ @value = opts[:value] or raise "missing value to exclude"
9
+
10
+ # row-loading automatically strips leading & trailing whitespace and converts blanks to nils,
11
+ # so when looking for blanks need to compare to nil instead of ''
12
+ @value = nil if @value.empty?
13
+ end
14
+
15
+ def exclude(opts)
16
+ execute(opts)
17
+ end
18
+
19
+ def crunch(row)
20
+ if !@headers
21
+ @headers = row
22
+ @index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
23
+ row
24
+ elsif row
25
+ if row[@index] == @value
26
+ # exclude
27
+ else
28
+ row
29
+ end
30
+ end
31
+ end
32
+ end
@@ -1,5 +1,4 @@
1
- # extract subset of columns from CSV
2
- require "csv"
1
+ # apply regex transformation to a field
3
2
 
4
3
  class Masticate::Gsubber < Masticate::Base
5
4
  def configure(opts)
@@ -37,6 +37,10 @@ class Masticate::MyOptionParser
37
37
  @options[:field] = f
38
38
  end
39
39
 
40
+ opts.on("--value VALUE", String, "(*exclude* only) Value to compare field to for exclusion") do |s|
41
+ @options[:value] = s
42
+ end
43
+
40
44
  opts.on("--snip DIRECTIVE", String, "Specify header fields to snip: first N, or by name") do |f|
41
45
  @options[:snip] = f.to_i
42
46
  end
@@ -85,7 +89,8 @@ class Masticate::MyOptionParser
85
89
  'datify' => Masticate::Datify,
86
90
  'maxrows' => Masticate::MaxRows,
87
91
  'relabel' => Masticate::Relabel,
88
- 'pluck' => Masticate::Plucker
92
+ 'pluck' => Masticate::Plucker,
93
+ 'exclude' => Masticate::Exclude
89
94
  }
90
95
 
91
96
  klass = klasses[command]
@@ -145,6 +150,10 @@ EOT
145
150
  results = Masticate.cook(filename, options)
146
151
  logmessage(command, options, results)
147
152
 
153
+ when 'exclude'
154
+ results = Masticate.exclude(filename, options)
155
+ logmessage(command, options, results)
156
+
148
157
  else
149
158
  raise "unknown command #{command}"
150
159
  end
@@ -17,16 +17,15 @@ class Masticate::Plucker < Masticate::Base
17
17
  @headers = row
18
18
  @indexes = @fields.map do |f|
19
19
  case f
20
- when String
21
- row.index(f) or raise "Unable to find column '#{f}'"
22
- when Fixnum
20
+ when Fixnum, /^\d+/
21
+ f = f.to_i
23
22
  if f > row.count
24
23
  raise "Cannot pluck column #{f}, there are only #{row.count} fields"
25
24
  else
26
25
  f-1
27
26
  end
28
27
  else
29
- raise "Invalid field descriptor '#{f}'"
28
+ row.index(f) or raise "Unable to find column '#{f}'"
30
29
  end
31
30
  end
32
31
  @indexes.map {|i| row[i]}
@@ -1,3 +1,3 @@
1
1
  module Masticate
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.2"
3
3
  end
data/lib/masticate.rb CHANGED
@@ -14,6 +14,8 @@ require_relative "masticate/gsubber"
14
14
  require_relative "masticate/max_rows"
15
15
  require_relative "masticate/concat"
16
16
  require_relative "masticate/relabel"
17
+ require_relative "masticate/exclude"
18
+
17
19
  require_relative "masticate/cook"
18
20
 
19
21
  module Masticate
@@ -53,6 +55,10 @@ module Masticate
53
55
  Relabel.new(filename).relabel(opts)
54
56
  end
55
57
 
58
+ def self.exclude(filename, opts)
59
+ Exclude.new(filename).exclude(opts)
60
+ end
61
+
56
62
  def self.cook(filename, opts)
57
63
  Cook.new(filename).cook(opts)
58
64
  end
@@ -0,0 +1,7 @@
1
+ ID,DATE,VALUE
2
+ ,,594787
3
+ 1,201201060826,594823
4
+ 2,201201060521,594790
5
+ 7,201201060429,594780
6
+ ,201201060446,594786
7
+ 8,201201061011,594857
@@ -0,0 +1,5 @@
1
+ ID,DATE,VALUE
2
+ 1,201201060826,594823
3
+ 2,201201060521,594790
4
+ 7,201201060429,594780
5
+ 8,201201061011,594857
@@ -0,0 +1,15 @@
1
+ # spec for row exclusion
2
+
3
+ require "spec_helper"
4
+
5
+ describe "exclude" do
6
+ it "should be able to ignore rows with blank fields" do
7
+ filename = File.dirname(__FILE__) + "/../data/exclude_input.csv"
8
+ tmp = Tempfile.new('exclude')
9
+ results = Masticate.exclude(filename, :output => tmp, :field => 'ID', :value => '')
10
+ output = File.read(tmp)
11
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/exclude_results.csv")
12
+
13
+ output.should == correct_output
14
+ end
15
+ end
@@ -7,7 +7,7 @@ describe "plucker" do
7
7
  it "should pull named columns" do
8
8
  filename = File.dirname(__FILE__) + "/../data/namedcols.csv"
9
9
  tmp = Tempfile.new('plucker')
10
- results = Masticate.pluck(filename, :output => tmp, :fields => ['three', 'five'])
10
+ results = Masticate.pluck(filename, :output => tmp, :fields => ['three', '5'])
11
11
  output = File.read(tmp)
12
12
  correct_output = File.read(File.dirname(__FILE__) + "/../data/namedcols.csv.output")
13
13
  tmp.unlink
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masticate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-23 00:00:00.000000000 Z
12
+ date: 2012-04-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &2157087600 !ruby/object:Gem::Requirement
16
+ requirement: &2160837420 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 2.9.0
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2157087600
24
+ version_requirements: *2160837420
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: guard-rspec
27
- requirement: &2157087060 !ruby/object:Gem::Requirement
27
+ requirement: &2160836340 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.7.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2157087060
35
+ version_requirements: *2160836340
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ruby_gntp
38
- requirement: &2157086580 !ruby/object:Gem::Requirement
38
+ requirement: &2160827420 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: 0.3.4
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2157086580
46
+ version_requirements: *2160827420
47
47
  description: Data file crunching
48
48
  email:
49
49
  - jmay@pobox.com
@@ -65,6 +65,7 @@ files:
65
65
  - lib/masticate/cook.rb
66
66
  - lib/masticate/csvify.rb
67
67
  - lib/masticate/datify.rb
68
+ - lib/masticate/exclude.rb
68
69
  - lib/masticate/gsubber.rb
69
70
  - lib/masticate/max_rows.rb
70
71
  - lib/masticate/mender.rb
@@ -82,6 +83,8 @@ files:
82
83
  - spec/data/datify_input.txt
83
84
  - spec/data/events.csv
84
85
  - spec/data/events_reduced.csv
86
+ - spec/data/exclude_input.csv
87
+ - spec/data/exclude_results.csv
85
88
  - spec/data/inlined_headers.csv
86
89
  - spec/data/inlined_headers.csv.output
87
90
  - spec/data/junk_header.csv
@@ -98,6 +101,7 @@ files:
98
101
  - spec/lib/cook_spec.rb
99
102
  - spec/lib/csvify_spec.rb
100
103
  - spec/lib/datify_spec.rb
104
+ - spec/lib/exclude_spec.rb
101
105
  - spec/lib/gsub_spec.rb
102
106
  - spec/lib/maxrow_spec.rb
103
107
  - spec/lib/mender_spec.rb
@@ -138,6 +142,8 @@ test_files:
138
142
  - spec/data/datify_input.txt
139
143
  - spec/data/events.csv
140
144
  - spec/data/events_reduced.csv
145
+ - spec/data/exclude_input.csv
146
+ - spec/data/exclude_results.csv
141
147
  - spec/data/inlined_headers.csv
142
148
  - spec/data/inlined_headers.csv.output
143
149
  - spec/data/junk_header.csv
@@ -154,6 +160,7 @@ test_files:
154
160
  - spec/lib/cook_spec.rb
155
161
  - spec/lib/csvify_spec.rb
156
162
  - spec/lib/datify_spec.rb
163
+ - spec/lib/exclude_spec.rb
157
164
  - spec/lib/gsub_spec.rb
158
165
  - spec/lib/maxrow_spec.rb
159
166
  - spec/lib/mender_spec.rb