masticate 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ class Masticate::Base
6
6
 
7
7
  def initialize(args)
8
8
  case args
9
- when String
9
+ when String, nil
10
10
  @filename = args
11
11
  when Hash
12
12
  configure(args)
@@ -13,7 +13,10 @@ class Masticate::Csvify < Masticate::Base
13
13
  with_input do |input|
14
14
  while line = get
15
15
  row = CSV.parse_line(line, csv_options)
16
- emit(row.to_csv) if row
16
+ if row
17
+ row = row.map(&:strip)
18
+ emit(row)
19
+ end
17
20
  end
18
21
  end
19
22
  @output.close if opts[:output]
@@ -14,7 +14,11 @@ class Masticate::Datify < Masticate::Base
14
14
 
15
15
  def crunch(row)
16
16
  if !@index
17
- @index = row.index(@field) or raise "Unable to find column '#{@field}'"
17
+ if @field.is_a?(Fixnum) || @field =~ /^\d+/
18
+ @index = @field.to_i
19
+ else
20
+ @index = row.index(@field) or raise "Unable to find column '#{@field}'"
21
+ end
18
22
  elsif row
19
23
  ts = DateTime.strptime(row[@index], @format).to_time
20
24
  row[@index] = ts.to_i rescue nil
@@ -0,0 +1,32 @@
1
+ # exclude rows based on field = value
2
+
3
+ class Masticate::Exclude < Masticate::Base
4
+ def configure(opts)
5
+ standard_options(opts)
6
+
7
+ @field = opts[:field] or raise "missing field to exclude"
8
+ @value = opts[:value] or raise "missing value to exclude"
9
+
10
+ # row-loading automatically strips leading & trailing whitespace and converts blanks to nils,
11
+ # so when looking for blanks need to compare to nil instead of ''
12
+ @value = nil if @value.empty?
13
+ end
14
+
15
+ def exclude(opts)
16
+ execute(opts)
17
+ end
18
+
19
+ def crunch(row)
20
+ if !@headers
21
+ @headers = row
22
+ @index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
23
+ row
24
+ elsif row
25
+ if row[@index] == @value
26
+ # exclude
27
+ else
28
+ row
29
+ end
30
+ end
31
+ end
32
+ end
@@ -1,5 +1,4 @@
1
- # extract subset of columns from CSV
2
- require "csv"
1
+ # apply regex transformation to a field
3
2
 
4
3
  class Masticate::Gsubber < Masticate::Base
5
4
  def configure(opts)
@@ -37,6 +37,10 @@ class Masticate::MyOptionParser
37
37
  @options[:field] = f
38
38
  end
39
39
 
40
+ opts.on("--value VALUE", String, "(*exclude* only) Value to compare field to for exclusion") do |s|
41
+ @options[:value] = s
42
+ end
43
+
40
44
  opts.on("--snip DIRECTIVE", String, "Specify header fields to snip: first N, or by name") do |f|
41
45
  @options[:snip] = f.to_i
42
46
  end
@@ -85,7 +89,8 @@ class Masticate::MyOptionParser
85
89
  'datify' => Masticate::Datify,
86
90
  'maxrows' => Masticate::MaxRows,
87
91
  'relabel' => Masticate::Relabel,
88
- 'pluck' => Masticate::Plucker
92
+ 'pluck' => Masticate::Plucker,
93
+ 'exclude' => Masticate::Exclude
89
94
  }
90
95
 
91
96
  klass = klasses[command]
@@ -145,6 +150,10 @@ EOT
145
150
  results = Masticate.cook(filename, options)
146
151
  logmessage(command, options, results)
147
152
 
153
+ when 'exclude'
154
+ results = Masticate.exclude(filename, options)
155
+ logmessage(command, options, results)
156
+
148
157
  else
149
158
  raise "unknown command #{command}"
150
159
  end
@@ -17,16 +17,15 @@ class Masticate::Plucker < Masticate::Base
17
17
  @headers = row
18
18
  @indexes = @fields.map do |f|
19
19
  case f
20
- when String
21
- row.index(f) or raise "Unable to find column '#{f}'"
22
- when Fixnum
20
+ when Fixnum, /^\d+/
21
+ f = f.to_i
23
22
  if f > row.count
24
23
  raise "Cannot pluck column #{f}, there are only #{row.count} fields"
25
24
  else
26
25
  f-1
27
26
  end
28
27
  else
29
- raise "Invalid field descriptor '#{f}'"
28
+ row.index(f) or raise "Unable to find column '#{f}'"
30
29
  end
31
30
  end
32
31
  @indexes.map {|i| row[i]}
@@ -1,3 +1,3 @@
1
1
  module Masticate
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.2"
3
3
  end
data/lib/masticate.rb CHANGED
@@ -14,6 +14,8 @@ require_relative "masticate/gsubber"
14
14
  require_relative "masticate/max_rows"
15
15
  require_relative "masticate/concat"
16
16
  require_relative "masticate/relabel"
17
+ require_relative "masticate/exclude"
18
+
17
19
  require_relative "masticate/cook"
18
20
 
19
21
  module Masticate
@@ -53,6 +55,10 @@ module Masticate
53
55
  Relabel.new(filename).relabel(opts)
54
56
  end
55
57
 
58
+ def self.exclude(filename, opts)
59
+ Exclude.new(filename).exclude(opts)
60
+ end
61
+
56
62
  def self.cook(filename, opts)
57
63
  Cook.new(filename).cook(opts)
58
64
  end
@@ -0,0 +1,7 @@
1
+ ID,DATE,VALUE
2
+ ,,594787
3
+ 1,201201060826,594823
4
+ 2,201201060521,594790
5
+ 7,201201060429,594780
6
+ ,201201060446,594786
7
+ 8,201201061011,594857
@@ -0,0 +1,5 @@
1
+ ID,DATE,VALUE
2
+ 1,201201060826,594823
3
+ 2,201201060521,594790
4
+ 7,201201060429,594780
5
+ 8,201201061011,594857
@@ -0,0 +1,15 @@
1
+ # spec for row exclusion
2
+
3
+ require "spec_helper"
4
+
5
+ describe "exclude" do
6
+ it "should be able to ignore rows with blank fields" do
7
+ filename = File.dirname(__FILE__) + "/../data/exclude_input.csv"
8
+ tmp = Tempfile.new('exclude')
9
+ results = Masticate.exclude(filename, :output => tmp, :field => 'ID', :value => '')
10
+ output = File.read(tmp)
11
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/exclude_results.csv")
12
+
13
+ output.should == correct_output
14
+ end
15
+ end
@@ -7,7 +7,7 @@ describe "plucker" do
7
7
  it "should pull named columns" do
8
8
  filename = File.dirname(__FILE__) + "/../data/namedcols.csv"
9
9
  tmp = Tempfile.new('plucker')
10
- results = Masticate.pluck(filename, :output => tmp, :fields => ['three', 'five'])
10
+ results = Masticate.pluck(filename, :output => tmp, :fields => ['three', '5'])
11
11
  output = File.read(tmp)
12
12
  correct_output = File.read(File.dirname(__FILE__) + "/../data/namedcols.csv.output")
13
13
  tmp.unlink
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masticate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-23 00:00:00.000000000 Z
12
+ date: 2012-04-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &2157087600 !ruby/object:Gem::Requirement
16
+ requirement: &2160837420 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 2.9.0
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2157087600
24
+ version_requirements: *2160837420
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: guard-rspec
27
- requirement: &2157087060 !ruby/object:Gem::Requirement
27
+ requirement: &2160836340 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.7.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2157087060
35
+ version_requirements: *2160836340
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ruby_gntp
38
- requirement: &2157086580 !ruby/object:Gem::Requirement
38
+ requirement: &2160827420 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: 0.3.4
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2157086580
46
+ version_requirements: *2160827420
47
47
  description: Data file crunching
48
48
  email:
49
49
  - jmay@pobox.com
@@ -65,6 +65,7 @@ files:
65
65
  - lib/masticate/cook.rb
66
66
  - lib/masticate/csvify.rb
67
67
  - lib/masticate/datify.rb
68
+ - lib/masticate/exclude.rb
68
69
  - lib/masticate/gsubber.rb
69
70
  - lib/masticate/max_rows.rb
70
71
  - lib/masticate/mender.rb
@@ -82,6 +83,8 @@ files:
82
83
  - spec/data/datify_input.txt
83
84
  - spec/data/events.csv
84
85
  - spec/data/events_reduced.csv
86
+ - spec/data/exclude_input.csv
87
+ - spec/data/exclude_results.csv
85
88
  - spec/data/inlined_headers.csv
86
89
  - spec/data/inlined_headers.csv.output
87
90
  - spec/data/junk_header.csv
@@ -98,6 +101,7 @@ files:
98
101
  - spec/lib/cook_spec.rb
99
102
  - spec/lib/csvify_spec.rb
100
103
  - spec/lib/datify_spec.rb
104
+ - spec/lib/exclude_spec.rb
101
105
  - spec/lib/gsub_spec.rb
102
106
  - spec/lib/maxrow_spec.rb
103
107
  - spec/lib/mender_spec.rb
@@ -138,6 +142,8 @@ test_files:
138
142
  - spec/data/datify_input.txt
139
143
  - spec/data/events.csv
140
144
  - spec/data/events_reduced.csv
145
+ - spec/data/exclude_input.csv
146
+ - spec/data/exclude_results.csv
141
147
  - spec/data/inlined_headers.csv
142
148
  - spec/data/inlined_headers.csv.output
143
149
  - spec/data/junk_header.csv
@@ -154,6 +160,7 @@ test_files:
154
160
  - spec/lib/cook_spec.rb
155
161
  - spec/lib/csvify_spec.rb
156
162
  - spec/lib/datify_spec.rb
163
+ - spec/lib/exclude_spec.rb
157
164
  - spec/lib/gsub_spec.rb
158
165
  - spec/lib/maxrow_spec.rb
159
166
  - spec/lib/mender_spec.rb