masticate 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/masticate/base.rb +1 -1
- data/lib/masticate/csvify.rb +4 -1
- data/lib/masticate/datify.rb +5 -1
- data/lib/masticate/exclude.rb +32 -0
- data/lib/masticate/gsubber.rb +1 -2
- data/lib/masticate/myoptparse.rb +10 -1
- data/lib/masticate/plucker.rb +3 -4
- data/lib/masticate/version.rb +1 -1
- data/lib/masticate.rb +6 -0
- data/spec/data/exclude_input.csv +7 -0
- data/spec/data/exclude_results.csv +5 -0
- data/spec/lib/exclude_spec.rb +15 -0
- data/spec/lib/plucker_spec.rb +1 -1
- metadata +15 -8
data/lib/masticate/base.rb
CHANGED
data/lib/masticate/csvify.rb
CHANGED
@@ -13,7 +13,10 @@ class Masticate::Csvify < Masticate::Base
|
|
13
13
|
with_input do |input|
|
14
14
|
while line = get
|
15
15
|
row = CSV.parse_line(line, csv_options)
|
16
|
-
|
16
|
+
if row
|
17
|
+
row = row.map(&:strip)
|
18
|
+
emit(row)
|
19
|
+
end
|
17
20
|
end
|
18
21
|
end
|
19
22
|
@output.close if opts[:output]
|
data/lib/masticate/datify.rb
CHANGED
@@ -14,7 +14,11 @@ class Masticate::Datify < Masticate::Base
|
|
14
14
|
|
15
15
|
def crunch(row)
|
16
16
|
if !@index
|
17
|
-
@
|
17
|
+
if @field.is_a?(Fixnum) || @field =~ /^\d+/
|
18
|
+
@index = @field.to_i
|
19
|
+
else
|
20
|
+
@index = row.index(@field) or raise "Unable to find column '#{@field}'"
|
21
|
+
end
|
18
22
|
elsif row
|
19
23
|
ts = DateTime.strptime(row[@index], @format).to_time
|
20
24
|
row[@index] = ts.to_i rescue nil
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# exclude rows based on field = value
|
2
|
+
|
3
|
+
class Masticate::Exclude < Masticate::Base
|
4
|
+
def configure(opts)
|
5
|
+
standard_options(opts)
|
6
|
+
|
7
|
+
@field = opts[:field] or raise "missing field to exclude"
|
8
|
+
@value = opts[:value] or raise "missing value to exclude"
|
9
|
+
|
10
|
+
# row-loading automatically strips leading & trailing whitespace and converts blanks to nils,
|
11
|
+
# so when looking for blanks need to compare to nil instead of ''
|
12
|
+
@value = nil if @value.empty?
|
13
|
+
end
|
14
|
+
|
15
|
+
def exclude(opts)
|
16
|
+
execute(opts)
|
17
|
+
end
|
18
|
+
|
19
|
+
def crunch(row)
|
20
|
+
if !@headers
|
21
|
+
@headers = row
|
22
|
+
@index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
|
23
|
+
row
|
24
|
+
elsif row
|
25
|
+
if row[@index] == @value
|
26
|
+
# exclude
|
27
|
+
else
|
28
|
+
row
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/masticate/gsubber.rb
CHANGED
data/lib/masticate/myoptparse.rb
CHANGED
@@ -37,6 +37,10 @@ class Masticate::MyOptionParser
|
|
37
37
|
@options[:field] = f
|
38
38
|
end
|
39
39
|
|
40
|
+
opts.on("--value VALUE", String, "(*exclude* only) Value to compare field to for exclusion") do |s|
|
41
|
+
@options[:value] = s
|
42
|
+
end
|
43
|
+
|
40
44
|
opts.on("--snip DIRECTIVE", String, "Specify header fields to snip: first N, or by name") do |f|
|
41
45
|
@options[:snip] = f.to_i
|
42
46
|
end
|
@@ -85,7 +89,8 @@ class Masticate::MyOptionParser
|
|
85
89
|
'datify' => Masticate::Datify,
|
86
90
|
'maxrows' => Masticate::MaxRows,
|
87
91
|
'relabel' => Masticate::Relabel,
|
88
|
-
'pluck' => Masticate::Plucker
|
92
|
+
'pluck' => Masticate::Plucker,
|
93
|
+
'exclude' => Masticate::Exclude
|
89
94
|
}
|
90
95
|
|
91
96
|
klass = klasses[command]
|
@@ -145,6 +150,10 @@ EOT
|
|
145
150
|
results = Masticate.cook(filename, options)
|
146
151
|
logmessage(command, options, results)
|
147
152
|
|
153
|
+
when 'exclude'
|
154
|
+
results = Masticate.exclude(filename, options)
|
155
|
+
logmessage(command, options, results)
|
156
|
+
|
148
157
|
else
|
149
158
|
raise "unknown command #{command}"
|
150
159
|
end
|
data/lib/masticate/plucker.rb
CHANGED
@@ -17,16 +17,15 @@ class Masticate::Plucker < Masticate::Base
|
|
17
17
|
@headers = row
|
18
18
|
@indexes = @fields.map do |f|
|
19
19
|
case f
|
20
|
-
when
|
21
|
-
|
22
|
-
when Fixnum
|
20
|
+
when Fixnum, /^\d+/
|
21
|
+
f = f.to_i
|
23
22
|
if f > row.count
|
24
23
|
raise "Cannot pluck column #{f}, there are only #{row.count} fields"
|
25
24
|
else
|
26
25
|
f-1
|
27
26
|
end
|
28
27
|
else
|
29
|
-
raise "
|
28
|
+
row.index(f) or raise "Unable to find column '#{f}'"
|
30
29
|
end
|
31
30
|
end
|
32
31
|
@indexes.map {|i| row[i]}
|
data/lib/masticate/version.rb
CHANGED
data/lib/masticate.rb
CHANGED
@@ -14,6 +14,8 @@ require_relative "masticate/gsubber"
|
|
14
14
|
require_relative "masticate/max_rows"
|
15
15
|
require_relative "masticate/concat"
|
16
16
|
require_relative "masticate/relabel"
|
17
|
+
require_relative "masticate/exclude"
|
18
|
+
|
17
19
|
require_relative "masticate/cook"
|
18
20
|
|
19
21
|
module Masticate
|
@@ -53,6 +55,10 @@ module Masticate
|
|
53
55
|
Relabel.new(filename).relabel(opts)
|
54
56
|
end
|
55
57
|
|
58
|
+
def self.exclude(filename, opts)
|
59
|
+
Exclude.new(filename).exclude(opts)
|
60
|
+
end
|
61
|
+
|
56
62
|
def self.cook(filename, opts)
|
57
63
|
Cook.new(filename).cook(opts)
|
58
64
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# spec for row exclusion
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "exclude" do
|
6
|
+
it "should be able to ignore rows with blank fields" do
|
7
|
+
filename = File.dirname(__FILE__) + "/../data/exclude_input.csv"
|
8
|
+
tmp = Tempfile.new('exclude')
|
9
|
+
results = Masticate.exclude(filename, :output => tmp, :field => 'ID', :value => '')
|
10
|
+
output = File.read(tmp)
|
11
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/exclude_results.csv")
|
12
|
+
|
13
|
+
output.should == correct_output
|
14
|
+
end
|
15
|
+
end
|
data/spec/lib/plucker_spec.rb
CHANGED
@@ -7,7 +7,7 @@ describe "plucker" do
|
|
7
7
|
it "should pull named columns" do
|
8
8
|
filename = File.dirname(__FILE__) + "/../data/namedcols.csv"
|
9
9
|
tmp = Tempfile.new('plucker')
|
10
|
-
results = Masticate.pluck(filename, :output => tmp, :fields => ['three', '
|
10
|
+
results = Masticate.pluck(filename, :output => tmp, :fields => ['three', '5'])
|
11
11
|
output = File.read(tmp)
|
12
12
|
correct_output = File.read(File.dirname(__FILE__) + "/../data/namedcols.csv.output")
|
13
13
|
tmp.unlink
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masticate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &2160837420 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.9.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2160837420
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: guard-rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2160836340 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.7.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2160836340
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ruby_gntp
|
38
|
-
requirement: &
|
38
|
+
requirement: &2160827420 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: 0.3.4
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2160827420
|
47
47
|
description: Data file crunching
|
48
48
|
email:
|
49
49
|
- jmay@pobox.com
|
@@ -65,6 +65,7 @@ files:
|
|
65
65
|
- lib/masticate/cook.rb
|
66
66
|
- lib/masticate/csvify.rb
|
67
67
|
- lib/masticate/datify.rb
|
68
|
+
- lib/masticate/exclude.rb
|
68
69
|
- lib/masticate/gsubber.rb
|
69
70
|
- lib/masticate/max_rows.rb
|
70
71
|
- lib/masticate/mender.rb
|
@@ -82,6 +83,8 @@ files:
|
|
82
83
|
- spec/data/datify_input.txt
|
83
84
|
- spec/data/events.csv
|
84
85
|
- spec/data/events_reduced.csv
|
86
|
+
- spec/data/exclude_input.csv
|
87
|
+
- spec/data/exclude_results.csv
|
85
88
|
- spec/data/inlined_headers.csv
|
86
89
|
- spec/data/inlined_headers.csv.output
|
87
90
|
- spec/data/junk_header.csv
|
@@ -98,6 +101,7 @@ files:
|
|
98
101
|
- spec/lib/cook_spec.rb
|
99
102
|
- spec/lib/csvify_spec.rb
|
100
103
|
- spec/lib/datify_spec.rb
|
104
|
+
- spec/lib/exclude_spec.rb
|
101
105
|
- spec/lib/gsub_spec.rb
|
102
106
|
- spec/lib/maxrow_spec.rb
|
103
107
|
- spec/lib/mender_spec.rb
|
@@ -138,6 +142,8 @@ test_files:
|
|
138
142
|
- spec/data/datify_input.txt
|
139
143
|
- spec/data/events.csv
|
140
144
|
- spec/data/events_reduced.csv
|
145
|
+
- spec/data/exclude_input.csv
|
146
|
+
- spec/data/exclude_results.csv
|
141
147
|
- spec/data/inlined_headers.csv
|
142
148
|
- spec/data/inlined_headers.csv.output
|
143
149
|
- spec/data/junk_header.csv
|
@@ -154,6 +160,7 @@ test_files:
|
|
154
160
|
- spec/lib/cook_spec.rb
|
155
161
|
- spec/lib/csvify_spec.rb
|
156
162
|
- spec/lib/datify_spec.rb
|
163
|
+
- spec/lib/exclude_spec.rb
|
157
164
|
- spec/lib/gsub_spec.rb
|
158
165
|
- spec/lib/maxrow_spec.rb
|
159
166
|
- spec/lib/mender_spec.rb
|