masticate 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/masticate/base.rb +1 -1
- data/lib/masticate/csvify.rb +4 -1
- data/lib/masticate/datify.rb +5 -1
- data/lib/masticate/exclude.rb +32 -0
- data/lib/masticate/gsubber.rb +1 -2
- data/lib/masticate/myoptparse.rb +10 -1
- data/lib/masticate/plucker.rb +3 -4
- data/lib/masticate/version.rb +1 -1
- data/lib/masticate.rb +6 -0
- data/spec/data/exclude_input.csv +7 -0
- data/spec/data/exclude_results.csv +5 -0
- data/spec/lib/exclude_spec.rb +15 -0
- data/spec/lib/plucker_spec.rb +1 -1
- metadata +15 -8
data/lib/masticate/base.rb
CHANGED
data/lib/masticate/csvify.rb
CHANGED
@@ -13,7 +13,10 @@ class Masticate::Csvify < Masticate::Base
|
|
13
13
|
with_input do |input|
|
14
14
|
while line = get
|
15
15
|
row = CSV.parse_line(line, csv_options)
|
16
|
-
|
16
|
+
if row
|
17
|
+
row = row.map(&:strip)
|
18
|
+
emit(row)
|
19
|
+
end
|
17
20
|
end
|
18
21
|
end
|
19
22
|
@output.close if opts[:output]
|
data/lib/masticate/datify.rb
CHANGED
@@ -14,7 +14,11 @@ class Masticate::Datify < Masticate::Base
|
|
14
14
|
|
15
15
|
def crunch(row)
|
16
16
|
if !@index
|
17
|
-
@
|
17
|
+
if @field.is_a?(Fixnum) || @field =~ /^\d+/
|
18
|
+
@index = @field.to_i
|
19
|
+
else
|
20
|
+
@index = row.index(@field) or raise "Unable to find column '#{@field}'"
|
21
|
+
end
|
18
22
|
elsif row
|
19
23
|
ts = DateTime.strptime(row[@index], @format).to_time
|
20
24
|
row[@index] = ts.to_i rescue nil
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# exclude rows based on field = value
|
2
|
+
|
3
|
+
class Masticate::Exclude < Masticate::Base
|
4
|
+
def configure(opts)
|
5
|
+
standard_options(opts)
|
6
|
+
|
7
|
+
@field = opts[:field] or raise "missing field to exclude"
|
8
|
+
@value = opts[:value] or raise "missing value to exclude"
|
9
|
+
|
10
|
+
# row-loading automatically strips leading & trailing whitespace and converts blanks to nils,
|
11
|
+
# so when looking for blanks need to compare to nil instead of ''
|
12
|
+
@value = nil if @value.empty?
|
13
|
+
end
|
14
|
+
|
15
|
+
def exclude(opts)
|
16
|
+
execute(opts)
|
17
|
+
end
|
18
|
+
|
19
|
+
def crunch(row)
|
20
|
+
if !@headers
|
21
|
+
@headers = row
|
22
|
+
@index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
|
23
|
+
row
|
24
|
+
elsif row
|
25
|
+
if row[@index] == @value
|
26
|
+
# exclude
|
27
|
+
else
|
28
|
+
row
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/masticate/gsubber.rb
CHANGED
data/lib/masticate/myoptparse.rb
CHANGED
@@ -37,6 +37,10 @@ class Masticate::MyOptionParser
|
|
37
37
|
@options[:field] = f
|
38
38
|
end
|
39
39
|
|
40
|
+
opts.on("--value VALUE", String, "(*exclude* only) Value to compare field to for exclusion") do |s|
|
41
|
+
@options[:value] = s
|
42
|
+
end
|
43
|
+
|
40
44
|
opts.on("--snip DIRECTIVE", String, "Specify header fields to snip: first N, or by name") do |f|
|
41
45
|
@options[:snip] = f.to_i
|
42
46
|
end
|
@@ -85,7 +89,8 @@ class Masticate::MyOptionParser
|
|
85
89
|
'datify' => Masticate::Datify,
|
86
90
|
'maxrows' => Masticate::MaxRows,
|
87
91
|
'relabel' => Masticate::Relabel,
|
88
|
-
'pluck' => Masticate::Plucker
|
92
|
+
'pluck' => Masticate::Plucker,
|
93
|
+
'exclude' => Masticate::Exclude
|
89
94
|
}
|
90
95
|
|
91
96
|
klass = klasses[command]
|
@@ -145,6 +150,10 @@ EOT
|
|
145
150
|
results = Masticate.cook(filename, options)
|
146
151
|
logmessage(command, options, results)
|
147
152
|
|
153
|
+
when 'exclude'
|
154
|
+
results = Masticate.exclude(filename, options)
|
155
|
+
logmessage(command, options, results)
|
156
|
+
|
148
157
|
else
|
149
158
|
raise "unknown command #{command}"
|
150
159
|
end
|
data/lib/masticate/plucker.rb
CHANGED
@@ -17,16 +17,15 @@ class Masticate::Plucker < Masticate::Base
|
|
17
17
|
@headers = row
|
18
18
|
@indexes = @fields.map do |f|
|
19
19
|
case f
|
20
|
-
when
|
21
|
-
|
22
|
-
when Fixnum
|
20
|
+
when Fixnum, /^\d+/
|
21
|
+
f = f.to_i
|
23
22
|
if f > row.count
|
24
23
|
raise "Cannot pluck column #{f}, there are only #{row.count} fields"
|
25
24
|
else
|
26
25
|
f-1
|
27
26
|
end
|
28
27
|
else
|
29
|
-
raise "
|
28
|
+
row.index(f) or raise "Unable to find column '#{f}'"
|
30
29
|
end
|
31
30
|
end
|
32
31
|
@indexes.map {|i| row[i]}
|
data/lib/masticate/version.rb
CHANGED
data/lib/masticate.rb
CHANGED
@@ -14,6 +14,8 @@ require_relative "masticate/gsubber"
|
|
14
14
|
require_relative "masticate/max_rows"
|
15
15
|
require_relative "masticate/concat"
|
16
16
|
require_relative "masticate/relabel"
|
17
|
+
require_relative "masticate/exclude"
|
18
|
+
|
17
19
|
require_relative "masticate/cook"
|
18
20
|
|
19
21
|
module Masticate
|
@@ -53,6 +55,10 @@ module Masticate
|
|
53
55
|
Relabel.new(filename).relabel(opts)
|
54
56
|
end
|
55
57
|
|
58
|
+
def self.exclude(filename, opts)
|
59
|
+
Exclude.new(filename).exclude(opts)
|
60
|
+
end
|
61
|
+
|
56
62
|
def self.cook(filename, opts)
|
57
63
|
Cook.new(filename).cook(opts)
|
58
64
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# spec for row exclusion
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "exclude" do
|
6
|
+
it "should be able to ignore rows with blank fields" do
|
7
|
+
filename = File.dirname(__FILE__) + "/../data/exclude_input.csv"
|
8
|
+
tmp = Tempfile.new('exclude')
|
9
|
+
results = Masticate.exclude(filename, :output => tmp, :field => 'ID', :value => '')
|
10
|
+
output = File.read(tmp)
|
11
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/exclude_results.csv")
|
12
|
+
|
13
|
+
output.should == correct_output
|
14
|
+
end
|
15
|
+
end
|
data/spec/lib/plucker_spec.rb
CHANGED
@@ -7,7 +7,7 @@ describe "plucker" do
|
|
7
7
|
it "should pull named columns" do
|
8
8
|
filename = File.dirname(__FILE__) + "/../data/namedcols.csv"
|
9
9
|
tmp = Tempfile.new('plucker')
|
10
|
-
results = Masticate.pluck(filename, :output => tmp, :fields => ['three', '
|
10
|
+
results = Masticate.pluck(filename, :output => tmp, :fields => ['three', '5'])
|
11
11
|
output = File.read(tmp)
|
12
12
|
correct_output = File.read(File.dirname(__FILE__) + "/../data/namedcols.csv.output")
|
13
13
|
tmp.unlink
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masticate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &2160837420 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.9.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2160837420
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: guard-rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2160836340 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.7.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2160836340
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ruby_gntp
|
38
|
-
requirement: &
|
38
|
+
requirement: &2160827420 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: 0.3.4
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2160827420
|
47
47
|
description: Data file crunching
|
48
48
|
email:
|
49
49
|
- jmay@pobox.com
|
@@ -65,6 +65,7 @@ files:
|
|
65
65
|
- lib/masticate/cook.rb
|
66
66
|
- lib/masticate/csvify.rb
|
67
67
|
- lib/masticate/datify.rb
|
68
|
+
- lib/masticate/exclude.rb
|
68
69
|
- lib/masticate/gsubber.rb
|
69
70
|
- lib/masticate/max_rows.rb
|
70
71
|
- lib/masticate/mender.rb
|
@@ -82,6 +83,8 @@ files:
|
|
82
83
|
- spec/data/datify_input.txt
|
83
84
|
- spec/data/events.csv
|
84
85
|
- spec/data/events_reduced.csv
|
86
|
+
- spec/data/exclude_input.csv
|
87
|
+
- spec/data/exclude_results.csv
|
85
88
|
- spec/data/inlined_headers.csv
|
86
89
|
- spec/data/inlined_headers.csv.output
|
87
90
|
- spec/data/junk_header.csv
|
@@ -98,6 +101,7 @@ files:
|
|
98
101
|
- spec/lib/cook_spec.rb
|
99
102
|
- spec/lib/csvify_spec.rb
|
100
103
|
- spec/lib/datify_spec.rb
|
104
|
+
- spec/lib/exclude_spec.rb
|
101
105
|
- spec/lib/gsub_spec.rb
|
102
106
|
- spec/lib/maxrow_spec.rb
|
103
107
|
- spec/lib/mender_spec.rb
|
@@ -138,6 +142,8 @@ test_files:
|
|
138
142
|
- spec/data/datify_input.txt
|
139
143
|
- spec/data/events.csv
|
140
144
|
- spec/data/events_reduced.csv
|
145
|
+
- spec/data/exclude_input.csv
|
146
|
+
- spec/data/exclude_results.csv
|
141
147
|
- spec/data/inlined_headers.csv
|
142
148
|
- spec/data/inlined_headers.csv.output
|
143
149
|
- spec/data/junk_header.csv
|
@@ -154,6 +160,7 @@ test_files:
|
|
154
160
|
- spec/lib/cook_spec.rb
|
155
161
|
- spec/lib/csvify_spec.rb
|
156
162
|
- spec/lib/datify_spec.rb
|
163
|
+
- spec/lib/exclude_spec.rb
|
157
164
|
- spec/lib/gsub_spec.rb
|
158
165
|
- spec/lib/maxrow_spec.rb
|
159
166
|
- spec/lib/mender_spec.rb
|