masticate 0.1.5 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/masticate +3 -114
- data/lib/masticate/base.rb +13 -2
- data/lib/masticate/concat.rb +21 -0
- data/lib/masticate/cook.rb +48 -0
- data/lib/masticate/gsubber.rb +23 -6
- data/lib/masticate/max_rows.rb +35 -5
- data/lib/masticate/myoptparse.rb +163 -0
- data/lib/masticate/plucker.rb +33 -3
- data/lib/masticate/relabel.rb +44 -0
- data/lib/masticate/sniffer.rb +1 -1
- data/lib/masticate/version.rb +1 -1
- data/lib/masticate.rb +18 -0
- data/spec/data/concat_result.txt +108 -0
- data/spec/data/cooking_result.csv +41 -0
- data/spec/data/recipe.txt +4 -0
- data/spec/data/relabel_result.csv +4 -0
- data/spec/data/tilde_data.txt +5 -0
- data/spec/lib/concat_spec.rb +17 -0
- data/spec/lib/cook_spec.rb +16 -0
- data/spec/lib/gsub_spec.rb +0 -1
- data/spec/lib/relabel_spec.rb +15 -0
- data/spec/lib/sniffer_spec.rb +7 -0
- data/spec/spec_helper.rb +1 -0
- metadata +28 -8
data/bin/masticate
CHANGED
@@ -1,119 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require_relative "../lib/masticate"
|
4
|
-
require "optparse"
|
5
4
|
|
6
|
-
|
5
|
+
masticator = Masticate::MyOptionParser.new
|
6
|
+
command, options, filenames = masticator.parse
|
7
7
|
|
8
|
-
options
|
9
|
-
OptionParser.new do |opts|
|
10
|
-
opts.banner = "Usage: example.rb [options]"
|
11
|
-
|
12
|
-
opts.on("--format FORMAT", String, "Specify format") do |v|
|
13
|
-
options[:format] = v
|
14
|
-
end
|
15
|
-
|
16
|
-
opts.on("--delim DELIMITER", String, "Specify field delimiter (character or TAB; default is ',')") do |v|
|
17
|
-
options[:col_sep] = v
|
18
|
-
options[:col_sep] = "\t" if options[:col_sep] == "TAB"
|
19
|
-
end
|
20
|
-
|
21
|
-
opts.on("--quote QUOTE-CHAR", String, "Specify character used for quoting fields (optional; default is no quoting)") do |char|
|
22
|
-
options[:quote_char] = char
|
23
|
-
end
|
24
|
-
|
25
|
-
opts.on("--stats", "(for *sniff*) collect & display input stats") do
|
26
|
-
options[:stats] = true
|
27
|
-
end
|
28
|
-
|
29
|
-
opts.on("--fields LIST", Array, "Specify fields to select") do |list|
|
30
|
-
options[:fields] = list
|
31
|
-
end
|
32
|
-
|
33
|
-
opts.on("--field FIELD", String, "Specify field to convert") do |f|
|
34
|
-
options[:field] = f
|
35
|
-
end
|
36
|
-
|
37
|
-
opts.on("--snip DIRECTIVE", String, "Specify header fields to snip: first N, or by name") do |f|
|
38
|
-
options[:snip] = f.to_i
|
39
|
-
end
|
40
|
-
|
41
|
-
opts.on("--from REGEXP", String, "Regular expression for gsub conversion") do |s|
|
42
|
-
options[:from] = s
|
43
|
-
end
|
44
|
-
|
45
|
-
opts.on("--to STRING", String, "Result string for gsub conversion") do |s|
|
46
|
-
options[:to] = s
|
47
|
-
end
|
48
|
-
|
49
|
-
opts.on("--inlined", "(for *mend* only) Source file has headers inlined on each line") do |b|
|
50
|
-
options[:inlined] = true
|
51
|
-
end
|
52
|
-
|
53
|
-
opts.on("--dejunk", "(for *mend* only) Expunge junk lines from source") do |b|
|
54
|
-
options[:dejunk] = true
|
55
|
-
end
|
56
|
-
|
57
|
-
opts.on("--by FIELD", String, "(for *maxrows* only) Field to group by") do |f|
|
58
|
-
options[:by] = f
|
59
|
-
end
|
60
|
-
|
61
|
-
opts.on("--max FIELD", String, "(for *maxrows* only) Field to find max value for") do |f|
|
62
|
-
options[:max] = f
|
63
|
-
end
|
64
|
-
end.parse!
|
65
|
-
|
66
|
-
filename = ARGV.shift # use stdin if no filename provided
|
67
|
-
|
68
|
-
def logmessage(command, options, results)
|
69
|
-
$stderr.puts <<-EOT
|
70
|
-
* masticate #{command} (#{options.keys.join(', ')})
|
71
|
-
Lines in input: #{results[:input_count]}
|
72
|
-
Lines in output: #{results[:output_count]}
|
73
|
-
EOT
|
74
|
-
if results[:field_counts]
|
75
|
-
$stderr.puts " Field counts: #{results[:field_counts].inspect}"
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
case command
|
80
|
-
when 'sniff'
|
81
|
-
results = Masticate.sniff(filename, options)
|
82
|
-
col_sep = results[:col_sep]
|
83
|
-
col_sep = "TAB" if col_sep == "\t"
|
84
|
-
quote_char = results[:quote_char] || "NONE"
|
85
|
-
$stderr.puts <<-EOT
|
86
|
-
Processing complete.
|
87
|
-
Input delimiter: #{col_sep}
|
88
|
-
Quote char: #{quote_char}
|
89
|
-
Field counts: #{results[:field_counts].inspect}
|
90
|
-
Headers: #{results[:headers].join(',')}
|
91
|
-
EOT
|
92
|
-
|
93
|
-
when 'mend'
|
94
|
-
results = Masticate.mend(filename, options)
|
95
|
-
logmessage(command, options, results)
|
96
|
-
|
97
|
-
when 'csvify'
|
98
|
-
results = Masticate.csvify(filename, options)
|
99
|
-
logmessage(command, options, results)
|
100
|
-
|
101
|
-
when 'pluck'
|
102
|
-
results = Masticate.pluck(filename, options)
|
103
|
-
logmessage(command, options, results)
|
104
|
-
|
105
|
-
when 'datify'
|
106
|
-
results = Masticate.datify(filename, options)
|
107
|
-
logmessage(command, options, results)
|
108
|
-
|
109
|
-
when 'gsub'
|
110
|
-
results = Masticate.gsub(filename, options)
|
111
|
-
logmessage(command, options, results)
|
112
|
-
|
113
|
-
when 'maxrows'
|
114
|
-
results = Masticate.maxrows(filename, options)
|
115
|
-
logmessage(command, options, results)
|
116
|
-
|
117
|
-
else
|
118
|
-
raise "unknown command #{command}"
|
119
|
-
end
|
8
|
+
masticator.execute(command, options, filenames)
|
data/lib/masticate/base.rb
CHANGED
@@ -4,8 +4,15 @@ class Masticate::Base
|
|
4
4
|
attr_reader :input_count, :output_count
|
5
5
|
attr_reader :csv_options
|
6
6
|
|
7
|
-
def initialize(
|
8
|
-
|
7
|
+
def initialize(args)
|
8
|
+
case args
|
9
|
+
when String
|
10
|
+
@filename = args
|
11
|
+
when Hash
|
12
|
+
configure(args)
|
13
|
+
else
|
14
|
+
raise "invalid initialization: #{args}"
|
15
|
+
end
|
9
16
|
end
|
10
17
|
|
11
18
|
def with_input
|
@@ -40,4 +47,8 @@ class Masticate::Base
|
|
40
47
|
@csv_options[:quote_char] = opts[:quote_char] || "\0"
|
41
48
|
end
|
42
49
|
end
|
50
|
+
|
51
|
+
# def crunch(row)
|
52
|
+
# # noop
|
53
|
+
# end
|
43
54
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# concatenate input files:
|
2
|
+
# * assuming that each input file has a single header line
|
3
|
+
# * writing a single header line to the output (just use the header line from the first file)
|
4
|
+
# * trying that all the files have the same format (no validation)
|
5
|
+
|
6
|
+
class Masticate::Concat #< Masticate::Base
|
7
|
+
def initialize(filenames)
|
8
|
+
@filenames = filenames
|
9
|
+
end
|
10
|
+
|
11
|
+
def concat(opts)
|
12
|
+
File.unlink(opts[:output]) if opts[:output] && File.exists?(opts[:output])
|
13
|
+
redirect = ">>#{opts[:output]}" if opts[:output]
|
14
|
+
|
15
|
+
file1, *rest = @filenames
|
16
|
+
system "cat #{file1} #{redirect}"
|
17
|
+
rest.each do |file|
|
18
|
+
system "tail +2 #{file} #{redirect}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# cook up a recipe
|
2
|
+
# * single file as input
|
3
|
+
# * recipe from a file
|
4
|
+
# * multiple steps
|
5
|
+
# * single output
|
6
|
+
|
7
|
+
require "shellwords"
|
8
|
+
|
9
|
+
class Masticate::Cook < Masticate::Base
|
10
|
+
def initialize(filename)
|
11
|
+
@filename = filename
|
12
|
+
end
|
13
|
+
|
14
|
+
def cook(opts)
|
15
|
+
recipefile = opts[:recipe] or raise "missing recipe for cook"
|
16
|
+
recipe = File.read(recipefile).lines
|
17
|
+
standard_options(opts)
|
18
|
+
|
19
|
+
steps = recipe.map do |step|
|
20
|
+
# puts step
|
21
|
+
argv = Shellwords.split(step)
|
22
|
+
masticator = Masticate::MyOptionParser.new
|
23
|
+
command, options = masticator.parse(argv)
|
24
|
+
masticator.prepare(command, options)
|
25
|
+
end
|
26
|
+
|
27
|
+
@output_count = 0
|
28
|
+
headers = nil
|
29
|
+
with_input do |input|
|
30
|
+
while line = get
|
31
|
+
row = CSV.parse_line(line, csv_options)
|
32
|
+
|
33
|
+
steps.each do |step|
|
34
|
+
# puts "APPLY #{step} to #{row}"
|
35
|
+
row = step.crunch(row)
|
36
|
+
end
|
37
|
+
|
38
|
+
emit(row.to_csv) if row
|
39
|
+
end
|
40
|
+
end
|
41
|
+
@output.close if opts[:output]
|
42
|
+
|
43
|
+
{
|
44
|
+
:input_count => @input_count,
|
45
|
+
:output_count => @output_count
|
46
|
+
}
|
47
|
+
end
|
48
|
+
end
|
data/lib/masticate/gsubber.rb
CHANGED
@@ -2,13 +2,21 @@
|
|
2
2
|
require "csv"
|
3
3
|
|
4
4
|
class Masticate::Gsubber < Masticate::Base
|
5
|
-
def
|
5
|
+
def configure(opts)
|
6
6
|
standard_options(opts)
|
7
7
|
|
8
|
-
field = opts[:field] or raise "missing field to gsub"
|
9
|
-
from = Regexp.new(opts[:from]) or raise "Invalid regex '#{opts[:from]}' for conversion"
|
10
|
-
to = opts[:to] or raise "missing 'to' string for gsub"
|
8
|
+
@field = opts[:field] or raise "missing field to gsub"
|
9
|
+
@from = Regexp.new(opts[:from]) or raise "Invalid regex '#{opts[:from]}' for conversion"
|
10
|
+
@to = opts[:to] or raise "missing 'to' string for gsub"
|
11
|
+
end
|
12
|
+
|
13
|
+
def set_headers(row)
|
14
|
+
@headers = row
|
15
|
+
@index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
|
16
|
+
end
|
11
17
|
|
18
|
+
def gsub(opts)
|
19
|
+
configure(opts)
|
12
20
|
@output_count = 0
|
13
21
|
headers = nil
|
14
22
|
with_input do |input|
|
@@ -16,11 +24,11 @@ class Masticate::Gsubber < Masticate::Base
|
|
16
24
|
row = CSV.parse_line(line, csv_options)
|
17
25
|
if !headers
|
18
26
|
headers = row
|
19
|
-
index = headers.index(field) or raise "Unable to find column '#{field}' in headers"
|
27
|
+
index = headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
|
20
28
|
emit(line)
|
21
29
|
else
|
22
30
|
oldval = row[index]
|
23
|
-
newval = oldval.gsub(from, to)
|
31
|
+
newval = oldval.gsub(@from, @to)
|
24
32
|
row[index] = newval
|
25
33
|
emit(row.to_csv)
|
26
34
|
end
|
@@ -33,4 +41,13 @@ class Masticate::Gsubber < Masticate::Base
|
|
33
41
|
:output_count => @output_count
|
34
42
|
}
|
35
43
|
end
|
44
|
+
|
45
|
+
def crunch(row)
|
46
|
+
if !@headers
|
47
|
+
set_headers(row)
|
48
|
+
else
|
49
|
+
row[@index] = row[@index].gsub(@from, @to)
|
50
|
+
end
|
51
|
+
row
|
52
|
+
end
|
36
53
|
end
|
data/lib/masticate/max_rows.rb
CHANGED
@@ -2,11 +2,15 @@
|
|
2
2
|
require "csv"
|
3
3
|
|
4
4
|
class Masticate::MaxRows < Masticate::Base
|
5
|
-
def
|
5
|
+
def configure(opts)
|
6
6
|
standard_options(opts)
|
7
7
|
|
8
|
-
groupby = opts[:by] or raise "missing field to group by"
|
9
|
-
maxon = opts[:max] or raise "missing field to max on"
|
8
|
+
@groupby = opts[:by] or raise "missing field to group by"
|
9
|
+
@maxon = opts[:max] or raise "missing field to max on"
|
10
|
+
end
|
11
|
+
|
12
|
+
def maxrows(opts)
|
13
|
+
configure(opts)
|
10
14
|
|
11
15
|
@output_count = 0
|
12
16
|
headers = nil
|
@@ -16,8 +20,8 @@ class Masticate::MaxRows < Masticate::Base
|
|
16
20
|
row = CSV.parse_line(line, csv_options)
|
17
21
|
if !headers
|
18
22
|
headers = row
|
19
|
-
index_by = headers.index(groupby) or raise "Unable to find column '#{groupby}'"
|
20
|
-
index_max = headers.index(maxon) or raise "Unable to find column '#{maxon}'"
|
23
|
+
index_by = headers.index(@groupby) or raise "Unable to find column '#{@groupby}'"
|
24
|
+
index_max = headers.index(@maxon) or raise "Unable to find column '#{@maxon}'"
|
21
25
|
emit(line)
|
22
26
|
else
|
23
27
|
key = row[index_by]
|
@@ -45,4 +49,30 @@ class Masticate::MaxRows < Masticate::Base
|
|
45
49
|
:output_count => @output_count
|
46
50
|
}
|
47
51
|
end
|
52
|
+
|
53
|
+
def crunch(row)
|
54
|
+
if !@headers
|
55
|
+
@headers = row
|
56
|
+
@index_by = row.index(@groupby) or raise "Unable to find column '#{@groupby}'"
|
57
|
+
@index_max = row.index(@maxon) or raise "Unable to find column '#{@maxon}'"
|
58
|
+
@accum = {}
|
59
|
+
row
|
60
|
+
elsif row.nil?
|
61
|
+
# output the accumulated results
|
62
|
+
@accum.each do |k,row|
|
63
|
+
emit(row.to_csv)
|
64
|
+
end
|
65
|
+
else
|
66
|
+
key = row[@index_by]
|
67
|
+
if !@accum[key]
|
68
|
+
@accum[key] = row
|
69
|
+
else
|
70
|
+
oldscore = @accum[key][@index_max]
|
71
|
+
newscore = row[@index_max]
|
72
|
+
if newscore > oldscore
|
73
|
+
@accum[key] = row
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
48
78
|
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require "optparse"
|
2
|
+
|
3
|
+
class Masticate::MyOptionParser
|
4
|
+
attr_reader :command, :options
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@options = {}
|
8
|
+
@parser = OptionParser.new do |opts|
|
9
|
+
opts.banner = "Usage: masticate [command] [options]"
|
10
|
+
|
11
|
+
opts.on("--output FILENAME", String, "Redirect output from stdout to file") do |f|
|
12
|
+
@options[:output] = f
|
13
|
+
end
|
14
|
+
|
15
|
+
opts.on("--format FORMAT", String, "Specify format") do |v|
|
16
|
+
@options[:format] = v
|
17
|
+
end
|
18
|
+
|
19
|
+
opts.on("--delim DELIMITER", String, "Specify field delimiter (character or TAB; default is ',')") do |v|
|
20
|
+
@options[:col_sep] = v
|
21
|
+
@options[:col_sep] = "\t" if @options[:col_sep] == "TAB"
|
22
|
+
end
|
23
|
+
|
24
|
+
opts.on("--quote QUOTE-CHAR", String, "Specify character used for quoting fields (optional; default is no quoting)") do |char|
|
25
|
+
@options[:quote_char] = char
|
26
|
+
end
|
27
|
+
|
28
|
+
opts.on("--stats", "(for *sniff*) collect & display input stats") do
|
29
|
+
@options[:stats] = true
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on("--fields LIST", Array, "Specify fields to select") do |list|
|
33
|
+
@options[:fields] = list
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on("--field FIELD", String, "Specify field to convert") do |f|
|
37
|
+
@options[:field] = f
|
38
|
+
end
|
39
|
+
|
40
|
+
opts.on("--snip DIRECTIVE", String, "Specify header fields to snip: first N, or by name") do |f|
|
41
|
+
@options[:snip] = f.to_i
|
42
|
+
end
|
43
|
+
|
44
|
+
opts.on("--from REGEXP", String, "Regular expression for gsub conversion") do |s|
|
45
|
+
@options[:from] = s
|
46
|
+
end
|
47
|
+
|
48
|
+
# if I specify String here, then a blank string '' is considered invalid and triggers an exception.
|
49
|
+
opts.on("--to STRING", "Result string for gsub conversion") do |s|
|
50
|
+
@options[:to] = s
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("--inlined", "(for *mend* only) Source file has headers inlined on each line") do |b|
|
54
|
+
@options[:inlined] = true
|
55
|
+
end
|
56
|
+
|
57
|
+
opts.on("--dejunk", "(for *mend* only) Expunge junk lines from source") do |b|
|
58
|
+
@options[:dejunk] = true
|
59
|
+
end
|
60
|
+
|
61
|
+
opts.on("--by FIELD", String, "(for *maxrows* only) Field to group by") do |f|
|
62
|
+
@options[:by] = f
|
63
|
+
end
|
64
|
+
|
65
|
+
opts.on("--max FIELD", String, "(for *maxrows* only) Field to find max value for") do |f|
|
66
|
+
@options[:max] = f
|
67
|
+
end
|
68
|
+
|
69
|
+
opts.on("--recipe FILENAME", String, "(*cook* only) Recipe file") do |f|
|
70
|
+
@options[:recipe] = f
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def parse(argv = ARGV)
|
76
|
+
@command = argv.shift
|
77
|
+
filenames = @parser.parse(argv)
|
78
|
+
# argv remnants are filenames
|
79
|
+
[@command, @options, filenames]
|
80
|
+
end
|
81
|
+
|
82
|
+
def prepare(command, options)
|
83
|
+
klasses = {
|
84
|
+
'gsub' => Masticate::Gsubber,
|
85
|
+
'datify' => Masticate::Datify,
|
86
|
+
'maxrows' => Masticate::MaxRows,
|
87
|
+
'relabel' => Masticate::Relabel,
|
88
|
+
'pluck' => Masticate::Plucker
|
89
|
+
}
|
90
|
+
|
91
|
+
klass = klasses[command]
|
92
|
+
klass.new(options)
|
93
|
+
end
|
94
|
+
|
95
|
+
def execute(command, options, filenames = nil)
|
96
|
+
filename = filenames.first
|
97
|
+
|
98
|
+
case command
|
99
|
+
when 'sniff'
|
100
|
+
results = Masticate.sniff(filename, options)
|
101
|
+
col_sep = results[:col_sep]
|
102
|
+
col_sep = "TAB" if col_sep == "\t"
|
103
|
+
quote_char = results[:quote_char] || "NONE"
|
104
|
+
$stderr.puts <<-EOT
|
105
|
+
Processing complete.
|
106
|
+
Input delimiter: #{col_sep}
|
107
|
+
Quote char: #{quote_char}
|
108
|
+
Field counts: #{results[:field_counts].inspect}
|
109
|
+
Headers: #{results[:headers].join(',')}
|
110
|
+
EOT
|
111
|
+
|
112
|
+
when 'mend'
|
113
|
+
results = Masticate.mend(filename, options)
|
114
|
+
logmessage(command, options, results)
|
115
|
+
|
116
|
+
when 'csvify'
|
117
|
+
results = Masticate.csvify(filename, options)
|
118
|
+
logmessage(command, options, results)
|
119
|
+
|
120
|
+
when 'pluck'
|
121
|
+
results = Masticate.pluck(filename, options)
|
122
|
+
logmessage(command, options, results)
|
123
|
+
|
124
|
+
when 'datify'
|
125
|
+
results = Masticate.datify(filename, options)
|
126
|
+
logmessage(command, options, results)
|
127
|
+
|
128
|
+
when 'gsub'
|
129
|
+
results = Masticate.gsub(filename, options)
|
130
|
+
logmessage(command, options, results)
|
131
|
+
|
132
|
+
when 'maxrows'
|
133
|
+
results = Masticate.maxrows(filename, options)
|
134
|
+
logmessage(command, options, results)
|
135
|
+
|
136
|
+
when 'concat'
|
137
|
+
results = Masticate.concat(ARGV, options)
|
138
|
+
# logmessage(command, options, results)
|
139
|
+
|
140
|
+
when 'relabel'
|
141
|
+
results = Masticate.relabel(filename, options)
|
142
|
+
# logmessage(command, options, results)
|
143
|
+
|
144
|
+
when 'cook'
|
145
|
+
results = Masticate.cook(filename, options)
|
146
|
+
logmessage(command, options, results)
|
147
|
+
|
148
|
+
else
|
149
|
+
raise "unknown command #{command}"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def logmessage(command, options, results)
|
154
|
+
$stderr.puts <<-EOT
|
155
|
+
* masticate #{command} (#{options.keys.join(', ')})
|
156
|
+
Lines in input: #{results[:input_count]}
|
157
|
+
Lines in output: #{results[:output_count]}
|
158
|
+
EOT
|
159
|
+
if results[:field_counts]
|
160
|
+
$stderr.puts " Field counts: #{results[:field_counts].inspect}"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
data/lib/masticate/plucker.rb
CHANGED
@@ -2,10 +2,17 @@
|
|
2
2
|
require "csv"
|
3
3
|
|
4
4
|
class Masticate::Plucker < Masticate::Base
|
5
|
-
def
|
5
|
+
def configure(opts)
|
6
6
|
standard_options(opts)
|
7
7
|
|
8
|
-
fields = opts[:fields] or raise "missing fields to pluck"
|
8
|
+
@fields = opts[:fields] or raise "missing fields to pluck"
|
9
|
+
end
|
10
|
+
|
11
|
+
def pluck(opts)
|
12
|
+
configure(opts)
|
13
|
+
# standard_options(opts)
|
14
|
+
#
|
15
|
+
# fields = opts[:fields] or raise "missing fields to pluck"
|
9
16
|
|
10
17
|
@output_count = 0
|
11
18
|
headers = nil
|
@@ -14,7 +21,7 @@ class Masticate::Plucker < Masticate::Base
|
|
14
21
|
row = CSV.parse_line(line, csv_options)
|
15
22
|
if !headers
|
16
23
|
headers = row
|
17
|
-
indexes = fields.map do |f|
|
24
|
+
indexes = @fields.map do |f|
|
18
25
|
case f
|
19
26
|
when String
|
20
27
|
headers.index(f) or raise "Unable to find column '#{f}'"
|
@@ -41,4 +48,27 @@ class Masticate::Plucker < Masticate::Base
|
|
41
48
|
:output_count => @output_count
|
42
49
|
}
|
43
50
|
end
|
51
|
+
|
52
|
+
def crunch(row)
|
53
|
+
if !@headers
|
54
|
+
@headers = row
|
55
|
+
@indexes = @fields.map do |f|
|
56
|
+
case f
|
57
|
+
when String
|
58
|
+
row.index(f) or raise "Unable to find column '#{f}'"
|
59
|
+
when Fixnum
|
60
|
+
if f > row.count
|
61
|
+
raise "Cannot pluck column #{f}, there are only #{row.count} fields"
|
62
|
+
else
|
63
|
+
f-1
|
64
|
+
end
|
65
|
+
else
|
66
|
+
raise "Invalid field descriptor '#{f}'"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# output is just the selected columns
|
72
|
+
@indexes.map {|i| row[i]}
|
73
|
+
end
|
44
74
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# relabel a single input file
|
2
|
+
# * assuming that input file has a single header line
|
3
|
+
# * assuming that input file is in valid CSV format (no validation)
|
4
|
+
|
5
|
+
class Masticate::Relabel < Masticate::Base
|
6
|
+
def configure(opts)
|
7
|
+
standard_options(opts)
|
8
|
+
|
9
|
+
@fields = opts[:fields] or raise "missing fieldnames for relabel"
|
10
|
+
end
|
11
|
+
|
12
|
+
def relabel(opts)
|
13
|
+
configure(opts)
|
14
|
+
|
15
|
+
@output_count = 0
|
16
|
+
headers = nil
|
17
|
+
with_input do |input|
|
18
|
+
while line = get
|
19
|
+
row = CSV.parse_line(line, csv_options)
|
20
|
+
if !headers
|
21
|
+
headers = @fields
|
22
|
+
emit(headers.to_csv)
|
23
|
+
else
|
24
|
+
emit(row.to_csv)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
@output.close if opts[:output]
|
29
|
+
|
30
|
+
# File.unlink(opts[:output]) if opts[:output] && File.exists?(opts[:output])
|
31
|
+
# redirect = ">>#{opts[:output]}" if opts[:output]
|
32
|
+
#
|
33
|
+
# system "/bin/echo -n '#{fields.to_csv}' #{redirect}"
|
34
|
+
# system "tail +2 #{@filename} #{redirect}"
|
35
|
+
end
|
36
|
+
|
37
|
+
def crunch(row)
|
38
|
+
if !@headers
|
39
|
+
@headers = @fields
|
40
|
+
row = @headers
|
41
|
+
end
|
42
|
+
row
|
43
|
+
end
|
44
|
+
end
|
data/lib/masticate/sniffer.rb
CHANGED
data/lib/masticate/version.rb
CHANGED
data/lib/masticate.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
require "open-uri"
|
2
|
+
require "csv"
|
2
3
|
|
3
4
|
require_relative "masticate/version"
|
4
5
|
require_relative "masticate/base"
|
6
|
+
require_relative "masticate/myoptparse"
|
7
|
+
|
5
8
|
require_relative "masticate/sniffer"
|
6
9
|
require_relative "masticate/mender"
|
7
10
|
require_relative "masticate/csvify"
|
@@ -9,6 +12,9 @@ require_relative "masticate/plucker"
|
|
9
12
|
require_relative "masticate/datify"
|
10
13
|
require_relative "masticate/gsubber"
|
11
14
|
require_relative "masticate/max_rows"
|
15
|
+
require_relative "masticate/concat"
|
16
|
+
require_relative "masticate/relabel"
|
17
|
+
require_relative "masticate/cook"
|
12
18
|
|
13
19
|
module Masticate
|
14
20
|
def self.sniff(filename, opts = {})
|
@@ -38,4 +44,16 @@ module Masticate
|
|
38
44
|
def self.maxrows(filename, opts)
|
39
45
|
MaxRows.new(filename).maxrows(opts)
|
40
46
|
end
|
47
|
+
|
48
|
+
def self.concat(filenames, opts)
|
49
|
+
Concat.new(filenames).concat(opts)
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.relabel(filename, opts)
|
53
|
+
Relabel.new(filename).relabel(opts)
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.cook(filename, opts)
|
57
|
+
Cook.new(filename).cook(opts)
|
58
|
+
end
|
41
59
|
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
COL1 COL 2 Col 3 col-4 col5 col6
|
2
|
+
data data data d a t a data data
|
3
|
+
data data data d a t a data data
|
4
|
+
data data data d a t a data data
|
5
|
+
data data data d a t a data data
|
6
|
+
data| data |data |d a t a|data|data
|
7
|
+
data| data |data |d a t a|data|data
|
8
|
+
data| data |data |d a t a|data,data|data
|
9
|
+
data| data |data "more data" |d a t a|data|data
|
10
|
+
1,20120106003230,2044272,L,407,15267,407,201201060140,407,201201060140,0,201201060309,L,"594,756"
|
11
|
+
1,20120106003230,2044277,X,407,15267,381,201201060222,381,201201060222,0,201201060647,X,"594,761"
|
12
|
+
1,20120106003230,2044309,L,407,15267,407,201201060311,407,201201060311,0,201201060339,L,"594,766"
|
13
|
+
1,20120106003230,,Q,407,15267,407,201201060514,108,201201060515,108,201201060515,SEC,"594,787"
|
14
|
+
1,20120106024355,,Q,407,15267,407,201201060309,90,201201060316,90,201201060316,IV,"594,764"
|
15
|
+
1,20120106024355,2044306,L,407,15267,407,201201060309,407,201201060309,0,201201060345,L,"594,763"
|
16
|
+
1,20120106024355,2044308,X,407,15267,407,201201060310,407,201201060310,0,201201060556,X,"594,765"
|
17
|
+
1,20120106024355,2044307,L,407,15267,407,201201060309,407,201201060309,0,201201060333,L,"594,762"
|
18
|
+
1,20120106024355,,Q,407,15267,407,201201060520,108,201201060522,108,201201060522,SEC,"594,789"
|
19
|
+
1,20120106024355,2044579,L,407,15267,68,201201060826,68,201201060826,0,201201071149,L,"594,823"
|
20
|
+
1,20120106032719,2044345,L,407,15267,407,201201060348,407,201201060348,0,201201060442,L,"594,775"
|
21
|
+
1,20120106032719,2044344,L,407,15267,407,201201060348,407,201201060348,0,201201060442,L,"594,777"
|
22
|
+
1,20120106032719,2044343,L,407,15267,407,201201060348,407,201201060348,0,201201060428,L,"594,773"
|
23
|
+
1,20120106032719,,Q,407,15267,407,201201060348,426,201201060408,426,201201060408,IV,"594,774"
|
24
|
+
1,20120106032719,,Q,407,15267,407,201201060348,426,201201060634,426,201201060634,URINE,"594,776"
|
25
|
+
1,20120106032719,2044386,L,407,15267,407,201201060445,407,201201060445,0,201201060519,L,"594,785"
|
26
|
+
1,20120106032719,2044401,X,407,15267,407,201201060521,407,201201060521,0,201201060646,X,"594,790"
|
27
|
+
1,20120106033235,,Q,407,15267,407,201201060347,74,201201060353,74,201201060353,IV,"594,769"
|
28
|
+
1,20120106033235,2044349,L,407,15267,407,201201060347,74,201201060353,0,201201060443,L,"594,771"
|
29
|
+
1,20120106033235,2044350,L,407,15267,407,201201060347,74,201201060353,0,201201060434,URINE,"594,770"
|
30
|
+
1,20120106033235,2044347,L,407,15267,407,201201060347,74,201201060353,0,201201060428,L,"594,768"
|
31
|
+
1,20120106033235,2044348,L,407,15267,407,201201060347,74,201201060353,0,201201060443,L,"594,772"
|
32
|
+
1,20120106033235,2044372,X,407,15267,407,201201060429,407,201201060429,0,201201060649,X,"594,780"
|
33
|
+
1,20120106035346,,Q,407,15267,407,201201060446,426,201201060448,426,201201060448,N,"594,786"
|
34
|
+
1,20120106041426,2044383,L,407,15267,407,201201060445,407,201201060445,0,201201060657,L,"594,784"
|
35
|
+
1,20120106041426,2044384,L,407,15267,407,201201060445,407,201201060445,0,201201060657,L,"594,782"
|
36
|
+
1,20120106041426,2044382,L,407,15267,407,201201060445,407,201201060445,0,201201060522,L,"594,781"
|
37
|
+
1,20120106041426,,Q,407,15267,407,201201060445,381,201201060452,381,201201060452,IV,"594,783"
|
38
|
+
1,20120106043025,2044400,X,407,15267,407,201201060515,407,201201060515,0,201201060554,X,"594,788"
|
39
|
+
1,20120106045326,2044411,R,407,15267,407,201201060535,407,201201060535,0,201201060630,RS,"594,791"
|
40
|
+
1,20120106045326,,Q,407,15267,407,201201060535,108,201201060540,108,201201060540,SEC,"594,794"
|
41
|
+
1,20120106045326,2044412,R,407,15267,407,201201060535,407,201201060535,0,201201060629,RS,"594,795"
|
42
|
+
1,20120106045326,2044413,X,407,15267,407,201201060536,407,201201060536,0,201201060649,X,"594,796"
|
43
|
+
1,20120106045326,,Q,407,15267,407,201201060535,108,201201060541,108,201201060541,SEC,"594,792"
|
44
|
+
1,20120106045326,2044410,R,407,15267,407,201201060535,407,201201060535,0,201201060628,RS,"594,793"
|
45
|
+
1,20120106052714,2044421,L,407,15267,407,201201060544,407,201201060544,0,201201060605,L,"594,797"
|
46
|
+
1,20120106052714,,Q,407,15267,407,201201060544,90,201201060545,90,201201060545,IV,"594,799"
|
47
|
+
1,20120106052714,,Q,407,15267,407,201201060544,90,201201060545,90,201201060545,N,"594,800"
|
48
|
+
1,20120106052714,2044422,L,407,15267,407,201201060544,407,201201060544,0,201201060621,L,"594,801"
|
49
|
+
1,20120106052714,2044423,L,407,15267,407,201201060544,407,201201060544,0,201201060727,L,"594,798"
|
50
|
+
1,20120106052714,2044424,L,407,15267,407,201201060551,407,201201060551,0,201201060714,L,"594,802"
|
51
|
+
1,20120106070243,2044439,L,504,15550,504,201201060721,504,201201060721,0,201201060753,L,"594,803"
|
52
|
+
1,20120106070243,2044440,L,504,15550,504,201201060721,504,201201060721,0,201201060748,L,"594,807"
|
53
|
+
1,20120106070243,2044441,L,504,15550,504,201201060721,504,201201060721,0,201201060748,L,"594,806"
|
54
|
+
1,20120106070243,,Q,504,15550,504,201201060721,155,201201060735,155,201201060735,IV,"594,805"
|
55
|
+
1,20120106070243,,Q,504,15550,504,201201060806,155,201201060813,155,201201060813,N,"594,820"
|
56
|
+
1,20120106070243,2044524,L,504,15550,504,201201060806,504,201201060806,0,201201061004,L,"594,816"
|
57
|
+
1,20120106070243,,Q,504,15550,504,201201060807,195,201201060813,195,201201060813,SEC,"594,822"
|
58
|
+
1,20120106070243,2044522,L,504,15550,504,201201060806,504,201201060806,0,201201060959,L,"594,819"
|
59
|
+
1,20120106070243,,Q,504,15550,504,201201060807,195,201201060811,195,201201060811,SEC,"594,821"
|
60
|
+
1,20120106070243,,Q,504,15550,504,201201060806,155,201201060813,155,201201060813,N,"594,818"
|
61
|
+
1,20120106070243,,Q,504,15550,504,201201060910,155,201201060916,155,201201060916,N,"594,831"
|
62
|
+
1,20120106070243,2044716,X,504,15550,504,201201060928,504,201201060928,0,201201060953,X,"594,834"
|
63
|
+
1,20120106073142,2044480,X,504,15550,504,201201060757,504,201201060757,0,201201060819,X,"594,815"
|
64
|
+
1,20120106073757,2044475,L,504,15550,504,201201060749,155,201201060755,0,201201060925,URINE,"594,810"
|
65
|
+
1,20120106073757,2044466,L,504,15550,504,201201060749,504,201201060749,0,201201060827,L,"594,808"
|
66
|
+
1,20120106073757,2044470,X,504,15550,504,201201060749,504,201201060749,0,201201060818,X,"594,809"
|
67
|
+
1,20120106073757,2044467,L,504,15550,504,201201060749,504,201201060749,0,201201060826,L,"594,813"
|
68
|
+
1,20120106073757,2044468,L,504,15550,504,201201060749,504,201201060749,0,201201060839,L,"594,811"
|
69
|
+
1,20120106073757,2044469,L,504,15550,504,201201060749,504,201201060749,0,201201060825,L,"594,814"
|
70
|
+
1,20120106073757,,Q,504,15550,504,201201060749,155,201201060755,155,201201060755,IV,"594,812"
|
71
|
+
1,20120106073757,,Q,504,15550,504,201201060911,76,201201060933,76,201201060933,IV,"594,832"
|
72
|
+
1,20120106073757,,Q,504,15550,504,201201060928,34,201201060934,34,201201060934,SEC,"594,833"
|
73
|
+
1,20120106073757,,Q,504,15550,504,201201061022,155,201201061108,155,201201061108,IV,"594,862"
|
74
|
+
1,20120106073757,,Q,504,15550,504,201201061019,155,201201061025,155,201201061025,IV,"594,861"
|
75
|
+
1,20120106073757,,Q,504,15550,504,201201061131,195,201201061133,195,201201061133,SEC,"594,896"
|
76
|
+
1,20120106073757,,Q,504,15550,504,201201061131,195,201201061133,195,201201061133,SEC,"594,895"
|
77
|
+
1,20120106073757,2045028,X,504,15550,504,201201061131,504,201201061131,0,201201061209,X,"594,898"
|
78
|
+
1,20120106073757,2045029,X,504,15550,504,201201061131,504,201201061131,0,201201061345,X,"594,897"
|
79
|
+
1,20120106073757,,Q,504,15550,504,201201061131,155,201201061223,155,201201061223,N,"594,894"
|
80
|
+
1,20120106084347,2044639,X,504,15550,76,201201060850,76,201201060850,0,201201060931,X,"594,828"
|
81
|
+
1,20120106084720,2044670,X,55,4644,55,201201060909,55,201201060909,0,201201060934,X,"594,829"
|
82
|
+
1,20120106084720,,Q,55,4644,55,201201060910,66,201201060914,66,201201060914,N,"594,830"
|
83
|
+
1,20120106085558,2044755,L,55,4644,55,201201060949,55,201201060949,0,201201061018,L,"594,846"
|
84
|
+
1,20120106085558,2044756,L,55,4644,55,201201060949,55,201201060949,0,201201061038,L,"594,851"
|
85
|
+
1,20120106085558,2044793,L,55,4644,55,201201060949,76,201201061003,0,201201061239,URINE,"594,848"
|
86
|
+
1,20120106085558,,Q,55,4644,55,201201060949,76,201201061003,76,201201061003,IV,"594,850"
|
87
|
+
1,20120106085558,,Q,55,4644,55,201201060949,76,201201061003,76,201201061003,IV,"594,847"
|
88
|
+
1,20120106085558,2044757,L,55,4644,55,201201060949,55,201201060949,0,201201061040,L,"594,849"
|
89
|
+
1,20120106085558,2044843,L,55,4644,55,201201061033,55,201201061033,0,201201071505,L,"594,864"
|
90
|
+
1,20120106085558,2044841,X,55,4644,55,201201061032,55,201201061032,0,201201061136,X,"594,863"
|
91
|
+
1,20120106085558,2044844,L,55,4644,55,201201061033,55,201201061033,0,201201061119,L,"594,865"
|
92
|
+
1,20120106085558,,Q,55,4644,55,201201061228,195,201201061240,195,201201061240,SEC,"594,961"
|
93
|
+
1,20120106091726,2044741,L,504,15550,504,201201060942,504,201201060942,0,201201061024,L,"594,839"
|
94
|
+
1,20120106091726,2044745,X,504,15550,504,201201060942,504,201201060942,0,201201061016,X,"594,835"
|
95
|
+
1,20120106091726,2044746,L,504,15550,504,201201060942,504,201201060942,0,201201061107,L,"594,842"
|
96
|
+
1,20120106091726,2044740,L,504,15550,504,201201060942,504,201201060942,0,201201061017,L,"594,836"
|
97
|
+
1,20120106091726,2044744,L,504,15550,504,201201060942,504,201201060942,0,201201061024,L,"594,838"
|
98
|
+
1,20120106091726,2044742,L,504,15550,504,201201060942,504,201201060942,0,201201061016,L,"594,841"
|
99
|
+
1,20120106091726,,Q,504,15550,504,201201060942,66,201201060944,66,201201060944,IV,"594,837"
|
100
|
+
1,20120106091726,2044743,L,504,15550,504,201201060942,504,201201060942,0,201201061016,L,"594,840"
|
101
|
+
1,20120106095129,2044814,X,55,4644,55,201201061010,55,201201061010,0,201201061037,X,"594,853"
|
102
|
+
1,20120106100014,,Q,504,15550,504,201201061011,885,201201061037,885,201201061037,IV,"594,857"
|
103
|
+
1,20120106100014,,Q,504,15550,504,201201061011,885,201201061037,885,201201061037,N,"594,858"
|
104
|
+
1,20120106100014,,Q,504,15550,504,201201061011,885,201201061037,885,201201061037,N,"594,859"
|
105
|
+
1,20120106100014,2044815,L,504,15550,504,201201061011,504,201201061011,0,201201061023,L,"594,854"
|
106
|
+
1,20120106100014,2044817,L,504,15550,504,201201061011,504,201201061011,0,201201061049,L,"594,856"
|
107
|
+
1,20120106100014,2044818,X,504,15550,504,201201061011,504,201201061011,0,201201061038,X,"594,855"
|
108
|
+
1,20120106100014,2044816,L,504,15550,504,201201061011,504,201201061011,0,201201061049,L,"594,860"
|
@@ -0,0 +1,41 @@
|
|
1
|
+
two,eight,fourteen
|
2
|
+
20120106003230,201201060140,594756
|
3
|
+
20120106003230,201201060222,594761
|
4
|
+
20120106003230,201201060311,594766
|
5
|
+
20120106003230,201201060514,594787
|
6
|
+
20120106024355,201201060309,594764
|
7
|
+
20120106024355,201201060310,594765
|
8
|
+
20120106024355,201201060520,594789
|
9
|
+
20120106024355,201201060826,594823
|
10
|
+
20120106032719,201201060348,594775
|
11
|
+
20120106032719,201201060445,594785
|
12
|
+
20120106032719,201201060521,594790
|
13
|
+
20120106033235,201201060347,594769
|
14
|
+
20120106033235,201201060429,594780
|
15
|
+
20120106035346,201201060446,594786
|
16
|
+
20120106041426,201201060445,594784
|
17
|
+
20120106043025,201201060515,594788
|
18
|
+
20120106045326,201201060535,594791
|
19
|
+
20120106045326,201201060536,594796
|
20
|
+
20120106052714,201201060544,594797
|
21
|
+
20120106052714,201201060551,594802
|
22
|
+
20120106070243,201201060721,594803
|
23
|
+
20120106070243,201201060806,594820
|
24
|
+
20120106070243,201201060807,594822
|
25
|
+
20120106070243,201201060910,594831
|
26
|
+
20120106070243,201201060928,594834
|
27
|
+
20120106073142,201201060757,594815
|
28
|
+
20120106073757,201201060749,594810
|
29
|
+
20120106073757,201201060911,594832
|
30
|
+
20120106073757,201201060928,594833
|
31
|
+
20120106073757,201201061022,594862
|
32
|
+
20120106073757,201201061131,594896
|
33
|
+
20120106084347,201201060850,594828
|
34
|
+
20120106084720,201201060909,594829
|
35
|
+
20120106084720,201201060910,594830
|
36
|
+
20120106085558,201201060949,594846
|
37
|
+
20120106085558,201201061033,594864
|
38
|
+
20120106085558,201201061228,594961
|
39
|
+
20120106091726,201201060942,594839
|
40
|
+
20120106095129,201201061010,594853
|
41
|
+
20120106100014,201201061011,594857
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# spec for file concatenation
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "concatenation" do
|
6
|
+
it "should leave just one header row in the result" do
|
7
|
+
file1 = File.dirname(__FILE__) + "/../data/tabbed_data.txt"
|
8
|
+
file2 = File.dirname(__FILE__) + "/../data/pipe_data.txt"
|
9
|
+
file3 = File.dirname(__FILE__) + "/../data/quoted_csv_data.txt"
|
10
|
+
tmp = Tempfile.new('concat')
|
11
|
+
results = Masticate.concat([file1, file2, file3], :output => tmp.path)
|
12
|
+
output = File.read(tmp)
|
13
|
+
tmp.unlink
|
14
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/concat_result.txt")
|
15
|
+
output.should == correct_output
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# spec for cookery
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "cooking up a recipe" do
|
6
|
+
it "should handle multiple steps" do
|
7
|
+
input = File.dirname(__FILE__) + "/../data/quoted_csv_data.txt"
|
8
|
+
recipe = File.dirname(__FILE__) + "/../data/recipe.txt"
|
9
|
+
tmp = Tempfile.new('cooked')
|
10
|
+
results = Masticate.cook(input, :output => tmp, :recipe => recipe)
|
11
|
+
output = File.read(tmp)
|
12
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/cooking_result.csv")
|
13
|
+
|
14
|
+
output.should == correct_output
|
15
|
+
end
|
16
|
+
end
|
data/spec/lib/gsub_spec.rb
CHANGED
@@ -0,0 +1,15 @@
|
|
1
|
+
# spec for file concatenation
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "relabeling" do
|
6
|
+
it "result should be same as original" do
|
7
|
+
input = File.dirname(__FILE__) + "/../data/namedcols.csv"
|
8
|
+
tmp = Tempfile.new('relabel')
|
9
|
+
results = Masticate.relabel(input, :fields => %w{happy birth day to you}, :output => tmp.path)
|
10
|
+
output = File.read(tmp)
|
11
|
+
tmp.unlink
|
12
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/relabel_result.csv")
|
13
|
+
output.should == correct_output
|
14
|
+
end
|
15
|
+
end
|
data/spec/lib/sniffer_spec.rb
CHANGED
@@ -31,4 +31,11 @@ describe "delimiter sniffing" do
|
|
31
31
|
results[:quote_char].should == '"'
|
32
32
|
results[:field_counts].should == {14 => 100}
|
33
33
|
end
|
34
|
+
|
35
|
+
it "should find tilde delimiter" do
|
36
|
+
filename = File.dirname(__FILE__) + "/../data/tilde_data.txt"
|
37
|
+
results = Masticate.sniff(filename, :stats => true)
|
38
|
+
results[:col_sep].should == '~'
|
39
|
+
results[:field_counts].should == {6 => 5}
|
40
|
+
end
|
34
41
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masticate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.2'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &2153649040 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.9.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2153649040
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: guard-rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2153648360 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.7.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2153648360
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ruby_gntp
|
38
|
-
requirement: &
|
38
|
+
requirement: &2153647700 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: 0.3.4
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2153647700
|
47
47
|
description: Data file crunching
|
48
48
|
email:
|
49
49
|
- jmay@pobox.com
|
@@ -61,18 +61,24 @@ files:
|
|
61
61
|
- bin/masticate
|
62
62
|
- lib/masticate.rb
|
63
63
|
- lib/masticate/base.rb
|
64
|
+
- lib/masticate/concat.rb
|
65
|
+
- lib/masticate/cook.rb
|
64
66
|
- lib/masticate/csvify.rb
|
65
67
|
- lib/masticate/datify.rb
|
66
68
|
- lib/masticate/gsubber.rb
|
67
69
|
- lib/masticate/max_rows.rb
|
68
70
|
- lib/masticate/mender.rb
|
71
|
+
- lib/masticate/myoptparse.rb
|
69
72
|
- lib/masticate/plucker.rb
|
73
|
+
- lib/masticate/relabel.rb
|
70
74
|
- lib/masticate/sniffer.rb
|
71
75
|
- lib/masticate/version.rb
|
72
76
|
- masticate.gemspec
|
73
77
|
- spec/data/badnums.csv
|
74
78
|
- spec/data/badnums_fixed.csv
|
75
79
|
- spec/data/broken_psv.txt
|
80
|
+
- spec/data/concat_result.txt
|
81
|
+
- spec/data/cooking_result.csv
|
76
82
|
- spec/data/events.csv
|
77
83
|
- spec/data/events_reduced.csv
|
78
84
|
- spec/data/inlined_headers.csv
|
@@ -83,12 +89,18 @@ files:
|
|
83
89
|
- spec/data/namedcols.csv.output
|
84
90
|
- spec/data/pipe_data.txt
|
85
91
|
- spec/data/quoted_csv_data.txt
|
92
|
+
- spec/data/recipe.txt
|
93
|
+
- spec/data/relabel_result.csv
|
86
94
|
- spec/data/tabbed_data.txt
|
95
|
+
- spec/data/tilde_data.txt
|
96
|
+
- spec/lib/concat_spec.rb
|
97
|
+
- spec/lib/cook_spec.rb
|
87
98
|
- spec/lib/csvify_spec.rb
|
88
99
|
- spec/lib/gsub_spec.rb
|
89
100
|
- spec/lib/maxrow_spec.rb
|
90
101
|
- spec/lib/mender_spec.rb
|
91
102
|
- spec/lib/plucker_spec.rb
|
103
|
+
- spec/lib/relabel_spec.rb
|
92
104
|
- spec/lib/sniffer_spec.rb
|
93
105
|
- spec/spec_helper.rb
|
94
106
|
homepage: ''
|
@@ -119,6 +131,8 @@ test_files:
|
|
119
131
|
- spec/data/badnums.csv
|
120
132
|
- spec/data/badnums_fixed.csv
|
121
133
|
- spec/data/broken_psv.txt
|
134
|
+
- spec/data/concat_result.txt
|
135
|
+
- spec/data/cooking_result.csv
|
122
136
|
- spec/data/events.csv
|
123
137
|
- spec/data/events_reduced.csv
|
124
138
|
- spec/data/inlined_headers.csv
|
@@ -129,12 +143,18 @@ test_files:
|
|
129
143
|
- spec/data/namedcols.csv.output
|
130
144
|
- spec/data/pipe_data.txt
|
131
145
|
- spec/data/quoted_csv_data.txt
|
146
|
+
- spec/data/recipe.txt
|
147
|
+
- spec/data/relabel_result.csv
|
132
148
|
- spec/data/tabbed_data.txt
|
149
|
+
- spec/data/tilde_data.txt
|
150
|
+
- spec/lib/concat_spec.rb
|
151
|
+
- spec/lib/cook_spec.rb
|
133
152
|
- spec/lib/csvify_spec.rb
|
134
153
|
- spec/lib/gsub_spec.rb
|
135
154
|
- spec/lib/maxrow_spec.rb
|
136
155
|
- spec/lib/mender_spec.rb
|
137
156
|
- spec/lib/plucker_spec.rb
|
157
|
+
- spec/lib/relabel_spec.rb
|
138
158
|
- spec/lib/sniffer_spec.rb
|
139
159
|
- spec/spec_helper.rb
|
140
160
|
has_rdoc:
|