masticate 0.1.5 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/masticate +3 -114
- data/lib/masticate/base.rb +13 -2
- data/lib/masticate/concat.rb +21 -0
- data/lib/masticate/cook.rb +48 -0
- data/lib/masticate/gsubber.rb +23 -6
- data/lib/masticate/max_rows.rb +35 -5
- data/lib/masticate/myoptparse.rb +163 -0
- data/lib/masticate/plucker.rb +33 -3
- data/lib/masticate/relabel.rb +44 -0
- data/lib/masticate/sniffer.rb +1 -1
- data/lib/masticate/version.rb +1 -1
- data/lib/masticate.rb +18 -0
- data/spec/data/concat_result.txt +108 -0
- data/spec/data/cooking_result.csv +41 -0
- data/spec/data/recipe.txt +4 -0
- data/spec/data/relabel_result.csv +4 -0
- data/spec/data/tilde_data.txt +5 -0
- data/spec/lib/concat_spec.rb +17 -0
- data/spec/lib/cook_spec.rb +16 -0
- data/spec/lib/gsub_spec.rb +0 -1
- data/spec/lib/relabel_spec.rb +15 -0
- data/spec/lib/sniffer_spec.rb +7 -0
- data/spec/spec_helper.rb +1 -0
- metadata +28 -8
data/bin/masticate
CHANGED
@@ -1,119 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require_relative "../lib/masticate"
|
4
|
-
require "optparse"
|
5
4
|
|
6
|
-
|
5
|
+
masticator = Masticate::MyOptionParser.new
|
6
|
+
command, options, filenames = masticator.parse
|
7
7
|
|
8
|
-
options
|
9
|
-
OptionParser.new do |opts|
|
10
|
-
opts.banner = "Usage: example.rb [options]"
|
11
|
-
|
12
|
-
opts.on("--format FORMAT", String, "Specify format") do |v|
|
13
|
-
options[:format] = v
|
14
|
-
end
|
15
|
-
|
16
|
-
opts.on("--delim DELIMITER", String, "Specify field delimiter (character or TAB; default is ',')") do |v|
|
17
|
-
options[:col_sep] = v
|
18
|
-
options[:col_sep] = "\t" if options[:col_sep] == "TAB"
|
19
|
-
end
|
20
|
-
|
21
|
-
opts.on("--quote QUOTE-CHAR", String, "Specify character used for quoting fields (optional; default is no quoting)") do |char|
|
22
|
-
options[:quote_char] = char
|
23
|
-
end
|
24
|
-
|
25
|
-
opts.on("--stats", "(for *sniff*) collect & display input stats") do
|
26
|
-
options[:stats] = true
|
27
|
-
end
|
28
|
-
|
29
|
-
opts.on("--fields LIST", Array, "Specify fields to select") do |list|
|
30
|
-
options[:fields] = list
|
31
|
-
end
|
32
|
-
|
33
|
-
opts.on("--field FIELD", String, "Specify field to convert") do |f|
|
34
|
-
options[:field] = f
|
35
|
-
end
|
36
|
-
|
37
|
-
opts.on("--snip DIRECTIVE", String, "Specify header fields to snip: first N, or by name") do |f|
|
38
|
-
options[:snip] = f.to_i
|
39
|
-
end
|
40
|
-
|
41
|
-
opts.on("--from REGEXP", String, "Regular expression for gsub conversion") do |s|
|
42
|
-
options[:from] = s
|
43
|
-
end
|
44
|
-
|
45
|
-
opts.on("--to STRING", String, "Result string for gsub conversion") do |s|
|
46
|
-
options[:to] = s
|
47
|
-
end
|
48
|
-
|
49
|
-
opts.on("--inlined", "(for *mend* only) Source file has headers inlined on each line") do |b|
|
50
|
-
options[:inlined] = true
|
51
|
-
end
|
52
|
-
|
53
|
-
opts.on("--dejunk", "(for *mend* only) Expunge junk lines from source") do |b|
|
54
|
-
options[:dejunk] = true
|
55
|
-
end
|
56
|
-
|
57
|
-
opts.on("--by FIELD", String, "(for *maxrows* only) Field to group by") do |f|
|
58
|
-
options[:by] = f
|
59
|
-
end
|
60
|
-
|
61
|
-
opts.on("--max FIELD", String, "(for *maxrows* only) Field to find max value for") do |f|
|
62
|
-
options[:max] = f
|
63
|
-
end
|
64
|
-
end.parse!
|
65
|
-
|
66
|
-
filename = ARGV.shift # use stdin if no filename provided
|
67
|
-
|
68
|
-
def logmessage(command, options, results)
|
69
|
-
$stderr.puts <<-EOT
|
70
|
-
* masticate #{command} (#{options.keys.join(', ')})
|
71
|
-
Lines in input: #{results[:input_count]}
|
72
|
-
Lines in output: #{results[:output_count]}
|
73
|
-
EOT
|
74
|
-
if results[:field_counts]
|
75
|
-
$stderr.puts " Field counts: #{results[:field_counts].inspect}"
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
case command
|
80
|
-
when 'sniff'
|
81
|
-
results = Masticate.sniff(filename, options)
|
82
|
-
col_sep = results[:col_sep]
|
83
|
-
col_sep = "TAB" if col_sep == "\t"
|
84
|
-
quote_char = results[:quote_char] || "NONE"
|
85
|
-
$stderr.puts <<-EOT
|
86
|
-
Processing complete.
|
87
|
-
Input delimiter: #{col_sep}
|
88
|
-
Quote char: #{quote_char}
|
89
|
-
Field counts: #{results[:field_counts].inspect}
|
90
|
-
Headers: #{results[:headers].join(',')}
|
91
|
-
EOT
|
92
|
-
|
93
|
-
when 'mend'
|
94
|
-
results = Masticate.mend(filename, options)
|
95
|
-
logmessage(command, options, results)
|
96
|
-
|
97
|
-
when 'csvify'
|
98
|
-
results = Masticate.csvify(filename, options)
|
99
|
-
logmessage(command, options, results)
|
100
|
-
|
101
|
-
when 'pluck'
|
102
|
-
results = Masticate.pluck(filename, options)
|
103
|
-
logmessage(command, options, results)
|
104
|
-
|
105
|
-
when 'datify'
|
106
|
-
results = Masticate.datify(filename, options)
|
107
|
-
logmessage(command, options, results)
|
108
|
-
|
109
|
-
when 'gsub'
|
110
|
-
results = Masticate.gsub(filename, options)
|
111
|
-
logmessage(command, options, results)
|
112
|
-
|
113
|
-
when 'maxrows'
|
114
|
-
results = Masticate.maxrows(filename, options)
|
115
|
-
logmessage(command, options, results)
|
116
|
-
|
117
|
-
else
|
118
|
-
raise "unknown command #{command}"
|
119
|
-
end
|
8
|
+
masticator.execute(command, options, filenames)
|
data/lib/masticate/base.rb
CHANGED
@@ -4,8 +4,15 @@ class Masticate::Base
|
|
4
4
|
attr_reader :input_count, :output_count
|
5
5
|
attr_reader :csv_options
|
6
6
|
|
7
|
-
def initialize(
|
8
|
-
|
7
|
+
def initialize(args)
|
8
|
+
case args
|
9
|
+
when String
|
10
|
+
@filename = args
|
11
|
+
when Hash
|
12
|
+
configure(args)
|
13
|
+
else
|
14
|
+
raise "invalid initialization: #{args}"
|
15
|
+
end
|
9
16
|
end
|
10
17
|
|
11
18
|
def with_input
|
@@ -40,4 +47,8 @@ class Masticate::Base
|
|
40
47
|
@csv_options[:quote_char] = opts[:quote_char] || "\0"
|
41
48
|
end
|
42
49
|
end
|
50
|
+
|
51
|
+
# def crunch(row)
|
52
|
+
# # noop
|
53
|
+
# end
|
43
54
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# concatenate input files:
|
2
|
+
# * assuming that each input file has a single header line
|
3
|
+
# * writing a single header line to the output (just use the header line from the first file)
|
4
|
+
# * trying that all the files have the same format (no validation)
|
5
|
+
|
6
|
+
class Masticate::Concat #< Masticate::Base
|
7
|
+
def initialize(filenames)
|
8
|
+
@filenames = filenames
|
9
|
+
end
|
10
|
+
|
11
|
+
def concat(opts)
|
12
|
+
File.unlink(opts[:output]) if opts[:output] && File.exists?(opts[:output])
|
13
|
+
redirect = ">>#{opts[:output]}" if opts[:output]
|
14
|
+
|
15
|
+
file1, *rest = @filenames
|
16
|
+
system "cat #{file1} #{redirect}"
|
17
|
+
rest.each do |file|
|
18
|
+
system "tail +2 #{file} #{redirect}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# cook up a recipe
|
2
|
+
# * single file as input
|
3
|
+
# * recipe from a file
|
4
|
+
# * multiple steps
|
5
|
+
# * single output
|
6
|
+
|
7
|
+
require "shellwords"
|
8
|
+
|
9
|
+
class Masticate::Cook < Masticate::Base
|
10
|
+
def initialize(filename)
|
11
|
+
@filename = filename
|
12
|
+
end
|
13
|
+
|
14
|
+
def cook(opts)
|
15
|
+
recipefile = opts[:recipe] or raise "missing recipe for cook"
|
16
|
+
recipe = File.read(recipefile).lines
|
17
|
+
standard_options(opts)
|
18
|
+
|
19
|
+
steps = recipe.map do |step|
|
20
|
+
# puts step
|
21
|
+
argv = Shellwords.split(step)
|
22
|
+
masticator = Masticate::MyOptionParser.new
|
23
|
+
command, options = masticator.parse(argv)
|
24
|
+
masticator.prepare(command, options)
|
25
|
+
end
|
26
|
+
|
27
|
+
@output_count = 0
|
28
|
+
headers = nil
|
29
|
+
with_input do |input|
|
30
|
+
while line = get
|
31
|
+
row = CSV.parse_line(line, csv_options)
|
32
|
+
|
33
|
+
steps.each do |step|
|
34
|
+
# puts "APPLY #{step} to #{row}"
|
35
|
+
row = step.crunch(row)
|
36
|
+
end
|
37
|
+
|
38
|
+
emit(row.to_csv) if row
|
39
|
+
end
|
40
|
+
end
|
41
|
+
@output.close if opts[:output]
|
42
|
+
|
43
|
+
{
|
44
|
+
:input_count => @input_count,
|
45
|
+
:output_count => @output_count
|
46
|
+
}
|
47
|
+
end
|
48
|
+
end
|
data/lib/masticate/gsubber.rb
CHANGED
@@ -2,13 +2,21 @@
|
|
2
2
|
require "csv"
|
3
3
|
|
4
4
|
class Masticate::Gsubber < Masticate::Base
|
5
|
-
def
|
5
|
+
def configure(opts)
|
6
6
|
standard_options(opts)
|
7
7
|
|
8
|
-
field = opts[:field] or raise "missing field to gsub"
|
9
|
-
from = Regexp.new(opts[:from]) or raise "Invalid regex '#{opts[:from]}' for conversion"
|
10
|
-
to = opts[:to] or raise "missing 'to' string for gsub"
|
8
|
+
@field = opts[:field] or raise "missing field to gsub"
|
9
|
+
@from = Regexp.new(opts[:from]) or raise "Invalid regex '#{opts[:from]}' for conversion"
|
10
|
+
@to = opts[:to] or raise "missing 'to' string for gsub"
|
11
|
+
end
|
12
|
+
|
13
|
+
def set_headers(row)
|
14
|
+
@headers = row
|
15
|
+
@index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
|
16
|
+
end
|
11
17
|
|
18
|
+
def gsub(opts)
|
19
|
+
configure(opts)
|
12
20
|
@output_count = 0
|
13
21
|
headers = nil
|
14
22
|
with_input do |input|
|
@@ -16,11 +24,11 @@ class Masticate::Gsubber < Masticate::Base
|
|
16
24
|
row = CSV.parse_line(line, csv_options)
|
17
25
|
if !headers
|
18
26
|
headers = row
|
19
|
-
index = headers.index(field) or raise "Unable to find column '#{field}' in headers"
|
27
|
+
index = headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
|
20
28
|
emit(line)
|
21
29
|
else
|
22
30
|
oldval = row[index]
|
23
|
-
newval = oldval.gsub(from, to)
|
31
|
+
newval = oldval.gsub(@from, @to)
|
24
32
|
row[index] = newval
|
25
33
|
emit(row.to_csv)
|
26
34
|
end
|
@@ -33,4 +41,13 @@ class Masticate::Gsubber < Masticate::Base
|
|
33
41
|
:output_count => @output_count
|
34
42
|
}
|
35
43
|
end
|
44
|
+
|
45
|
+
def crunch(row)
|
46
|
+
if !@headers
|
47
|
+
set_headers(row)
|
48
|
+
else
|
49
|
+
row[@index] = row[@index].gsub(@from, @to)
|
50
|
+
end
|
51
|
+
row
|
52
|
+
end
|
36
53
|
end
|
data/lib/masticate/max_rows.rb
CHANGED
@@ -2,11 +2,15 @@
|
|
2
2
|
require "csv"
|
3
3
|
|
4
4
|
class Masticate::MaxRows < Masticate::Base
|
5
|
-
def
|
5
|
+
def configure(opts)
|
6
6
|
standard_options(opts)
|
7
7
|
|
8
|
-
groupby = opts[:by] or raise "missing field to group by"
|
9
|
-
maxon = opts[:max] or raise "missing field to max on"
|
8
|
+
@groupby = opts[:by] or raise "missing field to group by"
|
9
|
+
@maxon = opts[:max] or raise "missing field to max on"
|
10
|
+
end
|
11
|
+
|
12
|
+
def maxrows(opts)
|
13
|
+
configure(opts)
|
10
14
|
|
11
15
|
@output_count = 0
|
12
16
|
headers = nil
|
@@ -16,8 +20,8 @@ class Masticate::MaxRows < Masticate::Base
|
|
16
20
|
row = CSV.parse_line(line, csv_options)
|
17
21
|
if !headers
|
18
22
|
headers = row
|
19
|
-
index_by = headers.index(groupby) or raise "Unable to find column '#{groupby}'"
|
20
|
-
index_max = headers.index(maxon) or raise "Unable to find column '#{maxon}'"
|
23
|
+
index_by = headers.index(@groupby) or raise "Unable to find column '#{@groupby}'"
|
24
|
+
index_max = headers.index(@maxon) or raise "Unable to find column '#{@maxon}'"
|
21
25
|
emit(line)
|
22
26
|
else
|
23
27
|
key = row[index_by]
|
@@ -45,4 +49,30 @@ class Masticate::MaxRows < Masticate::Base
|
|
45
49
|
:output_count => @output_count
|
46
50
|
}
|
47
51
|
end
|
52
|
+
|
53
|
+
def crunch(row)
|
54
|
+
if !@headers
|
55
|
+
@headers = row
|
56
|
+
@index_by = row.index(@groupby) or raise "Unable to find column '#{@groupby}'"
|
57
|
+
@index_max = row.index(@maxon) or raise "Unable to find column '#{@maxon}'"
|
58
|
+
@accum = {}
|
59
|
+
row
|
60
|
+
elsif row.nil?
|
61
|
+
# output the accumulated results
|
62
|
+
@accum.each do |k,row|
|
63
|
+
emit(row.to_csv)
|
64
|
+
end
|
65
|
+
else
|
66
|
+
key = row[@index_by]
|
67
|
+
if !@accum[key]
|
68
|
+
@accum[key] = row
|
69
|
+
else
|
70
|
+
oldscore = @accum[key][@index_max]
|
71
|
+
newscore = row[@index_max]
|
72
|
+
if newscore > oldscore
|
73
|
+
@accum[key] = row
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
48
78
|
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require "optparse"
|
2
|
+
|
3
|
+
class Masticate::MyOptionParser
|
4
|
+
attr_reader :command, :options
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@options = {}
|
8
|
+
@parser = OptionParser.new do |opts|
|
9
|
+
opts.banner = "Usage: masticate [command] [options]"
|
10
|
+
|
11
|
+
opts.on("--output FILENAME", String, "Redirect output from stdout to file") do |f|
|
12
|
+
@options[:output] = f
|
13
|
+
end
|
14
|
+
|
15
|
+
opts.on("--format FORMAT", String, "Specify format") do |v|
|
16
|
+
@options[:format] = v
|
17
|
+
end
|
18
|
+
|
19
|
+
opts.on("--delim DELIMITER", String, "Specify field delimiter (character or TAB; default is ',')") do |v|
|
20
|
+
@options[:col_sep] = v
|
21
|
+
@options[:col_sep] = "\t" if @options[:col_sep] == "TAB"
|
22
|
+
end
|
23
|
+
|
24
|
+
opts.on("--quote QUOTE-CHAR", String, "Specify character used for quoting fields (optional; default is no quoting)") do |char|
|
25
|
+
@options[:quote_char] = char
|
26
|
+
end
|
27
|
+
|
28
|
+
opts.on("--stats", "(for *sniff*) collect & display input stats") do
|
29
|
+
@options[:stats] = true
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on("--fields LIST", Array, "Specify fields to select") do |list|
|
33
|
+
@options[:fields] = list
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on("--field FIELD", String, "Specify field to convert") do |f|
|
37
|
+
@options[:field] = f
|
38
|
+
end
|
39
|
+
|
40
|
+
opts.on("--snip DIRECTIVE", String, "Specify header fields to snip: first N, or by name") do |f|
|
41
|
+
@options[:snip] = f.to_i
|
42
|
+
end
|
43
|
+
|
44
|
+
opts.on("--from REGEXP", String, "Regular expression for gsub conversion") do |s|
|
45
|
+
@options[:from] = s
|
46
|
+
end
|
47
|
+
|
48
|
+
# if I specify String here, then a blank string '' is considered invalid and triggers an exception.
|
49
|
+
opts.on("--to STRING", "Result string for gsub conversion") do |s|
|
50
|
+
@options[:to] = s
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("--inlined", "(for *mend* only) Source file has headers inlined on each line") do |b|
|
54
|
+
@options[:inlined] = true
|
55
|
+
end
|
56
|
+
|
57
|
+
opts.on("--dejunk", "(for *mend* only) Expunge junk lines from source") do |b|
|
58
|
+
@options[:dejunk] = true
|
59
|
+
end
|
60
|
+
|
61
|
+
opts.on("--by FIELD", String, "(for *maxrows* only) Field to group by") do |f|
|
62
|
+
@options[:by] = f
|
63
|
+
end
|
64
|
+
|
65
|
+
opts.on("--max FIELD", String, "(for *maxrows* only) Field to find max value for") do |f|
|
66
|
+
@options[:max] = f
|
67
|
+
end
|
68
|
+
|
69
|
+
opts.on("--recipe FILENAME", String, "(*cook* only) Recipe file") do |f|
|
70
|
+
@options[:recipe] = f
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def parse(argv = ARGV)
|
76
|
+
@command = argv.shift
|
77
|
+
filenames = @parser.parse(argv)
|
78
|
+
# argv remnants are filenames
|
79
|
+
[@command, @options, filenames]
|
80
|
+
end
|
81
|
+
|
82
|
+
def prepare(command, options)
|
83
|
+
klasses = {
|
84
|
+
'gsub' => Masticate::Gsubber,
|
85
|
+
'datify' => Masticate::Datify,
|
86
|
+
'maxrows' => Masticate::MaxRows,
|
87
|
+
'relabel' => Masticate::Relabel,
|
88
|
+
'pluck' => Masticate::Plucker
|
89
|
+
}
|
90
|
+
|
91
|
+
klass = klasses[command]
|
92
|
+
klass.new(options)
|
93
|
+
end
|
94
|
+
|
95
|
+
def execute(command, options, filenames = nil)
|
96
|
+
filename = filenames.first
|
97
|
+
|
98
|
+
case command
|
99
|
+
when 'sniff'
|
100
|
+
results = Masticate.sniff(filename, options)
|
101
|
+
col_sep = results[:col_sep]
|
102
|
+
col_sep = "TAB" if col_sep == "\t"
|
103
|
+
quote_char = results[:quote_char] || "NONE"
|
104
|
+
$stderr.puts <<-EOT
|
105
|
+
Processing complete.
|
106
|
+
Input delimiter: #{col_sep}
|
107
|
+
Quote char: #{quote_char}
|
108
|
+
Field counts: #{results[:field_counts].inspect}
|
109
|
+
Headers: #{results[:headers].join(',')}
|
110
|
+
EOT
|
111
|
+
|
112
|
+
when 'mend'
|
113
|
+
results = Masticate.mend(filename, options)
|
114
|
+
logmessage(command, options, results)
|
115
|
+
|
116
|
+
when 'csvify'
|
117
|
+
results = Masticate.csvify(filename, options)
|
118
|
+
logmessage(command, options, results)
|
119
|
+
|
120
|
+
when 'pluck'
|
121
|
+
results = Masticate.pluck(filename, options)
|
122
|
+
logmessage(command, options, results)
|
123
|
+
|
124
|
+
when 'datify'
|
125
|
+
results = Masticate.datify(filename, options)
|
126
|
+
logmessage(command, options, results)
|
127
|
+
|
128
|
+
when 'gsub'
|
129
|
+
results = Masticate.gsub(filename, options)
|
130
|
+
logmessage(command, options, results)
|
131
|
+
|
132
|
+
when 'maxrows'
|
133
|
+
results = Masticate.maxrows(filename, options)
|
134
|
+
logmessage(command, options, results)
|
135
|
+
|
136
|
+
when 'concat'
|
137
|
+
results = Masticate.concat(ARGV, options)
|
138
|
+
# logmessage(command, options, results)
|
139
|
+
|
140
|
+
when 'relabel'
|
141
|
+
results = Masticate.relabel(filename, options)
|
142
|
+
# logmessage(command, options, results)
|
143
|
+
|
144
|
+
when 'cook'
|
145
|
+
results = Masticate.cook(filename, options)
|
146
|
+
logmessage(command, options, results)
|
147
|
+
|
148
|
+
else
|
149
|
+
raise "unknown command #{command}"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def logmessage(command, options, results)
|
154
|
+
$stderr.puts <<-EOT
|
155
|
+
* masticate #{command} (#{options.keys.join(', ')})
|
156
|
+
Lines in input: #{results[:input_count]}
|
157
|
+
Lines in output: #{results[:output_count]}
|
158
|
+
EOT
|
159
|
+
if results[:field_counts]
|
160
|
+
$stderr.puts " Field counts: #{results[:field_counts].inspect}"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
data/lib/masticate/plucker.rb
CHANGED
@@ -2,10 +2,17 @@
|
|
2
2
|
require "csv"
|
3
3
|
|
4
4
|
class Masticate::Plucker < Masticate::Base
|
5
|
-
def
|
5
|
+
def configure(opts)
|
6
6
|
standard_options(opts)
|
7
7
|
|
8
|
-
fields = opts[:fields] or raise "missing fields to pluck"
|
8
|
+
@fields = opts[:fields] or raise "missing fields to pluck"
|
9
|
+
end
|
10
|
+
|
11
|
+
def pluck(opts)
|
12
|
+
configure(opts)
|
13
|
+
# standard_options(opts)
|
14
|
+
#
|
15
|
+
# fields = opts[:fields] or raise "missing fields to pluck"
|
9
16
|
|
10
17
|
@output_count = 0
|
11
18
|
headers = nil
|
@@ -14,7 +21,7 @@ class Masticate::Plucker < Masticate::Base
|
|
14
21
|
row = CSV.parse_line(line, csv_options)
|
15
22
|
if !headers
|
16
23
|
headers = row
|
17
|
-
indexes = fields.map do |f|
|
24
|
+
indexes = @fields.map do |f|
|
18
25
|
case f
|
19
26
|
when String
|
20
27
|
headers.index(f) or raise "Unable to find column '#{f}'"
|
@@ -41,4 +48,27 @@ class Masticate::Plucker < Masticate::Base
|
|
41
48
|
:output_count => @output_count
|
42
49
|
}
|
43
50
|
end
|
51
|
+
|
52
|
+
def crunch(row)
|
53
|
+
if !@headers
|
54
|
+
@headers = row
|
55
|
+
@indexes = @fields.map do |f|
|
56
|
+
case f
|
57
|
+
when String
|
58
|
+
row.index(f) or raise "Unable to find column '#{f}'"
|
59
|
+
when Fixnum
|
60
|
+
if f > row.count
|
61
|
+
raise "Cannot pluck column #{f}, there are only #{row.count} fields"
|
62
|
+
else
|
63
|
+
f-1
|
64
|
+
end
|
65
|
+
else
|
66
|
+
raise "Invalid field descriptor '#{f}'"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# output is just the selected columns
|
72
|
+
@indexes.map {|i| row[i]}
|
73
|
+
end
|
44
74
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# relabel a single input file
|
2
|
+
# * assuming that input file has a single header line
|
3
|
+
# * assuming that input file is in valid CSV format (no validation)
|
4
|
+
|
5
|
+
class Masticate::Relabel < Masticate::Base
|
6
|
+
def configure(opts)
|
7
|
+
standard_options(opts)
|
8
|
+
|
9
|
+
@fields = opts[:fields] or raise "missing fieldnames for relabel"
|
10
|
+
end
|
11
|
+
|
12
|
+
def relabel(opts)
|
13
|
+
configure(opts)
|
14
|
+
|
15
|
+
@output_count = 0
|
16
|
+
headers = nil
|
17
|
+
with_input do |input|
|
18
|
+
while line = get
|
19
|
+
row = CSV.parse_line(line, csv_options)
|
20
|
+
if !headers
|
21
|
+
headers = @fields
|
22
|
+
emit(headers.to_csv)
|
23
|
+
else
|
24
|
+
emit(row.to_csv)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
@output.close if opts[:output]
|
29
|
+
|
30
|
+
# File.unlink(opts[:output]) if opts[:output] && File.exists?(opts[:output])
|
31
|
+
# redirect = ">>#{opts[:output]}" if opts[:output]
|
32
|
+
#
|
33
|
+
# system "/bin/echo -n '#{fields.to_csv}' #{redirect}"
|
34
|
+
# system "tail +2 #{@filename} #{redirect}"
|
35
|
+
end
|
36
|
+
|
37
|
+
def crunch(row)
|
38
|
+
if !@headers
|
39
|
+
@headers = @fields
|
40
|
+
row = @headers
|
41
|
+
end
|
42
|
+
row
|
43
|
+
end
|
44
|
+
end
|
data/lib/masticate/sniffer.rb
CHANGED
data/lib/masticate/version.rb
CHANGED
data/lib/masticate.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
require "open-uri"
|
2
|
+
require "csv"
|
2
3
|
|
3
4
|
require_relative "masticate/version"
|
4
5
|
require_relative "masticate/base"
|
6
|
+
require_relative "masticate/myoptparse"
|
7
|
+
|
5
8
|
require_relative "masticate/sniffer"
|
6
9
|
require_relative "masticate/mender"
|
7
10
|
require_relative "masticate/csvify"
|
@@ -9,6 +12,9 @@ require_relative "masticate/plucker"
|
|
9
12
|
require_relative "masticate/datify"
|
10
13
|
require_relative "masticate/gsubber"
|
11
14
|
require_relative "masticate/max_rows"
|
15
|
+
require_relative "masticate/concat"
|
16
|
+
require_relative "masticate/relabel"
|
17
|
+
require_relative "masticate/cook"
|
12
18
|
|
13
19
|
module Masticate
|
14
20
|
def self.sniff(filename, opts = {})
|
@@ -38,4 +44,16 @@ module Masticate
|
|
38
44
|
def self.maxrows(filename, opts)
|
39
45
|
MaxRows.new(filename).maxrows(opts)
|
40
46
|
end
|
47
|
+
|
48
|
+
def self.concat(filenames, opts)
|
49
|
+
Concat.new(filenames).concat(opts)
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.relabel(filename, opts)
|
53
|
+
Relabel.new(filename).relabel(opts)
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.cook(filename, opts)
|
57
|
+
Cook.new(filename).cook(opts)
|
58
|
+
end
|
41
59
|
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
COL1 COL 2 Col 3 col-4 col5 col6
|
2
|
+
data data data d a t a data data
|
3
|
+
data data data d a t a data data
|
4
|
+
data data data d a t a data data
|
5
|
+
data data data d a t a data data
|
6
|
+
data| data |data |d a t a|data|data
|
7
|
+
data| data |data |d a t a|data|data
|
8
|
+
data| data |data |d a t a|data,data|data
|
9
|
+
data| data |data "more data" |d a t a|data|data
|
10
|
+
1,20120106003230,2044272,L,407,15267,407,201201060140,407,201201060140,0,201201060309,L,"594,756"
|
11
|
+
1,20120106003230,2044277,X,407,15267,381,201201060222,381,201201060222,0,201201060647,X,"594,761"
|
12
|
+
1,20120106003230,2044309,L,407,15267,407,201201060311,407,201201060311,0,201201060339,L,"594,766"
|
13
|
+
1,20120106003230,,Q,407,15267,407,201201060514,108,201201060515,108,201201060515,SEC,"594,787"
|
14
|
+
1,20120106024355,,Q,407,15267,407,201201060309,90,201201060316,90,201201060316,IV,"594,764"
|
15
|
+
1,20120106024355,2044306,L,407,15267,407,201201060309,407,201201060309,0,201201060345,L,"594,763"
|
16
|
+
1,20120106024355,2044308,X,407,15267,407,201201060310,407,201201060310,0,201201060556,X,"594,765"
|
17
|
+
1,20120106024355,2044307,L,407,15267,407,201201060309,407,201201060309,0,201201060333,L,"594,762"
|
18
|
+
1,20120106024355,,Q,407,15267,407,201201060520,108,201201060522,108,201201060522,SEC,"594,789"
|
19
|
+
1,20120106024355,2044579,L,407,15267,68,201201060826,68,201201060826,0,201201071149,L,"594,823"
|
20
|
+
1,20120106032719,2044345,L,407,15267,407,201201060348,407,201201060348,0,201201060442,L,"594,775"
|
21
|
+
1,20120106032719,2044344,L,407,15267,407,201201060348,407,201201060348,0,201201060442,L,"594,777"
|
22
|
+
1,20120106032719,2044343,L,407,15267,407,201201060348,407,201201060348,0,201201060428,L,"594,773"
|
23
|
+
1,20120106032719,,Q,407,15267,407,201201060348,426,201201060408,426,201201060408,IV,"594,774"
|
24
|
+
1,20120106032719,,Q,407,15267,407,201201060348,426,201201060634,426,201201060634,URINE,"594,776"
|
25
|
+
1,20120106032719,2044386,L,407,15267,407,201201060445,407,201201060445,0,201201060519,L,"594,785"
|
26
|
+
1,20120106032719,2044401,X,407,15267,407,201201060521,407,201201060521,0,201201060646,X,"594,790"
|
27
|
+
1,20120106033235,,Q,407,15267,407,201201060347,74,201201060353,74,201201060353,IV,"594,769"
|
28
|
+
1,20120106033235,2044349,L,407,15267,407,201201060347,74,201201060353,0,201201060443,L,"594,771"
|
29
|
+
1,20120106033235,2044350,L,407,15267,407,201201060347,74,201201060353,0,201201060434,URINE,"594,770"
|
30
|
+
1,20120106033235,2044347,L,407,15267,407,201201060347,74,201201060353,0,201201060428,L,"594,768"
|
31
|
+
1,20120106033235,2044348,L,407,15267,407,201201060347,74,201201060353,0,201201060443,L,"594,772"
|
32
|
+
1,20120106033235,2044372,X,407,15267,407,201201060429,407,201201060429,0,201201060649,X,"594,780"
|
33
|
+
1,20120106035346,,Q,407,15267,407,201201060446,426,201201060448,426,201201060448,N,"594,786"
|
34
|
+
1,20120106041426,2044383,L,407,15267,407,201201060445,407,201201060445,0,201201060657,L,"594,784"
|
35
|
+
1,20120106041426,2044384,L,407,15267,407,201201060445,407,201201060445,0,201201060657,L,"594,782"
|
36
|
+
1,20120106041426,2044382,L,407,15267,407,201201060445,407,201201060445,0,201201060522,L,"594,781"
|
37
|
+
1,20120106041426,,Q,407,15267,407,201201060445,381,201201060452,381,201201060452,IV,"594,783"
|
38
|
+
1,20120106043025,2044400,X,407,15267,407,201201060515,407,201201060515,0,201201060554,X,"594,788"
|
39
|
+
1,20120106045326,2044411,R,407,15267,407,201201060535,407,201201060535,0,201201060630,RS,"594,791"
|
40
|
+
1,20120106045326,,Q,407,15267,407,201201060535,108,201201060540,108,201201060540,SEC,"594,794"
|
41
|
+
1,20120106045326,2044412,R,407,15267,407,201201060535,407,201201060535,0,201201060629,RS,"594,795"
|
42
|
+
1,20120106045326,2044413,X,407,15267,407,201201060536,407,201201060536,0,201201060649,X,"594,796"
|
43
|
+
1,20120106045326,,Q,407,15267,407,201201060535,108,201201060541,108,201201060541,SEC,"594,792"
|
44
|
+
1,20120106045326,2044410,R,407,15267,407,201201060535,407,201201060535,0,201201060628,RS,"594,793"
|
45
|
+
1,20120106052714,2044421,L,407,15267,407,201201060544,407,201201060544,0,201201060605,L,"594,797"
|
46
|
+
1,20120106052714,,Q,407,15267,407,201201060544,90,201201060545,90,201201060545,IV,"594,799"
|
47
|
+
1,20120106052714,,Q,407,15267,407,201201060544,90,201201060545,90,201201060545,N,"594,800"
|
48
|
+
1,20120106052714,2044422,L,407,15267,407,201201060544,407,201201060544,0,201201060621,L,"594,801"
|
49
|
+
1,20120106052714,2044423,L,407,15267,407,201201060544,407,201201060544,0,201201060727,L,"594,798"
|
50
|
+
1,20120106052714,2044424,L,407,15267,407,201201060551,407,201201060551,0,201201060714,L,"594,802"
|
51
|
+
1,20120106070243,2044439,L,504,15550,504,201201060721,504,201201060721,0,201201060753,L,"594,803"
|
52
|
+
1,20120106070243,2044440,L,504,15550,504,201201060721,504,201201060721,0,201201060748,L,"594,807"
|
53
|
+
1,20120106070243,2044441,L,504,15550,504,201201060721,504,201201060721,0,201201060748,L,"594,806"
|
54
|
+
1,20120106070243,,Q,504,15550,504,201201060721,155,201201060735,155,201201060735,IV,"594,805"
|
55
|
+
1,20120106070243,,Q,504,15550,504,201201060806,155,201201060813,155,201201060813,N,"594,820"
|
56
|
+
1,20120106070243,2044524,L,504,15550,504,201201060806,504,201201060806,0,201201061004,L,"594,816"
|
57
|
+
1,20120106070243,,Q,504,15550,504,201201060807,195,201201060813,195,201201060813,SEC,"594,822"
|
58
|
+
1,20120106070243,2044522,L,504,15550,504,201201060806,504,201201060806,0,201201060959,L,"594,819"
|
59
|
+
1,20120106070243,,Q,504,15550,504,201201060807,195,201201060811,195,201201060811,SEC,"594,821"
|
60
|
+
1,20120106070243,,Q,504,15550,504,201201060806,155,201201060813,155,201201060813,N,"594,818"
|
61
|
+
1,20120106070243,,Q,504,15550,504,201201060910,155,201201060916,155,201201060916,N,"594,831"
|
62
|
+
1,20120106070243,2044716,X,504,15550,504,201201060928,504,201201060928,0,201201060953,X,"594,834"
|
63
|
+
1,20120106073142,2044480,X,504,15550,504,201201060757,504,201201060757,0,201201060819,X,"594,815"
|
64
|
+
1,20120106073757,2044475,L,504,15550,504,201201060749,155,201201060755,0,201201060925,URINE,"594,810"
|
65
|
+
1,20120106073757,2044466,L,504,15550,504,201201060749,504,201201060749,0,201201060827,L,"594,808"
|
66
|
+
1,20120106073757,2044470,X,504,15550,504,201201060749,504,201201060749,0,201201060818,X,"594,809"
|
67
|
+
1,20120106073757,2044467,L,504,15550,504,201201060749,504,201201060749,0,201201060826,L,"594,813"
|
68
|
+
1,20120106073757,2044468,L,504,15550,504,201201060749,504,201201060749,0,201201060839,L,"594,811"
|
69
|
+
1,20120106073757,2044469,L,504,15550,504,201201060749,504,201201060749,0,201201060825,L,"594,814"
|
70
|
+
1,20120106073757,,Q,504,15550,504,201201060749,155,201201060755,155,201201060755,IV,"594,812"
|
71
|
+
1,20120106073757,,Q,504,15550,504,201201060911,76,201201060933,76,201201060933,IV,"594,832"
|
72
|
+
1,20120106073757,,Q,504,15550,504,201201060928,34,201201060934,34,201201060934,SEC,"594,833"
|
73
|
+
1,20120106073757,,Q,504,15550,504,201201061022,155,201201061108,155,201201061108,IV,"594,862"
|
74
|
+
1,20120106073757,,Q,504,15550,504,201201061019,155,201201061025,155,201201061025,IV,"594,861"
|
75
|
+
1,20120106073757,,Q,504,15550,504,201201061131,195,201201061133,195,201201061133,SEC,"594,896"
|
76
|
+
1,20120106073757,,Q,504,15550,504,201201061131,195,201201061133,195,201201061133,SEC,"594,895"
|
77
|
+
1,20120106073757,2045028,X,504,15550,504,201201061131,504,201201061131,0,201201061209,X,"594,898"
|
78
|
+
1,20120106073757,2045029,X,504,15550,504,201201061131,504,201201061131,0,201201061345,X,"594,897"
|
79
|
+
1,20120106073757,,Q,504,15550,504,201201061131,155,201201061223,155,201201061223,N,"594,894"
|
80
|
+
1,20120106084347,2044639,X,504,15550,76,201201060850,76,201201060850,0,201201060931,X,"594,828"
|
81
|
+
1,20120106084720,2044670,X,55,4644,55,201201060909,55,201201060909,0,201201060934,X,"594,829"
|
82
|
+
1,20120106084720,,Q,55,4644,55,201201060910,66,201201060914,66,201201060914,N,"594,830"
|
83
|
+
1,20120106085558,2044755,L,55,4644,55,201201060949,55,201201060949,0,201201061018,L,"594,846"
|
84
|
+
1,20120106085558,2044756,L,55,4644,55,201201060949,55,201201060949,0,201201061038,L,"594,851"
|
85
|
+
1,20120106085558,2044793,L,55,4644,55,201201060949,76,201201061003,0,201201061239,URINE,"594,848"
|
86
|
+
1,20120106085558,,Q,55,4644,55,201201060949,76,201201061003,76,201201061003,IV,"594,850"
|
87
|
+
1,20120106085558,,Q,55,4644,55,201201060949,76,201201061003,76,201201061003,IV,"594,847"
|
88
|
+
1,20120106085558,2044757,L,55,4644,55,201201060949,55,201201060949,0,201201061040,L,"594,849"
|
89
|
+
1,20120106085558,2044843,L,55,4644,55,201201061033,55,201201061033,0,201201071505,L,"594,864"
|
90
|
+
1,20120106085558,2044841,X,55,4644,55,201201061032,55,201201061032,0,201201061136,X,"594,863"
|
91
|
+
1,20120106085558,2044844,L,55,4644,55,201201061033,55,201201061033,0,201201061119,L,"594,865"
|
92
|
+
1,20120106085558,,Q,55,4644,55,201201061228,195,201201061240,195,201201061240,SEC,"594,961"
|
93
|
+
1,20120106091726,2044741,L,504,15550,504,201201060942,504,201201060942,0,201201061024,L,"594,839"
|
94
|
+
1,20120106091726,2044745,X,504,15550,504,201201060942,504,201201060942,0,201201061016,X,"594,835"
|
95
|
+
1,20120106091726,2044746,L,504,15550,504,201201060942,504,201201060942,0,201201061107,L,"594,842"
|
96
|
+
1,20120106091726,2044740,L,504,15550,504,201201060942,504,201201060942,0,201201061017,L,"594,836"
|
97
|
+
1,20120106091726,2044744,L,504,15550,504,201201060942,504,201201060942,0,201201061024,L,"594,838"
|
98
|
+
1,20120106091726,2044742,L,504,15550,504,201201060942,504,201201060942,0,201201061016,L,"594,841"
|
99
|
+
1,20120106091726,,Q,504,15550,504,201201060942,66,201201060944,66,201201060944,IV,"594,837"
|
100
|
+
1,20120106091726,2044743,L,504,15550,504,201201060942,504,201201060942,0,201201061016,L,"594,840"
|
101
|
+
1,20120106095129,2044814,X,55,4644,55,201201061010,55,201201061010,0,201201061037,X,"594,853"
|
102
|
+
1,20120106100014,,Q,504,15550,504,201201061011,885,201201061037,885,201201061037,IV,"594,857"
|
103
|
+
1,20120106100014,,Q,504,15550,504,201201061011,885,201201061037,885,201201061037,N,"594,858"
|
104
|
+
1,20120106100014,,Q,504,15550,504,201201061011,885,201201061037,885,201201061037,N,"594,859"
|
105
|
+
1,20120106100014,2044815,L,504,15550,504,201201061011,504,201201061011,0,201201061023,L,"594,854"
|
106
|
+
1,20120106100014,2044817,L,504,15550,504,201201061011,504,201201061011,0,201201061049,L,"594,856"
|
107
|
+
1,20120106100014,2044818,X,504,15550,504,201201061011,504,201201061011,0,201201061038,X,"594,855"
|
108
|
+
1,20120106100014,2044816,L,504,15550,504,201201061011,504,201201061011,0,201201061049,L,"594,860"
|
@@ -0,0 +1,41 @@
|
|
1
|
+
two,eight,fourteen
|
2
|
+
20120106003230,201201060140,594756
|
3
|
+
20120106003230,201201060222,594761
|
4
|
+
20120106003230,201201060311,594766
|
5
|
+
20120106003230,201201060514,594787
|
6
|
+
20120106024355,201201060309,594764
|
7
|
+
20120106024355,201201060310,594765
|
8
|
+
20120106024355,201201060520,594789
|
9
|
+
20120106024355,201201060826,594823
|
10
|
+
20120106032719,201201060348,594775
|
11
|
+
20120106032719,201201060445,594785
|
12
|
+
20120106032719,201201060521,594790
|
13
|
+
20120106033235,201201060347,594769
|
14
|
+
20120106033235,201201060429,594780
|
15
|
+
20120106035346,201201060446,594786
|
16
|
+
20120106041426,201201060445,594784
|
17
|
+
20120106043025,201201060515,594788
|
18
|
+
20120106045326,201201060535,594791
|
19
|
+
20120106045326,201201060536,594796
|
20
|
+
20120106052714,201201060544,594797
|
21
|
+
20120106052714,201201060551,594802
|
22
|
+
20120106070243,201201060721,594803
|
23
|
+
20120106070243,201201060806,594820
|
24
|
+
20120106070243,201201060807,594822
|
25
|
+
20120106070243,201201060910,594831
|
26
|
+
20120106070243,201201060928,594834
|
27
|
+
20120106073142,201201060757,594815
|
28
|
+
20120106073757,201201060749,594810
|
29
|
+
20120106073757,201201060911,594832
|
30
|
+
20120106073757,201201060928,594833
|
31
|
+
20120106073757,201201061022,594862
|
32
|
+
20120106073757,201201061131,594896
|
33
|
+
20120106084347,201201060850,594828
|
34
|
+
20120106084720,201201060909,594829
|
35
|
+
20120106084720,201201060910,594830
|
36
|
+
20120106085558,201201060949,594846
|
37
|
+
20120106085558,201201061033,594864
|
38
|
+
20120106085558,201201061228,594961
|
39
|
+
20120106091726,201201060942,594839
|
40
|
+
20120106095129,201201061010,594853
|
41
|
+
20120106100014,201201061011,594857
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# spec for file concatenation
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "concatenation" do
|
6
|
+
it "should leave just one header row in the result" do
|
7
|
+
file1 = File.dirname(__FILE__) + "/../data/tabbed_data.txt"
|
8
|
+
file2 = File.dirname(__FILE__) + "/../data/pipe_data.txt"
|
9
|
+
file3 = File.dirname(__FILE__) + "/../data/quoted_csv_data.txt"
|
10
|
+
tmp = Tempfile.new('concat')
|
11
|
+
results = Masticate.concat([file1, file2, file3], :output => tmp.path)
|
12
|
+
output = File.read(tmp)
|
13
|
+
tmp.unlink
|
14
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/concat_result.txt")
|
15
|
+
output.should == correct_output
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# spec for cookery
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "cooking up a recipe" do
|
6
|
+
it "should handle multiple steps" do
|
7
|
+
input = File.dirname(__FILE__) + "/../data/quoted_csv_data.txt"
|
8
|
+
recipe = File.dirname(__FILE__) + "/../data/recipe.txt"
|
9
|
+
tmp = Tempfile.new('cooked')
|
10
|
+
results = Masticate.cook(input, :output => tmp, :recipe => recipe)
|
11
|
+
output = File.read(tmp)
|
12
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/cooking_result.csv")
|
13
|
+
|
14
|
+
output.should == correct_output
|
15
|
+
end
|
16
|
+
end
|
data/spec/lib/gsub_spec.rb
CHANGED
@@ -0,0 +1,15 @@
|
|
1
|
+
# spec for file concatenation
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "relabeling" do
|
6
|
+
it "result should be same as original" do
|
7
|
+
input = File.dirname(__FILE__) + "/../data/namedcols.csv"
|
8
|
+
tmp = Tempfile.new('relabel')
|
9
|
+
results = Masticate.relabel(input, :fields => %w{happy birth day to you}, :output => tmp.path)
|
10
|
+
output = File.read(tmp)
|
11
|
+
tmp.unlink
|
12
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/relabel_result.csv")
|
13
|
+
output.should == correct_output
|
14
|
+
end
|
15
|
+
end
|
data/spec/lib/sniffer_spec.rb
CHANGED
@@ -31,4 +31,11 @@ describe "delimiter sniffing" do
|
|
31
31
|
results[:quote_char].should == '"'
|
32
32
|
results[:field_counts].should == {14 => 100}
|
33
33
|
end
|
34
|
+
|
35
|
+
it "should find tilde delimiter" do
|
36
|
+
filename = File.dirname(__FILE__) + "/../data/tilde_data.txt"
|
37
|
+
results = Masticate.sniff(filename, :stats => true)
|
38
|
+
results[:col_sep].should == '~'
|
39
|
+
results[:field_counts].should == {6 => 5}
|
40
|
+
end
|
34
41
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masticate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.2'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &2153649040 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.9.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2153649040
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: guard-rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2153648360 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.7.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2153648360
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ruby_gntp
|
38
|
-
requirement: &
|
38
|
+
requirement: &2153647700 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: 0.3.4
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2153647700
|
47
47
|
description: Data file crunching
|
48
48
|
email:
|
49
49
|
- jmay@pobox.com
|
@@ -61,18 +61,24 @@ files:
|
|
61
61
|
- bin/masticate
|
62
62
|
- lib/masticate.rb
|
63
63
|
- lib/masticate/base.rb
|
64
|
+
- lib/masticate/concat.rb
|
65
|
+
- lib/masticate/cook.rb
|
64
66
|
- lib/masticate/csvify.rb
|
65
67
|
- lib/masticate/datify.rb
|
66
68
|
- lib/masticate/gsubber.rb
|
67
69
|
- lib/masticate/max_rows.rb
|
68
70
|
- lib/masticate/mender.rb
|
71
|
+
- lib/masticate/myoptparse.rb
|
69
72
|
- lib/masticate/plucker.rb
|
73
|
+
- lib/masticate/relabel.rb
|
70
74
|
- lib/masticate/sniffer.rb
|
71
75
|
- lib/masticate/version.rb
|
72
76
|
- masticate.gemspec
|
73
77
|
- spec/data/badnums.csv
|
74
78
|
- spec/data/badnums_fixed.csv
|
75
79
|
- spec/data/broken_psv.txt
|
80
|
+
- spec/data/concat_result.txt
|
81
|
+
- spec/data/cooking_result.csv
|
76
82
|
- spec/data/events.csv
|
77
83
|
- spec/data/events_reduced.csv
|
78
84
|
- spec/data/inlined_headers.csv
|
@@ -83,12 +89,18 @@ files:
|
|
83
89
|
- spec/data/namedcols.csv.output
|
84
90
|
- spec/data/pipe_data.txt
|
85
91
|
- spec/data/quoted_csv_data.txt
|
92
|
+
- spec/data/recipe.txt
|
93
|
+
- spec/data/relabel_result.csv
|
86
94
|
- spec/data/tabbed_data.txt
|
95
|
+
- spec/data/tilde_data.txt
|
96
|
+
- spec/lib/concat_spec.rb
|
97
|
+
- spec/lib/cook_spec.rb
|
87
98
|
- spec/lib/csvify_spec.rb
|
88
99
|
- spec/lib/gsub_spec.rb
|
89
100
|
- spec/lib/maxrow_spec.rb
|
90
101
|
- spec/lib/mender_spec.rb
|
91
102
|
- spec/lib/plucker_spec.rb
|
103
|
+
- spec/lib/relabel_spec.rb
|
92
104
|
- spec/lib/sniffer_spec.rb
|
93
105
|
- spec/spec_helper.rb
|
94
106
|
homepage: ''
|
@@ -119,6 +131,8 @@ test_files:
|
|
119
131
|
- spec/data/badnums.csv
|
120
132
|
- spec/data/badnums_fixed.csv
|
121
133
|
- spec/data/broken_psv.txt
|
134
|
+
- spec/data/concat_result.txt
|
135
|
+
- spec/data/cooking_result.csv
|
122
136
|
- spec/data/events.csv
|
123
137
|
- spec/data/events_reduced.csv
|
124
138
|
- spec/data/inlined_headers.csv
|
@@ -129,12 +143,18 @@ test_files:
|
|
129
143
|
- spec/data/namedcols.csv.output
|
130
144
|
- spec/data/pipe_data.txt
|
131
145
|
- spec/data/quoted_csv_data.txt
|
146
|
+
- spec/data/recipe.txt
|
147
|
+
- spec/data/relabel_result.csv
|
132
148
|
- spec/data/tabbed_data.txt
|
149
|
+
- spec/data/tilde_data.txt
|
150
|
+
- spec/lib/concat_spec.rb
|
151
|
+
- spec/lib/cook_spec.rb
|
133
152
|
- spec/lib/csvify_spec.rb
|
134
153
|
- spec/lib/gsub_spec.rb
|
135
154
|
- spec/lib/maxrow_spec.rb
|
136
155
|
- spec/lib/mender_spec.rb
|
137
156
|
- spec/lib/plucker_spec.rb
|
157
|
+
- spec/lib/relabel_spec.rb
|
138
158
|
- spec/lib/sniffer_spec.rb
|
139
159
|
- spec/spec_helper.rb
|
140
160
|
has_rdoc:
|