masticate 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/masticate +1 -4
- data/lib/masticate.rb +1 -1
- data/lib/masticate/command.rb +153 -0
- data/lib/masticate/version.rb +1 -1
- data/spec/spec_helper.rb +0 -31
- metadata +5 -4
data/bin/masticate
CHANGED
data/lib/masticate.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "open-uri"
|
2
2
|
require "csv"
|
3
3
|
|
4
|
-
%w{version base
|
4
|
+
%w{version base sniffer mender csvify plucker datify gsubber max_rows concat relabel exclude transform include cook command myoptparse}.each do |f|
|
5
5
|
require_relative "masticate/#{f}"
|
6
6
|
end
|
7
7
|
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'pp'
|
3
|
+
|
4
|
+
class Masticate::Command < Thor
|
5
|
+
|
6
|
+
no_tasks do
|
7
|
+
def params
|
8
|
+
opts = options.dup
|
9
|
+
col_sep = opts[:delim]
|
10
|
+
col_sep = "TAB" if col_sep == "\t"
|
11
|
+
opts[:col_sep] = col_sep
|
12
|
+
opts
|
13
|
+
end
|
14
|
+
|
15
|
+
def execute(filename = nil)
|
16
|
+
method = caller[0][/`.*'/][1..-2]
|
17
|
+
results = Masticate.send(method, filename, params)
|
18
|
+
logmessage(method, params, results)
|
19
|
+
end
|
20
|
+
|
21
|
+
def logmessage(command, options, results)
|
22
|
+
$stderr.puts <<-EOT
|
23
|
+
* masticate #{command} (#{options.keys.join(', ')})
|
24
|
+
Lines in input: #{results[:input_count]}
|
25
|
+
Lines in output: #{results[:output_count]}
|
26
|
+
Headers: #{results[:headers].inspect}
|
27
|
+
EOT
|
28
|
+
if results[:field_counts]
|
29
|
+
$stderr.puts " Field counts: #{results[:field_counts].inspect}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
desc "sniff", "determine structure of source file"
|
35
|
+
method_option :stats => :boolean, :desc => "Read entire file, report distribution of field counts"
|
36
|
+
def sniff
|
37
|
+
results = Masticate.sniff(filename, params)
|
38
|
+
col_sep = results[:col_sep]
|
39
|
+
col_sep = "TAB" if col_sep == "\t"
|
40
|
+
quote_char = results[:quote_char] || "NONE"
|
41
|
+
$stderr.puts <<-EOT
|
42
|
+
Processing complete.
|
43
|
+
Input delimiter: #{col_sep}
|
44
|
+
Quote char: #{quote_char}
|
45
|
+
Field counts: #{results[:field_counts].inspect}
|
46
|
+
Headers: #{results[:headers].join(',')}
|
47
|
+
EOT
|
48
|
+
end
|
49
|
+
|
50
|
+
desc "mend", "repair source file format"
|
51
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
52
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
53
|
+
method_option :inlined => :boolean, :desc => "Source records repeat header fields on every line"
|
54
|
+
method_option :snip => :numeric, :desc => "Ignore first N fields of each row"
|
55
|
+
method_option :dejunk => :boolean, :desc => "Ignore short lines"
|
56
|
+
method_option :buried => :numeric, :desc => "Remove embedded delimiters in field N"
|
57
|
+
def mend(filename = nil)
|
58
|
+
results = Masticate.mend(filename, params)
|
59
|
+
logmessage(__method__, params, results)
|
60
|
+
end
|
61
|
+
|
62
|
+
desc "csvify", "convert input to standard CSV"
|
63
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
64
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
65
|
+
def csvify(filename = nil)
|
66
|
+
execute(filename)
|
67
|
+
end
|
68
|
+
|
69
|
+
desc 'pluck', "ignore all but the specified columns"
|
70
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
71
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
72
|
+
method_option :fields, :required => true, :desc => "field names to extract"
|
73
|
+
def pluck(filename = nil)
|
74
|
+
execute(filename)
|
75
|
+
end
|
76
|
+
|
77
|
+
desc "datify", "parsed named field as formatted time/datestamp"
|
78
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
79
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
80
|
+
method_option :field, :required => true, :desc => "Fieldname to interpret as a date/time"
|
81
|
+
method_option :format, :required => true, :desc => "strptime format string"
|
82
|
+
def datify(filename = nil)
|
83
|
+
execute(filename)
|
84
|
+
end
|
85
|
+
|
86
|
+
desc "gsub", "applied substitution rule to named field"
|
87
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
88
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
89
|
+
method_option :field, :required => true, :desc => "field to convert"
|
90
|
+
method_option :from, :required => true, :desc => "regexp to apply to original value"
|
91
|
+
method_option :to, :required => true, :desc => "string to replace capture with"
|
92
|
+
def gsub(filename = nil)
|
93
|
+
execute(filename)
|
94
|
+
end
|
95
|
+
|
96
|
+
desc "maxrows", "compute SUM(max-field) GROUP BY(by-field)"
|
97
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
98
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
99
|
+
method_option :max, :required => true, :desc => "field to sum"
|
100
|
+
method_option :by, :required => true, :desc => "field to aggregate over"
|
101
|
+
def maxrows(filename = nil)
|
102
|
+
execute(filename)
|
103
|
+
end
|
104
|
+
|
105
|
+
desc "concat", "concatenate multiple input files, ignoring header lines on all but first file"
|
106
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
107
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
108
|
+
def concat(*args)
|
109
|
+
Masticate.concat(args, params)
|
110
|
+
end
|
111
|
+
|
112
|
+
desc "relabel", "replace header line in output"
|
113
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
114
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
115
|
+
method_option :fields, :required => true, :desc => "list of field names to use in output"
|
116
|
+
def relabel(filename = nil)
|
117
|
+
Masticate.relabel(filename, params)
|
118
|
+
end
|
119
|
+
|
120
|
+
desc "exclude", "ignore input lines that match criteria"
|
121
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
122
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
123
|
+
method_option :field, :required => true, :desc => "field to check for exclusion"
|
124
|
+
method_option :value, :required => true, :desc => "value to compare with for exclusion"
|
125
|
+
def exclude(filename = nil)
|
126
|
+
execute(filename)
|
127
|
+
end
|
128
|
+
|
129
|
+
desc "include", "ignore all input lines *except* those that match criteria"
|
130
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
131
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
132
|
+
method_option :field, :required => true, :desc => "field to check for inclusion"
|
133
|
+
method_option :value, :required => true, :desc => "value to compare with for inclusion"
|
134
|
+
def include(filename = nil)
|
135
|
+
execute(filename)
|
136
|
+
end
|
137
|
+
|
138
|
+
desc "transform", "apply transformation rule to named field"
|
139
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
140
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
141
|
+
method_option :rule, :required => true, :desc => "valid values are {upcase, downcase}"
|
142
|
+
def transform(filename = nil)
|
143
|
+
execute(filename)
|
144
|
+
end
|
145
|
+
|
146
|
+
desc "cook", "apply conversion recipe to input records"
|
147
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
148
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
149
|
+
method_option :recipe, :required => true, :desc => "filename containing recipe"
|
150
|
+
def cook(filename = nil)
|
151
|
+
execute(filename)
|
152
|
+
end
|
153
|
+
end
|
data/lib/masticate/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -1,35 +1,4 @@
|
|
1
|
-
# require File.expand_path("../../config/environment", __FILE__)
|
2
|
-
# require 'rspec/rails'
|
3
1
|
require 'rspec/autorun'
|
4
|
-
# require 'capybara/rspec'
|
5
2
|
require "tempfile"
|
6
3
|
|
7
|
-
# Requires supporting ruby files with custom matchers and macros, etc,
|
8
|
-
# in spec/support/ and its subdirectories.
|
9
|
-
# Dir[Rails.root.join("spec/support/**/*.rb")].each {|f| require f}
|
10
|
-
|
11
4
|
require File.expand_path('../../lib/masticate', __FILE__)
|
12
|
-
|
13
|
-
RSpec.configure do |config|
|
14
|
-
# ## Mock Framework
|
15
|
-
#
|
16
|
-
# If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
|
17
|
-
#
|
18
|
-
# config.mock_with :mocha
|
19
|
-
# config.mock_with :flexmock
|
20
|
-
# config.mock_with :rr
|
21
|
-
|
22
|
-
# Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
|
23
|
-
# config.fixture_path = "#{::Rails.root}/spec/fixtures"
|
24
|
-
|
25
|
-
# If you're not using ActiveRecord, or you'd prefer not to run each of your
|
26
|
-
# examples within a transaction, remove the following line or assign false
|
27
|
-
# instead of true.
|
28
|
-
# config.use_transactional_fixtures = true
|
29
|
-
# config.use_instantiated_fixtures = false
|
30
|
-
|
31
|
-
# If true, the base class of anonymous controllers will be inferred
|
32
|
-
# automatically. This will be the default behavior in future versions of
|
33
|
-
# rspec-rails.
|
34
|
-
# config.infer_base_class_for_anonymous_controllers = false
|
35
|
-
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masticate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-11-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- bin/masticate
|
94
94
|
- lib/masticate.rb
|
95
95
|
- lib/masticate/base.rb
|
96
|
+
- lib/masticate/command.rb
|
96
97
|
- lib/masticate/concat.rb
|
97
98
|
- lib/masticate/cook.rb
|
98
99
|
- lib/masticate/csvify.rb
|
@@ -172,7 +173,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
172
173
|
version: '0'
|
173
174
|
segments:
|
174
175
|
- 0
|
175
|
-
hash:
|
176
|
+
hash: 2907326929962451974
|
176
177
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
177
178
|
none: false
|
178
179
|
requirements:
|
@@ -181,7 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
181
182
|
version: '0'
|
182
183
|
segments:
|
183
184
|
- 0
|
184
|
-
hash:
|
185
|
+
hash: 2907326929962451974
|
185
186
|
requirements: []
|
186
187
|
rubyforge_project: masticate
|
187
188
|
rubygems_version: 1.8.24
|