masticate 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/masticate +1 -4
- data/lib/masticate.rb +1 -1
- data/lib/masticate/command.rb +153 -0
- data/lib/masticate/version.rb +1 -1
- data/spec/spec_helper.rb +0 -31
- metadata +5 -4
data/bin/masticate
CHANGED
data/lib/masticate.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "open-uri"
|
2
2
|
require "csv"
|
3
3
|
|
4
|
-
%w{version base
|
4
|
+
%w{version base sniffer mender csvify plucker datify gsubber max_rows concat relabel exclude transform include cook command myoptparse}.each do |f|
|
5
5
|
require_relative "masticate/#{f}"
|
6
6
|
end
|
7
7
|
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'pp'
|
3
|
+
|
4
|
+
class Masticate::Command < Thor
|
5
|
+
|
6
|
+
no_tasks do
|
7
|
+
def params
|
8
|
+
opts = options.dup
|
9
|
+
col_sep = opts[:delim]
|
10
|
+
col_sep = "TAB" if col_sep == "\t"
|
11
|
+
opts[:col_sep] = col_sep
|
12
|
+
opts
|
13
|
+
end
|
14
|
+
|
15
|
+
def execute(filename = nil)
|
16
|
+
method = caller[0][/`.*'/][1..-2]
|
17
|
+
results = Masticate.send(method, filename, params)
|
18
|
+
logmessage(method, params, results)
|
19
|
+
end
|
20
|
+
|
21
|
+
def logmessage(command, options, results)
|
22
|
+
$stderr.puts <<-EOT
|
23
|
+
* masticate #{command} (#{options.keys.join(', ')})
|
24
|
+
Lines in input: #{results[:input_count]}
|
25
|
+
Lines in output: #{results[:output_count]}
|
26
|
+
Headers: #{results[:headers].inspect}
|
27
|
+
EOT
|
28
|
+
if results[:field_counts]
|
29
|
+
$stderr.puts " Field counts: #{results[:field_counts].inspect}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
desc "sniff", "determine structure of source file"
|
35
|
+
method_option :stats => :boolean, :desc => "Read entire file, report distribution of field counts"
|
36
|
+
def sniff
|
37
|
+
results = Masticate.sniff(filename, params)
|
38
|
+
col_sep = results[:col_sep]
|
39
|
+
col_sep = "TAB" if col_sep == "\t"
|
40
|
+
quote_char = results[:quote_char] || "NONE"
|
41
|
+
$stderr.puts <<-EOT
|
42
|
+
Processing complete.
|
43
|
+
Input delimiter: #{col_sep}
|
44
|
+
Quote char: #{quote_char}
|
45
|
+
Field counts: #{results[:field_counts].inspect}
|
46
|
+
Headers: #{results[:headers].join(',')}
|
47
|
+
EOT
|
48
|
+
end
|
49
|
+
|
50
|
+
desc "mend", "repair source file format"
|
51
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
52
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
53
|
+
method_option :inlined => :boolean, :desc => "Source records repeat header fields on every line"
|
54
|
+
method_option :snip => :numeric, :desc => "Ignore first N fields of each row"
|
55
|
+
method_option :dejunk => :boolean, :desc => "Ignore short lines"
|
56
|
+
method_option :buried => :numeric, :desc => "Remove embedded delimiters in field N"
|
57
|
+
def mend(filename = nil)
|
58
|
+
results = Masticate.mend(filename, params)
|
59
|
+
logmessage(__method__, params, results)
|
60
|
+
end
|
61
|
+
|
62
|
+
desc "csvify", "convert input to standard CSV"
|
63
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
64
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
65
|
+
def csvify(filename = nil)
|
66
|
+
execute(filename)
|
67
|
+
end
|
68
|
+
|
69
|
+
desc 'pluck', "ignore all but the specified columns"
|
70
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
71
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
72
|
+
method_option :fields, :required => true, :desc => "field names to extract"
|
73
|
+
def pluck(filename = nil)
|
74
|
+
execute(filename)
|
75
|
+
end
|
76
|
+
|
77
|
+
desc "datify", "parsed named field as formatted time/datestamp"
|
78
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
79
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
80
|
+
method_option :field, :required => true, :desc => "Fieldname to interpret as a date/time"
|
81
|
+
method_option :format, :required => true, :desc => "strptime format string"
|
82
|
+
def datify(filename = nil)
|
83
|
+
execute(filename)
|
84
|
+
end
|
85
|
+
|
86
|
+
desc "gsub", "applied substitution rule to named field"
|
87
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
88
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
89
|
+
method_option :field, :required => true, :desc => "field to convert"
|
90
|
+
method_option :from, :required => true, :desc => "regexp to apply to original value"
|
91
|
+
method_option :to, :required => true, :desc => "string to replace capture with"
|
92
|
+
def gsub(filename = nil)
|
93
|
+
execute(filename)
|
94
|
+
end
|
95
|
+
|
96
|
+
desc "maxrows", "compute SUM(max-field) GROUP BY(by-field)"
|
97
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
98
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
99
|
+
method_option :max, :required => true, :desc => "field to sum"
|
100
|
+
method_option :by, :required => true, :desc => "field to aggregate over"
|
101
|
+
def maxrows(filename = nil)
|
102
|
+
execute(filename)
|
103
|
+
end
|
104
|
+
|
105
|
+
desc "concat", "concatenate multiple input files, ignoring header lines on all but first file"
|
106
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
107
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
108
|
+
def concat(*args)
|
109
|
+
Masticate.concat(args, params)
|
110
|
+
end
|
111
|
+
|
112
|
+
desc "relabel", "replace header line in output"
|
113
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
114
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
115
|
+
method_option :fields, :required => true, :desc => "list of field names to use in output"
|
116
|
+
def relabel(filename = nil)
|
117
|
+
Masticate.relabel(filename, params)
|
118
|
+
end
|
119
|
+
|
120
|
+
desc "exclude", "ignore input lines that match criteria"
|
121
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
122
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
123
|
+
method_option :field, :required => true, :desc => "field to check for exclusion"
|
124
|
+
method_option :value, :required => true, :desc => "value to compare with for exclusion"
|
125
|
+
def exclude(filename = nil)
|
126
|
+
execute(filename)
|
127
|
+
end
|
128
|
+
|
129
|
+
desc "include", "ignore all input lines *except* those that match criteria"
|
130
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
131
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
132
|
+
method_option :field, :required => true, :desc => "field to check for inclusion"
|
133
|
+
method_option :value, :required => true, :desc => "value to compare with for inclusion"
|
134
|
+
def include(filename = nil)
|
135
|
+
execute(filename)
|
136
|
+
end
|
137
|
+
|
138
|
+
desc "transform", "apply transformation rule to named field"
|
139
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
140
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
141
|
+
method_option :rule, :required => true, :desc => "valid values are {upcase, downcase}"
|
142
|
+
def transform(filename = nil)
|
143
|
+
execute(filename)
|
144
|
+
end
|
145
|
+
|
146
|
+
desc "cook", "apply conversion recipe to input records"
|
147
|
+
method_option :delim, :desc => "field delimiter (default is comma)"
|
148
|
+
method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
|
149
|
+
method_option :recipe, :required => true, :desc => "filename containing recipe"
|
150
|
+
def cook(filename = nil)
|
151
|
+
execute(filename)
|
152
|
+
end
|
153
|
+
end
|
data/lib/masticate/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -1,35 +1,4 @@
|
|
1
|
-
# require File.expand_path("../../config/environment", __FILE__)
|
2
|
-
# require 'rspec/rails'
|
3
1
|
require 'rspec/autorun'
|
4
|
-
# require 'capybara/rspec'
|
5
2
|
require "tempfile"
|
6
3
|
|
7
|
-
# Requires supporting ruby files with custom matchers and macros, etc,
|
8
|
-
# in spec/support/ and its subdirectories.
|
9
|
-
# Dir[Rails.root.join("spec/support/**/*.rb")].each {|f| require f}
|
10
|
-
|
11
4
|
require File.expand_path('../../lib/masticate', __FILE__)
|
12
|
-
|
13
|
-
RSpec.configure do |config|
|
14
|
-
# ## Mock Framework
|
15
|
-
#
|
16
|
-
# If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
|
17
|
-
#
|
18
|
-
# config.mock_with :mocha
|
19
|
-
# config.mock_with :flexmock
|
20
|
-
# config.mock_with :rr
|
21
|
-
|
22
|
-
# Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
|
23
|
-
# config.fixture_path = "#{::Rails.root}/spec/fixtures"
|
24
|
-
|
25
|
-
# If you're not using ActiveRecord, or you'd prefer not to run each of your
|
26
|
-
# examples within a transaction, remove the following line or assign false
|
27
|
-
# instead of true.
|
28
|
-
# config.use_transactional_fixtures = true
|
29
|
-
# config.use_instantiated_fixtures = false
|
30
|
-
|
31
|
-
# If true, the base class of anonymous controllers will be inferred
|
32
|
-
# automatically. This will be the default behavior in future versions of
|
33
|
-
# rspec-rails.
|
34
|
-
# config.infer_base_class_for_anonymous_controllers = false
|
35
|
-
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masticate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-11-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- bin/masticate
|
94
94
|
- lib/masticate.rb
|
95
95
|
- lib/masticate/base.rb
|
96
|
+
- lib/masticate/command.rb
|
96
97
|
- lib/masticate/concat.rb
|
97
98
|
- lib/masticate/cook.rb
|
98
99
|
- lib/masticate/csvify.rb
|
@@ -172,7 +173,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
172
173
|
version: '0'
|
173
174
|
segments:
|
174
175
|
- 0
|
175
|
-
hash:
|
176
|
+
hash: 2907326929962451974
|
176
177
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
177
178
|
none: false
|
178
179
|
requirements:
|
@@ -181,7 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
181
182
|
version: '0'
|
182
183
|
segments:
|
183
184
|
- 0
|
184
|
-
hash:
|
185
|
+
hash: 2907326929962451974
|
185
186
|
requirements: []
|
186
187
|
rubyforge_project: masticate
|
187
188
|
rubygems_version: 1.8.24
|