masticate 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/masticate CHANGED
@@ -2,7 +2,4 @@
2
2
 
3
3
  require_relative "../lib/masticate"
4
4
 
5
- masticator = Masticate::MyOptionParser.new
6
- command, options, filenames = masticator.parse
7
-
8
- masticator.execute(command, options, filenames)
5
+ Masticate::Command.start
data/lib/masticate.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require "open-uri"
2
2
  require "csv"
3
3
 
4
- %w{version base myoptparse sniffer mender csvify plucker datify gsubber max_rows concat relabel exclude transform include cook}.each do |f|
4
+ %w{version base sniffer mender csvify plucker datify gsubber max_rows concat relabel exclude transform include cook command myoptparse}.each do |f|
5
5
  require_relative "masticate/#{f}"
6
6
  end
7
7
 
@@ -0,0 +1,153 @@
1
+ require 'thor'
2
+ require 'pp'
3
+
4
+ class Masticate::Command < Thor
5
+
6
+ no_tasks do
7
+ def params
8
+ opts = options.dup
9
+ col_sep = opts[:delim]
10
+ col_sep = "TAB" if col_sep == "\t"
11
+ opts[:col_sep] = col_sep
12
+ opts
13
+ end
14
+
15
+ def execute(filename = nil)
16
+ method = caller[0][/`.*'/][1..-2]
17
+ results = Masticate.send(method, filename, params)
18
+ logmessage(method, params, results)
19
+ end
20
+
21
+ def logmessage(command, options, results)
22
+ $stderr.puts <<-EOT
23
+ * masticate #{command} (#{options.keys.join(', ')})
24
+ Lines in input: #{results[:input_count]}
25
+ Lines in output: #{results[:output_count]}
26
+ Headers: #{results[:headers].inspect}
27
+ EOT
28
+ if results[:field_counts]
29
+ $stderr.puts " Field counts: #{results[:field_counts].inspect}"
30
+ end
31
+ end
32
+ end
33
+
34
+ desc "sniff", "determine structure of source file"
35
+ method_option :stats => :boolean, :desc => "Read entire file, report distribution of field counts"
36
+ def sniff
37
+ results = Masticate.sniff(filename, params)
38
+ col_sep = results[:col_sep]
39
+ col_sep = "TAB" if col_sep == "\t"
40
+ quote_char = results[:quote_char] || "NONE"
41
+ $stderr.puts <<-EOT
42
+ Processing complete.
43
+ Input delimiter: #{col_sep}
44
+ Quote char: #{quote_char}
45
+ Field counts: #{results[:field_counts].inspect}
46
+ Headers: #{results[:headers].join(',')}
47
+ EOT
48
+ end
49
+
50
+ desc "mend", "repair source file format"
51
+ method_option :delim, :desc => "field delimiter (default is comma)"
52
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
53
+ method_option :inlined => :boolean, :desc => "Source records repeat header fields on every line"
54
+ method_option :snip => :numeric, :desc => "Ignore first N fields of each row"
55
+ method_option :dejunk => :boolean, :desc => "Ignore short lines"
56
+ method_option :buried => :numeric, :desc => "Remove embedded delimiters in field N"
57
+ def mend(filename = nil)
58
+ results = Masticate.mend(filename, params)
59
+ logmessage(__method__, params, results)
60
+ end
61
+
62
+ desc "csvify", "convert input to standard CSV"
63
+ method_option :delim, :desc => "field delimiter (default is comma)"
64
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
65
+ def csvify(filename = nil)
66
+ execute(filename)
67
+ end
68
+
69
+ desc 'pluck', "ignore all but the specified columns"
70
+ method_option :delim, :desc => "field delimiter (default is comma)"
71
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
72
+ method_option :fields, :required => true, :desc => "field names to extract"
73
+ def pluck(filename = nil)
74
+ execute(filename)
75
+ end
76
+
77
+ desc "datify", "parsed named field as formatted time/datestamp"
78
+ method_option :delim, :desc => "field delimiter (default is comma)"
79
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
80
+ method_option :field, :required => true, :desc => "Fieldname to interpret as a date/time"
81
+ method_option :format, :required => true, :desc => "strptime format string"
82
+ def datify(filename = nil)
83
+ execute(filename)
84
+ end
85
+
86
+ desc "gsub", "applied substitution rule to named field"
87
+ method_option :delim, :desc => "field delimiter (default is comma)"
88
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
89
+ method_option :field, :required => true, :desc => "field to convert"
90
+ method_option :from, :required => true, :desc => "regexp to apply to original value"
91
+ method_option :to, :required => true, :desc => "string to replace capture with"
92
+ def gsub(filename = nil)
93
+ execute(filename)
94
+ end
95
+
96
+ desc "maxrows", "compute SUM(max-field) GROUP BY(by-field)"
97
+ method_option :delim, :desc => "field delimiter (default is comma)"
98
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
99
+ method_option :max, :required => true, :desc => "field to sum"
100
+ method_option :by, :required => true, :desc => "field to aggregate over"
101
+ def maxrows(filename = nil)
102
+ execute(filename)
103
+ end
104
+
105
+ desc "concat", "concatenate multiple input files, ignoring header lines on all but first file"
106
+ method_option :delim, :desc => "field delimiter (default is comma)"
107
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
108
+ def concat(*args)
109
+ Masticate.concat(args, params)
110
+ end
111
+
112
+ desc "relabel", "replace header line in output"
113
+ method_option :delim, :desc => "field delimiter (default is comma)"
114
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
115
+ method_option :fields, :required => true, :desc => "list of field names to use in output"
116
+ def relabel(filename = nil)
117
+ Masticate.relabel(filename, params)
118
+ end
119
+
120
+ desc "exclude", "ignore input lines that match criteria"
121
+ method_option :delim, :desc => "field delimiter (default is comma)"
122
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
123
+ method_option :field, :required => true, :desc => "field to check for exclusion"
124
+ method_option :value, :required => true, :desc => "value to compare with for exclusion"
125
+ def exclude(filename = nil)
126
+ execute(filename)
127
+ end
128
+
129
+ desc "include", "ignore all input lines *except* those that match criteria"
130
+ method_option :delim, :desc => "field delimiter (default is comma)"
131
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
132
+ method_option :field, :required => true, :desc => "field to check for inclusion"
133
+ method_option :value, :required => true, :desc => "value to compare with for inclusion"
134
+ def include(filename = nil)
135
+ execute(filename)
136
+ end
137
+
138
+ desc "transform", "apply transformation rule to named field"
139
+ method_option :delim, :desc => "field delimiter (default is comma)"
140
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
141
+ method_option :rule, :required => true, :desc => "valid values are {upcase, downcase}"
142
+ def transform(filename = nil)
143
+ execute(filename)
144
+ end
145
+
146
+ desc "cook", "apply conversion recipe to input records"
147
+ method_option :delim, :desc => "field delimiter (default is comma)"
148
+ method_option :quote, :desc => "delimiter escape string (default is double-quotes when delim is comma, otherwise none)"
149
+ method_option :recipe, :required => true, :desc => "filename containing recipe"
150
+ def cook(filename = nil)
151
+ execute(filename)
152
+ end
153
+ end
@@ -1,3 +1,3 @@
1
1
  module Masticate
2
- VERSION = "0.5.1"
2
+ VERSION = "0.6.0"
3
3
  end
data/spec/spec_helper.rb CHANGED
@@ -1,35 +1,4 @@
1
- # require File.expand_path("../../config/environment", __FILE__)
2
- # require 'rspec/rails'
3
1
  require 'rspec/autorun'
4
- # require 'capybara/rspec'
5
2
  require "tempfile"
6
3
 
7
- # Requires supporting ruby files with custom matchers and macros, etc,
8
- # in spec/support/ and its subdirectories.
9
- # Dir[Rails.root.join("spec/support/**/*.rb")].each {|f| require f}
10
-
11
4
  require File.expand_path('../../lib/masticate', __FILE__)
12
-
13
- RSpec.configure do |config|
14
- # ## Mock Framework
15
- #
16
- # If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
17
- #
18
- # config.mock_with :mocha
19
- # config.mock_with :flexmock
20
- # config.mock_with :rr
21
-
22
- # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
23
- # config.fixture_path = "#{::Rails.root}/spec/fixtures"
24
-
25
- # If you're not using ActiveRecord, or you'd prefer not to run each of your
26
- # examples within a transaction, remove the following line or assign false
27
- # instead of true.
28
- # config.use_transactional_fixtures = true
29
- # config.use_instantiated_fixtures = false
30
-
31
- # If true, the base class of anonymous controllers will be inferred
32
- # automatically. This will be the default behavior in future versions of
33
- # rspec-rails.
34
- # config.infer_base_class_for_anonymous_controllers = false
35
- end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masticate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-03 00:00:00.000000000 Z
12
+ date: 2012-11-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -93,6 +93,7 @@ files:
93
93
  - bin/masticate
94
94
  - lib/masticate.rb
95
95
  - lib/masticate/base.rb
96
+ - lib/masticate/command.rb
96
97
  - lib/masticate/concat.rb
97
98
  - lib/masticate/cook.rb
98
99
  - lib/masticate/csvify.rb
@@ -172,7 +173,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
172
173
  version: '0'
173
174
  segments:
174
175
  - 0
175
- hash: 89441162680926451
176
+ hash: 2907326929962451974
176
177
  required_rubygems_version: !ruby/object:Gem::Requirement
177
178
  none: false
178
179
  requirements:
@@ -181,7 +182,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
181
182
  version: '0'
182
183
  segments:
183
184
  - 0
184
- hash: 89441162680926451
185
+ hash: 2907326929962451974
185
186
  requirements: []
186
187
  rubyforge_project: masticate
187
188
  rubygems_version: 1.8.24