masticate 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/masticate/myoptparse.rb +9 -0
- data/lib/masticate/transform.rb +39 -0
- data/lib/masticate/version.rb +1 -1
- data/lib/masticate.rb +5 -0
- data/spec/data/downcase_results.csv +11 -0
- data/spec/lib/transform_spec.rb +25 -0
- metadata +42 -12
data/lib/masticate/myoptparse.rb
CHANGED
@@ -74,6 +74,10 @@ class Masticate::MyOptionParser
|
|
74
74
|
opts.on("--recipe FILENAME", String, "(*cook* only) Recipe file") do |f|
|
75
75
|
@options[:recipe] = f
|
76
76
|
end
|
77
|
+
|
78
|
+
opts.on("--rule {downcase,upcase}", String, "(*transform* only) Transformation rule") do |f|
|
79
|
+
@options[:rule] = f
|
80
|
+
end
|
77
81
|
end
|
78
82
|
end
|
79
83
|
|
@@ -87,6 +91,7 @@ class Masticate::MyOptionParser
|
|
87
91
|
def prepare(command, options)
|
88
92
|
klasses = {
|
89
93
|
'gsub' => Masticate::Gsubber,
|
94
|
+
'transform' => Masticate::Transform,
|
90
95
|
'datify' => Masticate::Datify,
|
91
96
|
'maxrows' => Masticate::MaxRows,
|
92
97
|
'relabel' => Masticate::Relabel,
|
@@ -156,6 +161,10 @@ EOT
|
|
156
161
|
results = Masticate.exclude(filename, options)
|
157
162
|
logmessage(command, options, results)
|
158
163
|
|
164
|
+
when 'transform'
|
165
|
+
results = Masticate.transform(filename, options)
|
166
|
+
logmessage(command, options, results)
|
167
|
+
|
159
168
|
else
|
160
169
|
raise "unknown command #{command}"
|
161
170
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# apply transformation rules to a field
|
2
|
+
|
3
|
+
class Masticate::Transform < Masticate::Base
|
4
|
+
def configure(opts)
|
5
|
+
standard_options(opts)
|
6
|
+
|
7
|
+
@field = opts[:field] or raise "missing field to transform"
|
8
|
+
@rule = opts[:rule] or raise "missing transformation rule"
|
9
|
+
unless ['upcase', 'downcase'].include?(@rule)
|
10
|
+
raise "invalid transformation rule <#{@rule}>: supported rules are downcase, upcase"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def set_headers(row)
|
15
|
+
@headers = row
|
16
|
+
@index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
|
17
|
+
end
|
18
|
+
|
19
|
+
def transform(opts)
|
20
|
+
execute(opts)
|
21
|
+
end
|
22
|
+
|
23
|
+
def crunch(row)
|
24
|
+
if !@headers
|
25
|
+
set_headers(row)
|
26
|
+
elsif row
|
27
|
+
row[@index] = case @rule
|
28
|
+
when 'downcase'
|
29
|
+
row[@index].downcase
|
30
|
+
when 'upcase'
|
31
|
+
row[@index].upcase
|
32
|
+
else
|
33
|
+
raise "no code for rule #{@rule}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
row
|
38
|
+
end
|
39
|
+
end
|
data/lib/masticate/version.rb
CHANGED
data/lib/masticate.rb
CHANGED
@@ -15,6 +15,7 @@ require_relative "masticate/max_rows"
|
|
15
15
|
require_relative "masticate/concat"
|
16
16
|
require_relative "masticate/relabel"
|
17
17
|
require_relative "masticate/exclude"
|
18
|
+
require_relative "masticate/transform"
|
18
19
|
|
19
20
|
require_relative "masticate/cook"
|
20
21
|
|
@@ -62,4 +63,8 @@ module Masticate
|
|
62
63
|
def self.cook(filename, opts)
|
63
64
|
Cook.new(filename).cook(opts)
|
64
65
|
end
|
66
|
+
|
67
|
+
def self.transform(filename, opts)
|
68
|
+
Transform.new(filename).transform(opts)
|
69
|
+
end
|
65
70
|
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
LAST_NAME,FIRST_NAME,MIDDLE_INIT,DEPT #,Empl #,timestamp,Term Date,Status,R_NAME,SEX,BIRTHDATE
|
2
|
+
washington,GEORGE,D,824,9556,09/10/2005 4:23:16PM,07/01/2006,TM,Surgical House Staff,M,09/23/1975
|
3
|
+
jefferson,TOM,,621,8052,07/23/2001 7:23:11AM,01/28/2011,TM,Telemetry,F,12/24/1976
|
4
|
+
adams,JOHN,,655,8834,09/22/2003 01:23:45PM,,WA,6 East,F,08/07/1978
|
5
|
+
adams,JOHN QUINCY,A,209,8637,02/24/2003 02:34:00AM,12/02/2007,TM,Imaging Svcs - MRI,F,11/03/1966
|
6
|
+
hamilton,ANDREW,,278,10065,01/09/2007 02:34:00AM,11/16/2007,TM,Information Technology,M,09/16/1968
|
7
|
+
madison,JAMES,F,672,10720,01/05/2009 02:34:00AM,02/16/2010,TM,Rehab Svcs - Outpatients,F,04/15/1985
|
8
|
+
franklin,BENJAMIN,R,674,8340,05/01/2002 02:34:00AM,09/01/2003,TM,"Rehab Svcs, xyz",F,03/15/1973
|
9
|
+
lincoln,ABRAHAM,M,634,11340,05/02/2011 02:34:00AM,,PN,Sibley Ambulatory Surgery Ctr,F,07/11/1960
|
10
|
+
monroe,JAMES,L,614,10757,02/16/2009 02:34:00AM,,RF,Labor & Delivery,F,11/06/1983
|
11
|
+
revere,PAUL,B,424,8568,11/18/2002 02:34:00AM,06/27/2006,TM,Laundry & Linen,M,12/31/1976
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# spec for field transformation
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "transform" do
|
6
|
+
it "should be able to downcase fields" do
|
7
|
+
filename = File.dirname(__FILE__) + "/../data/datify_input.csv"
|
8
|
+
tmp = Tempfile.new('transform')
|
9
|
+
results = Masticate.transform(filename, :output => tmp, :field => 'LAST_NAME', :rule => 'downcase')
|
10
|
+
output = File.read(tmp)
|
11
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/downcase_results.csv")
|
12
|
+
|
13
|
+
output.should == correct_output
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to upcase fields" do
|
17
|
+
filename = File.dirname(__FILE__) + "/../data/downcase_results.csv"
|
18
|
+
tmp = Tempfile.new('transform')
|
19
|
+
results = Masticate.transform(filename, :output => tmp, :field => 'LAST_NAME', :rule => 'upcase')
|
20
|
+
output = File.read(tmp)
|
21
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/datify_input.csv")
|
22
|
+
|
23
|
+
output.should == correct_output
|
24
|
+
end
|
25
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masticate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-08-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,15 @@ dependencies:
|
|
21
21
|
version: 0.9.2
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.9.2
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: rspec
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ~>
|
@@ -32,10 +37,15 @@ dependencies:
|
|
32
37
|
version: 2.9.0
|
33
38
|
type: :development
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 2.9.0
|
36
46
|
- !ruby/object:Gem::Dependency
|
37
47
|
name: guard-rspec
|
38
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
39
49
|
none: false
|
40
50
|
requirements:
|
41
51
|
- - ~>
|
@@ -43,10 +53,15 @@ dependencies:
|
|
43
53
|
version: 0.7.0
|
44
54
|
type: :development
|
45
55
|
prerelease: false
|
46
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.7.0
|
47
62
|
- !ruby/object:Gem::Dependency
|
48
63
|
name: ruby_gntp
|
49
|
-
requirement:
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
50
65
|
none: false
|
51
66
|
requirements:
|
52
67
|
- - ~>
|
@@ -54,7 +69,12 @@ dependencies:
|
|
54
69
|
version: 0.3.4
|
55
70
|
type: :development
|
56
71
|
prerelease: false
|
57
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.3.4
|
58
78
|
description: Data file crunching
|
59
79
|
email:
|
60
80
|
- jmay@pobox.com
|
@@ -85,6 +105,7 @@ files:
|
|
85
105
|
- lib/masticate/plucker.rb
|
86
106
|
- lib/masticate/relabel.rb
|
87
107
|
- lib/masticate/sniffer.rb
|
108
|
+
- lib/masticate/transform.rb
|
88
109
|
- lib/masticate/version.rb
|
89
110
|
- masticate.gemspec
|
90
111
|
- spec/data/badnums.csv
|
@@ -97,6 +118,7 @@ files:
|
|
97
118
|
- spec/data/cooking_result.csv
|
98
119
|
- spec/data/datify_input.csv
|
99
120
|
- spec/data/datify_result.csv
|
121
|
+
- spec/data/downcase_results.csv
|
100
122
|
- spec/data/events.csv
|
101
123
|
- spec/data/events_reduced.csv
|
102
124
|
- spec/data/exclude_input.csv
|
@@ -128,6 +150,7 @@ files:
|
|
128
150
|
- spec/lib/plucker_spec.rb
|
129
151
|
- spec/lib/relabel_spec.rb
|
130
152
|
- spec/lib/sniffer_spec.rb
|
153
|
+
- spec/lib/transform_spec.rb
|
131
154
|
- spec/spec_helper.rb
|
132
155
|
homepage: ''
|
133
156
|
licenses: []
|
@@ -141,15 +164,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
141
164
|
- - ! '>='
|
142
165
|
- !ruby/object:Gem::Version
|
143
166
|
version: '0'
|
167
|
+
segments:
|
168
|
+
- 0
|
169
|
+
hash: -519721259904741395
|
144
170
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
145
171
|
none: false
|
146
172
|
requirements:
|
147
173
|
- - ! '>='
|
148
174
|
- !ruby/object:Gem::Version
|
149
175
|
version: '0'
|
176
|
+
segments:
|
177
|
+
- 0
|
178
|
+
hash: -519721259904741395
|
150
179
|
requirements: []
|
151
180
|
rubyforge_project: masticate
|
152
|
-
rubygems_version: 1.8.
|
181
|
+
rubygems_version: 1.8.24
|
153
182
|
signing_key:
|
154
183
|
specification_version: 3
|
155
184
|
summary: Utility functions for parsing incoming text data files.
|
@@ -164,6 +193,7 @@ test_files:
|
|
164
193
|
- spec/data/cooking_result.csv
|
165
194
|
- spec/data/datify_input.csv
|
166
195
|
- spec/data/datify_result.csv
|
196
|
+
- spec/data/downcase_results.csv
|
167
197
|
- spec/data/events.csv
|
168
198
|
- spec/data/events_reduced.csv
|
169
199
|
- spec/data/exclude_input.csv
|
@@ -195,5 +225,5 @@ test_files:
|
|
195
225
|
- spec/lib/plucker_spec.rb
|
196
226
|
- spec/lib/relabel_spec.rb
|
197
227
|
- spec/lib/sniffer_spec.rb
|
228
|
+
- spec/lib/transform_spec.rb
|
198
229
|
- spec/spec_helper.rb
|
199
|
-
has_rdoc:
|