masticate 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/masticate/myoptparse.rb +9 -0
- data/lib/masticate/transform.rb +39 -0
- data/lib/masticate/version.rb +1 -1
- data/lib/masticate.rb +5 -0
- data/spec/data/downcase_results.csv +11 -0
- data/spec/lib/transform_spec.rb +25 -0
- metadata +42 -12
data/lib/masticate/myoptparse.rb
CHANGED
@@ -74,6 +74,10 @@ class Masticate::MyOptionParser
|
|
74
74
|
opts.on("--recipe FILENAME", String, "(*cook* only) Recipe file") do |f|
|
75
75
|
@options[:recipe] = f
|
76
76
|
end
|
77
|
+
|
78
|
+
opts.on("--rule {downcase,upcase}", String, "(*transform* only) Transformation rule") do |f|
|
79
|
+
@options[:rule] = f
|
80
|
+
end
|
77
81
|
end
|
78
82
|
end
|
79
83
|
|
@@ -87,6 +91,7 @@ class Masticate::MyOptionParser
|
|
87
91
|
def prepare(command, options)
|
88
92
|
klasses = {
|
89
93
|
'gsub' => Masticate::Gsubber,
|
94
|
+
'transform' => Masticate::Transform,
|
90
95
|
'datify' => Masticate::Datify,
|
91
96
|
'maxrows' => Masticate::MaxRows,
|
92
97
|
'relabel' => Masticate::Relabel,
|
@@ -156,6 +161,10 @@ EOT
|
|
156
161
|
results = Masticate.exclude(filename, options)
|
157
162
|
logmessage(command, options, results)
|
158
163
|
|
164
|
+
when 'transform'
|
165
|
+
results = Masticate.transform(filename, options)
|
166
|
+
logmessage(command, options, results)
|
167
|
+
|
159
168
|
else
|
160
169
|
raise "unknown command #{command}"
|
161
170
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# apply transformation rules to a field
|
2
|
+
|
3
|
+
class Masticate::Transform < Masticate::Base
|
4
|
+
def configure(opts)
|
5
|
+
standard_options(opts)
|
6
|
+
|
7
|
+
@field = opts[:field] or raise "missing field to transform"
|
8
|
+
@rule = opts[:rule] or raise "missing transformation rule"
|
9
|
+
unless ['upcase', 'downcase'].include?(@rule)
|
10
|
+
raise "invalid transformation rule <#{@rule}>: supported rules are downcase, upcase"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def set_headers(row)
|
15
|
+
@headers = row
|
16
|
+
@index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
|
17
|
+
end
|
18
|
+
|
19
|
+
def transform(opts)
|
20
|
+
execute(opts)
|
21
|
+
end
|
22
|
+
|
23
|
+
def crunch(row)
|
24
|
+
if !@headers
|
25
|
+
set_headers(row)
|
26
|
+
elsif row
|
27
|
+
row[@index] = case @rule
|
28
|
+
when 'downcase'
|
29
|
+
row[@index].downcase
|
30
|
+
when 'upcase'
|
31
|
+
row[@index].upcase
|
32
|
+
else
|
33
|
+
raise "no code for rule #{@rule}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
row
|
38
|
+
end
|
39
|
+
end
|
data/lib/masticate/version.rb
CHANGED
data/lib/masticate.rb
CHANGED
@@ -15,6 +15,7 @@ require_relative "masticate/max_rows"
|
|
15
15
|
require_relative "masticate/concat"
|
16
16
|
require_relative "masticate/relabel"
|
17
17
|
require_relative "masticate/exclude"
|
18
|
+
require_relative "masticate/transform"
|
18
19
|
|
19
20
|
require_relative "masticate/cook"
|
20
21
|
|
@@ -62,4 +63,8 @@ module Masticate
|
|
62
63
|
def self.cook(filename, opts)
|
63
64
|
Cook.new(filename).cook(opts)
|
64
65
|
end
|
66
|
+
|
67
|
+
def self.transform(filename, opts)
|
68
|
+
Transform.new(filename).transform(opts)
|
69
|
+
end
|
65
70
|
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
LAST_NAME,FIRST_NAME,MIDDLE_INIT,DEPT #,Empl #,timestamp,Term Date,Status,R_NAME,SEX,BIRTHDATE
|
2
|
+
washington,GEORGE,D,824,9556,09/10/2005 4:23:16PM,07/01/2006,TM,Surgical House Staff,M,09/23/1975
|
3
|
+
jefferson,TOM,,621,8052,07/23/2001 7:23:11AM,01/28/2011,TM,Telemetry,F,12/24/1976
|
4
|
+
adams,JOHN,,655,8834,09/22/2003 01:23:45PM,,WA,6 East,F,08/07/1978
|
5
|
+
adams,JOHN QUINCY,A,209,8637,02/24/2003 02:34:00AM,12/02/2007,TM,Imaging Svcs - MRI,F,11/03/1966
|
6
|
+
hamilton,ANDREW,,278,10065,01/09/2007 02:34:00AM,11/16/2007,TM,Information Technology,M,09/16/1968
|
7
|
+
madison,JAMES,F,672,10720,01/05/2009 02:34:00AM,02/16/2010,TM,Rehab Svcs - Outpatients,F,04/15/1985
|
8
|
+
franklin,BENJAMIN,R,674,8340,05/01/2002 02:34:00AM,09/01/2003,TM,"Rehab Svcs, xyz",F,03/15/1973
|
9
|
+
lincoln,ABRAHAM,M,634,11340,05/02/2011 02:34:00AM,,PN,Sibley Ambulatory Surgery Ctr,F,07/11/1960
|
10
|
+
monroe,JAMES,L,614,10757,02/16/2009 02:34:00AM,,RF,Labor & Delivery,F,11/06/1983
|
11
|
+
revere,PAUL,B,424,8568,11/18/2002 02:34:00AM,06/27/2006,TM,Laundry & Linen,M,12/31/1976
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# spec for field transformation
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "transform" do
|
6
|
+
it "should be able to downcase fields" do
|
7
|
+
filename = File.dirname(__FILE__) + "/../data/datify_input.csv"
|
8
|
+
tmp = Tempfile.new('transform')
|
9
|
+
results = Masticate.transform(filename, :output => tmp, :field => 'LAST_NAME', :rule => 'downcase')
|
10
|
+
output = File.read(tmp)
|
11
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/downcase_results.csv")
|
12
|
+
|
13
|
+
output.should == correct_output
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should be able to upcase fields" do
|
17
|
+
filename = File.dirname(__FILE__) + "/../data/downcase_results.csv"
|
18
|
+
tmp = Tempfile.new('transform')
|
19
|
+
results = Masticate.transform(filename, :output => tmp, :field => 'LAST_NAME', :rule => 'upcase')
|
20
|
+
output = File.read(tmp)
|
21
|
+
correct_output = File.read(File.dirname(__FILE__) + "/../data/datify_input.csv")
|
22
|
+
|
23
|
+
output.should == correct_output
|
24
|
+
end
|
25
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masticate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-08-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,15 @@ dependencies:
|
|
21
21
|
version: 0.9.2
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.9.2
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: rspec
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ~>
|
@@ -32,10 +37,15 @@ dependencies:
|
|
32
37
|
version: 2.9.0
|
33
38
|
type: :development
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 2.9.0
|
36
46
|
- !ruby/object:Gem::Dependency
|
37
47
|
name: guard-rspec
|
38
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
39
49
|
none: false
|
40
50
|
requirements:
|
41
51
|
- - ~>
|
@@ -43,10 +53,15 @@ dependencies:
|
|
43
53
|
version: 0.7.0
|
44
54
|
type: :development
|
45
55
|
prerelease: false
|
46
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.7.0
|
47
62
|
- !ruby/object:Gem::Dependency
|
48
63
|
name: ruby_gntp
|
49
|
-
requirement:
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
50
65
|
none: false
|
51
66
|
requirements:
|
52
67
|
- - ~>
|
@@ -54,7 +69,12 @@ dependencies:
|
|
54
69
|
version: 0.3.4
|
55
70
|
type: :development
|
56
71
|
prerelease: false
|
57
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.3.4
|
58
78
|
description: Data file crunching
|
59
79
|
email:
|
60
80
|
- jmay@pobox.com
|
@@ -85,6 +105,7 @@ files:
|
|
85
105
|
- lib/masticate/plucker.rb
|
86
106
|
- lib/masticate/relabel.rb
|
87
107
|
- lib/masticate/sniffer.rb
|
108
|
+
- lib/masticate/transform.rb
|
88
109
|
- lib/masticate/version.rb
|
89
110
|
- masticate.gemspec
|
90
111
|
- spec/data/badnums.csv
|
@@ -97,6 +118,7 @@ files:
|
|
97
118
|
- spec/data/cooking_result.csv
|
98
119
|
- spec/data/datify_input.csv
|
99
120
|
- spec/data/datify_result.csv
|
121
|
+
- spec/data/downcase_results.csv
|
100
122
|
- spec/data/events.csv
|
101
123
|
- spec/data/events_reduced.csv
|
102
124
|
- spec/data/exclude_input.csv
|
@@ -128,6 +150,7 @@ files:
|
|
128
150
|
- spec/lib/plucker_spec.rb
|
129
151
|
- spec/lib/relabel_spec.rb
|
130
152
|
- spec/lib/sniffer_spec.rb
|
153
|
+
- spec/lib/transform_spec.rb
|
131
154
|
- spec/spec_helper.rb
|
132
155
|
homepage: ''
|
133
156
|
licenses: []
|
@@ -141,15 +164,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
141
164
|
- - ! '>='
|
142
165
|
- !ruby/object:Gem::Version
|
143
166
|
version: '0'
|
167
|
+
segments:
|
168
|
+
- 0
|
169
|
+
hash: -519721259904741395
|
144
170
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
145
171
|
none: false
|
146
172
|
requirements:
|
147
173
|
- - ! '>='
|
148
174
|
- !ruby/object:Gem::Version
|
149
175
|
version: '0'
|
176
|
+
segments:
|
177
|
+
- 0
|
178
|
+
hash: -519721259904741395
|
150
179
|
requirements: []
|
151
180
|
rubyforge_project: masticate
|
152
|
-
rubygems_version: 1.8.
|
181
|
+
rubygems_version: 1.8.24
|
153
182
|
signing_key:
|
154
183
|
specification_version: 3
|
155
184
|
summary: Utility functions for parsing incoming text data files.
|
@@ -164,6 +193,7 @@ test_files:
|
|
164
193
|
- spec/data/cooking_result.csv
|
165
194
|
- spec/data/datify_input.csv
|
166
195
|
- spec/data/datify_result.csv
|
196
|
+
- spec/data/downcase_results.csv
|
167
197
|
- spec/data/events.csv
|
168
198
|
- spec/data/events_reduced.csv
|
169
199
|
- spec/data/exclude_input.csv
|
@@ -195,5 +225,5 @@ test_files:
|
|
195
225
|
- spec/lib/plucker_spec.rb
|
196
226
|
- spec/lib/relabel_spec.rb
|
197
227
|
- spec/lib/sniffer_spec.rb
|
228
|
+
- spec/lib/transform_spec.rb
|
198
229
|
- spec/spec_helper.rb
|
199
|
-
has_rdoc:
|