masticate 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -74,6 +74,10 @@ class Masticate::MyOptionParser
74
74
  opts.on("--recipe FILENAME", String, "(*cook* only) Recipe file") do |f|
75
75
  @options[:recipe] = f
76
76
  end
77
+
78
+ opts.on("--rule {downcase,upcase}", String, "(*transform* only) Transformation rule") do |f|
79
+ @options[:rule] = f
80
+ end
77
81
  end
78
82
  end
79
83
 
@@ -87,6 +91,7 @@ class Masticate::MyOptionParser
87
91
  def prepare(command, options)
88
92
  klasses = {
89
93
  'gsub' => Masticate::Gsubber,
94
+ 'transform' => Masticate::Transform,
90
95
  'datify' => Masticate::Datify,
91
96
  'maxrows' => Masticate::MaxRows,
92
97
  'relabel' => Masticate::Relabel,
@@ -156,6 +161,10 @@ EOT
156
161
  results = Masticate.exclude(filename, options)
157
162
  logmessage(command, options, results)
158
163
 
164
+ when 'transform'
165
+ results = Masticate.transform(filename, options)
166
+ logmessage(command, options, results)
167
+
159
168
  else
160
169
  raise "unknown command #{command}"
161
170
  end
@@ -0,0 +1,39 @@
1
+ # apply transformation rules to a field
2
+
3
+ class Masticate::Transform < Masticate::Base
4
+ def configure(opts)
5
+ standard_options(opts)
6
+
7
+ @field = opts[:field] or raise "missing field to transform"
8
+ @rule = opts[:rule] or raise "missing transformation rule"
9
+ unless ['upcase', 'downcase'].include?(@rule)
10
+ raise "invalid transformation rule <#{@rule}>: supported rules are downcase, upcase"
11
+ end
12
+ end
13
+
14
+ def set_headers(row)
15
+ @headers = row
16
+ @index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
17
+ end
18
+
19
+ def transform(opts)
20
+ execute(opts)
21
+ end
22
+
23
+ def crunch(row)
24
+ if !@headers
25
+ set_headers(row)
26
+ elsif row
27
+ row[@index] = case @rule
28
+ when 'downcase'
29
+ row[@index].downcase
30
+ when 'upcase'
31
+ row[@index].upcase
32
+ else
33
+ raise "no code for rule #{@rule}"
34
+ end
35
+ end
36
+
37
+ row
38
+ end
39
+ end
@@ -1,3 +1,3 @@
1
1
  module Masticate
2
- VERSION = "0.3.2"
2
+ VERSION = "0.4.0"
3
3
  end
data/lib/masticate.rb CHANGED
@@ -15,6 +15,7 @@ require_relative "masticate/max_rows"
15
15
  require_relative "masticate/concat"
16
16
  require_relative "masticate/relabel"
17
17
  require_relative "masticate/exclude"
18
+ require_relative "masticate/transform"
18
19
 
19
20
  require_relative "masticate/cook"
20
21
 
@@ -62,4 +63,8 @@ module Masticate
62
63
  def self.cook(filename, opts)
63
64
  Cook.new(filename).cook(opts)
64
65
  end
66
+
67
+ def self.transform(filename, opts)
68
+ Transform.new(filename).transform(opts)
69
+ end
65
70
  end
@@ -0,0 +1,11 @@
1
+ LAST_NAME,FIRST_NAME,MIDDLE_INIT,DEPT #,Empl #,timestamp,Term Date,Status,R_NAME,SEX,BIRTHDATE
2
+ washington,GEORGE,D,824,9556,09/10/2005 4:23:16PM,07/01/2006,TM,Surgical House Staff,M,09/23/1975
3
+ jefferson,TOM,,621,8052,07/23/2001 7:23:11AM,01/28/2011,TM,Telemetry,F,12/24/1976
4
+ adams,JOHN,,655,8834,09/22/2003 01:23:45PM,,WA,6 East,F,08/07/1978
5
+ adams,JOHN QUINCY,A,209,8637,02/24/2003 02:34:00AM,12/02/2007,TM,Imaging Svcs - MRI,F,11/03/1966
6
+ hamilton,ANDREW,,278,10065,01/09/2007 02:34:00AM,11/16/2007,TM,Information Technology,M,09/16/1968
7
+ madison,JAMES,F,672,10720,01/05/2009 02:34:00AM,02/16/2010,TM,Rehab Svcs - Outpatients,F,04/15/1985
8
+ franklin,BENJAMIN,R,674,8340,05/01/2002 02:34:00AM,09/01/2003,TM,"Rehab Svcs, xyz",F,03/15/1973
9
+ lincoln,ABRAHAM,M,634,11340,05/02/2011 02:34:00AM,,PN,Sibley Ambulatory Surgery Ctr,F,07/11/1960
10
+ monroe,JAMES,L,614,10757,02/16/2009 02:34:00AM,,RF,Labor & Delivery,F,11/06/1983
11
+ revere,PAUL,B,424,8568,11/18/2002 02:34:00AM,06/27/2006,TM,Laundry & Linen,M,12/31/1976
@@ -0,0 +1,25 @@
1
+ # spec for field transformation
2
+
3
+ require "spec_helper"
4
+
5
+ describe "transform" do
6
+ it "should be able to downcase fields" do
7
+ filename = File.dirname(__FILE__) + "/../data/datify_input.csv"
8
+ tmp = Tempfile.new('transform')
9
+ results = Masticate.transform(filename, :output => tmp, :field => 'LAST_NAME', :rule => 'downcase')
10
+ output = File.read(tmp)
11
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/downcase_results.csv")
12
+
13
+ output.should == correct_output
14
+ end
15
+
16
+ it "should be able to upcase fields" do
17
+ filename = File.dirname(__FILE__) + "/../data/downcase_results.csv"
18
+ tmp = Tempfile.new('transform')
19
+ results = Masticate.transform(filename, :output => tmp, :field => 'LAST_NAME', :rule => 'upcase')
20
+ output = File.read(tmp)
21
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/datify_input.csv")
22
+
23
+ output.should == correct_output
24
+ end
25
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masticate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-30 00:00:00.000000000 Z
12
+ date: 2012-08-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &2157784500 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: 0.9.2
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2157784500
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.9.2
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: rspec
27
- requirement: &2157784000 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ~>
@@ -32,10 +37,15 @@ dependencies:
32
37
  version: 2.9.0
33
38
  type: :development
34
39
  prerelease: false
35
- version_requirements: *2157784000
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 2.9.0
36
46
  - !ruby/object:Gem::Dependency
37
47
  name: guard-rspec
38
- requirement: &2157783540 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
39
49
  none: false
40
50
  requirements:
41
51
  - - ~>
@@ -43,10 +53,15 @@ dependencies:
43
53
  version: 0.7.0
44
54
  type: :development
45
55
  prerelease: false
46
- version_requirements: *2157783540
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.7.0
47
62
  - !ruby/object:Gem::Dependency
48
63
  name: ruby_gntp
49
- requirement: &2157783080 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
50
65
  none: false
51
66
  requirements:
52
67
  - - ~>
@@ -54,7 +69,12 @@ dependencies:
54
69
  version: 0.3.4
55
70
  type: :development
56
71
  prerelease: false
57
- version_requirements: *2157783080
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 0.3.4
58
78
  description: Data file crunching
59
79
  email:
60
80
  - jmay@pobox.com
@@ -85,6 +105,7 @@ files:
85
105
  - lib/masticate/plucker.rb
86
106
  - lib/masticate/relabel.rb
87
107
  - lib/masticate/sniffer.rb
108
+ - lib/masticate/transform.rb
88
109
  - lib/masticate/version.rb
89
110
  - masticate.gemspec
90
111
  - spec/data/badnums.csv
@@ -97,6 +118,7 @@ files:
97
118
  - spec/data/cooking_result.csv
98
119
  - spec/data/datify_input.csv
99
120
  - spec/data/datify_result.csv
121
+ - spec/data/downcase_results.csv
100
122
  - spec/data/events.csv
101
123
  - spec/data/events_reduced.csv
102
124
  - spec/data/exclude_input.csv
@@ -128,6 +150,7 @@ files:
128
150
  - spec/lib/plucker_spec.rb
129
151
  - spec/lib/relabel_spec.rb
130
152
  - spec/lib/sniffer_spec.rb
153
+ - spec/lib/transform_spec.rb
131
154
  - spec/spec_helper.rb
132
155
  homepage: ''
133
156
  licenses: []
@@ -141,15 +164,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
141
164
  - - ! '>='
142
165
  - !ruby/object:Gem::Version
143
166
  version: '0'
167
+ segments:
168
+ - 0
169
+ hash: -519721259904741395
144
170
  required_rubygems_version: !ruby/object:Gem::Requirement
145
171
  none: false
146
172
  requirements:
147
173
  - - ! '>='
148
174
  - !ruby/object:Gem::Version
149
175
  version: '0'
176
+ segments:
177
+ - 0
178
+ hash: -519721259904741395
150
179
  requirements: []
151
180
  rubyforge_project: masticate
152
- rubygems_version: 1.8.10
181
+ rubygems_version: 1.8.24
153
182
  signing_key:
154
183
  specification_version: 3
155
184
  summary: Utility functions for parsing incoming text data files.
@@ -164,6 +193,7 @@ test_files:
164
193
  - spec/data/cooking_result.csv
165
194
  - spec/data/datify_input.csv
166
195
  - spec/data/datify_result.csv
196
+ - spec/data/downcase_results.csv
167
197
  - spec/data/events.csv
168
198
  - spec/data/events_reduced.csv
169
199
  - spec/data/exclude_input.csv
@@ -195,5 +225,5 @@ test_files:
195
225
  - spec/lib/plucker_spec.rb
196
226
  - spec/lib/relabel_spec.rb
197
227
  - spec/lib/sniffer_spec.rb
228
+ - spec/lib/transform_spec.rb
198
229
  - spec/spec_helper.rb
199
- has_rdoc: