masticate 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,6 +74,10 @@ class Masticate::MyOptionParser
74
74
  opts.on("--recipe FILENAME", String, "(*cook* only) Recipe file") do |f|
75
75
  @options[:recipe] = f
76
76
  end
77
+
78
+ opts.on("--rule {downcase,upcase}", String, "(*transform* only) Transformation rule") do |f|
79
+ @options[:rule] = f
80
+ end
77
81
  end
78
82
  end
79
83
 
@@ -87,6 +91,7 @@ class Masticate::MyOptionParser
87
91
  def prepare(command, options)
88
92
  klasses = {
89
93
  'gsub' => Masticate::Gsubber,
94
+ 'transform' => Masticate::Transform,
90
95
  'datify' => Masticate::Datify,
91
96
  'maxrows' => Masticate::MaxRows,
92
97
  'relabel' => Masticate::Relabel,
@@ -156,6 +161,10 @@ EOT
156
161
  results = Masticate.exclude(filename, options)
157
162
  logmessage(command, options, results)
158
163
 
164
+ when 'transform'
165
+ results = Masticate.transform(filename, options)
166
+ logmessage(command, options, results)
167
+
159
168
  else
160
169
  raise "unknown command #{command}"
161
170
  end
@@ -0,0 +1,39 @@
1
+ # apply transformation rules to a field
2
+
3
+ class Masticate::Transform < Masticate::Base
4
+ def configure(opts)
5
+ standard_options(opts)
6
+
7
+ @field = opts[:field] or raise "missing field to transform"
8
+ @rule = opts[:rule] or raise "missing transformation rule"
9
+ unless ['upcase', 'downcase'].include?(@rule)
10
+ raise "invalid transformation rule <#{@rule}>: supported rules are downcase, upcase"
11
+ end
12
+ end
13
+
14
+ def set_headers(row)
15
+ @headers = row
16
+ @index = @headers.index(@field) or raise "Unable to find column '#{@field}' in headers"
17
+ end
18
+
19
+ def transform(opts)
20
+ execute(opts)
21
+ end
22
+
23
+ def crunch(row)
24
+ if !@headers
25
+ set_headers(row)
26
+ elsif row
27
+ row[@index] = case @rule
28
+ when 'downcase'
29
+ row[@index].downcase
30
+ when 'upcase'
31
+ row[@index].upcase
32
+ else
33
+ raise "no code for rule #{@rule}"
34
+ end
35
+ end
36
+
37
+ row
38
+ end
39
+ end
@@ -1,3 +1,3 @@
1
1
  module Masticate
2
- VERSION = "0.3.2"
2
+ VERSION = "0.4.0"
3
3
  end
data/lib/masticate.rb CHANGED
@@ -15,6 +15,7 @@ require_relative "masticate/max_rows"
15
15
  require_relative "masticate/concat"
16
16
  require_relative "masticate/relabel"
17
17
  require_relative "masticate/exclude"
18
+ require_relative "masticate/transform"
18
19
 
19
20
  require_relative "masticate/cook"
20
21
 
@@ -62,4 +63,8 @@ module Masticate
62
63
  def self.cook(filename, opts)
63
64
  Cook.new(filename).cook(opts)
64
65
  end
66
+
67
+ def self.transform(filename, opts)
68
+ Transform.new(filename).transform(opts)
69
+ end
65
70
  end
@@ -0,0 +1,11 @@
1
+ LAST_NAME,FIRST_NAME,MIDDLE_INIT,DEPT #,Empl #,timestamp,Term Date,Status,R_NAME,SEX,BIRTHDATE
2
+ washington,GEORGE,D,824,9556,09/10/2005 4:23:16PM,07/01/2006,TM,Surgical House Staff,M,09/23/1975
3
+ jefferson,TOM,,621,8052,07/23/2001 7:23:11AM,01/28/2011,TM,Telemetry,F,12/24/1976
4
+ adams,JOHN,,655,8834,09/22/2003 01:23:45PM,,WA,6 East,F,08/07/1978
5
+ adams,JOHN QUINCY,A,209,8637,02/24/2003 02:34:00AM,12/02/2007,TM,Imaging Svcs - MRI,F,11/03/1966
6
+ hamilton,ANDREW,,278,10065,01/09/2007 02:34:00AM,11/16/2007,TM,Information Technology,M,09/16/1968
7
+ madison,JAMES,F,672,10720,01/05/2009 02:34:00AM,02/16/2010,TM,Rehab Svcs - Outpatients,F,04/15/1985
8
+ franklin,BENJAMIN,R,674,8340,05/01/2002 02:34:00AM,09/01/2003,TM,"Rehab Svcs, xyz",F,03/15/1973
9
+ lincoln,ABRAHAM,M,634,11340,05/02/2011 02:34:00AM,,PN,Sibley Ambulatory Surgery Ctr,F,07/11/1960
10
+ monroe,JAMES,L,614,10757,02/16/2009 02:34:00AM,,RF,Labor & Delivery,F,11/06/1983
11
+ revere,PAUL,B,424,8568,11/18/2002 02:34:00AM,06/27/2006,TM,Laundry & Linen,M,12/31/1976
@@ -0,0 +1,25 @@
1
+ # spec for field transformation
2
+
3
+ require "spec_helper"
4
+
5
+ describe "transform" do
6
+ it "should be able to downcase fields" do
7
+ filename = File.dirname(__FILE__) + "/../data/datify_input.csv"
8
+ tmp = Tempfile.new('transform')
9
+ results = Masticate.transform(filename, :output => tmp, :field => 'LAST_NAME', :rule => 'downcase')
10
+ output = File.read(tmp)
11
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/downcase_results.csv")
12
+
13
+ output.should == correct_output
14
+ end
15
+
16
+ it "should be able to upcase fields" do
17
+ filename = File.dirname(__FILE__) + "/../data/downcase_results.csv"
18
+ tmp = Tempfile.new('transform')
19
+ results = Masticate.transform(filename, :output => tmp, :field => 'LAST_NAME', :rule => 'upcase')
20
+ output = File.read(tmp)
21
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/datify_input.csv")
22
+
23
+ output.should == correct_output
24
+ end
25
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masticate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-30 00:00:00.000000000 Z
12
+ date: 2012-08-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &2157784500 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: 0.9.2
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2157784500
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.9.2
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: rspec
27
- requirement: &2157784000 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ~>
@@ -32,10 +37,15 @@ dependencies:
32
37
  version: 2.9.0
33
38
  type: :development
34
39
  prerelease: false
35
- version_requirements: *2157784000
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 2.9.0
36
46
  - !ruby/object:Gem::Dependency
37
47
  name: guard-rspec
38
- requirement: &2157783540 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
39
49
  none: false
40
50
  requirements:
41
51
  - - ~>
@@ -43,10 +53,15 @@ dependencies:
43
53
  version: 0.7.0
44
54
  type: :development
45
55
  prerelease: false
46
- version_requirements: *2157783540
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.7.0
47
62
  - !ruby/object:Gem::Dependency
48
63
  name: ruby_gntp
49
- requirement: &2157783080 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
50
65
  none: false
51
66
  requirements:
52
67
  - - ~>
@@ -54,7 +69,12 @@ dependencies:
54
69
  version: 0.3.4
55
70
  type: :development
56
71
  prerelease: false
57
- version_requirements: *2157783080
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 0.3.4
58
78
  description: Data file crunching
59
79
  email:
60
80
  - jmay@pobox.com
@@ -85,6 +105,7 @@ files:
85
105
  - lib/masticate/plucker.rb
86
106
  - lib/masticate/relabel.rb
87
107
  - lib/masticate/sniffer.rb
108
+ - lib/masticate/transform.rb
88
109
  - lib/masticate/version.rb
89
110
  - masticate.gemspec
90
111
  - spec/data/badnums.csv
@@ -97,6 +118,7 @@ files:
97
118
  - spec/data/cooking_result.csv
98
119
  - spec/data/datify_input.csv
99
120
  - spec/data/datify_result.csv
121
+ - spec/data/downcase_results.csv
100
122
  - spec/data/events.csv
101
123
  - spec/data/events_reduced.csv
102
124
  - spec/data/exclude_input.csv
@@ -128,6 +150,7 @@ files:
128
150
  - spec/lib/plucker_spec.rb
129
151
  - spec/lib/relabel_spec.rb
130
152
  - spec/lib/sniffer_spec.rb
153
+ - spec/lib/transform_spec.rb
131
154
  - spec/spec_helper.rb
132
155
  homepage: ''
133
156
  licenses: []
@@ -141,15 +164,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
141
164
  - - ! '>='
142
165
  - !ruby/object:Gem::Version
143
166
  version: '0'
167
+ segments:
168
+ - 0
169
+ hash: -519721259904741395
144
170
  required_rubygems_version: !ruby/object:Gem::Requirement
145
171
  none: false
146
172
  requirements:
147
173
  - - ! '>='
148
174
  - !ruby/object:Gem::Version
149
175
  version: '0'
176
+ segments:
177
+ - 0
178
+ hash: -519721259904741395
150
179
  requirements: []
151
180
  rubyforge_project: masticate
152
- rubygems_version: 1.8.10
181
+ rubygems_version: 1.8.24
153
182
  signing_key:
154
183
  specification_version: 3
155
184
  summary: Utility functions for parsing incoming text data files.
@@ -164,6 +193,7 @@ test_files:
164
193
  - spec/data/cooking_result.csv
165
194
  - spec/data/datify_input.csv
166
195
  - spec/data/datify_result.csv
196
+ - spec/data/downcase_results.csv
167
197
  - spec/data/events.csv
168
198
  - spec/data/events_reduced.csv
169
199
  - spec/data/exclude_input.csv
@@ -195,5 +225,5 @@ test_files:
195
225
  - spec/lib/plucker_spec.rb
196
226
  - spec/lib/relabel_spec.rb
197
227
  - spec/lib/sniffer_spec.rb
228
+ - spec/lib/transform_spec.rb
198
229
  - spec/spec_helper.rb
199
- has_rdoc: