data_cleansing 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c1895359140ba6c85db9f47c33a3ddb7b8558d6d
4
+ data.tar.gz: d2f610c71b6c413114d1b30f41aab6f8d9fdd403
5
+ SHA512:
6
+ metadata.gz: 0503371aefa9478b62345cdbce5fcc88d344c2b080471e38e5c1d2bf0bdb0f7cd5f00b61e182d100b477994c567e5211f662a828ec781ef6955a24a19686aaa1
7
+ data.tar.gz: 1e48577434bafba21a884156ade715dbdc56d7dcabcb9ecdccbed057c22f2bcccd162e8f5c38b3822dbb03bde927b55a3967f940a550386bb9c8670b53aa9693
data/Gemfile ADDED
@@ -0,0 +1,19 @@
1
+ source 'https://rubygems.org'
2
+
3
+ group :test do
4
+ gem "shoulda"
5
+
6
+ gem "activerecord"
7
+ gem 'sqlite3', :platform => :ruby
8
+
9
+ platforms :jruby do
10
+ gem 'jdbc-sqlite3'
11
+ gem 'activerecord-jdbcsqlite3-adapter'
12
+ end
13
+
14
+ gem "mongoid"
15
+ end
16
+
17
+ group :develop do
18
+ gem 'awesome_print'
19
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,57 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ activemodel (4.0.0)
5
+ activesupport (= 4.0.0)
6
+ builder (~> 3.1.0)
7
+ activerecord (4.0.0)
8
+ activemodel (= 4.0.0)
9
+ activerecord-deprecated_finders (~> 1.0.2)
10
+ activesupport (= 4.0.0)
11
+ arel (~> 4.0.0)
12
+ activerecord-deprecated_finders (1.0.3)
13
+ activesupport (4.0.0)
14
+ i18n (~> 0.6, >= 0.6.4)
15
+ minitest (~> 4.2)
16
+ multi_json (~> 1.3)
17
+ thread_safe (~> 0.1)
18
+ tzinfo (~> 0.3.37)
19
+ arel (4.0.0)
20
+ atomic (1.1.10)
21
+ awesome_print (1.1.0)
22
+ bson (1.9.1)
23
+ builder (3.1.4)
24
+ durran-validatable (2.0.1)
25
+ i18n (0.6.4)
26
+ leshill-will_paginate (2.3.11)
27
+ minitest (4.7.5)
28
+ mongo (1.9.1)
29
+ bson (~> 1.9.1)
30
+ mongoid (1.0.6)
31
+ activesupport (>= 2.2.2)
32
+ durran-validatable (>= 2.0.1)
33
+ leshill-will_paginate (>= 2.3.11)
34
+ mongo (>= 0.18.2)
35
+ multi_json (1.7.7)
36
+ shoulda (3.5.0)
37
+ shoulda-context (~> 1.0, >= 1.0.1)
38
+ shoulda-matchers (>= 1.4.1, < 3.0)
39
+ shoulda-context (1.1.4)
40
+ shoulda-matchers (2.2.0)
41
+ activesupport (>= 3.0.0)
42
+ sqlite3 (1.3.7)
43
+ thread_safe (0.1.0)
44
+ atomic
45
+ tzinfo (0.3.37)
46
+
47
+ PLATFORMS
48
+ ruby
49
+
50
+ DEPENDENCIES
51
+ activerecord
52
+ activerecord-jdbcsqlite3-adapter
53
+ awesome_print
54
+ jdbc-sqlite3
55
+ mongoid
56
+ shoulda
57
+ sqlite3
data/LICENSE.txt ADDED
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2012 Clarity Services, Inc.
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,150 @@
1
+ data_cleansing
2
+ ==============
3
+
4
+ Data Cleansing solution for Ruby with additional support for Rails and Mongoid
5
+
6
+ * http://github.com/ClarityServices/data_cleansing
7
+
8
+ ## Introduction
9
+
10
+ It is important to keep internal data free of unwanted escape characters, leading
11
+ or trailing blanks and even newlines.
12
+ Similarly it would be useful to be able to attach a cleansing solution to a field
13
+ in a model and have the data cleansed transparently when required.
14
+
15
+ DataCleansing is a framework that allows any data cleansing to be applied to
16
+ specific attributes or fields. At this time it does not supply the cleaning
17
+ solutions themselves since they are usually straight forward, or so complex
18
+ that they don't tend to be too usefull to others. However, over time built-in
19
+ cleansing solutions may be added. Feel free to submit any suggestions via a ticket
20
+ or pull request.
21
+
22
+ ## Features
23
+
24
+ * Supports global cleansing definitions that can be associated with any Ruby,
25
+ Rails, Mongoid, or other model.
26
+ * Supports custom cleansing definitions for a single attribute
27
+
28
+ ## Examples
29
+
30
+ ### Ruby Example
31
+
32
+ ```ruby
33
+ class User
34
+ include DataCleansing::Cleanse
35
+
36
+ attr_accessor :first_name, :last_name, :address1, :address2
37
+
38
+ # Use a global cleaner
39
+ cleanse :first_name, :last_name, :cleaner => :strip
40
+
41
+ # Define a once off cleaner
42
+ cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip!}
43
+ end
44
+
45
+ # Define a global cleanser
46
+ DataCleansing.register_cleaner(:strip) {|string, params, object| string.strip!}
47
+
48
+ u = User.new
49
+ u.first_name = ' joe '
50
+ u.last_name = "\n black\n"
51
+ u.address1 = "2632 Brown St \n"
52
+ puts "Before data cleansing #{u.inspect}"
53
+ u.cleanse_attributes!
54
+ puts "After data cleansing #{u.inspect}"
55
+ ```
56
+
57
+ ### Rails Example
58
+
59
+ To encrypt a field in a Mongoid document, just add ":encrypted => true" at the end
60
+ of the field specifier. The field name must currently begin with "encrypted_"
61
+
62
+ ```ruby
63
+ # 'users' table has the following columns :first_name, :last_name, :address1, :address2
64
+ class User < ActiveRecord::Base
65
+ include DataCleansing::Cleanse
66
+
67
+ # Use a global cleaner
68
+ cleanse :first_name, :last_name, :cleaner => :strip
69
+
70
+ # Define a once off cleaner
71
+ cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip!}
72
+
73
+ # Automatically cleanse data before validation
74
+ before_validation :cleanse_attributes!
75
+ end
76
+
77
+ # Define a global cleanser
78
+ DataCleansing.register_cleaner(:strip) {|string, params, object| string.strip!}
79
+
80
+ # Create a User instance
81
+ u = User.new(:first_name => ' joe ', :last_name => "\n black\n", :address1 => "2632 Brown St \n")
82
+ puts "Before data cleansing #{u.attributes.inspect}"
83
+ u.validate
84
+ puts "After data cleansing #{u.attributes.inspect}"
85
+ u.save!
86
+ ```
87
+
88
+ ## Installation
89
+
90
+ ### Add to an existing Rails project
91
+
92
+ Add the following line to Gemfile
93
+
94
+ ```ruby
95
+ gem 'data_validation'
96
+ ```
97
+
98
+ Install the Gem with bundler
99
+
100
+ bundle install
101
+
102
+ ## Architecture
103
+
104
+ DataCleansing has been designed to support externalized data cleansing routines.
105
+ In this way the data cleansing routine itself can be loaded from a datastore and
106
+ applied dynamically at runtime.
107
+ Although not supported out of the box, this design allows for example for the
108
+ data cleansing routines to be stored in something like [ZooKeeper](http://zookeeper.apache.org/).
109
+ Then any changes to the data cleansing routines can be pushed out immediately to
110
+ every server that needs it.
111
+
112
+ DataCleansing is designed to support any Ruby model. In this way it can be used
113
+ in just about any ORM or DOM. For example, it currently easily supports both
114
+ Rails and Mongoid models. Some extensions have been added to support these frameworks.
115
+
116
+ For example, in Rails it obtains the raw data value before Rails has converted it.
117
+ Which is useful for cleansing integer or float fields as raw strings before Rails
118
+ tries to convert it to an integer or float.
119
+
120
+ Meta
121
+ ----
122
+
123
+ * Code: `git clone git://github.com/ClarityServices/data_cleansing.git`
124
+ * Home: <https://github.com/ClarityServices/data_cleansing>
125
+ * Issues: <http://github.com/ClarityServices/data_cleansing/issues>
126
+ * Gems: <http://rubygems.org/gems/data_cleansing>
127
+
128
+ This project uses [Semantic Versioning](http://semver.org/).
129
+
130
+ Authors
131
+ -------
132
+
133
+ Reid Morrison :: reidmo@gmail.com :: @reidmorrison
134
+
135
+ License
136
+ -------
137
+
138
+ Copyright 2013 Clarity Services, Inc.
139
+
140
+ Licensed under the Apache License, Version 2.0 (the "License");
141
+ you may not use this file except in compliance with the License.
142
+ You may obtain a copy of the License at
143
+
144
+ http://www.apache.org/licenses/LICENSE-2.0
145
+
146
+ Unless required by applicable law or agreed to in writing, software
147
+ distributed under the License is distributed on an "AS IS" BASIS,
148
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
149
+ See the License for the specific language governing permissions and
150
+ limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,39 @@
1
+ lib = File.expand_path('../lib/', __FILE__)
2
+ $:.unshift lib unless $:.include?(lib)
3
+
4
+ require 'rubygems'
5
+ require 'rubygems/package'
6
+ require 'rake/clean'
7
+ require 'rake/testtask'
8
+ require 'date'
9
+ require 'data_cleansing/version'
10
+
11
+ desc "Build gem"
12
+ task :gem do |t|
13
+ gemspec = Gem::Specification.new do |s|
14
+ s.name = 'data_cleansing'
15
+ s.version = DataCleansing::VERSION
16
+ s.platform = Gem::Platform::RUBY
17
+ s.authors = ['Reid Morrison']
18
+ s.email = ['reidmo@gmail.com']
19
+ s.homepage = 'https://github.com/ClarityServices/data_cleansing'
20
+ s.date = Date.today.to_s
21
+ s.summary = "Data Cleansing framework for Ruby, and Ruby on Rails"
22
+ s.description = "Data Cleansing framework for Ruby with additional support for Rails and Mongoid"
23
+ s.files = FileList["./**/*"].exclude(/.gem$/, /.log$/,/^nbproject/).map{|f| f.sub(/^\.\//, '')}
24
+ s.license = "Apache License V2.0"
25
+ s.has_rdoc = true
26
+ s.add_dependency 'thread_safe'
27
+ end
28
+ Gem::Package.build gemspec
29
+ end
30
+
31
+ desc "Run Test Suite"
32
+ task :test do
33
+ Rake::TestTask.new(:functional) do |t|
34
+ t.test_files = FileList['test/*_test.rb']
35
+ t.verbose = true
36
+ end
37
+
38
+ Rake::Task['functional'].invoke
39
+ end
@@ -0,0 +1,91 @@
1
+ module DataCleansing
2
+ # Mix-in to add cleaner
3
+ module Cleanse
4
+ CleanerStruct = Struct.new(:cleaner, :attributes, :params)
5
+
6
+ module ClassMethods
7
+ # Define how to cleanse one or more attributes
8
+ def cleanse(*args)
9
+ last = args.last
10
+ params = (last.is_a?(Hash) && last.instance_of?(Hash)) ? args.pop.dup : {}
11
+ cleaner = params.delete(:cleaner)
12
+ raise(ArgumentError, "Mandatory :cleaner parameter is missing: #{params.inspect}") unless cleaner
13
+ (@cleaners ||= ThreadSafe::Array.new) << CleanerStruct.new(cleaner, args, params)
14
+ end
15
+
16
+ def cleaners
17
+ @cleaners
18
+ end
19
+ end
20
+
21
+ module InstanceMethods
22
+ # Cleanse the attributes using specified cleaners
23
+ def cleanse_attributes!
24
+ self.class.cleaners.each do |cleaner_struct|
25
+ params = cleaner_struct.params
26
+ cleaner = cleaner_struct.cleaner
27
+ attrs = cleaner_struct.attributes
28
+
29
+ # Special case to include :all fields
30
+ # Only works with ActiveRecord based models, not supported with regular Ruby models
31
+ if attrs.include?(:all) && defined?(ActiveRecord) && respond_to?(:attributes)
32
+ attrs = attributes.keys.collect{|i| i.to_sym}
33
+ if except = params.delete(:except)
34
+ attrs -= except
35
+ end
36
+
37
+ # Remove serialized_attributes if any, from the :all condition
38
+ if self.class.respond_to?(:serialized_attributes)
39
+ serialized_attrs = self.class.serialized_attributes.keys
40
+ attrs -= serialized_attrs.collect{|i| i.to_sym} if serialized_attrs
41
+ end
42
+
43
+ # Replace any encrypted attributes with their non-encrypted versions if any
44
+ if defined?(SymmetricEncryption) && self.class.respond_to?(:encrypted_attributes)
45
+ self.class.encrypted_attributes.each_pair do |clear, encrypted|
46
+ if attrs.include?(encrypted.to_sym)
47
+ attrs.delete(encrypted.to_sym)
48
+ attrs << clear.to_sym
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ attrs.each do |attr|
55
+ # Under ActiveModel for Rails and Mongoid need to retrieve raw value
56
+ # before data type conversion
57
+ value = if respond_to?(:read_attribute_before_type_cast)
58
+ read_attribute_before_type_cast(attr.to_s)
59
+ else
60
+ send(attr.to_sym)
61
+ end
62
+
63
+ # No need to clean if attribute is nil
64
+ unless value.nil?
65
+ new_value = if cleaner.is_a?(Proc)
66
+ cleaner.call(value, params)
67
+ else
68
+ if c = DataCleansing.cleaner(cleaner)
69
+ c.call(value, params)
70
+ else
71
+ raise "No cleaner defined for #{cleaner.to_sym}"
72
+ end
73
+ end
74
+ # Update value if it has changed
75
+ send("#{attr.to_sym}=".to_sym, new_value) if new_value != value
76
+ end
77
+
78
+ end
79
+ end
80
+ end
81
+ end
82
+
83
+ def self.included(base)
84
+ base.class_eval do
85
+ extend(DataCleansing::Cleanse::ClassMethods)
86
+ include(DataCleansing::Cleanse::InstanceMethods)
87
+ end
88
+ end
89
+ end
90
+
91
+ end
@@ -0,0 +1,23 @@
1
+ module DataCleansing
2
+
3
+ # Global Data Cleansers
4
+ @@global_cleaners = ThreadSafe::Hash.new
5
+
6
+ # Register a new cleaner
7
+ # Replaces any existing cleaner with the same name
8
+ def self.register_cleaner(cleaner, &block)
9
+ if block
10
+ @@global_cleaners[cleaner.to_sym] = block
11
+ else
12
+ # TODO Expose class methods as cleaners
13
+ #
14
+ # cleaners[cleaner.to_sym] = block
15
+ # raise ArgumentError, "Must supply either a Proc, or a cleaner klass"
16
+ end
17
+ end
18
+
19
+ # Returns the cleaner matching the supplied cleaner name
20
+ def self.cleaner(cleaner_name)
21
+ @@global_cleaners[cleaner_name.to_sym]
22
+ end
23
+ end
@@ -0,0 +1,3 @@
1
+ module DataCleansing
2
+ VERSION = "0.2.0"
3
+ end
@@ -0,0 +1,17 @@
1
+ require 'thread_safe'
2
+ require 'data_cleansing/version'
3
+ require 'data_cleansing/data_cleansing'
4
+
5
+ module DataCleansing
6
+ autoload :Cleanse, 'data_cleansing/cleanse'
7
+ end
8
+
9
+ # Rails Extensions
10
+ #if defined?(Rails)
11
+ # require 'data_cleansing/railtie'
12
+ #end
13
+
14
+ # Mongoid Extensions
15
+ #if defined?(Mongoid)
16
+ # require 'data_cleansing/extensions/mongoid/fields'
17
+ #end
@@ -0,0 +1,3 @@
1
+ file.reference.data_cleansing-examples=/Users/rmorrison/Sandbox/data_cleansing/examples
2
+ file.reference.data_cleansing-lib=/Users/rmorrison/Sandbox/data_cleansing/lib
3
+ file.reference.data_cleansing-test=/Users/rmorrison/Sandbox/data_cleansing/test
File without changes
@@ -0,0 +1,9 @@
1
+ examples.dir=${file.reference.data_cleansing-examples}
2
+ file.reference.data_cleansing-examples=examples
3
+ file.reference.data_cleansing-lib=lib
4
+ file.reference.data_cleansing-test=test
5
+ main.file=
6
+ platform.active=Ruby_1
7
+ source.encoding=UTF-8
8
+ src.dir=${file.reference.data_cleansing-lib}
9
+ test.src.dir=${file.reference.data_cleansing-test}
@@ -0,0 +1,16 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project xmlns="http://www.netbeans.org/ns/project/1">
3
+ <type>org.netbeans.modules.ruby.rubyproject</type>
4
+ <configuration>
5
+ <data xmlns="http://www.netbeans.org/ns/ruby-project/1">
6
+ <name>data_cleansing</name>
7
+ <source-roots>
8
+ <root id="src.dir"/>
9
+ <root id="examples.dir"/>
10
+ </source-roots>
11
+ <test-roots>
12
+ <root id="test.src.dir"/>
13
+ </test-roots>
14
+ </data>
15
+ </configuration>
16
+ </project>
@@ -0,0 +1,133 @@
1
+ # Allow examples to be run in-place without requiring a gem install
2
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
3
+
4
+ require 'rubygems'
5
+ require 'test/unit'
6
+ require 'shoulda'
7
+ # Load ActiveRecord before loading data_cleansing so that the AR extensions
8
+ # are loaded
9
+ require 'active_record'
10
+ require 'data_cleansing'
11
+
12
+ ActiveRecord::Base.logger = Logger.new($stderr)
13
+ ActiveRecord::Base.configurations = {
14
+ 'test' => {
15
+ 'adapter' => 'sqlite3',
16
+ 'database' => 'test/test_db.sqlite3',
17
+ 'pool' => 5,
18
+ 'timeout' => 5000
19
+ }
20
+ }
21
+ ActiveRecord::Base.establish_connection('test')
22
+
23
+ ActiveRecord::Schema.define :version => 0 do
24
+ create_table :users, :force => true do |t|
25
+ t.string :first_name
26
+ t.string :last_name
27
+ t.string :address1
28
+ t.string :address2
29
+ t.integer :zip_code
30
+ end
31
+ end
32
+
33
+ # Define a global cleaner
34
+ DataCleansing.register_cleaner(:strip) {|string, params, object| string.strip!}
35
+
36
+ # Removes all non-digit characters, except '.' then truncates
37
+ # the result to an integer string
38
+ # Returns nil if no digits are present in the string
39
+ DataCleansing.register_cleaner(:digits_to_integer) do |integer|
40
+ if integer.kind_of?(String)
41
+ # Remove Non-Digit Chars, except for '.'
42
+ integer = integer.gsub(/[^0-9\.]/, '')
43
+ integer.length > 0 ? integer.to_i : nil
44
+ else
45
+ integer
46
+ end
47
+ end
48
+
49
+
50
+ class User < ActiveRecord::Base
51
+ include DataCleansing::Cleanse
52
+
53
+ # Use a global cleaner
54
+ cleanse :first_name, :last_name, :cleaner => :strip
55
+
56
+ # Define a once off cleaner
57
+ cleanse :address1, :address2, :cleaner => Proc.new {|string| "<< #{string.strip!} >>"}
58
+
59
+ # Custom Zip Code cleaner
60
+ cleanse :zip_code, :cleaner => :digits_to_integer
61
+
62
+ # Automatically cleanse data before validation
63
+ before_validation :cleanse_attributes!
64
+ end
65
+
66
+ class User2 < ActiveRecord::Base
67
+ include DataCleansing::Cleanse
68
+ # Use the same table as User above
69
+ self.table_name = 'users'
70
+
71
+ # Test :all cleaner. Only works with ActiveRecord Models
72
+ cleanse :all, :cleaner => :strip
73
+
74
+ # Automatically cleanse data before validation
75
+ before_validation :cleanse_attributes!
76
+ end
77
+
78
+ class ActiveRecordTest < Test::Unit::TestCase
79
+ context "ActiveRecord Models" do
80
+
81
+ should 'have globally registered cleaner' do
82
+ assert DataCleansing.cleaner(:strip)
83
+ end
84
+
85
+ context "with user" do
86
+ setup do
87
+ @user = User.new(
88
+ :first_name => ' joe ',
89
+ :last_name => "\n black\n",
90
+ :address1 => "2632 Brown St \n",
91
+ :zip_code => "\n\tblah 12345badtext\n"
92
+ )
93
+ end
94
+
95
+ should 'cleanse_attributes! using global cleaner' do
96
+ assert_equal true, @user.valid?
97
+ assert_equal 'joe', @user.first_name
98
+ assert_equal 'black', @user.last_name
99
+ end
100
+
101
+ should 'cleanse_attributes! using attribute specific custom cleaner' do
102
+ assert_equal true, @user.valid?
103
+ assert_equal '<< 2632 Brown St >>', @user.address1
104
+ end
105
+
106
+ should 'cleanse_attributes! using global cleaner using rails extensions' do
107
+ @user.cleanse_attributes!
108
+ assert_equal 12345, @user.zip_code
109
+ end
110
+ end
111
+
112
+ context "with user2" do
113
+ setup do
114
+ @user = User2.new(
115
+ :first_name => ' joe ',
116
+ :last_name => "\n black\n",
117
+ :address1 => "2632 Brown St \n",
118
+ :zip_code => "\n\t12345\n"
119
+ )
120
+ end
121
+
122
+ should 'cleanse_attributes! clean all attributes' do
123
+ assert_equal true, @user.valid?
124
+ assert_equal 'joe', @user.first_name, User2.cleaners
125
+ assert_equal 'black', @user.last_name
126
+ assert_equal '2632 Brown St', @user.address1
127
+ assert_equal 12345, @user.zip_code
128
+ end
129
+
130
+ end
131
+
132
+ end
133
+ end
data/test/ruby_test.rb ADDED
@@ -0,0 +1,58 @@
1
+ # Allow examples to be run in-place without requiring a gem install
2
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
3
+
4
+ require 'rubygems'
5
+ require 'test/unit'
6
+ require 'shoulda'
7
+ require 'data_cleansing'
8
+
9
+ # Define a global cleanser
10
+ DataCleansing.register_cleaner(:strip) {|string, params, object| string.strip!}
11
+
12
+ class RubyUser
13
+ include DataCleansing::Cleanse
14
+
15
+ attr_accessor :first_name, :last_name, :address1, :address2
16
+
17
+ # Use a global cleaner
18
+ cleanse :first_name, :last_name, :cleaner => :strip
19
+
20
+ # Define a once off cleaner
21
+ cleanse :address1, :address2, :cleaner => Proc.new {|string| "<< #{string.strip!} >>"}
22
+ end
23
+
24
+ class RubyTest < Test::Unit::TestCase
25
+ context "Ruby Models" do
26
+
27
+ should 'have globally registered cleaner' do
28
+ assert DataCleansing.cleaner(:strip)
29
+ end
30
+
31
+ context "with ruby user" do
32
+ setup do
33
+ @user = RubyUser.new
34
+ @user.first_name = ' joe '
35
+ @user.last_name = "\n black\n"
36
+ @user.address1 = "2632 Brown St \n"
37
+ end
38
+
39
+ should 'cleanse_attributes! using global cleaner' do
40
+ @user.cleanse_attributes!
41
+ assert_equal 'joe', @user.first_name
42
+ assert_equal 'black', @user.last_name
43
+ end
44
+
45
+ should 'cleanse_attributes! using attribute specific custom cleaner' do
46
+ @user.cleanse_attributes!
47
+ assert_equal '<< 2632 Brown St >>', @user.address1
48
+ end
49
+
50
+ should 'cleanse_attributes! not cleanse nil attributes' do
51
+ @user.first_name = nil
52
+ @user.cleanse_attributes!
53
+ assert_equal nil, @user.first_name
54
+ end
55
+ end
56
+
57
+ end
58
+ end
Binary file
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data_cleansing
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Reid Morrison
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-07-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: thread_safe
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description: Data Cleansing framework for Ruby with additional support for Rails and
28
+ Mongoid
29
+ email:
30
+ - reidmo@gmail.com
31
+ executables: []
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - Gemfile
36
+ - Gemfile.lock
37
+ - LICENSE.txt
38
+ - README.md
39
+ - Rakefile
40
+ - lib/data_cleansing.rb
41
+ - lib/data_cleansing/cleanse.rb
42
+ - lib/data_cleansing/data_cleansing.rb
43
+ - lib/data_cleansing/version.rb
44
+ - nbproject/private/private.properties
45
+ - nbproject/private/rake-d.txt
46
+ - nbproject/project.properties
47
+ - nbproject/project.xml
48
+ - test/active_record_test.rb
49
+ - test/ruby_test.rb
50
+ - test/test_db.sqlite3
51
+ homepage: https://github.com/ClarityServices/data_cleansing
52
+ licenses:
53
+ - Apache License V2.0
54
+ metadata: {}
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubyforge_project:
71
+ rubygems_version: 2.0.3
72
+ signing_key:
73
+ specification_version: 4
74
+ summary: Data Cleansing framework for Ruby, and Ruby on Rails
75
+ test_files: []