data_cleansing 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +78 -2
- data/lib/data_cleansing/cleanse.rb +11 -8
- data/lib/data_cleansing/data_cleansing.rb +0 -11
- data/lib/data_cleansing/railtie.rb +18 -2
- data/lib/data_cleansing/version.rb +1 -1
- data/test/active_record_test.rb +3 -3
- data/test/test_db.sqlite3 +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 720a83a0b486ac0d4ee6de7c7716de4d47ae6331
|
4
|
+
data.tar.gz: 5f2f86c19d4cfb896f124802bef947e6ebbaf1a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 006f1106f3f67dad19c979d0384cb85f0c2bdbe8d3150d6c6ff13629b0c24fa1aaad70b0c9e66718d615dc63d5ca4202bf2125f8fd91a0190f4a1601a3a83a3e
|
7
|
+
data.tar.gz: 83a1ec0a5da0c2ec7d0a3475db95e5360613c53d36a4837b33385e1a8309e0cda894ede12d9dd8861c6d6db1ad80f683cdadff34191b99e87a299de897d6074b
|
data/README.md
CHANGED
@@ -23,7 +23,27 @@ or pull request.
|
|
23
23
|
|
24
24
|
* Supports global cleansing definitions that can be associated with any Ruby,
|
25
25
|
Rails, Mongoid, or other model.
|
26
|
-
* Supports custom cleansing definitions
|
26
|
+
* Supports custom cleansing definitions that can be defined in-line using block.
|
27
|
+
* A cleansing block can access the other attributes in the model in determining
|
28
|
+
how to cleanse the current attribute
|
29
|
+
* In a cleansing block other can also be modified if necessary
|
30
|
+
* Cleansers are executed in the order they are defined. As a result multiple
|
31
|
+
cleansers can be run against the same field and the order is preserved
|
32
|
+
* Multiple cleansers can be specified for a list of attributes at the same time
|
33
|
+
* Inheritance is supported. The cleansers for parent classes are run before
|
34
|
+
the child's cleansers
|
35
|
+
* Cleansers can be called outside of a model instance for cases where fields
|
36
|
+
need to be cleansed before the model is created, or needs to be found
|
37
|
+
* Logging of data cleansing with the before and after values for troubleshooting.
|
38
|
+
Depending on the log level all modified fields are logged, or just the ones
|
39
|
+
completely wiped out to nil
|
40
|
+
|
41
|
+
## ActiveRecord (ActiveModel) Features
|
42
|
+
|
43
|
+
* Passes the value of the attribute before the Rails type cast so that the
|
44
|
+
original text can be cleansed before passing back to rails for type conversion.
|
45
|
+
This is important for numeric and date fields where spaces and control characters
|
46
|
+
can have undesired effects
|
27
47
|
|
28
48
|
## Examples
|
29
49
|
|
@@ -134,6 +154,62 @@ puts "After data cleansing #{u.inspect}"
|
|
134
154
|
# After data cleansing #<User:0x007fdd5a83a8f8 @first_name="joe", @last_name="black", @address1="2632 Brown St", @title="MR.", @gender="Male">
|
135
155
|
```
|
136
156
|
|
157
|
+
## Rails configuration
|
158
|
+
|
159
|
+
When DataCleansing is used in a Rails environment it can be configured using the
|
160
|
+
regular Rails configuration mechanisms. For example:
|
161
|
+
|
162
|
+
```ruby
|
163
|
+
module MyApplication
|
164
|
+
class Application < Rails::Application
|
165
|
+
|
166
|
+
# Data Cleansing Configuration
|
167
|
+
|
168
|
+
# Attributes who's values are to be masked out during logging
|
169
|
+
config.data_cleansing.register_masked_attributes :bank_account_number, :social_security_number
|
170
|
+
|
171
|
+
# Optionally override the default log level
|
172
|
+
# Set to :trace or :debug to log all fields modified
|
173
|
+
# Set to :info to log only those fields which were nilled out
|
174
|
+
# Set to :warn or higher to disable logging of cleansing actions
|
175
|
+
config.data_cleansing.logger.level = :info
|
176
|
+
|
177
|
+
# Register any global cleaners
|
178
|
+
config.data_cleansing.register_cleaner(:strip) {|string| string.strip!}
|
179
|
+
|
180
|
+
end
|
181
|
+
end
|
182
|
+
```
|
183
|
+
|
184
|
+
## Logging
|
185
|
+
|
186
|
+
DataCleansing uses SemanticLogger for logging due to it's excellent integration
|
187
|
+
with Rails and its ability to log data in it's raw form to Mongo and to files.
|
188
|
+
|
189
|
+
If running a Rails application it is recommended to install the gem
|
190
|
+
rails_semantic_logger which replaces the default Rails logger. It is however
|
191
|
+
possible to configure the semantic_logger gem to use the existing Rails logger
|
192
|
+
in a Rails initializer as follows:
|
193
|
+
|
194
|
+
```ruby
|
195
|
+
SemanticLogger.default_level = Rails.logger.level
|
196
|
+
SemanticLogger.add_appender(Rails.logger)
|
197
|
+
```
|
198
|
+
|
199
|
+
By changing the log level for DataCleansing the type of output for data
|
200
|
+
cleansing can be controlled:
|
201
|
+
|
202
|
+
* :trace or :debug to log all fields modified
|
203
|
+
* :info to log only those fields which were nilled out
|
204
|
+
* :warn or higher to disable logging of cleansing actions
|
205
|
+
|
206
|
+
To change the log level, either use the Rails configuration approach, or set it
|
207
|
+
directly:
|
208
|
+
|
209
|
+
```ruby
|
210
|
+
DataCleansing.logger.level = :info
|
211
|
+
```
|
212
|
+
|
137
213
|
## Notes
|
138
214
|
|
139
215
|
Cleaners are called in the order in which they are defined, so subsequent cleaners
|
@@ -178,7 +254,7 @@ DataCleansing requires the following dependencies
|
|
178
254
|
|
179
255
|
* Ruby V1.8.7, V1.9.3 or V2 and greater
|
180
256
|
* Rails V2 or greater for Rails integration ( Only if Rails is being used )
|
181
|
-
* Mongoid V2 or greater for
|
257
|
+
* Mongoid V2 or greater for Mongoid integration ( Only if Mongoid is being used )
|
182
258
|
|
183
259
|
## Meta
|
184
260
|
|
@@ -114,11 +114,12 @@ module DataCleansing
|
|
114
114
|
def data_cleansing_execute_cleaners(cleaners)
|
115
115
|
return false if cleaners.nil?
|
116
116
|
|
117
|
-
# Capture all changes to attributes if the log level is
|
118
|
-
changes = {} if DataCleansing.logger.
|
117
|
+
# Capture all changes to attributes if the log level is :info or greater
|
118
|
+
changes = {} if DataCleansing.logger.info?
|
119
|
+
# Capture all modified fields if log_level is :debug or :trace
|
120
|
+
verbose = DataCleansing.logger.debug?
|
119
121
|
|
120
|
-
|
121
|
-
DataCleansing.logger.send("benchmark_#{DataCleansing.cleansing_log_level}","Cleansed Attributes", :payload => changes) do
|
122
|
+
DataCleansing.logger.benchmark_info("cleanse_attributes!", :payload => changes) do
|
122
123
|
cleaners.each do |cleaner_struct|
|
123
124
|
params = cleaner_struct.params
|
124
125
|
attrs = cleaner_struct.attributes
|
@@ -177,10 +178,12 @@ module DataCleansing
|
|
177
178
|
if previous = changes[attr.to_sym]
|
178
179
|
previous[:after] = new_value
|
179
180
|
else
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
181
|
+
if new_value.nil? || verbose
|
182
|
+
changes[attr.to_sym] = {
|
183
|
+
:before => masked ? :masked : value,
|
184
|
+
:after => new_value
|
185
|
+
}
|
186
|
+
end
|
184
187
|
end
|
185
188
|
end
|
186
189
|
end
|
@@ -4,7 +4,6 @@ module DataCleansing
|
|
4
4
|
# Global Data Cleansers
|
5
5
|
@@global_cleaners = ThreadSafe::Hash.new
|
6
6
|
@@masked_attributes = ThreadSafe::Array.new
|
7
|
-
@@cleansing_log_level = :info
|
8
7
|
|
9
8
|
# Register a new cleaner
|
10
9
|
# Replaces any existing cleaner with the same name
|
@@ -28,14 +27,4 @@ module DataCleansing
|
|
28
27
|
@@masked_attributes.freeze
|
29
28
|
end
|
30
29
|
|
31
|
-
# Set the log_level at which to log cleansing activities at
|
32
|
-
def self.cleansing_log_level
|
33
|
-
@@cleansing_log_level
|
34
|
-
end
|
35
|
-
|
36
|
-
# Set the log_level at which to log cleansing activities at
|
37
|
-
def self.cleansing_log_level=(log_level)
|
38
|
-
@@cleansing_log_level = log_level
|
39
|
-
end
|
40
|
-
|
41
30
|
end
|
@@ -6,9 +6,25 @@ module RubySkynet #:nodoc:
|
|
6
6
|
# @example Set up configuration in the Rails app.
|
7
7
|
# module MyApplication
|
8
8
|
# class Application < Rails::Application
|
9
|
-
#
|
10
|
-
#
|
9
|
+
#
|
10
|
+
# # Data Cleansing Configuration
|
11
|
+
#
|
12
|
+
# # By default logging is enabled of data cleansing actions
|
13
|
+
# # Set to false to disable
|
14
|
+
# config.data_cleansing.logging_enabled = true
|
15
|
+
#
|
16
|
+
# # Attributes who's values are to be masked out during logging
|
17
|
+
# config.data_cleansing.register_masked_attributes :bank_account_number, :social_security_number
|
18
|
+
#
|
19
|
+
# # Optionally override the default log level
|
20
|
+
# # Set to :trace or :debug to log all fields modified
|
21
|
+
# # Set to :info to log only those fields which were nilled out
|
22
|
+
# # Set to :warn or higher to disable logging of cleansing actions
|
23
|
+
# config.data_cleansing.logger.level = :info
|
24
|
+
#
|
25
|
+
# # Register any global cleaners
|
11
26
|
# config.data_cleansing.register_cleaner(:strip) {|string| string.strip!}
|
27
|
+
#
|
12
28
|
# end
|
13
29
|
# end
|
14
30
|
config.data_cleansing = ::DataCleansing
|
data/test/active_record_test.rb
CHANGED
@@ -39,9 +39,9 @@ end
|
|
39
39
|
# Define a global cleaner
|
40
40
|
DataCleansing.register_cleaner(:strip) {|string, params, object| string.strip!}
|
41
41
|
|
42
|
-
# Log data cleansing result
|
43
|
-
# Set to
|
44
|
-
DataCleansing.
|
42
|
+
# Log data cleansing result
|
43
|
+
# Set to :warn or higher to disable
|
44
|
+
DataCleansing.logger.level = :debug
|
45
45
|
|
46
46
|
# Set the Global list of fields to be masked
|
47
47
|
DataCleansing.register_masked_attributes :ssn, :bank_account_number
|
data/test/test_db.sqlite3
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleansing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thread_safe
|