pii-detector 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +2 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +75 -0
- data/README.md +1 -0
- data/Rakefile +11 -0
- data/bin/console +8 -0
- data/dev.yml +21 -0
- data/lib/config/pii_fields +25 -0
- data/lib/hash.rb +9 -0
- data/lib/pii_detector.rb +36 -0
- data/lib/pii_detector/comparator.rb +28 -0
- data/lib/pii_detector/configuration.rb +29 -0
- data/lib/pii_detector/root.rb +8 -0
- data/lib/pii_detector/version.rb +5 -0
- data/lib/string.rb +11 -0
- data/pii_detector.gemspec +16 -0
- data/shitip.yml +13 -0
- data/test/comparator_test.rb +20 -0
- data/test/configuration_test.rb +38 -0
- data/test/hash_test.rb +38 -0
- data/test/pii_detector_test.rb +18 -0
- data/test/string_test.rb +10 -0
- data/test/test_helper.rb +5 -0
- metadata +67 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4be466fefd667de37780c94c4af71fad5a310c89
|
4
|
+
data.tar.gz: 369b6ee78bd3a55a793a785e15ccaaf0310448c3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3f55d0e0e71cee1d119cfd7ea72535245dd55ca2f4eb08832bd925b3075b356deb677295d709278093a91f1c3b3c314ecf6f91d18ef87e7053f02958823af538
|
7
|
+
data.tar.gz: af610219ad5697750fcd875aa1f1b9370320bb2bec440327780f373b4292594b62272383027abdfa1ed8b363e57472dbdc3e6d02fabb4f5048a6bea1fd638c3b
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
|
5
|
+
gem 'json'
|
6
|
+
gem 'levenshtein-ffi'
|
7
|
+
|
8
|
+
group :deployment do
|
9
|
+
gem 'package_cloud'
|
10
|
+
gem 'rake'
|
11
|
+
end
|
12
|
+
|
13
|
+
group :development do
|
14
|
+
gem 'rake'
|
15
|
+
gem 'pry'
|
16
|
+
gem 'minitest'
|
17
|
+
gem 'rubocop'
|
18
|
+
end
|
19
|
+
|
20
|
+
gemspec
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
pii-detector (1.0.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
ast (2.4.0)
|
10
|
+
coderay (1.1.2)
|
11
|
+
domain_name (0.5.20170404)
|
12
|
+
unf (>= 0.0.5, < 1.0.0)
|
13
|
+
ffi (1.9.18)
|
14
|
+
highline (1.6.20)
|
15
|
+
http-cookie (1.0.3)
|
16
|
+
domain_name (~> 0.5)
|
17
|
+
json (2.1.0)
|
18
|
+
json_pure (1.8.1)
|
19
|
+
levenshtein-ffi (1.1.0)
|
20
|
+
ffi (~> 1.9)
|
21
|
+
method_source (0.9.0)
|
22
|
+
mime-types (3.1)
|
23
|
+
mime-types-data (~> 3.2015)
|
24
|
+
mime-types-data (3.2016.0521)
|
25
|
+
minitest (5.11.3)
|
26
|
+
netrc (0.11.0)
|
27
|
+
package_cloud (0.3.03)
|
28
|
+
highline (= 1.6.20)
|
29
|
+
json_pure (= 1.8.1)
|
30
|
+
rainbow (= 2.2.2)
|
31
|
+
rest-client (~> 2.0)
|
32
|
+
thor (~> 0.18)
|
33
|
+
parallel (1.12.1)
|
34
|
+
parser (2.4.0.2)
|
35
|
+
ast (~> 2.3)
|
36
|
+
powerpack (0.1.1)
|
37
|
+
pry (0.11.3)
|
38
|
+
coderay (~> 1.1.0)
|
39
|
+
method_source (~> 0.9.0)
|
40
|
+
rainbow (2.2.2)
|
41
|
+
rake
|
42
|
+
rake (12.3.0)
|
43
|
+
rest-client (2.0.2)
|
44
|
+
http-cookie (>= 1.0.2, < 2.0)
|
45
|
+
mime-types (>= 1.16, < 4.0)
|
46
|
+
netrc (~> 0.8)
|
47
|
+
rubocop (0.52.1)
|
48
|
+
parallel (~> 1.10)
|
49
|
+
parser (>= 2.4.0.2, < 3.0)
|
50
|
+
powerpack (~> 0.1)
|
51
|
+
rainbow (>= 2.2.2, < 4.0)
|
52
|
+
ruby-progressbar (~> 1.7)
|
53
|
+
unicode-display_width (~> 1.0, >= 1.0.1)
|
54
|
+
ruby-progressbar (1.9.0)
|
55
|
+
thor (0.20.0)
|
56
|
+
unf (0.1.4)
|
57
|
+
unf_ext
|
58
|
+
unf_ext (0.0.7.4)
|
59
|
+
unicode-display_width (1.3.0)
|
60
|
+
|
61
|
+
PLATFORMS
|
62
|
+
ruby
|
63
|
+
|
64
|
+
DEPENDENCIES
|
65
|
+
json
|
66
|
+
levenshtein-ffi
|
67
|
+
minitest
|
68
|
+
package_cloud
|
69
|
+
pii-detector!
|
70
|
+
pry
|
71
|
+
rake
|
72
|
+
rubocop
|
73
|
+
|
74
|
+
BUNDLED WITH
|
75
|
+
1.16.1
|
data/README.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# PII Detector
|
data/Rakefile
ADDED
data/bin/console
ADDED
data/dev.yml
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
name: pii-detector
|
2
|
+
|
3
|
+
up:
|
4
|
+
- ruby: 2.5.0
|
5
|
+
- bundler
|
6
|
+
|
7
|
+
commands:
|
8
|
+
test:
|
9
|
+
syntax:
|
10
|
+
argument: file
|
11
|
+
optional: args...
|
12
|
+
run: |
|
13
|
+
if [[ $# -eq 0 ]]; then
|
14
|
+
rake test
|
15
|
+
else
|
16
|
+
bundle exec ruby -Itest "$@"
|
17
|
+
fi
|
18
|
+
console:
|
19
|
+
run: bundle exec ruby bin/console
|
20
|
+
check:
|
21
|
+
run: cd lib/ && bundle exec rubocop
|
data/lib/hash.rb
ADDED
data/lib/pii_detector.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'hash'
|
4
|
+
|
5
|
+
require 'pii_detector/root'
|
6
|
+
require 'pii_detector/version'
|
7
|
+
require 'pii_detector/comparator'
|
8
|
+
require 'pii_detector/configuration'
|
9
|
+
|
10
|
+
module PIIDetector
|
11
|
+
extend self
|
12
|
+
|
13
|
+
# @param value [Hash]
|
14
|
+
# @return [Array]
|
15
|
+
def retrieve_pii_keys_from_hash(value)
|
16
|
+
value.unique_keys.map { |field| pii_field?(field) ? field : nil }.compact.uniq
|
17
|
+
end
|
18
|
+
|
19
|
+
# @param value [String]
|
20
|
+
# @return [Boolean]
|
21
|
+
def pii_field?(value)
|
22
|
+
config.pii_fields.any? { |pii| Comparator.similar?(pii, value) }
|
23
|
+
end
|
24
|
+
|
25
|
+
# @return [PIIDetector::Configuration]
|
26
|
+
def configure
|
27
|
+
@config ||= Configuration.new
|
28
|
+
yield(@config) if block_given?
|
29
|
+
@config
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [PIIDetector::Configuration]
|
33
|
+
def config
|
34
|
+
@config || configure
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'levenshtein-ffi'
|
4
|
+
require 'string'
|
5
|
+
|
6
|
+
module PIIDetector
|
7
|
+
class Comparator
|
8
|
+
class << self
|
9
|
+
# @param desired [String]
|
10
|
+
# @param check [String]
|
11
|
+
# @return [Boolean] or [MatchData]
|
12
|
+
def similar?(desired, check)
|
13
|
+
standardized_desired_str = desired.strip.underscore
|
14
|
+
standardized_check_str = check.strip.underscore
|
15
|
+
|
16
|
+
return false if standardized_check_str.length <= 1
|
17
|
+
|
18
|
+
score = Levenshtein.distance(standardized_desired_str, standardized_check_str)
|
19
|
+
|
20
|
+
standardized_check_str == standardized_desired_str ||
|
21
|
+
standardized_check_str.match(/^#{standardized_desired_str}_/) ||
|
22
|
+
standardized_check_str.match(/_#{standardized_desired_str}?/) ||
|
23
|
+
standardized_check_str.match(/[a-z\d]*_#{standardized_desired_str}_[a-z\d]*/) ||
|
24
|
+
(PIIDetector.config.edit_distance < standardized_check_str.length && score < PIIDetector.config.edit_distance)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PIIDetector
|
4
|
+
class Configuration
|
5
|
+
attr_accessor :edit_distance
|
6
|
+
attr_reader :pii_fields
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@edit_distance = 2
|
10
|
+
@pii_fields = pii_fields_from_config
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param value [Array] or [Set]
|
14
|
+
def pii_fields=(value)
|
15
|
+
@pii_fields = pii_fields_from_config.merge(value)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# @return [Set]
|
21
|
+
def pii_fields_from_config
|
22
|
+
@default_pii_fields ||=
|
23
|
+
File.read(File.join(PIIDetector.root_path, 'lib/config', 'pii_fields'))
|
24
|
+
.split("\n")
|
25
|
+
.reject(&:empty?)
|
26
|
+
.to_set
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/string.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require File.expand_path('../lib/pii_detector/version', __FILE__)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = 'pii-detector'
|
7
|
+
spec.summary = 'Ruby gem to detect personally identifiable information.'
|
8
|
+
spec.version = PIIDetector::VERSION
|
9
|
+
spec.authors = ['']
|
10
|
+
spec.email = ''
|
11
|
+
spec.homepage = 'https://github.com/Shopify/pii-detector'
|
12
|
+
spec.license = 'Shopify'
|
13
|
+
|
14
|
+
spec.files = `git ls-files`.split($/)
|
15
|
+
spec.require_paths = ['lib']
|
16
|
+
end
|
data/shitip.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class ComparatorTest < Minitest::Test
|
6
|
+
def test_similar
|
7
|
+
assert PIIDetector::Comparator.similar?('first_name', 'first_name')
|
8
|
+
assert PIIDetector::Comparator.similar?('first_name', 'first_nam')
|
9
|
+
assert PIIDetector::Comparator.similar?('first_name', 'customer_first_name')
|
10
|
+
assert PIIDetector::Comparator.similar?('first_name', 'first_name_of_customer')
|
11
|
+
assert PIIDetector::Comparator.similar?('first_name', 'user_first_name_field')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_not_similar
|
15
|
+
refute PIIDetector::Comparator.similar?('first_name', 'event')
|
16
|
+
refute PIIDetector::Comparator.similar?('first_name', 'first')
|
17
|
+
refute PIIDetector::Comparator.similar?('first_name', 'first_na')
|
18
|
+
refute PIIDetector::Comparator.similar?('first_name', 'na')
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class ConfigurationTest < Minitest::Test
|
6
|
+
def test_configuration_default_values
|
7
|
+
new_pii_fields = Set['unknown_field1']
|
8
|
+
|
9
|
+
PIIDetector.configure do |config|
|
10
|
+
config.edit_distance = 3
|
11
|
+
config.pii_fields += new_pii_fields
|
12
|
+
end
|
13
|
+
|
14
|
+
assert_equal 3, PIIDetector.config.edit_distance
|
15
|
+
assert new_pii_fields.subset?(PIIDetector.config.pii_fields)
|
16
|
+
|
17
|
+
new_pii_fields.merge(['unknown_field2'])
|
18
|
+
PIIDetector.config.edit_distance = 2
|
19
|
+
PIIDetector.config.pii_fields += new_pii_fields
|
20
|
+
|
21
|
+
assert_equal 2, PIIDetector.config.edit_distance
|
22
|
+
assert new_pii_fields.subset?(PIIDetector.config.pii_fields)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_configuration_set_not_exists_attribute
|
26
|
+
assert_raises NoMethodError do
|
27
|
+
PIIDetector.configure do |config|
|
28
|
+
config.unknown_attribute = 'test'
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_configuration_get_not_exists_attribute
|
34
|
+
assert_raises NoMethodError do
|
35
|
+
PIIDetector.config.unknown_attribute
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/test/hash_test.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class HashTest < Minitest::Test
|
6
|
+
def test_unique_keys_from_multilevel_hash
|
7
|
+
test_hash = {
|
8
|
+
a: {
|
9
|
+
b: {
|
10
|
+
c: 'c'
|
11
|
+
}
|
12
|
+
}
|
13
|
+
}
|
14
|
+
|
15
|
+
assert_equal ['a', 'b', 'c'], test_hash.unique_keys
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_unique_keys_from_array_of_hashes
|
19
|
+
test_hash = {
|
20
|
+
a: [
|
21
|
+
{ b: 'b' },
|
22
|
+
{ c: 'c' }
|
23
|
+
]
|
24
|
+
}
|
25
|
+
|
26
|
+
assert_equal ['a', 'b', 'c'], test_hash.unique_keys
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_unique_keys_from_json_string_value
|
30
|
+
test_hash = {
|
31
|
+
a: {
|
32
|
+
b: "b"
|
33
|
+
}.to_json
|
34
|
+
}
|
35
|
+
|
36
|
+
assert_equal ['a'], test_hash.unique_keys
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class PIIDetectorTest < Minitest::Test
|
6
|
+
def test_retrieve_pii_keys_from_hash
|
7
|
+
test_hash = {
|
8
|
+
first_name: 'Dave',
|
9
|
+
event: 'checkout'
|
10
|
+
}
|
11
|
+
assert_equal(['first_name'], PIIDetector.retrieve_pii_keys_from_hash(test_hash))
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_pii_field
|
15
|
+
assert PIIDetector.pii_field?('first_name')
|
16
|
+
refute PIIDetector.pii_field?('event')
|
17
|
+
end
|
18
|
+
end
|
data/test/string_test.rb
ADDED
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pii-detector
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ''
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-02-01 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email: ''
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- ".gitignore"
|
20
|
+
- ".rubocop.yml"
|
21
|
+
- Gemfile
|
22
|
+
- Gemfile.lock
|
23
|
+
- README.md
|
24
|
+
- Rakefile
|
25
|
+
- bin/console
|
26
|
+
- dev.yml
|
27
|
+
- lib/config/pii_fields
|
28
|
+
- lib/hash.rb
|
29
|
+
- lib/pii_detector.rb
|
30
|
+
- lib/pii_detector/comparator.rb
|
31
|
+
- lib/pii_detector/configuration.rb
|
32
|
+
- lib/pii_detector/root.rb
|
33
|
+
- lib/pii_detector/version.rb
|
34
|
+
- lib/string.rb
|
35
|
+
- pii_detector.gemspec
|
36
|
+
- shitip.yml
|
37
|
+
- test/comparator_test.rb
|
38
|
+
- test/configuration_test.rb
|
39
|
+
- test/hash_test.rb
|
40
|
+
- test/pii_detector_test.rb
|
41
|
+
- test/string_test.rb
|
42
|
+
- test/test_helper.rb
|
43
|
+
homepage: https://github.com/Shopify/pii-detector
|
44
|
+
licenses:
|
45
|
+
- Shopify
|
46
|
+
metadata: {}
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
requirements: []
|
62
|
+
rubyforge_project:
|
63
|
+
rubygems_version: 2.6.14
|
64
|
+
signing_key:
|
65
|
+
specification_version: 4
|
66
|
+
summary: Ruby gem to detect personally identifiable information.
|
67
|
+
test_files: []
|