pii-detector 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +2 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +75 -0
- data/README.md +1 -0
- data/Rakefile +11 -0
- data/bin/console +8 -0
- data/dev.yml +21 -0
- data/lib/config/pii_fields +25 -0
- data/lib/hash.rb +9 -0
- data/lib/pii_detector.rb +36 -0
- data/lib/pii_detector/comparator.rb +28 -0
- data/lib/pii_detector/configuration.rb +29 -0
- data/lib/pii_detector/root.rb +8 -0
- data/lib/pii_detector/version.rb +5 -0
- data/lib/string.rb +11 -0
- data/pii_detector.gemspec +16 -0
- data/shitip.yml +13 -0
- data/test/comparator_test.rb +20 -0
- data/test/configuration_test.rb +38 -0
- data/test/hash_test.rb +38 -0
- data/test/pii_detector_test.rb +18 -0
- data/test/string_test.rb +10 -0
- data/test/test_helper.rb +5 -0
- metadata +67 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4be466fefd667de37780c94c4af71fad5a310c89
|
4
|
+
data.tar.gz: 369b6ee78bd3a55a793a785e15ccaaf0310448c3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3f55d0e0e71cee1d119cfd7ea72535245dd55ca2f4eb08832bd925b3075b356deb677295d709278093a91f1c3b3c314ecf6f91d18ef87e7053f02958823af538
|
7
|
+
data.tar.gz: af610219ad5697750fcd875aa1f1b9370320bb2bec440327780f373b4292594b62272383027abdfa1ed8b363e57472dbdc3e6d02fabb4f5048a6bea1fd638c3b
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
|
5
|
+
gem 'json'
|
6
|
+
gem 'levenshtein-ffi'
|
7
|
+
|
8
|
+
group :deployment do
|
9
|
+
gem 'package_cloud'
|
10
|
+
gem 'rake'
|
11
|
+
end
|
12
|
+
|
13
|
+
group :development do
|
14
|
+
gem 'rake'
|
15
|
+
gem 'pry'
|
16
|
+
gem 'minitest'
|
17
|
+
gem 'rubocop'
|
18
|
+
end
|
19
|
+
|
20
|
+
gemspec
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
pii-detector (1.0.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
ast (2.4.0)
|
10
|
+
coderay (1.1.2)
|
11
|
+
domain_name (0.5.20170404)
|
12
|
+
unf (>= 0.0.5, < 1.0.0)
|
13
|
+
ffi (1.9.18)
|
14
|
+
highline (1.6.20)
|
15
|
+
http-cookie (1.0.3)
|
16
|
+
domain_name (~> 0.5)
|
17
|
+
json (2.1.0)
|
18
|
+
json_pure (1.8.1)
|
19
|
+
levenshtein-ffi (1.1.0)
|
20
|
+
ffi (~> 1.9)
|
21
|
+
method_source (0.9.0)
|
22
|
+
mime-types (3.1)
|
23
|
+
mime-types-data (~> 3.2015)
|
24
|
+
mime-types-data (3.2016.0521)
|
25
|
+
minitest (5.11.3)
|
26
|
+
netrc (0.11.0)
|
27
|
+
package_cloud (0.3.03)
|
28
|
+
highline (= 1.6.20)
|
29
|
+
json_pure (= 1.8.1)
|
30
|
+
rainbow (= 2.2.2)
|
31
|
+
rest-client (~> 2.0)
|
32
|
+
thor (~> 0.18)
|
33
|
+
parallel (1.12.1)
|
34
|
+
parser (2.4.0.2)
|
35
|
+
ast (~> 2.3)
|
36
|
+
powerpack (0.1.1)
|
37
|
+
pry (0.11.3)
|
38
|
+
coderay (~> 1.1.0)
|
39
|
+
method_source (~> 0.9.0)
|
40
|
+
rainbow (2.2.2)
|
41
|
+
rake
|
42
|
+
rake (12.3.0)
|
43
|
+
rest-client (2.0.2)
|
44
|
+
http-cookie (>= 1.0.2, < 2.0)
|
45
|
+
mime-types (>= 1.16, < 4.0)
|
46
|
+
netrc (~> 0.8)
|
47
|
+
rubocop (0.52.1)
|
48
|
+
parallel (~> 1.10)
|
49
|
+
parser (>= 2.4.0.2, < 3.0)
|
50
|
+
powerpack (~> 0.1)
|
51
|
+
rainbow (>= 2.2.2, < 4.0)
|
52
|
+
ruby-progressbar (~> 1.7)
|
53
|
+
unicode-display_width (~> 1.0, >= 1.0.1)
|
54
|
+
ruby-progressbar (1.9.0)
|
55
|
+
thor (0.20.0)
|
56
|
+
unf (0.1.4)
|
57
|
+
unf_ext
|
58
|
+
unf_ext (0.0.7.4)
|
59
|
+
unicode-display_width (1.3.0)
|
60
|
+
|
61
|
+
PLATFORMS
|
62
|
+
ruby
|
63
|
+
|
64
|
+
DEPENDENCIES
|
65
|
+
json
|
66
|
+
levenshtein-ffi
|
67
|
+
minitest
|
68
|
+
package_cloud
|
69
|
+
pii-detector!
|
70
|
+
pry
|
71
|
+
rake
|
72
|
+
rubocop
|
73
|
+
|
74
|
+
BUNDLED WITH
|
75
|
+
1.16.1
|
data/README.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# PII Detector
|
data/Rakefile
ADDED
data/bin/console
ADDED
data/dev.yml
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
name: pii-detector
|
2
|
+
|
3
|
+
up:
|
4
|
+
- ruby: 2.5.0
|
5
|
+
- bundler
|
6
|
+
|
7
|
+
commands:
|
8
|
+
test:
|
9
|
+
syntax:
|
10
|
+
argument: file
|
11
|
+
optional: args...
|
12
|
+
run: |
|
13
|
+
if [[ $# -eq 0 ]]; then
|
14
|
+
rake test
|
15
|
+
else
|
16
|
+
bundle exec ruby -Itest "$@"
|
17
|
+
fi
|
18
|
+
console:
|
19
|
+
run: bundle exec ruby bin/console
|
20
|
+
check:
|
21
|
+
run: cd lib/ && bundle exec rubocop
|
data/lib/hash.rb
ADDED
data/lib/pii_detector.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'hash'
|
4
|
+
|
5
|
+
require 'pii_detector/root'
|
6
|
+
require 'pii_detector/version'
|
7
|
+
require 'pii_detector/comparator'
|
8
|
+
require 'pii_detector/configuration'
|
9
|
+
|
10
|
+
module PIIDetector
|
11
|
+
extend self
|
12
|
+
|
13
|
+
# @param value [Hash]
|
14
|
+
# @return [Array]
|
15
|
+
def retrieve_pii_keys_from_hash(value)
|
16
|
+
value.unique_keys.map { |field| pii_field?(field) ? field : nil }.compact.uniq
|
17
|
+
end
|
18
|
+
|
19
|
+
# @param value [String]
|
20
|
+
# @return [Boolean]
|
21
|
+
def pii_field?(value)
|
22
|
+
config.pii_fields.any? { |pii| Comparator.similar?(pii, value) }
|
23
|
+
end
|
24
|
+
|
25
|
+
# @return [PIIDetector::Configuration]
|
26
|
+
def configure
|
27
|
+
@config ||= Configuration.new
|
28
|
+
yield(@config) if block_given?
|
29
|
+
@config
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [PIIDetector::Configuration]
|
33
|
+
def config
|
34
|
+
@config || configure
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'levenshtein-ffi'
|
4
|
+
require 'string'
|
5
|
+
|
6
|
+
module PIIDetector
|
7
|
+
class Comparator
|
8
|
+
class << self
|
9
|
+
# @param desired [String]
|
10
|
+
# @param check [String]
|
11
|
+
# @return [Boolean] or [MatchData]
|
12
|
+
def similar?(desired, check)
|
13
|
+
standardized_desired_str = desired.strip.underscore
|
14
|
+
standardized_check_str = check.strip.underscore
|
15
|
+
|
16
|
+
return false if standardized_check_str.length <= 1
|
17
|
+
|
18
|
+
score = Levenshtein.distance(standardized_desired_str, standardized_check_str)
|
19
|
+
|
20
|
+
standardized_check_str == standardized_desired_str ||
|
21
|
+
standardized_check_str.match(/^#{standardized_desired_str}_/) ||
|
22
|
+
standardized_check_str.match(/_#{standardized_desired_str}?/) ||
|
23
|
+
standardized_check_str.match(/[a-z\d]*_#{standardized_desired_str}_[a-z\d]*/) ||
|
24
|
+
(PIIDetector.config.edit_distance < standardized_check_str.length && score < PIIDetector.config.edit_distance)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PIIDetector
|
4
|
+
class Configuration
|
5
|
+
attr_accessor :edit_distance
|
6
|
+
attr_reader :pii_fields
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@edit_distance = 2
|
10
|
+
@pii_fields = pii_fields_from_config
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param value [Array] or [Set]
|
14
|
+
def pii_fields=(value)
|
15
|
+
@pii_fields = pii_fields_from_config.merge(value)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# @return [Set]
|
21
|
+
def pii_fields_from_config
|
22
|
+
@default_pii_fields ||=
|
23
|
+
File.read(File.join(PIIDetector.root_path, 'lib/config', 'pii_fields'))
|
24
|
+
.split("\n")
|
25
|
+
.reject(&:empty?)
|
26
|
+
.to_set
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/string.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require File.expand_path('../lib/pii_detector/version', __FILE__)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = 'pii-detector'
|
7
|
+
spec.summary = 'Ruby gem to detect personally identifiable information.'
|
8
|
+
spec.version = PIIDetector::VERSION
|
9
|
+
spec.authors = ['']
|
10
|
+
spec.email = ''
|
11
|
+
spec.homepage = 'https://github.com/Shopify/pii-detector'
|
12
|
+
spec.license = 'Shopify'
|
13
|
+
|
14
|
+
spec.files = `git ls-files`.split($/)
|
15
|
+
spec.require_paths = ['lib']
|
16
|
+
end
|
data/shitip.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class ComparatorTest < Minitest::Test
|
6
|
+
def test_similar
|
7
|
+
assert PIIDetector::Comparator.similar?('first_name', 'first_name')
|
8
|
+
assert PIIDetector::Comparator.similar?('first_name', 'first_nam')
|
9
|
+
assert PIIDetector::Comparator.similar?('first_name', 'customer_first_name')
|
10
|
+
assert PIIDetector::Comparator.similar?('first_name', 'first_name_of_customer')
|
11
|
+
assert PIIDetector::Comparator.similar?('first_name', 'user_first_name_field')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_not_similar
|
15
|
+
refute PIIDetector::Comparator.similar?('first_name', 'event')
|
16
|
+
refute PIIDetector::Comparator.similar?('first_name', 'first')
|
17
|
+
refute PIIDetector::Comparator.similar?('first_name', 'first_na')
|
18
|
+
refute PIIDetector::Comparator.similar?('first_name', 'na')
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class ConfigurationTest < Minitest::Test
|
6
|
+
def test_configuration_default_values
|
7
|
+
new_pii_fields = Set['unknown_field1']
|
8
|
+
|
9
|
+
PIIDetector.configure do |config|
|
10
|
+
config.edit_distance = 3
|
11
|
+
config.pii_fields += new_pii_fields
|
12
|
+
end
|
13
|
+
|
14
|
+
assert_equal 3, PIIDetector.config.edit_distance
|
15
|
+
assert new_pii_fields.subset?(PIIDetector.config.pii_fields)
|
16
|
+
|
17
|
+
new_pii_fields.merge(['unknown_field2'])
|
18
|
+
PIIDetector.config.edit_distance = 2
|
19
|
+
PIIDetector.config.pii_fields += new_pii_fields
|
20
|
+
|
21
|
+
assert_equal 2, PIIDetector.config.edit_distance
|
22
|
+
assert new_pii_fields.subset?(PIIDetector.config.pii_fields)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_configuration_set_not_exists_attribute
|
26
|
+
assert_raises NoMethodError do
|
27
|
+
PIIDetector.configure do |config|
|
28
|
+
config.unknown_attribute = 'test'
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_configuration_get_not_exists_attribute
|
34
|
+
assert_raises NoMethodError do
|
35
|
+
PIIDetector.config.unknown_attribute
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/test/hash_test.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class HashTest < Minitest::Test
|
6
|
+
def test_unique_keys_from_multilevel_hash
|
7
|
+
test_hash = {
|
8
|
+
a: {
|
9
|
+
b: {
|
10
|
+
c: 'c'
|
11
|
+
}
|
12
|
+
}
|
13
|
+
}
|
14
|
+
|
15
|
+
assert_equal ['a', 'b', 'c'], test_hash.unique_keys
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_unique_keys_from_array_of_hashes
|
19
|
+
test_hash = {
|
20
|
+
a: [
|
21
|
+
{ b: 'b' },
|
22
|
+
{ c: 'c' }
|
23
|
+
]
|
24
|
+
}
|
25
|
+
|
26
|
+
assert_equal ['a', 'b', 'c'], test_hash.unique_keys
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_unique_keys_from_json_string_value
|
30
|
+
test_hash = {
|
31
|
+
a: {
|
32
|
+
b: "b"
|
33
|
+
}.to_json
|
34
|
+
}
|
35
|
+
|
36
|
+
assert_equal ['a'], test_hash.unique_keys
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class PIIDetectorTest < Minitest::Test
|
6
|
+
def test_retrieve_pii_keys_from_hash
|
7
|
+
test_hash = {
|
8
|
+
first_name: 'Dave',
|
9
|
+
event: 'checkout'
|
10
|
+
}
|
11
|
+
assert_equal(['first_name'], PIIDetector.retrieve_pii_keys_from_hash(test_hash))
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_pii_field
|
15
|
+
assert PIIDetector.pii_field?('first_name')
|
16
|
+
refute PIIDetector.pii_field?('event')
|
17
|
+
end
|
18
|
+
end
|
data/test/string_test.rb
ADDED
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pii-detector
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ''
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-02-01 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email: ''
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- ".gitignore"
|
20
|
+
- ".rubocop.yml"
|
21
|
+
- Gemfile
|
22
|
+
- Gemfile.lock
|
23
|
+
- README.md
|
24
|
+
- Rakefile
|
25
|
+
- bin/console
|
26
|
+
- dev.yml
|
27
|
+
- lib/config/pii_fields
|
28
|
+
- lib/hash.rb
|
29
|
+
- lib/pii_detector.rb
|
30
|
+
- lib/pii_detector/comparator.rb
|
31
|
+
- lib/pii_detector/configuration.rb
|
32
|
+
- lib/pii_detector/root.rb
|
33
|
+
- lib/pii_detector/version.rb
|
34
|
+
- lib/string.rb
|
35
|
+
- pii_detector.gemspec
|
36
|
+
- shitip.yml
|
37
|
+
- test/comparator_test.rb
|
38
|
+
- test/configuration_test.rb
|
39
|
+
- test/hash_test.rb
|
40
|
+
- test/pii_detector_test.rb
|
41
|
+
- test/string_test.rb
|
42
|
+
- test/test_helper.rb
|
43
|
+
homepage: https://github.com/Shopify/pii-detector
|
44
|
+
licenses:
|
45
|
+
- Shopify
|
46
|
+
metadata: {}
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
requirements: []
|
62
|
+
rubyforge_project:
|
63
|
+
rubygems_version: 2.6.14
|
64
|
+
signing_key:
|
65
|
+
specification_version: 4
|
66
|
+
summary: Ruby gem to detect personally identifiable information.
|
67
|
+
test_files: []
|