pii-detector 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4be466fefd667de37780c94c4af71fad5a310c89
4
+ data.tar.gz: 369b6ee78bd3a55a793a785e15ccaaf0310448c3
5
+ SHA512:
6
+ metadata.gz: 3f55d0e0e71cee1d119cfd7ea72535245dd55ca2f4eb08832bd925b3075b356deb677295d709278093a91f1c3b3c314ecf6f91d18ef87e7053f02958823af538
7
+ data.tar.gz: af610219ad5697750fcd875aa1f1b9370320bb2bec440327780f373b4292594b62272383027abdfa1ed8b363e57472dbdc3e6d02fabb4f5048a6bea1fd638c3b
@@ -0,0 +1,4 @@
1
+ .dev
2
+ .bundle
3
+ .rubocop-*
4
+ .DS_Store
@@ -0,0 +1,2 @@
1
+ inherit_from:
2
+ - https://shopify.github.io/ruby-style-guide/rubocop.yml
data/Gemfile ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gem 'json'
6
+ gem 'levenshtein-ffi'
7
+
8
+ group :deployment do
9
+ gem 'package_cloud'
10
+ gem 'rake'
11
+ end
12
+
13
+ group :development do
14
+ gem 'rake'
15
+ gem 'pry'
16
+ gem 'minitest'
17
+ gem 'rubocop'
18
+ end
19
+
20
+ gemspec
@@ -0,0 +1,75 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ pii-detector (1.0.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.0)
10
+ coderay (1.1.2)
11
+ domain_name (0.5.20170404)
12
+ unf (>= 0.0.5, < 1.0.0)
13
+ ffi (1.9.18)
14
+ highline (1.6.20)
15
+ http-cookie (1.0.3)
16
+ domain_name (~> 0.5)
17
+ json (2.1.0)
18
+ json_pure (1.8.1)
19
+ levenshtein-ffi (1.1.0)
20
+ ffi (~> 1.9)
21
+ method_source (0.9.0)
22
+ mime-types (3.1)
23
+ mime-types-data (~> 3.2015)
24
+ mime-types-data (3.2016.0521)
25
+ minitest (5.11.3)
26
+ netrc (0.11.0)
27
+ package_cloud (0.3.03)
28
+ highline (= 1.6.20)
29
+ json_pure (= 1.8.1)
30
+ rainbow (= 2.2.2)
31
+ rest-client (~> 2.0)
32
+ thor (~> 0.18)
33
+ parallel (1.12.1)
34
+ parser (2.4.0.2)
35
+ ast (~> 2.3)
36
+ powerpack (0.1.1)
37
+ pry (0.11.3)
38
+ coderay (~> 1.1.0)
39
+ method_source (~> 0.9.0)
40
+ rainbow (2.2.2)
41
+ rake
42
+ rake (12.3.0)
43
+ rest-client (2.0.2)
44
+ http-cookie (>= 1.0.2, < 2.0)
45
+ mime-types (>= 1.16, < 4.0)
46
+ netrc (~> 0.8)
47
+ rubocop (0.52.1)
48
+ parallel (~> 1.10)
49
+ parser (>= 2.4.0.2, < 3.0)
50
+ powerpack (~> 0.1)
51
+ rainbow (>= 2.2.2, < 4.0)
52
+ ruby-progressbar (~> 1.7)
53
+ unicode-display_width (~> 1.0, >= 1.0.1)
54
+ ruby-progressbar (1.9.0)
55
+ thor (0.20.0)
56
+ unf (0.1.4)
57
+ unf_ext
58
+ unf_ext (0.0.7.4)
59
+ unicode-display_width (1.3.0)
60
+
61
+ PLATFORMS
62
+ ruby
63
+
64
+ DEPENDENCIES
65
+ json
66
+ levenshtein-ffi
67
+ minitest
68
+ package_cloud
69
+ pii-detector!
70
+ pry
71
+ rake
72
+ rubocop
73
+
74
+ BUNDLED WITH
75
+ 1.16.1
@@ -0,0 +1 @@
1
+ # PII Detector
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rake/testtask'
4
+ require 'bundler/gem_tasks'
5
+
6
+ task default: :test
7
+
8
+ Rake::TestTask.new do |t|
9
+ t.libs << 'test'
10
+ t.test_files = FileList['test/**/*_test.rb']
11
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('../../lib', __FILE__)
4
+
5
+ require 'pii_detector'
6
+ require 'pry'
7
+
8
+ binding.pry
data/dev.yml ADDED
@@ -0,0 +1,21 @@
1
+ name: pii-detector
2
+
3
+ up:
4
+ - ruby: 2.5.0
5
+ - bundler
6
+
7
+ commands:
8
+ test:
9
+ syntax:
10
+ argument: file
11
+ optional: args...
12
+ run: |
13
+ if [[ $# -eq 0 ]]; then
14
+ rake test
15
+ else
16
+ bundle exec ruby -Itest "$@"
17
+ fi
18
+ console:
19
+ run: bundle exec ruby bin/console
20
+ check:
21
+ run: cd lib/ && bundle exec rubocop
@@ -0,0 +1,25 @@
1
+ phone
2
+ email
3
+ full_name
4
+ first_name
5
+ last_name
6
+
7
+ device_name
8
+ user_agent
9
+
10
+ zip
11
+ address
12
+ location
13
+ latitude
14
+ longitude
15
+
16
+ ip
17
+ gps
18
+
19
+ sex
20
+ gender
21
+
22
+ token
23
+ password
24
+
25
+ religion
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ class Hash
6
+ def unique_keys
7
+ to_json.scan(/"([^"\\]+)"\s*:/).map(&:first).uniq
8
+ end
9
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'hash'
4
+
5
+ require 'pii_detector/root'
6
+ require 'pii_detector/version'
7
+ require 'pii_detector/comparator'
8
+ require 'pii_detector/configuration'
9
+
10
+ module PIIDetector
11
+ extend self
12
+
13
+ # @param value [Hash]
14
+ # @return [Array]
15
+ def retrieve_pii_keys_from_hash(value)
16
+ value.unique_keys.map { |field| pii_field?(field) ? field : nil }.compact.uniq
17
+ end
18
+
19
+ # @param value [String]
20
+ # @return [Boolean]
21
+ def pii_field?(value)
22
+ config.pii_fields.any? { |pii| Comparator.similar?(pii, value) }
23
+ end
24
+
25
+ # @return [PIIDetector::Configuration]
26
+ def configure
27
+ @config ||= Configuration.new
28
+ yield(@config) if block_given?
29
+ @config
30
+ end
31
+
32
+ # @return [PIIDetector::Configuration]
33
+ def config
34
+ @config || configure
35
+ end
36
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'levenshtein-ffi'
4
+ require 'string'
5
+
6
+ module PIIDetector
7
+ class Comparator
8
+ class << self
9
+ # @param desired [String]
10
+ # @param check [String]
11
+ # @return [Boolean] or [MatchData]
12
+ def similar?(desired, check)
13
+ standardized_desired_str = desired.strip.underscore
14
+ standardized_check_str = check.strip.underscore
15
+
16
+ return false if standardized_check_str.length <= 1
17
+
18
+ score = Levenshtein.distance(standardized_desired_str, standardized_check_str)
19
+
20
+ standardized_check_str == standardized_desired_str ||
21
+ standardized_check_str.match(/^#{standardized_desired_str}_/) ||
22
+ standardized_check_str.match(/_#{standardized_desired_str}?/) ||
23
+ standardized_check_str.match(/[a-z\d]*_#{standardized_desired_str}_[a-z\d]*/) ||
24
+ (PIIDetector.config.edit_distance < standardized_check_str.length && score < PIIDetector.config.edit_distance)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PIIDetector
4
+ class Configuration
5
+ attr_accessor :edit_distance
6
+ attr_reader :pii_fields
7
+
8
+ def initialize
9
+ @edit_distance = 2
10
+ @pii_fields = pii_fields_from_config
11
+ end
12
+
13
+ # @param value [Array] or [Set]
14
+ def pii_fields=(value)
15
+ @pii_fields = pii_fields_from_config.merge(value)
16
+ end
17
+
18
+ private
19
+
20
+ # @return [Set]
21
+ def pii_fields_from_config
22
+ @default_pii_fields ||=
23
+ File.read(File.join(PIIDetector.root_path, 'lib/config', 'pii_fields'))
24
+ .split("\n")
25
+ .reject(&:empty?)
26
+ .to_set
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PIIDetector
4
+ # @return [String]
5
+ def root_path
6
+ File.expand_path('../../../', __FILE__)
7
+ end
8
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PIIDetector
4
+ VERSION = '1.0.0'
5
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ class String
4
+ def underscore
5
+ gsub(/::/, '/')
6
+ .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
7
+ .gsub(/([a-z\d])([A-Z])/, '\1_\2')
8
+ .tr('-', '_')
9
+ .downcase
10
+ end
11
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path('../lib/pii_detector/version', __FILE__)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'pii-detector'
7
+ spec.summary = 'Ruby gem to detect personally identifiable information.'
8
+ spec.version = PIIDetector::VERSION
9
+ spec.authors = ['']
10
+ spec.email = ''
11
+ spec.homepage = 'https://github.com/Shopify/pii-detector'
12
+ spec.license = 'Shopify'
13
+
14
+ spec.files = `git ls-files`.split($/)
15
+ spec.require_paths = ['lib']
16
+ end
@@ -0,0 +1,13 @@
1
+ deploy:
2
+ override:
3
+ - bundle exec rake build
4
+ - bundle exec package_cloud push shopify/gems pkg/*.gem
5
+
6
+ dependencies:
7
+ override:
8
+ - bundle install
9
+ bundler:
10
+ without:
11
+ - development
12
+ - test
13
+ - debug
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class ComparatorTest < Minitest::Test
6
+ def test_similar
7
+ assert PIIDetector::Comparator.similar?('first_name', 'first_name')
8
+ assert PIIDetector::Comparator.similar?('first_name', 'first_nam')
9
+ assert PIIDetector::Comparator.similar?('first_name', 'customer_first_name')
10
+ assert PIIDetector::Comparator.similar?('first_name', 'first_name_of_customer')
11
+ assert PIIDetector::Comparator.similar?('first_name', 'user_first_name_field')
12
+ end
13
+
14
+ def test_not_similar
15
+ refute PIIDetector::Comparator.similar?('first_name', 'event')
16
+ refute PIIDetector::Comparator.similar?('first_name', 'first')
17
+ refute PIIDetector::Comparator.similar?('first_name', 'first_na')
18
+ refute PIIDetector::Comparator.similar?('first_name', 'na')
19
+ end
20
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class ConfigurationTest < Minitest::Test
6
+ def test_configuration_default_values
7
+ new_pii_fields = Set['unknown_field1']
8
+
9
+ PIIDetector.configure do |config|
10
+ config.edit_distance = 3
11
+ config.pii_fields += new_pii_fields
12
+ end
13
+
14
+ assert_equal 3, PIIDetector.config.edit_distance
15
+ assert new_pii_fields.subset?(PIIDetector.config.pii_fields)
16
+
17
+ new_pii_fields.merge(['unknown_field2'])
18
+ PIIDetector.config.edit_distance = 2
19
+ PIIDetector.config.pii_fields += new_pii_fields
20
+
21
+ assert_equal 2, PIIDetector.config.edit_distance
22
+ assert new_pii_fields.subset?(PIIDetector.config.pii_fields)
23
+ end
24
+
25
+ def test_configuration_set_not_exists_attribute
26
+ assert_raises NoMethodError do
27
+ PIIDetector.configure do |config|
28
+ config.unknown_attribute = 'test'
29
+ end
30
+ end
31
+ end
32
+
33
+ def test_configuration_get_not_exists_attribute
34
+ assert_raises NoMethodError do
35
+ PIIDetector.config.unknown_attribute
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class HashTest < Minitest::Test
6
+ def test_unique_keys_from_multilevel_hash
7
+ test_hash = {
8
+ a: {
9
+ b: {
10
+ c: 'c'
11
+ }
12
+ }
13
+ }
14
+
15
+ assert_equal ['a', 'b', 'c'], test_hash.unique_keys
16
+ end
17
+
18
+ def test_unique_keys_from_array_of_hashes
19
+ test_hash = {
20
+ a: [
21
+ { b: 'b' },
22
+ { c: 'c' }
23
+ ]
24
+ }
25
+
26
+ assert_equal ['a', 'b', 'c'], test_hash.unique_keys
27
+ end
28
+
29
+ def test_unique_keys_from_json_string_value
30
+ test_hash = {
31
+ a: {
32
+ b: "b"
33
+ }.to_json
34
+ }
35
+
36
+ assert_equal ['a'], test_hash.unique_keys
37
+ end
38
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class PIIDetectorTest < Minitest::Test
6
+ def test_retrieve_pii_keys_from_hash
7
+ test_hash = {
8
+ first_name: 'Dave',
9
+ event: 'checkout'
10
+ }
11
+ assert_equal(['first_name'], PIIDetector.retrieve_pii_keys_from_hash(test_hash))
12
+ end
13
+
14
+ def test_pii_field
15
+ assert PIIDetector.pii_field?('first_name')
16
+ refute PIIDetector.pii_field?('event')
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class StringTest < Minitest::Test
6
+ def test_underscore
7
+ assert_equal 'first_name', 'FirstName'.underscore
8
+ assert_equal 'first_name', 'First_Name'.underscore
9
+ end
10
+ end
@@ -0,0 +1,5 @@
1
+ require 'pii_detector'
2
+ require 'minitest/autorun'
3
+
4
+ # Configure PIIDetector first to escape warning
5
+ PIIDetector.configure
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pii-detector
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - ''
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-02-01 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email: ''
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - ".gitignore"
20
+ - ".rubocop.yml"
21
+ - Gemfile
22
+ - Gemfile.lock
23
+ - README.md
24
+ - Rakefile
25
+ - bin/console
26
+ - dev.yml
27
+ - lib/config/pii_fields
28
+ - lib/hash.rb
29
+ - lib/pii_detector.rb
30
+ - lib/pii_detector/comparator.rb
31
+ - lib/pii_detector/configuration.rb
32
+ - lib/pii_detector/root.rb
33
+ - lib/pii_detector/version.rb
34
+ - lib/string.rb
35
+ - pii_detector.gemspec
36
+ - shitip.yml
37
+ - test/comparator_test.rb
38
+ - test/configuration_test.rb
39
+ - test/hash_test.rb
40
+ - test/pii_detector_test.rb
41
+ - test/string_test.rb
42
+ - test/test_helper.rb
43
+ homepage: https://github.com/Shopify/pii-detector
44
+ licenses:
45
+ - Shopify
46
+ metadata: {}
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubyforge_project:
63
+ rubygems_version: 2.6.14
64
+ signing_key:
65
+ specification_version: 4
66
+ summary: Ruby gem to detect personally identifiable information.
67
+ test_files: []