pii-detector 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4be466fefd667de37780c94c4af71fad5a310c89
4
+ data.tar.gz: 369b6ee78bd3a55a793a785e15ccaaf0310448c3
5
+ SHA512:
6
+ metadata.gz: 3f55d0e0e71cee1d119cfd7ea72535245dd55ca2f4eb08832bd925b3075b356deb677295d709278093a91f1c3b3c314ecf6f91d18ef87e7053f02958823af538
7
+ data.tar.gz: af610219ad5697750fcd875aa1f1b9370320bb2bec440327780f373b4292594b62272383027abdfa1ed8b363e57472dbdc3e6d02fabb4f5048a6bea1fd638c3b
@@ -0,0 +1,4 @@
1
+ .dev
2
+ .bundle
3
+ .rubocop-*
4
+ .DS_Store
@@ -0,0 +1,2 @@
1
+ inherit_from:
2
+ - https://shopify.github.io/ruby-style-guide/rubocop.yml
data/Gemfile ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gem 'json'
6
+ gem 'levenshtein-ffi'
7
+
8
+ group :deployment do
9
+ gem 'package_cloud'
10
+ gem 'rake'
11
+ end
12
+
13
+ group :development do
14
+ gem 'rake'
15
+ gem 'pry'
16
+ gem 'minitest'
17
+ gem 'rubocop'
18
+ end
19
+
20
+ gemspec
@@ -0,0 +1,75 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ pii-detector (1.0.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.0)
10
+ coderay (1.1.2)
11
+ domain_name (0.5.20170404)
12
+ unf (>= 0.0.5, < 1.0.0)
13
+ ffi (1.9.18)
14
+ highline (1.6.20)
15
+ http-cookie (1.0.3)
16
+ domain_name (~> 0.5)
17
+ json (2.1.0)
18
+ json_pure (1.8.1)
19
+ levenshtein-ffi (1.1.0)
20
+ ffi (~> 1.9)
21
+ method_source (0.9.0)
22
+ mime-types (3.1)
23
+ mime-types-data (~> 3.2015)
24
+ mime-types-data (3.2016.0521)
25
+ minitest (5.11.3)
26
+ netrc (0.11.0)
27
+ package_cloud (0.3.03)
28
+ highline (= 1.6.20)
29
+ json_pure (= 1.8.1)
30
+ rainbow (= 2.2.2)
31
+ rest-client (~> 2.0)
32
+ thor (~> 0.18)
33
+ parallel (1.12.1)
34
+ parser (2.4.0.2)
35
+ ast (~> 2.3)
36
+ powerpack (0.1.1)
37
+ pry (0.11.3)
38
+ coderay (~> 1.1.0)
39
+ method_source (~> 0.9.0)
40
+ rainbow (2.2.2)
41
+ rake
42
+ rake (12.3.0)
43
+ rest-client (2.0.2)
44
+ http-cookie (>= 1.0.2, < 2.0)
45
+ mime-types (>= 1.16, < 4.0)
46
+ netrc (~> 0.8)
47
+ rubocop (0.52.1)
48
+ parallel (~> 1.10)
49
+ parser (>= 2.4.0.2, < 3.0)
50
+ powerpack (~> 0.1)
51
+ rainbow (>= 2.2.2, < 4.0)
52
+ ruby-progressbar (~> 1.7)
53
+ unicode-display_width (~> 1.0, >= 1.0.1)
54
+ ruby-progressbar (1.9.0)
55
+ thor (0.20.0)
56
+ unf (0.1.4)
57
+ unf_ext
58
+ unf_ext (0.0.7.4)
59
+ unicode-display_width (1.3.0)
60
+
61
+ PLATFORMS
62
+ ruby
63
+
64
+ DEPENDENCIES
65
+ json
66
+ levenshtein-ffi
67
+ minitest
68
+ package_cloud
69
+ pii-detector!
70
+ pry
71
+ rake
72
+ rubocop
73
+
74
+ BUNDLED WITH
75
+ 1.16.1
@@ -0,0 +1 @@
1
+ # PII Detector
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rake/testtask'
4
+ require 'bundler/gem_tasks'
5
+
6
+ task default: :test
7
+
8
+ Rake::TestTask.new do |t|
9
+ t.libs << 'test'
10
+ t.test_files = FileList['test/**/*_test.rb']
11
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('../../lib', __FILE__)
4
+
5
+ require 'pii_detector'
6
+ require 'pry'
7
+
8
+ binding.pry
data/dev.yml ADDED
@@ -0,0 +1,21 @@
1
+ name: pii-detector
2
+
3
+ up:
4
+ - ruby: 2.5.0
5
+ - bundler
6
+
7
+ commands:
8
+ test:
9
+ syntax:
10
+ argument: file
11
+ optional: args...
12
+ run: |
13
+ if [[ $# -eq 0 ]]; then
14
+ rake test
15
+ else
16
+ bundle exec ruby -Itest "$@"
17
+ fi
18
+ console:
19
+ run: bundle exec ruby bin/console
20
+ check:
21
+ run: cd lib/ && bundle exec rubocop
@@ -0,0 +1,25 @@
1
+ phone
2
+ email
3
+ full_name
4
+ first_name
5
+ last_name
6
+
7
+ device_name
8
+ user_agent
9
+
10
+ zip
11
+ address
12
+ location
13
+ latitude
14
+ longitude
15
+
16
+ ip
17
+ gps
18
+
19
+ sex
20
+ gender
21
+
22
+ token
23
+ password
24
+
25
+ religion
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ class Hash
6
+ def unique_keys
7
+ to_json.scan(/"([^"\\]+)"\s*:/).map(&:first).uniq
8
+ end
9
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'hash'
4
+
5
+ require 'pii_detector/root'
6
+ require 'pii_detector/version'
7
+ require 'pii_detector/comparator'
8
+ require 'pii_detector/configuration'
9
+
10
+ module PIIDetector
11
+ extend self
12
+
13
+ # @param value [Hash]
14
+ # @return [Array]
15
+ def retrieve_pii_keys_from_hash(value)
16
+ value.unique_keys.map { |field| pii_field?(field) ? field : nil }.compact.uniq
17
+ end
18
+
19
+ # @param value [String]
20
+ # @return [Boolean]
21
+ def pii_field?(value)
22
+ config.pii_fields.any? { |pii| Comparator.similar?(pii, value) }
23
+ end
24
+
25
+ # @return [PIIDetector::Configuration]
26
+ def configure
27
+ @config ||= Configuration.new
28
+ yield(@config) if block_given?
29
+ @config
30
+ end
31
+
32
+ # @return [PIIDetector::Configuration]
33
+ def config
34
+ @config || configure
35
+ end
36
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'levenshtein-ffi'
4
+ require 'string'
5
+
6
+ module PIIDetector
7
+ class Comparator
8
+ class << self
9
+ # @param desired [String]
10
+ # @param check [String]
11
+ # @return [Boolean] or [MatchData]
12
+ def similar?(desired, check)
13
+ standardized_desired_str = desired.strip.underscore
14
+ standardized_check_str = check.strip.underscore
15
+
16
+ return false if standardized_check_str.length <= 1
17
+
18
+ score = Levenshtein.distance(standardized_desired_str, standardized_check_str)
19
+
20
+ standardized_check_str == standardized_desired_str ||
21
+ standardized_check_str.match(/^#{standardized_desired_str}_/) ||
22
+ standardized_check_str.match(/_#{standardized_desired_str}?/) ||
23
+ standardized_check_str.match(/[a-z\d]*_#{standardized_desired_str}_[a-z\d]*/) ||
24
+ (PIIDetector.config.edit_distance < standardized_check_str.length && score < PIIDetector.config.edit_distance)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PIIDetector
4
+ class Configuration
5
+ attr_accessor :edit_distance
6
+ attr_reader :pii_fields
7
+
8
+ def initialize
9
+ @edit_distance = 2
10
+ @pii_fields = pii_fields_from_config
11
+ end
12
+
13
+ # @param value [Array] or [Set]
14
+ def pii_fields=(value)
15
+ @pii_fields = pii_fields_from_config.merge(value)
16
+ end
17
+
18
+ private
19
+
20
+ # @return [Set]
21
+ def pii_fields_from_config
22
+ @default_pii_fields ||=
23
+ File.read(File.join(PIIDetector.root_path, 'lib/config', 'pii_fields'))
24
+ .split("\n")
25
+ .reject(&:empty?)
26
+ .to_set
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PIIDetector
4
+ # @return [String]
5
+ def root_path
6
+ File.expand_path('../../../', __FILE__)
7
+ end
8
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PIIDetector
4
+ VERSION = '1.0.0'
5
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ class String
4
+ def underscore
5
+ gsub(/::/, '/')
6
+ .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
7
+ .gsub(/([a-z\d])([A-Z])/, '\1_\2')
8
+ .tr('-', '_')
9
+ .downcase
10
+ end
11
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path('../lib/pii_detector/version', __FILE__)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'pii-detector'
7
+ spec.summary = 'Ruby gem to detect personally identifiable information.'
8
+ spec.version = PIIDetector::VERSION
9
+ spec.authors = ['']
10
+ spec.email = ''
11
+ spec.homepage = 'https://github.com/Shopify/pii-detector'
12
+ spec.license = 'Shopify'
13
+
14
+ spec.files = `git ls-files`.split($/)
15
+ spec.require_paths = ['lib']
16
+ end
@@ -0,0 +1,13 @@
1
+ deploy:
2
+ override:
3
+ - bundle exec rake build
4
+ - bundle exec package_cloud push shopify/gems pkg/*.gem
5
+
6
+ dependencies:
7
+ override:
8
+ - bundle install
9
+ bundler:
10
+ without:
11
+ - development
12
+ - test
13
+ - debug
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class ComparatorTest < Minitest::Test
6
+ def test_similar
7
+ assert PIIDetector::Comparator.similar?('first_name', 'first_name')
8
+ assert PIIDetector::Comparator.similar?('first_name', 'first_nam')
9
+ assert PIIDetector::Comparator.similar?('first_name', 'customer_first_name')
10
+ assert PIIDetector::Comparator.similar?('first_name', 'first_name_of_customer')
11
+ assert PIIDetector::Comparator.similar?('first_name', 'user_first_name_field')
12
+ end
13
+
14
+ def test_not_similar
15
+ refute PIIDetector::Comparator.similar?('first_name', 'event')
16
+ refute PIIDetector::Comparator.similar?('first_name', 'first')
17
+ refute PIIDetector::Comparator.similar?('first_name', 'first_na')
18
+ refute PIIDetector::Comparator.similar?('first_name', 'na')
19
+ end
20
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class ConfigurationTest < Minitest::Test
6
+ def test_configuration_default_values
7
+ new_pii_fields = Set['unknown_field1']
8
+
9
+ PIIDetector.configure do |config|
10
+ config.edit_distance = 3
11
+ config.pii_fields += new_pii_fields
12
+ end
13
+
14
+ assert_equal 3, PIIDetector.config.edit_distance
15
+ assert new_pii_fields.subset?(PIIDetector.config.pii_fields)
16
+
17
+ new_pii_fields.merge(['unknown_field2'])
18
+ PIIDetector.config.edit_distance = 2
19
+ PIIDetector.config.pii_fields += new_pii_fields
20
+
21
+ assert_equal 2, PIIDetector.config.edit_distance
22
+ assert new_pii_fields.subset?(PIIDetector.config.pii_fields)
23
+ end
24
+
25
+ def test_configuration_set_not_exists_attribute
26
+ assert_raises NoMethodError do
27
+ PIIDetector.configure do |config|
28
+ config.unknown_attribute = 'test'
29
+ end
30
+ end
31
+ end
32
+
33
+ def test_configuration_get_not_exists_attribute
34
+ assert_raises NoMethodError do
35
+ PIIDetector.config.unknown_attribute
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class HashTest < Minitest::Test
6
+ def test_unique_keys_from_multilevel_hash
7
+ test_hash = {
8
+ a: {
9
+ b: {
10
+ c: 'c'
11
+ }
12
+ }
13
+ }
14
+
15
+ assert_equal ['a', 'b', 'c'], test_hash.unique_keys
16
+ end
17
+
18
+ def test_unique_keys_from_array_of_hashes
19
+ test_hash = {
20
+ a: [
21
+ { b: 'b' },
22
+ { c: 'c' }
23
+ ]
24
+ }
25
+
26
+ assert_equal ['a', 'b', 'c'], test_hash.unique_keys
27
+ end
28
+
29
+ def test_unique_keys_from_json_string_value
30
+ test_hash = {
31
+ a: {
32
+ b: "b"
33
+ }.to_json
34
+ }
35
+
36
+ assert_equal ['a'], test_hash.unique_keys
37
+ end
38
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class PIIDetectorTest < Minitest::Test
6
+ def test_retrieve_pii_keys_from_hash
7
+ test_hash = {
8
+ first_name: 'Dave',
9
+ event: 'checkout'
10
+ }
11
+ assert_equal(['first_name'], PIIDetector.retrieve_pii_keys_from_hash(test_hash))
12
+ end
13
+
14
+ def test_pii_field
15
+ assert PIIDetector.pii_field?('first_name')
16
+ refute PIIDetector.pii_field?('event')
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+
5
+ class StringTest < Minitest::Test
6
+ def test_underscore
7
+ assert_equal 'first_name', 'FirstName'.underscore
8
+ assert_equal 'first_name', 'First_Name'.underscore
9
+ end
10
+ end
@@ -0,0 +1,5 @@
1
+ require 'pii_detector'
2
+ require 'minitest/autorun'
3
+
4
+ # Configure PIIDetector first to escape warning
5
+ PIIDetector.configure
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pii-detector
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - ''
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-02-01 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email: ''
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - ".gitignore"
20
+ - ".rubocop.yml"
21
+ - Gemfile
22
+ - Gemfile.lock
23
+ - README.md
24
+ - Rakefile
25
+ - bin/console
26
+ - dev.yml
27
+ - lib/config/pii_fields
28
+ - lib/hash.rb
29
+ - lib/pii_detector.rb
30
+ - lib/pii_detector/comparator.rb
31
+ - lib/pii_detector/configuration.rb
32
+ - lib/pii_detector/root.rb
33
+ - lib/pii_detector/version.rb
34
+ - lib/string.rb
35
+ - pii_detector.gemspec
36
+ - shitip.yml
37
+ - test/comparator_test.rb
38
+ - test/configuration_test.rb
39
+ - test/hash_test.rb
40
+ - test/pii_detector_test.rb
41
+ - test/string_test.rb
42
+ - test/test_helper.rb
43
+ homepage: https://github.com/Shopify/pii-detector
44
+ licenses:
45
+ - Shopify
46
+ metadata: {}
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubyforge_project:
63
+ rubygems_version: 2.6.14
64
+ signing_key:
65
+ specification_version: 4
66
+ summary: Ruby gem to detect personally identifiable information.
67
+ test_files: []