stockade 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14344fa7658d3173f6f14e2300725ce2def9ff69
4
- data.tar.gz: 2e5142b58d7a48dd811ad9bd2e5cfb228fea590d
3
+ metadata.gz: 84a7317f812734960f8ed6f56c0194d783aad1c7
4
+ data.tar.gz: 6d361b573054fb0b0b2c5d19a02cebe7cb705173
5
5
  SHA512:
6
- metadata.gz: 8aa73b99f960c8fb42f2fba50a5406a0521ed5d78b77d9f02959a8aefcbdb0839e94d656cc6da98bd610b5fdd38812bc148555761db3f07735bc3a58917f8aa0
7
- data.tar.gz: 0b622524d25184ae5313147ad83aa63d6e9e164b5b2b2f5c37f6131ccbc9a0d669cd473c877ced48afe5283d354de3f99220f22e38a19a3f2cbbb59636f988d5
6
+ metadata.gz: 431ecb92de2cd3e67596af6347d23c09e86bc4efbd8436617bdd599d25140fa67872385b848bd989635b24f8477da44d880047c2ac112fa8614f9b291b844727
7
+ data.tar.gz: 0b9040960eb148c06f4a664d6f074c075200afd36f10046c8c50eaa914d5550ebb1c91ea8ca5f9d53d0add1f0ddfb464c7b0636beb5ec6e361eaeefce129f417
data/Gemfile CHANGED
@@ -1,4 +1,5 @@
1
- source 'https://rubygems.org'
1
+ # frozen_string_literal: true
2
2
 
3
+ source 'https://rubygems.org'
3
4
 
4
5
  gemspec
data/Gemfile.lock CHANGED
@@ -1,28 +1,33 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- stockade (0.1.0)
5
- bloomfilter-rb
6
- memoist
4
+ stockade (0.1.1)
5
+ memoist (~> 0.1)
6
+ rambling-trie
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
- bloomfilter-rb (2.1.1)
12
- redis
11
+ ast (2.4.0)
13
12
  byebug (10.0.2)
14
13
  coderay (1.1.2)
15
14
  diff-lcs (1.3)
15
+ jaro_winkler (1.5.1)
16
16
  memoist (0.16.0)
17
17
  method_source (0.9.0)
18
+ parallel (1.12.1)
19
+ parser (2.5.1.2)
20
+ ast (~> 2.4.0)
21
+ powerpack (0.1.2)
18
22
  pry (0.11.3)
19
23
  coderay (~> 1.1.0)
20
24
  method_source (~> 0.9.0)
21
25
  pry-byebug (3.6.0)
22
26
  byebug (~> 10.0)
23
27
  pry (~> 0.10)
28
+ rainbow (3.0.0)
24
29
  rake (10.5.0)
25
- redis (4.0.1)
30
+ rambling-trie (2.0.0)
26
31
  rspec (3.7.0)
27
32
  rspec-core (~> 3.7.0)
28
33
  rspec-expectations (~> 3.7.0)
@@ -36,6 +41,16 @@ GEM
36
41
  diff-lcs (>= 1.2.0, < 2.0)
37
42
  rspec-support (~> 3.7.0)
38
43
  rspec-support (3.7.1)
44
+ rubocop (0.58.2)
45
+ jaro_winkler (~> 1.5.1)
46
+ parallel (~> 1.10)
47
+ parser (>= 2.5, != 2.5.1.1)
48
+ powerpack (~> 0.1)
49
+ rainbow (>= 2.2.2, < 4.0)
50
+ ruby-progressbar (~> 1.7)
51
+ unicode-display_width (~> 1.0, >= 1.0.1)
52
+ ruby-progressbar (1.9.0)
53
+ unicode-display_width (1.4.0)
39
54
 
40
55
  PLATFORMS
41
56
  ruby
@@ -45,6 +60,7 @@ DEPENDENCIES
45
60
  pry-byebug
46
61
  rake (~> 10.0)
47
62
  rspec (~> 3.0)
63
+ rubocop
48
64
  stockade!
49
65
 
50
66
  BUNDLED WITH
data/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # PII Lexer
2
+
3
+ _This is a proof-of-concept level software._
4
+
5
+ Stockade is a lexer for Personally Identifiable Information (PII). It scans
6
+ unstructured text (from files, logs, databases, web etc.) and tokenized
7
+ recognized pieces of PII. This information can be used to raise errors,
8
+ discard, mask data.
9
+
10
+ ## Installation
11
+
12
+ ```
13
+ gem install stockade
14
+ ```
15
+
16
+ ## Usage
17
+
18
+ ```ruby
19
+ require 'stockade'
20
+ #=> true
21
+ Stockade.mask('Mr. John Smith email is jsmith@example.com')
22
+ #=> "Mr. **** ***** email is ******************"
23
+ Stockade.mask('and his phone is 555-123-4567.')
24
+ #=> *** his phone is ************.
25
+ ```
26
+ Yes, 'and' looks like PII because there is also a lastname.
27
+
28
+ ## Implementation
29
+
30
+ It uses
31
+ [StringScanner](https://ruby-doc.org/stdlib-2.5.1/libdoc/strscan/rdoc/StringScanner.html)
32
+ and a manually curated list of regular expressions to match strings that _look_
33
+ like PII. This works for things like emails, phone numbers, dates, national
34
+ ids, credit card numbers and ip addresses. But it does not work for names.
35
+ Names are verified against the list of known first and last names that are
36
+ stored as a trie.
37
+
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
3
5
 
4
6
  RSpec::Core::RakeTask.new(:spec)
5
7
 
6
- task :default => :spec
8
+ task default: :spec
data/bin/load CHANGED
@@ -1,38 +1,12 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
- # Make bloomfilter and dump it for future use
4
+ # Convert plain text dictionaries to trie
4
5
  #
5
6
  require 'bundler/setup'
7
+ require 'rambling-trie'
6
8
 
7
- require 'csv'
8
- require 'bloomfilter-rb'
9
-
10
- def load(type)
11
- bf = BloomFilter::Native.new(
12
- :size => 10_000_000,
13
- :hashes => 2,
14
- :seed => 1,
15
- :bucket => 3,
16
- :raise => false
17
- )
18
-
19
- Dir.glob("data/#{type}/*.csv").each do |file|
20
- CSV.foreach(file) do |line|
21
- name = line.first
22
- next if name == 'name'
23
-
24
- name.strip!
25
- name.downcase!
26
- bf.insert(name) unless bf.include?(name)
27
- end
28
- end
29
-
30
- dump = Marshal.dump(bf)
31
-
32
- File.write("data/#{type}.dump", dump)
33
-
34
- df = Marshal.load(File.read("data/#{type}.dump"))
9
+ %w[lastnames firstnames words].each do |type|
10
+ trie = Rambling::Trie.create("data/#{type}.txt")
11
+ Rambling::Trie.dump(trie, "data/#{type}.dump")
35
12
  end
36
-
37
- load('surnames')
38
- load('firstnames')
data/data/firstnames.dump CHANGED
Binary file