confidential_info_redactor_lite 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9cc0f357a427f4cc05da05abd1d0c89544f18e34
4
- data.tar.gz: f76b54f78599ac06388f649a95a48f3bafe9e248
3
+ metadata.gz: 22ce6189b4e4889ade8350442036d46a8e3c4be9
4
+ data.tar.gz: d636d8803c0f10de0afbd35c8a560a9cb81cc454
5
5
  SHA512:
6
- metadata.gz: e79cdf4659e79523dccba90d68b64e6b963511eded9621c7db79f15864292f1508b2034354511199d5e140da71e3421ea3d1bbc5e617d67a0873aa4bb3ae6504
7
- data.tar.gz: e2f843ce61d278521a4ebd75e3d71979947f9cd53d0fe4e825ca49d305fb86d53088d96c6bd43238e594ae77369ce4143fe61a2b19f5d704817d47d529c29f74
6
+ metadata.gz: 851e356e7cd32ac7c43d1070532ee596e70df2bd19433e794297ce7f94f4ee32895791b2719cbff3063b437c778c24784215ce6b7ef7577a93b9b47f56035019
7
+ data.tar.gz: 4500d3135dd6bd2fcdf9eece3eba5b89035fb644879c268515469865b843c9ba0756be2e797a5070387b70657ed414c173b22456543ccea0c02125c4e53db501
data/README.md CHANGED
@@ -39,40 +39,40 @@ gem 'confidential_info_redactor_lite'
39
39
  ```ruby
40
40
  text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
41
41
  corpus = ['array', 'of', 'common', 'english', 'words']
42
- tokens = ConfidentialInfoRedactorLite::Extractor.new(text: text, corpus: corpus).extract
42
+ tokens = ConfidentialInfoRedactorLite::Extractor.new(corpus: corpus).extract(text)
43
43
  # => ["Coca-Cola", "Pepsi", "John Smith"]
44
44
 
45
45
  en_dow = %w(monday tuesday wednesday thursday friday saturday sunday)
46
46
  en_dow_abbr = %w(mon tu tue tues wed th thu thur thurs fri sat sun)
47
47
  en_months = %w(january february march april may june july august september october november december)
48
48
  en_month_abbr = %w(jan feb mar apr jun jul aug sep sept oct nov dec)
49
- ConfidentialInfoRedactorLite::Redactor.new(text: text, tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact
49
+ ConfidentialInfoRedactorLite::Redactor.new(tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact(text)
50
50
  # => '<redacted> announced a merger with <redacted> that will happen on <redacted date> for <redacted number>. Please contact <redacted> at <redacted> or visit <redacted>.'
51
51
 
52
52
  # You can also just use a specific redactor
53
- ConfidentialInfoRedactorLite::Redactor.new(text: text, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).dates
53
+ ConfidentialInfoRedactorLite::Redactor.new(dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).dates(text)
54
54
  # => 'Coca-Cola announced a merger with Pepsi that will happen on <redacted date> for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
55
55
 
56
- ConfidentialInfoRedactorLite::Redactor.new(text: text, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers
56
+ ConfidentialInfoRedactorLite::Redactor.new(dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers(text)
57
57
  # => 'Coca-Cola announced a merger with Pepsi that will happen on December <redacted number>, <redacted number> for <redacted number>. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
58
58
 
59
- ConfidentialInfoRedactorLite::Redactor.new(text: text, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).emails
59
+ ConfidentialInfoRedactorLite::Redactor.new(dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).emails(text)
60
60
  # => 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at <redacted> or visit http://www.super-fake-merger.com.'
61
61
 
62
- ConfidentialInfoRedactorLite::Redactor.new(text: text, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).hyperlinks
62
+ ConfidentialInfoRedactorLite::Redactor.new(dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).hyperlinks(text)
63
63
  # => 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit <redacted>.'
64
64
 
65
- ConfidentialInfoRedactorLite::Redactor.new(text: text, tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).proper_nouns
65
+ ConfidentialInfoRedactorLite::Redactor.new(tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).proper_nouns(text)
66
66
  # => '<redacted> announced a merger with <redacted> that will happen on December 15th, 2020 for $200,000,000,000. Please contact <redacted> at j.smith@example.com or visit http://www.super-fake-merger.com.'
67
67
 
68
68
  # It is possible to 'turn off' any of the specific redactors
69
- ConfidentialInfoRedactorLite::Redactor.new(text: text, tokens: tokens, ignore_numbers: true, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact
69
+ ConfidentialInfoRedactorLite::Redactor.new(tokens: tokens, ignore_numbers: true, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact(text)
70
70
  # => '<redacted> announced a merger with <redacted> that will happen on <redacted date> for $200,000,000,000. Please contact <redacted> at <redacted> or visit <redacted>.'
71
71
 
72
72
  # It is also possible to change the redaction text
73
73
  text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
74
74
  tokens = ['Coca-Cola', 'Pepsi', 'John Smith']
75
- ConfidentialInfoRedactorLite::Redactor.new(text: text, tokens: tokens, number_text: '**redacted number**', date_text: '^^redacted date^^', token_text: '*****', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact
75
+ ConfidentialInfoRedactorLite::Redactor.new(tokens: tokens, number_text: '**redacted number**', date_text: '^^redacted date^^', token_text: '*****', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact(text)
76
76
  # => '***** announced a merger with ***** that will happen on ^^redacted date^^ for **redacted number**. Please contact ***** at ***** or visit *****.'
77
77
  ```
78
78
 
@@ -5,7 +5,7 @@ module ConfidentialInfoRedactorLite
5
5
  # This class redacts various tokens from a text
6
6
  class Redactor
7
7
  # Rubular: http://rubular.com/r/OI2wQZ0KSl
8
- NUMBER_REGEX = /(?<=\A|\A\()[^(]?\d+((,|\.)*\d)*(\D?\s|\s|\.?\s|\.$)|(?<=\s|\s\()[^(]?\d+((,|\.)*\d)*(?=(\D?\s|\s|\.?\s|\.$))|(?<=\s)\d+(nd|th|st)|(?<=\s)\d+\/\d+\"*(?=\s)|(?<=\()\S{1}\d+(?=\))|(?<=\s{1})\S{1}\d+\z|^\d+$/
8
+ NUMBER_REGEX = /(?<=\A|\A\()[^(]?\d+((,|\.)*\d)*(\D?\s|\s|\.?\s|\.$)|(?<=\s|\s\(|\s'|\s‘)[^('‘]?\d+((,|\.)*\d)*(?=(\D?\s|\s|\.?\s|\.$))|(?<=\s)\d+(nd|th|st)|(?<=\s)\d+\/\d+\"*(?=\s)|(?<=\()\S{1}\d+(?=\))|(?<=\s{1})\S{1}\d+\z|^\d+$/
9
9
  # Rubular: http://rubular.com/r/mxcj2G0Jfa
10
10
  EMAIL_REGEX = /(?<=\A|\s|\()[\w+\-.]+@[a-z\d\-]+(\.[a-z]+)*\.[a-z]+(?=\z|\s|\.|\))/i
11
11
 
@@ -117,7 +117,13 @@ module ConfidentialInfoRedactorLite
117
117
  original_sentence_array = txt.split(' ')
118
118
  redacted_sentence_array = redacted_text.split(' ')
119
119
  diff = original_sentence_array - redacted_sentence_array
120
- final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }.map { |token| token[-1].eql?(')') ? token[0...-1] : token }.map { |token| token[0].eql?('(') ? token[1..token.length] : token }
120
+ final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
121
+ .map { |token| token[-1].eql?(')') ? token[0...-1] : token }
122
+ .map { |token| token[-1].eql?("'") ? token[0...-1] : token }
123
+ .map { |token| token[-1].eql?('’') ? token[0...-1] : token }
124
+ .map { |token| token[0].eql?('(') ? token[1..token.length] : token }
125
+ .map { |token| token[0].eql?("'") ? token[1..token.length] : token }
126
+ .map { |token| token[0].eql?("‘") ? token[1..token.length] : token }
121
127
  end
122
128
  [redacted_text.gsub(/(?<=[^\>]|\A)#{Regexp.escape(number_text)}/, "<span class='confidentialNumber'>#{number_text}</span>"), final_number_tokens]
123
129
  end
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactorLite
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end