confidential_info_redactor_lite 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9cc0f357a427f4cc05da05abd1d0c89544f18e34
4
- data.tar.gz: f76b54f78599ac06388f649a95a48f3bafe9e248
3
+ metadata.gz: 22ce6189b4e4889ade8350442036d46a8e3c4be9
4
+ data.tar.gz: d636d8803c0f10de0afbd35c8a560a9cb81cc454
5
5
  SHA512:
6
- metadata.gz: e79cdf4659e79523dccba90d68b64e6b963511eded9621c7db79f15864292f1508b2034354511199d5e140da71e3421ea3d1bbc5e617d67a0873aa4bb3ae6504
7
- data.tar.gz: e2f843ce61d278521a4ebd75e3d71979947f9cd53d0fe4e825ca49d305fb86d53088d96c6bd43238e594ae77369ce4143fe61a2b19f5d704817d47d529c29f74
6
+ metadata.gz: 851e356e7cd32ac7c43d1070532ee596e70df2bd19433e794297ce7f94f4ee32895791b2719cbff3063b437c778c24784215ce6b7ef7577a93b9b47f56035019
7
+ data.tar.gz: 4500d3135dd6bd2fcdf9eece3eba5b89035fb644879c268515469865b843c9ba0756be2e797a5070387b70657ed414c173b22456543ccea0c02125c4e53db501
data/README.md CHANGED
@@ -39,40 +39,40 @@ gem 'confidential_info_redactor_lite'
39
39
  ```ruby
40
40
  text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
41
41
  corpus = ['array', 'of', 'common', 'english', 'words']
42
- tokens = ConfidentialInfoRedactorLite::Extractor.new(text: text, corpus: corpus).extract
42
+ tokens = ConfidentialInfoRedactorLite::Extractor.new(corpus: corpus).extract(text)
43
43
  # => ["Coca-Cola", "Pepsi", "John Smith"]
44
44
 
45
45
  en_dow = %w(monday tuesday wednesday thursday friday saturday sunday)
46
46
  en_dow_abbr = %w(mon tu tue tues wed th thu thur thurs fri sat sun)
47
47
  en_months = %w(january february march april may june july august september october november december)
48
48
  en_month_abbr = %w(jan feb mar apr jun jul aug sep sept oct nov dec)
49
- ConfidentialInfoRedactorLite::Redactor.new(text: text, tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact
49
+ ConfidentialInfoRedactorLite::Redactor.new(tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact(text)
50
50
  # => '<redacted> announced a merger with <redacted> that will happen on <redacted date> for <redacted number>. Please contact <redacted> at <redacted> or visit <redacted>.'
51
51
 
52
52
  # You can also just use a specific redactor
53
- ConfidentialInfoRedactorLite::Redactor.new(text: text, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).dates
53
+ ConfidentialInfoRedactorLite::Redactor.new(dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).dates(text)
54
54
  # => 'Coca-Cola announced a merger with Pepsi that will happen on <redacted date> for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
55
55
 
56
- ConfidentialInfoRedactorLite::Redactor.new(text: text, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers
56
+ ConfidentialInfoRedactorLite::Redactor.new(dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers(text)
57
57
  # => 'Coca-Cola announced a merger with Pepsi that will happen on December <redacted number>, <redacted number> for <redacted number>. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
58
58
 
59
- ConfidentialInfoRedactorLite::Redactor.new(text: text, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).emails
59
+ ConfidentialInfoRedactorLite::Redactor.new(dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).emails(text)
60
60
  # => 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at <redacted> or visit http://www.super-fake-merger.com.'
61
61
 
62
- ConfidentialInfoRedactorLite::Redactor.new(text: text, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).hyperlinks
62
+ ConfidentialInfoRedactorLite::Redactor.new(dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).hyperlinks(text)
63
63
  # => 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit <redacted>.'
64
64
 
65
- ConfidentialInfoRedactorLite::Redactor.new(text: text, tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).proper_nouns
65
+ ConfidentialInfoRedactorLite::Redactor.new(tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).proper_nouns(text)
66
66
  # => '<redacted> announced a merger with <redacted> that will happen on December 15th, 2020 for $200,000,000,000. Please contact <redacted> at j.smith@example.com or visit http://www.super-fake-merger.com.'
67
67
 
68
68
  # It is possible to 'turn off' any of the specific redactors
69
- ConfidentialInfoRedactorLite::Redactor.new(text: text, tokens: tokens, ignore_numbers: true, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact
69
+ ConfidentialInfoRedactorLite::Redactor.new(tokens: tokens, ignore_numbers: true, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact(text)
70
70
  # => '<redacted> announced a merger with <redacted> that will happen on <redacted date> for $200,000,000,000. Please contact <redacted> at <redacted> or visit <redacted>.'
71
71
 
72
72
  # It is also possible to change the redaction text
73
73
  text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
74
74
  tokens = ['Coca-Cola', 'Pepsi', 'John Smith']
75
- ConfidentialInfoRedactorLite::Redactor.new(text: text, tokens: tokens, number_text: '**redacted number**', date_text: '^^redacted date^^', token_text: '*****', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact
75
+ ConfidentialInfoRedactorLite::Redactor.new(tokens: tokens, number_text: '**redacted number**', date_text: '^^redacted date^^', token_text: '*****', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact(text)
76
76
  # => '***** announced a merger with ***** that will happen on ^^redacted date^^ for **redacted number**. Please contact ***** at ***** or visit *****.'
77
77
  ```
78
78
 
@@ -5,7 +5,7 @@ module ConfidentialInfoRedactorLite
5
5
  # This class redacts various tokens from a text
6
6
  class Redactor
7
7
  # Rubular: http://rubular.com/r/OI2wQZ0KSl
8
- NUMBER_REGEX = /(?<=\A|\A\()[^(]?\d+((,|\.)*\d)*(\D?\s|\s|\.?\s|\.$)|(?<=\s|\s\()[^(]?\d+((,|\.)*\d)*(?=(\D?\s|\s|\.?\s|\.$))|(?<=\s)\d+(nd|th|st)|(?<=\s)\d+\/\d+\"*(?=\s)|(?<=\()\S{1}\d+(?=\))|(?<=\s{1})\S{1}\d+\z|^\d+$/
8
+ NUMBER_REGEX = /(?<=\A|\A\()[^(]?\d+((,|\.)*\d)*(\D?\s|\s|\.?\s|\.$)|(?<=\s|\s\(|\s'|\s‘)[^('‘]?\d+((,|\.)*\d)*(?=(\D?\s|\s|\.?\s|\.$))|(?<=\s)\d+(nd|th|st)|(?<=\s)\d+\/\d+\"*(?=\s)|(?<=\()\S{1}\d+(?=\))|(?<=\s{1})\S{1}\d+\z|^\d+$/
9
9
  # Rubular: http://rubular.com/r/mxcj2G0Jfa
10
10
  EMAIL_REGEX = /(?<=\A|\s|\()[\w+\-.]+@[a-z\d\-]+(\.[a-z]+)*\.[a-z]+(?=\z|\s|\.|\))/i
11
11
 
@@ -117,7 +117,13 @@ module ConfidentialInfoRedactorLite
117
117
  original_sentence_array = txt.split(' ')
118
118
  redacted_sentence_array = redacted_text.split(' ')
119
119
  diff = original_sentence_array - redacted_sentence_array
120
- final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }.map { |token| token[-1].eql?(')') ? token[0...-1] : token }.map { |token| token[0].eql?('(') ? token[1..token.length] : token }
120
+ final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
121
+ .map { |token| token[-1].eql?(')') ? token[0...-1] : token }
122
+ .map { |token| token[-1].eql?("'") ? token[0...-1] : token }
123
+ .map { |token| token[-1].eql?('’') ? token[0...-1] : token }
124
+ .map { |token| token[0].eql?('(') ? token[1..token.length] : token }
125
+ .map { |token| token[0].eql?("'") ? token[1..token.length] : token }
126
+ .map { |token| token[0].eql?("‘") ? token[1..token.length] : token }
121
127
  end
122
128
  [redacted_text.gsub(/(?<=[^\>]|\A)#{Regexp.escape(number_text)}/, "<span class='confidentialNumber'>#{number_text}</span>"), final_number_tokens]
123
129
  end
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactorLite
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end