redactor 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7930ab02836e02e6b1a54666b637cef619becde9
4
+ data.tar.gz: 5005ab98df807eed1cbecb5618b8db60be254629
5
+ SHA512:
6
+ metadata.gz: 1ea215bfc5261dbbb927fe8d72dd0e6f206205ae4ac2e1b502034d14ae9799a045ce2a07c9fe1965e0b5380edcb414ccec58fb077d5bed9ff92e616a2fb6825e
7
+ data.tar.gz: 80ea8bbce965ea23c314b6c8e004f98617355dff8af5be3225f544b65d319354b775c7e3310bb9c2650597182d5a06c6d55d9174451df06e4efd5493cfee9ae2
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,31 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ redactor (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.2.5)
10
+ rake (10.4.2)
11
+ rspec (3.3.0)
12
+ rspec-core (~> 3.3.0)
13
+ rspec-expectations (~> 3.3.0)
14
+ rspec-mocks (~> 3.3.0)
15
+ rspec-core (3.3.2)
16
+ rspec-support (~> 3.3.0)
17
+ rspec-expectations (3.3.1)
18
+ diff-lcs (>= 1.2.0, < 2.0)
19
+ rspec-support (~> 3.3.0)
20
+ rspec-mocks (3.3.2)
21
+ diff-lcs (>= 1.2.0, < 2.0)
22
+ rspec-support (~> 3.3.0)
23
+ rspec-support (3.3.0)
24
+
25
+ PLATFORMS
26
+ ruby
27
+
28
+ DEPENDENCIES
29
+ rake (~> 10.4.2)
30
+ redactor!
31
+ rspec (~> 3.3.0)
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Tim Petricola
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # redactor
2
+
3
+ Redact parts of text defined by custom rules (e.g. emails, phone numbers).
4
+
5
+ ## Usage
6
+
7
+ ```rb
8
+ require 'redactor'
9
+
10
+ input = 'To ride a kayak, contact me: tim.petricola@gmail.com or 1 234 567 8901.'
11
+
12
+ # these rules are only good enough for an example
13
+ Redactor.define do
14
+ # US phone
15
+ rule :phone, /(\+?1[ \.-]?)?\(?\d{3}\)?[ \.-]?\d{3}[ \.-]?\d{4}/
16
+
17
+ # email
18
+ rule :email, /[\w\.]+(@|at)\w+(\.|dot)\w{1,3}/i
19
+
20
+ # palindrome
21
+ rule :palindrome do |input|
22
+ words = input.split(/\W+/)
23
+ palindromes = words.select { |w| w.length > 2 && w == w.reverse }
24
+ palindromes.map do |w|
25
+ start = input.index(w)
26
+ finish = start + w.length
27
+ [start, finish]
28
+ end
29
+ end
30
+ end
31
+
32
+ Redactor.format(input)
33
+ # => "To ride a [REDACTED], contact me: [REDACTED] or [REDACTED]."
34
+
35
+ Redactor.format(input) do |extract|
36
+ "[#{extract.reason.upcase}]"
37
+ end
38
+ # => "To ride a [PALINDROME], contact me: [EMAIL] or [PHONE]."
39
+
40
+ Redactor.extract(input)
41
+ # => [#<Redactor::Extract:0x007fc05c6b25a0
42
+ # @finish=70,
43
+ # @rule=#<Redactor::Rule:0x007fc05f203970 @block=nil, @reason=:phone, @regex=/(\+?1[ \.-]?)?\(?\d{3}\)?[ \.-]?\d{3}[ \.-]?\d{4}/>,
44
+ # @start=56,
45
+ # @value="1 234 567 8901">,
46
+ # #<Redactor::Extract:0x007fc05c6b2230 @finish=52, @rule=#<Redactor::Rule:0x007fc05f203948 @block=nil, @reason=:email, @regex=/[\w\.]+(@|at)\w+(\.|dot)\w{1,3}/i>, @start=29, @value="tim.petricola@gmail.com">,
47
+ # #<Redactor::Extract:0x007fc05c6b19c0
48
+ # @finish=15,
49
+ # @rule=#<Redactor::Rule:0x007fc05f2038d0 @block=#<Proc:0x007fc05f2038f8@/Users/Tim/Projects/redact/test.rb:13>, @reason=:palindrome, @regex=nil>,
50
+ # @start=10,
51
+ # @value="kayak">]
52
+ ```
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require 'rspec/core/rake_task'
2
+
3
+ RSpec::Core::RakeTask.new(:spec)
4
+
5
+ task default: :spec
@@ -0,0 +1,5 @@
1
+ module Redactor
2
+ def self.clear
3
+ self.rules = []
4
+ end
5
+ end
@@ -0,0 +1,12 @@
1
+ module Redactor
2
+ class DSL
3
+ def rule(reason, regex = nil, &block)
4
+ rule = Rule.new(reason, regex, &block)
5
+ Redactor.register_rule(rule)
6
+ end
7
+
8
+ def self.run(block)
9
+ new.instance_eval(&block)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,22 @@
1
+ module Redactor
2
+ class Extract
3
+ attr_reader :rule, :value, :start, :finish
4
+
5
+ def initialize(rule:, value:, start:, finish:)
6
+ @rule = rule
7
+ @value = value
8
+ @start = start
9
+ @finish = finish
10
+ end
11
+
12
+ def reason
13
+ rule.reason
14
+ end
15
+
16
+ def collides?(extract)
17
+ [:start, :finish].any? do |m|
18
+ extract.send(m).between?(start, finish)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,49 @@
1
+ module Redactor
2
+ class Rule
3
+ attr_reader :reason, :regex, :block
4
+
5
+ def initialize(reason, regex = nil, &block)
6
+ if regex && block_given?
7
+ raise ArgumentError, 'cannot have a regex and a block'
8
+ elsif !regex && !block_given?
9
+ raise ArgumentError, 'must have a regex or a block'
10
+ end
11
+
12
+ @reason = reason
13
+ @regex = regex
14
+ @block = block
15
+ end
16
+
17
+ def extract(input)
18
+ return extract_regex(input) if regex
19
+ return extract_block(input) if block
20
+ end
21
+
22
+ private
23
+
24
+ def extract_regex(input)
25
+ input.enum_for(:scan, regex).map do
26
+ match = Regexp.last_match
27
+ Extract.new(
28
+ rule: self,
29
+ value: match[0],
30
+ start: match.begin(0),
31
+ finish: match.end(0)
32
+ )
33
+ end
34
+ end
35
+
36
+ def extract_block(input)
37
+ Array(block.call(input)).map do |positions|
38
+ start, finish = positions
39
+
40
+ Extract.new(
41
+ rule: self,
42
+ value: input[start...finish],
43
+ start: start,
44
+ finish: finish
45
+ )
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,3 @@
1
+ module Redactor
2
+ VERSION = '0.1.0'
3
+ end
data/lib/redactor.rb ADDED
@@ -0,0 +1,45 @@
1
+ require 'redactor/clear'
2
+ require 'redactor/dsl'
3
+ require 'redactor/extract'
4
+ require 'redactor/rule'
5
+
6
+ module Redactor
7
+ DEFAULT_REPLACEMENT = '[REDACTED]'.freeze
8
+
9
+ class << self
10
+ attr_accessor :rules
11
+ attr_accessor :default_replacement
12
+ end
13
+
14
+ self.rules = []
15
+ self.default_replacement = DEFAULT_REPLACEMENT
16
+
17
+ def self.define(&block)
18
+ DSL.run(block)
19
+ end
20
+
21
+ def self.register_rule(rule)
22
+ rules.push(rule)
23
+ end
24
+
25
+ # returns a list of Extract objects (including position and value)
26
+ # matching predefined rules
27
+ def self.extract(from)
28
+ rules.reduce([]) do |extracts, rule|
29
+ new_extracts = rule.extract(from).select do |extract|
30
+ # do not consider new extract if colliding with existing one
31
+ !extracts.any? { |e| e.collides?(extract) }
32
+ end
33
+ extracts.concat(new_extracts)
34
+ end
35
+ end
36
+
37
+ # replaces parts of text by [REDACTED]. The replacement string can be
38
+ # customized by passing a block taking the Extract object as an argument
39
+ def self.format(text, &block)
40
+ extract(text).each_with_object(text.clone) do |extract, redacted_text|
41
+ sub = block_given? ? block.call(extract) : default_replacement
42
+ redacted_text[extract.start...extract.finish] = sub
43
+ end
44
+ end
45
+ end
data/redactor.gemspec ADDED
@@ -0,0 +1,17 @@
1
+ require File.expand_path '../lib/redactor/version', __FILE__
2
+
3
+ Gem::Specification.new do |s|
4
+ s.authors = ['Tim Petricola']
5
+ s.email = ['tim.petricola@gmail.com']
6
+ s.summary = 'Redact parts of text defined by custom rules (e.g. emails, phone numbers)'
7
+ s.license = 'MIT'
8
+ s.homepage = 'https://github.com/TimPetricola/redactor'
9
+ s.files = `git ls-files`.split "\n"
10
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split "\n"
11
+ s.name = 'redactor'
12
+ s.require_paths = ['lib']
13
+ s.version = Redactor::VERSION
14
+
15
+ s.add_development_dependency 'rspec', '~> 3.3.0'
16
+ s.add_development_dependency 'rake', '~> 10.4.2'
17
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'clear' do
4
+ after { Redactor.clear }
5
+
6
+ it 'unload all rules' do
7
+ Redactor.clear
8
+ Redactor.register_rule(double)
9
+ expect { Redactor.clear }.to change { Redactor.rules.size }.from(1).to(0)
10
+ end
11
+ end
@@ -0,0 +1,42 @@
1
+ require 'spec_helper'
2
+
3
+ describe Redactor::DSL do
4
+ describe 'new rule' do
5
+ context 'with regex' do
6
+ it 'is registered' do
7
+ definitions = proc do
8
+ rule :foo, /foo/
9
+ end
10
+
11
+ rule = double('Redactor::Rule')
12
+ expect(Redactor::Rule).to(
13
+ receive(:new).with(:foo, /foo/).and_return(rule)
14
+ )
15
+ expect(Redactor).to receive(:register_rule).with(rule)
16
+
17
+ Redactor::DSL.run(definitions)
18
+ end
19
+ end
20
+
21
+ context 'with block' do
22
+ it 'is registered' do
23
+ proc = proc
24
+
25
+ definitions = proc do
26
+ rule(:foo, &proc)
27
+ end
28
+
29
+ rule = double('Redactor::Rule')
30
+ expect(Redactor::Rule).to(
31
+ receive(:new).with(:foo, nil) do |*_args, &block|
32
+ expect(proc).to be(block)
33
+ rule
34
+ end
35
+ )
36
+ expect(Redactor).to receive(:register_rule).with(rule)
37
+
38
+ Redactor::DSL.run(definitions)
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ describe Redactor::Extract do
4
+ def extract_at(start, finish)
5
+ Redactor::Extract.new(rule: double, value: '', start: start, finish: finish)
6
+ end
7
+
8
+ describe '#collides?' do
9
+ let(:extract) { extract_at(10, 20) }
10
+
11
+ context 'no collision' do
12
+ it 'is false' do
13
+ expect(extract.collides?(extract_at(0, 9))).to be false
14
+ end
15
+ end
16
+
17
+ context 'lower collision' do
18
+ it 'is true' do
19
+ expect(extract.collides?(extract_at(0, 10))).to be true
20
+ end
21
+ end
22
+
23
+ context 'upper collision' do
24
+ it 'is true' do
25
+ expect(extract.collides?(extract_at(20, 25))).to be true
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,55 @@
1
+ require 'spec_helper'
2
+
3
+ describe Redactor do
4
+ before do
5
+ @foobar_rule = Redactor::Rule.new(:foobar, /foobar/)
6
+ @barbaz_rule = Redactor::Rule.new(:barbaz, /barbaz/)
7
+ Redactor.register_rule(@foobar_rule)
8
+ Redactor.register_rule(@barbaz_rule)
9
+ end
10
+
11
+ after { Redactor.clear }
12
+
13
+ describe '.extract' do
14
+ it 'return extracts' do
15
+ text = 'hello barbaz foobar'
16
+ results = Redactor.extract(text)
17
+ expect(results.first.rule).to be @foobar_rule
18
+ expect(results.first.value).to eq 'foobar'
19
+ expect(results.last.rule).to be @barbaz_rule
20
+ expect(results.last.value).to eq 'barbaz'
21
+ end
22
+
23
+ context 'colliding rules' do
24
+ it "only keep first defined rule's match" do
25
+ text = 'hello foobarbaz'
26
+ results = Redactor.extract(text)
27
+ expect(results.size).to eq 1
28
+ expect(results.first.rule).to be @foobar_rule
29
+ end
30
+ end
31
+ end
32
+
33
+ describe '.format' do
34
+ it 'replaces redacted parts' do
35
+ text = 'hello barbaz foobar'
36
+ expect(Redactor.format(text)).to eq 'hello [REDACTED] [REDACTED]'
37
+ end
38
+
39
+ it 'accepts a block for redacted strings' do
40
+ text = 'hello barbaz foobar'
41
+
42
+ redacted = Redactor.format(text) do |extract|
43
+ "[#{extract.reason.upcase}]"
44
+ end
45
+
46
+ expect(redacted).to eq 'hello [BARBAZ] [FOOBAR]'
47
+ end
48
+
49
+ it 'does not mutate original string' do
50
+ text = 'hello barbaz foobar'
51
+ Redactor.format(text)
52
+ expect(text).to eq 'hello barbaz foobar'
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,89 @@
1
+ require 'spec_helper'
2
+
3
+ describe Redactor::Rule do
4
+ it 'can be built with a regex' do
5
+ rule = Redactor::Rule.new(:foo, /foo/)
6
+ expect(rule.reason).to eq :foo
7
+ expect(rule.regex).to eq(/foo/)
8
+ end
9
+
10
+ it 'can be built with a block' do
11
+ rule = Redactor::Rule.new(:foo) { |input| input }
12
+ expect(rule.reason).to eq :foo
13
+ expect(rule.block).not_to be_nil
14
+ end
15
+
16
+ it 'needs a regex or a block' do
17
+ expect {
18
+ Redactor::Rule.new(:foo)
19
+ }.to raise_error(ArgumentError)
20
+ end
21
+
22
+ it 'can not have a regex and a block' do
23
+ expect {
24
+ Redactor::Rule.new(:foo, /foo/) { |input| input }
25
+ }.to raise_error(ArgumentError)
26
+ end
27
+
28
+ describe '#extract' do
29
+ context 'with regex' do
30
+ let(:text) do
31
+ 'Lorem foo bar ipsum dolor sit amet, foobar adipiscing elit.'
32
+ end
33
+ let(:rule) { Redactor::Rule.new(:foo_bar, /foo ?bar/) }
34
+ let(:results) { rule.extract(text) }
35
+
36
+ it 'returns values and positions of first match' do
37
+ extract = results.first
38
+ expect(extract.rule).to be rule
39
+ expect(extract.value).to eq 'foo bar'
40
+ expect(extract.start).to eq 6
41
+ expect(extract.finish).to eq 13
42
+ end
43
+
44
+ it 'returns values and positions of others matches' do
45
+ extract = results[1]
46
+ expect(extract.rule).to be rule
47
+ expect(extract.value).to eq 'foobar'
48
+ expect(extract.start).to eq 36
49
+ expect(extract.finish).to eq 42
50
+ end
51
+ end
52
+
53
+ context 'with block' do
54
+ let(:text) do
55
+ 'I want a kayak, a poney or a racecar.'
56
+ end
57
+
58
+ let(:rule) do
59
+ Redactor::Rule.new(:palindrome) do |input|
60
+ input.split(/\W+/)
61
+ .select { |w| w.length > 2 && w == w.reverse }
62
+ .map do |word|
63
+ start = input.index(word)
64
+ finish = start + word.length
65
+ [start, finish]
66
+ end
67
+ end
68
+ end
69
+
70
+ let(:results) { rule.extract(text) }
71
+
72
+ it 'returns values and positions of first match' do
73
+ extract = results.first
74
+ expect(extract.rule).to be rule
75
+ expect(extract.value).to eq 'kayak'
76
+ expect(extract.start).to eq 9
77
+ expect(extract.finish).to eq 14
78
+ end
79
+
80
+ it 'returns values and positions of others matches' do
81
+ extract = results[1]
82
+ expect(extract.rule).to be rule
83
+ expect(extract.value).to eq 'racecar'
84
+ expect(extract.start).to eq 29
85
+ expect(extract.finish).to eq 36
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,42 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec::Matchers.define :be_redacted do |reason|
4
+ match do |actual|
5
+ Redactor.extract(actual).any? do |extract|
6
+ extract.value == actual && extract.reason == reason
7
+ end
8
+ end
9
+ end
10
+
11
+ describe 'Redactor rules' do
12
+ before do
13
+ Redactor.define do
14
+ # US phone
15
+ rule :phone, /(\+?1[ \.-]?)?\(?\d{3}\)?[ \.-]?\d{3}[ \.-]?\d{4}/
16
+
17
+ # email
18
+ rule :email, /[\w\.]+ ?(@|at) ?\w+\ ?(\.|dot) ?\w{1,3}/i
19
+ end
20
+ end
21
+
22
+ after { Redactor.clear }
23
+
24
+ {
25
+ '1 234 567 8901' => :phone,
26
+ '12345678901' => :phone,
27
+ '1-234-567-8901' => :phone,
28
+ '1.234.567.8901' => :phone,
29
+ '+1-234-567-8901' => :phone,
30
+ '1-(234) 5678901' => :phone,
31
+ 'foo@bar.baz' => :email,
32
+ 'foo.bar@baz.qux' => :email,
33
+ 'FOO@bar.baz' => :email,
34
+ 'foo @ bar . baz' => :email,
35
+ 'foo at bar dot baz' => :email,
36
+ 'foo AT bar DOT baz' => :email
37
+ }.each do |value, reason|
38
+ describe(value) do
39
+ it { should be_redacted(reason) }
40
+ end
41
+ end
42
+ end
@@ -0,0 +1 @@
1
+ require 'redactor'
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: redactor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Tim Petricola
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-10-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 3.3.0
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 3.3.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 10.4.2
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 10.4.2
41
+ description:
42
+ email:
43
+ - tim.petricola@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - Gemfile
50
+ - Gemfile.lock
51
+ - LICENSE
52
+ - README.md
53
+ - Rakefile
54
+ - lib/redactor.rb
55
+ - lib/redactor/clear.rb
56
+ - lib/redactor/dsl.rb
57
+ - lib/redactor/extract.rb
58
+ - lib/redactor/rule.rb
59
+ - lib/redactor/version.rb
60
+ - redactor.gemspec
61
+ - spec/lib/clear_spec.rb
62
+ - spec/lib/dsl_spec.rb
63
+ - spec/lib/extract_spec.rb
64
+ - spec/lib/redact_spec.rb
65
+ - spec/lib/rule_spec.rb
66
+ - spec/lib/rules_spec.rb
67
+ - spec/spec_helper.rb
68
+ homepage: https://github.com/TimPetricola/redactor
69
+ licenses:
70
+ - MIT
71
+ metadata: {}
72
+ post_install_message:
73
+ rdoc_options: []
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ requirements: []
87
+ rubyforge_project:
88
+ rubygems_version: 2.2.0
89
+ signing_key:
90
+ specification_version: 4
91
+ summary: Redact parts of text defined by custom rules (e.g. emails, phone numbers)
92
+ test_files:
93
+ - spec/lib/clear_spec.rb
94
+ - spec/lib/dsl_spec.rb
95
+ - spec/lib/extract_spec.rb
96
+ - spec/lib/redact_spec.rb
97
+ - spec/lib/rule_spec.rb
98
+ - spec/lib/rules_spec.rb
99
+ - spec/spec_helper.rb