anon 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/anon +5 -0
- data/lib/anon/base.rb +70 -0
- data/lib/anon/cli.rb +57 -0
- data/lib/anon/csv/columns.rb +33 -0
- data/lib/anon/csv.rb +63 -0
- data/lib/anon/text.rb +44 -0
- data/lib/anon.rb +3 -0
- data/spec/intergration/cli_spec.rb +76 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/unit/cli_spec.rb +46 -0
- data/spec/unit/columns_spec.rb +93 -0
- data/spec/unit/csv_spec.rb +76 -0
- data/spec/unit/text_spec.rb +24 -0
- metadata +142 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b58dd8e96a5488b0ae05d43be7b9a9cc2162db34
|
4
|
+
data.tar.gz: e8ae73279bd357ab92e6aa258426d63206db3734
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 162ebe61b83c353976ff81da3d2882d32cbf9b0b464a3b6b8a67e3ad9bdce20abdaa7cadbdcd91ce0b3ac7838f1f72ef98dfe21b38ed6b104c7605e105f0c046
|
7
|
+
data.tar.gz: ab5a70456e30880db8e2a238454ddc003813c339218ccbd152ee3e7d5dac656cfd5a215948dab083d02bbad08b9552695641afb9f162b1402ea5245a784438c3
|
data/bin/anon
ADDED
data/lib/anon/base.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'time_difference'
|
4
|
+
|
5
|
+
module Anon
|
6
|
+
# Anonymiser base class
|
7
|
+
class Base
|
8
|
+
|
9
|
+
# Performs anonymisation
|
10
|
+
def self.anonymise!(*args)
|
11
|
+
new(*args).anonymise!
|
12
|
+
end
|
13
|
+
|
14
|
+
protected
|
15
|
+
|
16
|
+
# Helper method that replaces a personal e-mail
|
17
|
+
# with an anonymous one.
|
18
|
+
#
|
19
|
+
# The same personal e-mail will be replaced
|
20
|
+
# with the same anonymous e-mail.
|
21
|
+
def anonymous_email(personal_email)
|
22
|
+
@anonymised_emails ||= {}
|
23
|
+
|
24
|
+
unless @anonymised_emails.key? personal_email
|
25
|
+
next_count = @anonymised_emails.count + 1
|
26
|
+
@anonymised_emails[personal_email] = "anon#{next_count}@anon.com"
|
27
|
+
end
|
28
|
+
|
29
|
+
@anonymised_emails[personal_email]
|
30
|
+
end
|
31
|
+
|
32
|
+
# Initializes progress tracking.
|
33
|
+
def start_progress
|
34
|
+
@progress = 0
|
35
|
+
@started = Time.now
|
36
|
+
update_progress
|
37
|
+
end
|
38
|
+
|
39
|
+
# Adds 1 to the progress count.
|
40
|
+
def increment_progress
|
41
|
+
@progress += 1
|
42
|
+
update_progress
|
43
|
+
end
|
44
|
+
|
45
|
+
# End progress tracking and output the results.
|
46
|
+
def complete_progress
|
47
|
+
stopped = Time.now
|
48
|
+
duration = TimeDifference.between(@started, stopped).in_seconds
|
49
|
+
if duration == 0
|
50
|
+
average = @progress
|
51
|
+
else
|
52
|
+
average = (@progress.to_f / duration.to_f).round
|
53
|
+
end
|
54
|
+
|
55
|
+
$stderr.puts "Read #{@progress} lines in #{duration} seconds (#{average} lines/s)"
|
56
|
+
$stderr.puts "#{@anonymised_emails.count} unique e-mails replaced"
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def update_progress
|
62
|
+
output_progress if @progress % 100 == 0
|
63
|
+
end
|
64
|
+
|
65
|
+
def output_progress
|
66
|
+
$stderr.print "Working... #{@progress}\r"
|
67
|
+
$stderr.flush
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/anon/cli.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'thor'
|
2
|
+
|
3
|
+
module Anon
|
4
|
+
# Command Line Interface for Anon
|
5
|
+
class CLI < Thor
|
6
|
+
|
7
|
+
desc 'csv [OPTIONS]', 'Anonymise a csv file'
|
8
|
+
|
9
|
+
option :infile,
|
10
|
+
aliases: [:i],
|
11
|
+
desc: 'input filename to read from, reads from STDIN if ommited'
|
12
|
+
|
13
|
+
option :outfile,
|
14
|
+
aliases: [:o],
|
15
|
+
desc: 'output filename write to, writes to STDOUT if ommited'
|
16
|
+
|
17
|
+
option :columns,
|
18
|
+
aliases: [:c],
|
19
|
+
desc: 'columns to anonymise, by index or name
|
20
|
+
e.g. 0,1,5 or email-address,other_email, guesses based on header if ommited'
|
21
|
+
|
22
|
+
option :header,
|
23
|
+
type: :boolean,
|
24
|
+
default: true,
|
25
|
+
desc: 'if the csv file to be processed has a header row'
|
26
|
+
|
27
|
+
def csv
|
28
|
+
require 'anon/csv'
|
29
|
+
Anon::CSV.anonymise!(input, output, options[:columns], options[:header])
|
30
|
+
end
|
31
|
+
|
32
|
+
desc 'text [OPTIONS]', 'Anonymise a text file'
|
33
|
+
|
34
|
+
option :infile,
|
35
|
+
aliases: [:i],
|
36
|
+
desc: 'input filename to read from, reads from STDIN if ommited'
|
37
|
+
|
38
|
+
option :outfile,
|
39
|
+
aliases: [:o],
|
40
|
+
desc: 'output filename write to, writes to STDOUT if ommited'
|
41
|
+
|
42
|
+
def text
|
43
|
+
require 'anon/text'
|
44
|
+
Anon::Text.anonymise!(input, output)
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def input
|
50
|
+
options[:infile] ? File.open(options[:infile]) : $stdin
|
51
|
+
end
|
52
|
+
|
53
|
+
def output
|
54
|
+
options[:outfile] ? File.open(options[:outfile], 'w') : $stdout
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'anon/base'
|
2
|
+
|
3
|
+
module Anon
|
4
|
+
class CSV < Base
|
5
|
+
class Columns
|
6
|
+
def initialize(columns, headers)
|
7
|
+
@columns, @headers = columns, headers
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_anonymise
|
11
|
+
@_to_anonymise ||= indexes || columns || best_guess
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
attr_reader :headers
|
17
|
+
|
18
|
+
def indexes
|
19
|
+
columns.map { |c| Integer(c) } if columns
|
20
|
+
rescue ArgumentError
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
|
24
|
+
def columns
|
25
|
+
@columns.split(',') if @columns
|
26
|
+
end
|
27
|
+
|
28
|
+
def best_guess
|
29
|
+
headers.select { |h| h.match(/e.*mail/i) }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/anon/csv.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'anon/base'
|
4
|
+
require 'anon/csv/columns'
|
5
|
+
require 'csv'
|
6
|
+
|
7
|
+
module Anon
|
8
|
+
# Replaces the contents of a set of columns with anonymous e-mails.
|
9
|
+
class CSV < Base
|
10
|
+
def initialize(input, output, columns_to_anonymise, has_header = true)
|
11
|
+
@input = input
|
12
|
+
@output = output
|
13
|
+
@columns_to_anonymise = columns_to_anonymise
|
14
|
+
@has_header = has_header
|
15
|
+
end
|
16
|
+
|
17
|
+
# Anonymises all content of the columns set in the initializer
|
18
|
+
def anonymise!
|
19
|
+
start_progress
|
20
|
+
map_lines do |line|
|
21
|
+
anonymise(line)
|
22
|
+
increment_progress
|
23
|
+
line
|
24
|
+
end
|
25
|
+
complete_progress
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
attr_reader :has_header
|
31
|
+
|
32
|
+
def anonymise(line)
|
33
|
+
columns.to_anonymise.each do |anon_index|
|
34
|
+
line[anon_index] = anonymous_email(line[anon_index])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def input
|
39
|
+
@_input ||= ::CSV.new(@input, headers: has_header)
|
40
|
+
end
|
41
|
+
|
42
|
+
def output
|
43
|
+
@_output ||= ::CSV.new(@output, write_headers: has_header, headers: headers)
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def columns
|
48
|
+
@_columns ||= Columns.new(@columns_to_anonymise, input.headers)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Reads each line from the incoming file, processes it using the block
|
52
|
+
# and saves the return value of the block to the outgoing file.
|
53
|
+
def map_lines
|
54
|
+
while (inline = input.gets)
|
55
|
+
output.puts yield(inline)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def headers
|
60
|
+
input.headers
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/anon/text.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'anon/base'
|
4
|
+
|
5
|
+
module Anon
|
6
|
+
# Anonymises any detected e-mail address in a text file.
|
7
|
+
class Text < Base
|
8
|
+
# From the email regex research: http://fightingforalostcause.net/misc/2006/compare-email-regex.php
|
9
|
+
# Authors: James Watts and Francisco Jose Martin Moreno
|
10
|
+
EMAIL_REGEX = /([\w\!\#\z\%\&\'\*\+\-\/\=\?\\A\`{\|\}\~]+\.)*[\w\+-]+@((((([a-z0-9]{1}[a-z0-9\-]{0,62}[a-z0-9]{1})|[a-z])\.)+[a-z]{2,6})|(\d{1,3}\.){3}\d{1,3}(\:\d{1,5})?)/i # rubocop:disable Metrics/LineLength
|
11
|
+
|
12
|
+
def initialize(incoming_filename, outgoing_filename)
|
13
|
+
@input = incoming_filename
|
14
|
+
@output = outgoing_filename
|
15
|
+
end
|
16
|
+
|
17
|
+
# Anonymises any e-mail addresses found in the text
|
18
|
+
def anonymise!
|
19
|
+
start_progress
|
20
|
+
map_lines do |line|
|
21
|
+
line = anonymise_line(line)
|
22
|
+
increment_progress
|
23
|
+
line
|
24
|
+
end
|
25
|
+
complete_progress
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
attr_reader :input, :output
|
31
|
+
|
32
|
+
# Reads each line from the incoming file, processes it using the block
|
33
|
+
# and saves the return value of the block to the outgoing file.
|
34
|
+
def map_lines
|
35
|
+
while (inline = input.gets)
|
36
|
+
output.puts yield(inline)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def anonymise_line(line)
|
41
|
+
line.gsub(EMAIL_REGEX) { |email| anonymous_email(email) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/anon.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
describe 'The comand line interface' do
|
5
|
+
let(:expected_output) do
|
6
|
+
'pgid_sku,contract start date,e---mail,post_code,product_category,transmission_type
|
7
|
+
F^u4bgebdfds, 15/07/1756,anon1@anon.com, XY1 TH7, jokes > joke books, oily
|
8
|
+
FNY7dujh, 15/11/1856,anon2@anon.com, CZ1 NJ7, books > dusty, automatic
|
9
|
+
'
|
10
|
+
end
|
11
|
+
|
12
|
+
describe 'text' do
|
13
|
+
|
14
|
+
describe 'reading from a file' do
|
15
|
+
let(:output) { `bin/anon text -i spec/fixture/csv_with_headers.csv` }
|
16
|
+
specify { expect(output).to eq expected_output }
|
17
|
+
end
|
18
|
+
|
19
|
+
describe 'reading from stdin' do
|
20
|
+
let(:output) { `cat spec/fixture/csv_with_headers.csv | bin/anon text` }
|
21
|
+
specify { expect(output).to eq expected_output }
|
22
|
+
end
|
23
|
+
|
24
|
+
describe 'writing to a file' do
|
25
|
+
let(:output) do
|
26
|
+
FileUtils.rm_rf('spec/fixture/test.out')
|
27
|
+
`cat spec/fixture/csv_with_headers.csv | bin/anon text -o spec/fixture/test.out`
|
28
|
+
File.read('spec/fixture/test.out')
|
29
|
+
end
|
30
|
+
specify { expect(output).to eq expected_output }
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
describe 'csv' do
|
36
|
+
describe 'reading from a file' do
|
37
|
+
let(:output) { `bin/anon csv -i spec/fixture/csv_with_headers.csv -c 2` }
|
38
|
+
specify { expect(output).to eq expected_output }
|
39
|
+
end
|
40
|
+
|
41
|
+
describe 'reading from stdin' do
|
42
|
+
let(:output) { `cat spec/fixture/csv_with_headers.csv | bin/anon csv -c 2` }
|
43
|
+
specify { expect(output).to eq expected_output }
|
44
|
+
end
|
45
|
+
|
46
|
+
describe 'writing to a file' do
|
47
|
+
let(:output) do
|
48
|
+
FileUtils.rm_rf('spec/fixture/test.out')
|
49
|
+
`cat spec/fixture/csv_with_headers.csv | bin/anon csv -c 2 -o spec/fixture/test.out`
|
50
|
+
File.read('spec/fixture/test.out')
|
51
|
+
end
|
52
|
+
specify { expect(output).to eq expected_output }
|
53
|
+
end
|
54
|
+
|
55
|
+
describe 'a headerless file' do
|
56
|
+
let(:expected_output) do
|
57
|
+
'F^u4bgebdfds, 15/07/1756,anon1@anon.com, XY1 TH7, jokes > joke books, oily
|
58
|
+
FNY7dujh, 15/11/1856,anon2@anon.com, CZ1 NJ7, books > dusty, automatic
|
59
|
+
'
|
60
|
+
end
|
61
|
+
|
62
|
+
let(:output) { `bin/anon csv -c 2 -i spec/fixture/csv_without_headers.csv --no-header` }
|
63
|
+
specify { expect(output).to eq expected_output }
|
64
|
+
end
|
65
|
+
|
66
|
+
describe 'named headers' do
|
67
|
+
let(:output) { `bin/anon csv -i spec/fixture/csv_with_headers.csv -c e---mail` }
|
68
|
+
specify { expect(output).to eq expected_output }
|
69
|
+
end
|
70
|
+
|
71
|
+
describe 'automatic header detection' do
|
72
|
+
let(:output) { `bin/anon csv -i spec/fixture/csv_with_headers.csv` }
|
73
|
+
specify { expect(output).to eq expected_output }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'anon/cli'
|
4
|
+
require 'anon/csv'
|
5
|
+
require 'anon/text'
|
6
|
+
|
7
|
+
describe Anon::CLI do
|
8
|
+
describe '#text' do
|
9
|
+
context 'without options' do
|
10
|
+
it 'calls the text anonomiser with standard in and out' do
|
11
|
+
expect(Anon::Text).to receive(:anonymise!).with($stdin, $stdout)
|
12
|
+
subject.text
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
context 'specifying an input file' do
|
17
|
+
it 'calls the text anonomiser with the file' do
|
18
|
+
expect(Anon::Text).to receive(:anonymise!) do |input, _output|
|
19
|
+
expect(input).to be_a File
|
20
|
+
expect(input.path).to eq 'spec/fixture/test.in'
|
21
|
+
end
|
22
|
+
subject.options = { infile: 'spec/fixture/test.in' }
|
23
|
+
subject.text
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
context 'specifying an output file' do
|
28
|
+
it 'calls the text anonomiser with the file' do
|
29
|
+
expect(Anon::Text).to receive(:anonymise!) do |input, _output|
|
30
|
+
expect(input).to be_a File
|
31
|
+
expect(input.path).to eq 'spec/fixture/test.in'
|
32
|
+
end
|
33
|
+
subject.options = { infile: 'spec/fixture/test.in' }
|
34
|
+
subject.text
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe '#csv' do
|
40
|
+
it 'calls the csv anonomiser with the correct columns' do
|
41
|
+
expect(Anon::CSV).to receive(:anonymise!).with($stdin, $stdout, '1,4,7', nil)
|
42
|
+
subject.options = { columns: '1,4,7' }
|
43
|
+
subject.csv
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'anon/csv/columns'
|
3
|
+
|
4
|
+
describe Anon::CSV::Columns do
|
5
|
+
describe '#to_anonymise' do
|
6
|
+
context 'index columns' do
|
7
|
+
subject { described_class.new('1,2,3', double) }
|
8
|
+
|
9
|
+
it 'converts the indexes to Integers' do
|
10
|
+
expect(subject.to_anonymise).to eq [1, 2, 3]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
context 'named columns' do
|
15
|
+
subject { described_class.new('email_address,name,foo', double) }
|
16
|
+
|
17
|
+
it 'returns the named columns' do
|
18
|
+
expect(subject.to_anonymise).to eq %w(email_address name foo)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
context 'automatic column detection' do
|
23
|
+
let(:email_headers) do
|
24
|
+
[
|
25
|
+
'email',
|
26
|
+
'zemailaddress',
|
27
|
+
'contact email address',
|
28
|
+
'emailaddress',
|
29
|
+
'e-mail',
|
30
|
+
'personal email',
|
31
|
+
'email address',
|
32
|
+
'ct_email_addr',
|
33
|
+
'e_mail_address',
|
34
|
+
'student_e_mail',
|
35
|
+
'e-mail address',
|
36
|
+
'parent email address',
|
37
|
+
'customer email address',
|
38
|
+
'email_address',
|
39
|
+
'contact address.email',
|
40
|
+
'user email string',
|
41
|
+
'email client',
|
42
|
+
'client email',
|
43
|
+
'ot ship-to email address',
|
44
|
+
'customer email',
|
45
|
+
'clnp email address1',
|
46
|
+
'default_email',
|
47
|
+
'email_addr',
|
48
|
+
'sender_email',
|
49
|
+
'webemail',
|
50
|
+
]
|
51
|
+
end
|
52
|
+
|
53
|
+
let(:non_email_headers) do
|
54
|
+
[
|
55
|
+
'pgid_sku',
|
56
|
+
'contract start date',
|
57
|
+
'post_code',
|
58
|
+
'product_category',
|
59
|
+
'transmission_type',
|
60
|
+
'policy_start_date',
|
61
|
+
'category-genericname',
|
62
|
+
'order id',
|
63
|
+
'unique_model',
|
64
|
+
'start',
|
65
|
+
'image',
|
66
|
+
'orderdate',
|
67
|
+
'model',
|
68
|
+
'homebound date',
|
69
|
+
'pid-cid',
|
70
|
+
'ct_name_last',
|
71
|
+
'booking ref.',
|
72
|
+
'a5',
|
73
|
+
'id',
|
74
|
+
'bazaarvoice id',
|
75
|
+
'questionnaire_name',
|
76
|
+
'deeplink_id',
|
77
|
+
'hotel_name',
|
78
|
+
'series_identifier',
|
79
|
+
'registration',
|
80
|
+
'zmailname',
|
81
|
+
]
|
82
|
+
end
|
83
|
+
|
84
|
+
let(:headers) { email_headers + non_email_headers }
|
85
|
+
|
86
|
+
subject { described_class.new(nil, headers.shuffle) }
|
87
|
+
|
88
|
+
it 'returns the email related headers' do
|
89
|
+
expect(subject.to_anonymise.sort).to eq email_headers.sort
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'anon/csv'
|
3
|
+
|
4
|
+
describe Anon::CSV do
|
5
|
+
|
6
|
+
describe '.anonymise!' do
|
7
|
+
|
8
|
+
before do
|
9
|
+
described_class.anonymise!(input_stream, output_stream, columns, headers)
|
10
|
+
end
|
11
|
+
|
12
|
+
let(:output_stream) { StringIO.new }
|
13
|
+
let(:columns) { '0' }
|
14
|
+
|
15
|
+
context 'with headers' do
|
16
|
+
let(:headers) { true }
|
17
|
+
|
18
|
+
let(:input_stream) do
|
19
|
+
StringIO.new "email,foo,bar
|
20
|
+
foo@bar.com,34545,bannas
|
21
|
+
foopface@fooofoo.co.uk,124353,apples"
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'anonymises the correct column' do
|
25
|
+
output_stream.rewind
|
26
|
+
expect(output_stream.gets).to eq "email,foo,bar\n"
|
27
|
+
expect(output_stream.gets).to eq "anon1@anon.com,34545,bannas\n"
|
28
|
+
expect(output_stream.gets).to eq "anon2@anon.com,124353,apples\n"
|
29
|
+
expect(output_stream.gets).to eq nil
|
30
|
+
end
|
31
|
+
|
32
|
+
context 'with named columns' do
|
33
|
+
let(:columns) { 'email,bar' }
|
34
|
+
|
35
|
+
it 'anonymises the correct columns' do
|
36
|
+
output_stream.rewind
|
37
|
+
expect(output_stream.gets).to eq "email,foo,bar\n"
|
38
|
+
expect(output_stream.gets).to eq "anon1@anon.com,34545,anon2@anon.com\n"
|
39
|
+
expect(output_stream.gets).to eq "anon3@anon.com,124353,anon4@anon.com\n"
|
40
|
+
expect(output_stream.gets).to eq nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context 'without named columns' do
|
45
|
+
let(:columns) { nil }
|
46
|
+
|
47
|
+
it 'guesses the columns to anonymise' do
|
48
|
+
output_stream.rewind
|
49
|
+
expect(output_stream.gets).to eq "email,foo,bar\n"
|
50
|
+
expect(output_stream.gets).to eq "anon1@anon.com,34545,bannas\n"
|
51
|
+
expect(output_stream.gets).to eq "anon2@anon.com,124353,apples\n"
|
52
|
+
expect(output_stream.gets).to eq nil
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
context 'without headers' do
|
59
|
+
let(:headers) { false }
|
60
|
+
|
61
|
+
let(:input_stream) do
|
62
|
+
StringIO.new "looloo@example.com,2456,satsuma
|
63
|
+
foo@bar.com,34545,bannas
|
64
|
+
foopface@fooofoo.co.uk,124353,apples"
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'anonymises the correct column' do
|
68
|
+
output_stream.rewind
|
69
|
+
expect(output_stream.gets).to eq "anon1@anon.com,2456,satsuma\n"
|
70
|
+
expect(output_stream.gets).to eq "anon2@anon.com,34545,bannas\n"
|
71
|
+
expect(output_stream.gets).to eq "anon3@anon.com,124353,apples\n"
|
72
|
+
expect(output_stream.gets).to eq nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'anon/text'
|
3
|
+
|
4
|
+
describe Anon::Text do
|
5
|
+
let(:output_stream) { StringIO.new }
|
6
|
+
subject { described_class.new(input_stream, output_stream) }
|
7
|
+
|
8
|
+
describe '#anonymise!' do
|
9
|
+
let(:input_stream) do
|
10
|
+
StringIO.new ' someone@foo.com then some other interesting text another@icecream.museum'
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'anonymises any email addresss' do
|
14
|
+
subject.anonymise!
|
15
|
+
expect(output_stream.string).to_not include 'someone@foo.com'
|
16
|
+
expect(output_stream.string).to_not include 'another@icecream.museum'
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'leaves the other text alone' do
|
20
|
+
subject.anonymise!
|
21
|
+
expect(output_stream.string).to eq " anon1@anon.com then some other interesting text anon2@anon.com\n"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
metadata
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: anon
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Reevoo Engineering
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-02-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: time_difference
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: thor
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: simplecov
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: reevoocop
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Replaces personal data with fake data
|
98
|
+
email: developers@reevoo.com
|
99
|
+
executables:
|
100
|
+
- anon
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- bin/anon
|
105
|
+
- lib/anon.rb
|
106
|
+
- lib/anon/base.rb
|
107
|
+
- lib/anon/cli.rb
|
108
|
+
- lib/anon/csv.rb
|
109
|
+
- lib/anon/csv/columns.rb
|
110
|
+
- lib/anon/text.rb
|
111
|
+
- spec/intergration/cli_spec.rb
|
112
|
+
- spec/spec_helper.rb
|
113
|
+
- spec/unit/cli_spec.rb
|
114
|
+
- spec/unit/columns_spec.rb
|
115
|
+
- spec/unit/csv_spec.rb
|
116
|
+
- spec/unit/text_spec.rb
|
117
|
+
homepage: https://github.com/reevoo/anon
|
118
|
+
licenses:
|
119
|
+
- MIT
|
120
|
+
metadata: {}
|
121
|
+
post_install_message:
|
122
|
+
rdoc_options: []
|
123
|
+
require_paths:
|
124
|
+
- lib
|
125
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - ">="
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0'
|
130
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
131
|
+
requirements:
|
132
|
+
- - ">="
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
version: '0'
|
135
|
+
requirements: []
|
136
|
+
rubyforge_project:
|
137
|
+
rubygems_version: 2.2.2
|
138
|
+
signing_key:
|
139
|
+
specification_version: 4
|
140
|
+
summary: Replaces personal data with fake data
|
141
|
+
test_files: []
|
142
|
+
has_rdoc:
|