anon 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/anon +5 -0
- data/lib/anon/base.rb +70 -0
- data/lib/anon/cli.rb +57 -0
- data/lib/anon/csv/columns.rb +33 -0
- data/lib/anon/csv.rb +63 -0
- data/lib/anon/text.rb +44 -0
- data/lib/anon.rb +3 -0
- data/spec/intergration/cli_spec.rb +76 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/unit/cli_spec.rb +46 -0
- data/spec/unit/columns_spec.rb +93 -0
- data/spec/unit/csv_spec.rb +76 -0
- data/spec/unit/text_spec.rb +24 -0
- metadata +142 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b58dd8e96a5488b0ae05d43be7b9a9cc2162db34
|
4
|
+
data.tar.gz: e8ae73279bd357ab92e6aa258426d63206db3734
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 162ebe61b83c353976ff81da3d2882d32cbf9b0b464a3b6b8a67e3ad9bdce20abdaa7cadbdcd91ce0b3ac7838f1f72ef98dfe21b38ed6b104c7605e105f0c046
|
7
|
+
data.tar.gz: ab5a70456e30880db8e2a238454ddc003813c339218ccbd152ee3e7d5dac656cfd5a215948dab083d02bbad08b9552695641afb9f162b1402ea5245a784438c3
|
data/bin/anon
ADDED
data/lib/anon/base.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'time_difference'
|
4
|
+
|
5
|
+
module Anon
|
6
|
+
# Anonymiser base class
|
7
|
+
class Base
|
8
|
+
|
9
|
+
# Performs anonymisation
|
10
|
+
def self.anonymise!(*args)
|
11
|
+
new(*args).anonymise!
|
12
|
+
end
|
13
|
+
|
14
|
+
protected
|
15
|
+
|
16
|
+
# Helper method that replaces a personal e-mail
|
17
|
+
# with an anonymous one.
|
18
|
+
#
|
19
|
+
# The same personal e-mail will be replaced
|
20
|
+
# with the same anonymous e-mail.
|
21
|
+
def anonymous_email(personal_email)
|
22
|
+
@anonymised_emails ||= {}
|
23
|
+
|
24
|
+
unless @anonymised_emails.key? personal_email
|
25
|
+
next_count = @anonymised_emails.count + 1
|
26
|
+
@anonymised_emails[personal_email] = "anon#{next_count}@anon.com"
|
27
|
+
end
|
28
|
+
|
29
|
+
@anonymised_emails[personal_email]
|
30
|
+
end
|
31
|
+
|
32
|
+
# Initializes progress tracking.
|
33
|
+
def start_progress
|
34
|
+
@progress = 0
|
35
|
+
@started = Time.now
|
36
|
+
update_progress
|
37
|
+
end
|
38
|
+
|
39
|
+
# Adds 1 to the progress count.
|
40
|
+
def increment_progress
|
41
|
+
@progress += 1
|
42
|
+
update_progress
|
43
|
+
end
|
44
|
+
|
45
|
+
# End progress tracking and output the results.
|
46
|
+
def complete_progress
|
47
|
+
stopped = Time.now
|
48
|
+
duration = TimeDifference.between(@started, stopped).in_seconds
|
49
|
+
if duration == 0
|
50
|
+
average = @progress
|
51
|
+
else
|
52
|
+
average = (@progress.to_f / duration.to_f).round
|
53
|
+
end
|
54
|
+
|
55
|
+
$stderr.puts "Read #{@progress} lines in #{duration} seconds (#{average} lines/s)"
|
56
|
+
$stderr.puts "#{@anonymised_emails.count} unique e-mails replaced"
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def update_progress
|
62
|
+
output_progress if @progress % 100 == 0
|
63
|
+
end
|
64
|
+
|
65
|
+
def output_progress
|
66
|
+
$stderr.print "Working... #{@progress}\r"
|
67
|
+
$stderr.flush
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/anon/cli.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'thor'
|
2
|
+
|
3
|
+
module Anon
|
4
|
+
# Command Line Interface for Anon
|
5
|
+
class CLI < Thor
|
6
|
+
|
7
|
+
desc 'csv [OPTIONS]', 'Anonymise a csv file'
|
8
|
+
|
9
|
+
option :infile,
|
10
|
+
aliases: [:i],
|
11
|
+
desc: 'input filename to read from, reads from STDIN if ommited'
|
12
|
+
|
13
|
+
option :outfile,
|
14
|
+
aliases: [:o],
|
15
|
+
desc: 'output filename write to, writes to STDOUT if ommited'
|
16
|
+
|
17
|
+
option :columns,
|
18
|
+
aliases: [:c],
|
19
|
+
desc: 'columns to anonymise, by index or name
|
20
|
+
e.g. 0,1,5 or email-address,other_email, guesses based on header if ommited'
|
21
|
+
|
22
|
+
option :header,
|
23
|
+
type: :boolean,
|
24
|
+
default: true,
|
25
|
+
desc: 'if the csv file to be processed has a header row'
|
26
|
+
|
27
|
+
def csv
|
28
|
+
require 'anon/csv'
|
29
|
+
Anon::CSV.anonymise!(input, output, options[:columns], options[:header])
|
30
|
+
end
|
31
|
+
|
32
|
+
desc 'text [OPTIONS]', 'Anonymise a text file'
|
33
|
+
|
34
|
+
option :infile,
|
35
|
+
aliases: [:i],
|
36
|
+
desc: 'input filename to read from, reads from STDIN if ommited'
|
37
|
+
|
38
|
+
option :outfile,
|
39
|
+
aliases: [:o],
|
40
|
+
desc: 'output filename write to, writes to STDOUT if ommited'
|
41
|
+
|
42
|
+
def text
|
43
|
+
require 'anon/text'
|
44
|
+
Anon::Text.anonymise!(input, output)
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def input
|
50
|
+
options[:infile] ? File.open(options[:infile]) : $stdin
|
51
|
+
end
|
52
|
+
|
53
|
+
def output
|
54
|
+
options[:outfile] ? File.open(options[:outfile], 'w') : $stdout
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'anon/base'
|
2
|
+
|
3
|
+
module Anon
|
4
|
+
class CSV < Base
|
5
|
+
class Columns
|
6
|
+
def initialize(columns, headers)
|
7
|
+
@columns, @headers = columns, headers
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_anonymise
|
11
|
+
@_to_anonymise ||= indexes || columns || best_guess
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
attr_reader :headers
|
17
|
+
|
18
|
+
def indexes
|
19
|
+
columns.map { |c| Integer(c) } if columns
|
20
|
+
rescue ArgumentError
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
|
24
|
+
def columns
|
25
|
+
@columns.split(',') if @columns
|
26
|
+
end
|
27
|
+
|
28
|
+
def best_guess
|
29
|
+
headers.select { |h| h.match(/e.*mail/i) }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/anon/csv.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'anon/base'
|
4
|
+
require 'anon/csv/columns'
|
5
|
+
require 'csv'
|
6
|
+
|
7
|
+
module Anon
|
8
|
+
# Replaces the contents of a set of columns with anonymous e-mails.
|
9
|
+
class CSV < Base
|
10
|
+
def initialize(input, output, columns_to_anonymise, has_header = true)
|
11
|
+
@input = input
|
12
|
+
@output = output
|
13
|
+
@columns_to_anonymise = columns_to_anonymise
|
14
|
+
@has_header = has_header
|
15
|
+
end
|
16
|
+
|
17
|
+
# Anonymises all content of the columns set in the initializer
|
18
|
+
def anonymise!
|
19
|
+
start_progress
|
20
|
+
map_lines do |line|
|
21
|
+
anonymise(line)
|
22
|
+
increment_progress
|
23
|
+
line
|
24
|
+
end
|
25
|
+
complete_progress
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
attr_reader :has_header
|
31
|
+
|
32
|
+
def anonymise(line)
|
33
|
+
columns.to_anonymise.each do |anon_index|
|
34
|
+
line[anon_index] = anonymous_email(line[anon_index])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def input
|
39
|
+
@_input ||= ::CSV.new(@input, headers: has_header)
|
40
|
+
end
|
41
|
+
|
42
|
+
def output
|
43
|
+
@_output ||= ::CSV.new(@output, write_headers: has_header, headers: headers)
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def columns
|
48
|
+
@_columns ||= Columns.new(@columns_to_anonymise, input.headers)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Reads each line from the incoming file, processes it using the block
|
52
|
+
# and saves the return value of the block to the outgoing file.
|
53
|
+
def map_lines
|
54
|
+
while (inline = input.gets)
|
55
|
+
output.puts yield(inline)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def headers
|
60
|
+
input.headers
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/anon/text.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'anon/base'
|
4
|
+
|
5
|
+
module Anon
|
6
|
+
# Anonymises any detected e-mail address in a text file.
|
7
|
+
class Text < Base
|
8
|
+
# From the email regex research: http://fightingforalostcause.net/misc/2006/compare-email-regex.php
|
9
|
+
# Authors: James Watts and Francisco Jose Martin Moreno
|
10
|
+
EMAIL_REGEX = /([\w\!\#\z\%\&\'\*\+\-\/\=\?\\A\`{\|\}\~]+\.)*[\w\+-]+@((((([a-z0-9]{1}[a-z0-9\-]{0,62}[a-z0-9]{1})|[a-z])\.)+[a-z]{2,6})|(\d{1,3}\.){3}\d{1,3}(\:\d{1,5})?)/i # rubocop:disable Metrics/LineLength
|
11
|
+
|
12
|
+
def initialize(incoming_filename, outgoing_filename)
|
13
|
+
@input = incoming_filename
|
14
|
+
@output = outgoing_filename
|
15
|
+
end
|
16
|
+
|
17
|
+
# Anonymises any e-mail addresses found in the text
|
18
|
+
def anonymise!
|
19
|
+
start_progress
|
20
|
+
map_lines do |line|
|
21
|
+
line = anonymise_line(line)
|
22
|
+
increment_progress
|
23
|
+
line
|
24
|
+
end
|
25
|
+
complete_progress
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
attr_reader :input, :output
|
31
|
+
|
32
|
+
# Reads each line from the incoming file, processes it using the block
|
33
|
+
# and saves the return value of the block to the outgoing file.
|
34
|
+
def map_lines
|
35
|
+
while (inline = input.gets)
|
36
|
+
output.puts yield(inline)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def anonymise_line(line)
|
41
|
+
line.gsub(EMAIL_REGEX) { |email| anonymous_email(email) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/anon.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
describe 'The comand line interface' do
|
5
|
+
let(:expected_output) do
|
6
|
+
'pgid_sku,contract start date,e---mail,post_code,product_category,transmission_type
|
7
|
+
F^u4bgebdfds, 15/07/1756,anon1@anon.com, XY1 TH7, jokes > joke books, oily
|
8
|
+
FNY7dujh, 15/11/1856,anon2@anon.com, CZ1 NJ7, books > dusty, automatic
|
9
|
+
'
|
10
|
+
end
|
11
|
+
|
12
|
+
describe 'text' do
|
13
|
+
|
14
|
+
describe 'reading from a file' do
|
15
|
+
let(:output) { `bin/anon text -i spec/fixture/csv_with_headers.csv` }
|
16
|
+
specify { expect(output).to eq expected_output }
|
17
|
+
end
|
18
|
+
|
19
|
+
describe 'reading from stdin' do
|
20
|
+
let(:output) { `cat spec/fixture/csv_with_headers.csv | bin/anon text` }
|
21
|
+
specify { expect(output).to eq expected_output }
|
22
|
+
end
|
23
|
+
|
24
|
+
describe 'writing to a file' do
|
25
|
+
let(:output) do
|
26
|
+
FileUtils.rm_rf('spec/fixture/test.out')
|
27
|
+
`cat spec/fixture/csv_with_headers.csv | bin/anon text -o spec/fixture/test.out`
|
28
|
+
File.read('spec/fixture/test.out')
|
29
|
+
end
|
30
|
+
specify { expect(output).to eq expected_output }
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
describe 'csv' do
|
36
|
+
describe 'reading from a file' do
|
37
|
+
let(:output) { `bin/anon csv -i spec/fixture/csv_with_headers.csv -c 2` }
|
38
|
+
specify { expect(output).to eq expected_output }
|
39
|
+
end
|
40
|
+
|
41
|
+
describe 'reading from stdin' do
|
42
|
+
let(:output) { `cat spec/fixture/csv_with_headers.csv | bin/anon csv -c 2` }
|
43
|
+
specify { expect(output).to eq expected_output }
|
44
|
+
end
|
45
|
+
|
46
|
+
describe 'writing to a file' do
|
47
|
+
let(:output) do
|
48
|
+
FileUtils.rm_rf('spec/fixture/test.out')
|
49
|
+
`cat spec/fixture/csv_with_headers.csv | bin/anon csv -c 2 -o spec/fixture/test.out`
|
50
|
+
File.read('spec/fixture/test.out')
|
51
|
+
end
|
52
|
+
specify { expect(output).to eq expected_output }
|
53
|
+
end
|
54
|
+
|
55
|
+
describe 'a headerless file' do
|
56
|
+
let(:expected_output) do
|
57
|
+
'F^u4bgebdfds, 15/07/1756,anon1@anon.com, XY1 TH7, jokes > joke books, oily
|
58
|
+
FNY7dujh, 15/11/1856,anon2@anon.com, CZ1 NJ7, books > dusty, automatic
|
59
|
+
'
|
60
|
+
end
|
61
|
+
|
62
|
+
let(:output) { `bin/anon csv -c 2 -i spec/fixture/csv_without_headers.csv --no-header` }
|
63
|
+
specify { expect(output).to eq expected_output }
|
64
|
+
end
|
65
|
+
|
66
|
+
describe 'named headers' do
|
67
|
+
let(:output) { `bin/anon csv -i spec/fixture/csv_with_headers.csv -c e---mail` }
|
68
|
+
specify { expect(output).to eq expected_output }
|
69
|
+
end
|
70
|
+
|
71
|
+
describe 'automatic header detection' do
|
72
|
+
let(:output) { `bin/anon csv -i spec/fixture/csv_with_headers.csv` }
|
73
|
+
specify { expect(output).to eq expected_output }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'anon/cli'
|
4
|
+
require 'anon/csv'
|
5
|
+
require 'anon/text'
|
6
|
+
|
7
|
+
describe Anon::CLI do
|
8
|
+
describe '#text' do
|
9
|
+
context 'without options' do
|
10
|
+
it 'calls the text anonomiser with standard in and out' do
|
11
|
+
expect(Anon::Text).to receive(:anonymise!).with($stdin, $stdout)
|
12
|
+
subject.text
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
context 'specifying an input file' do
|
17
|
+
it 'calls the text anonomiser with the file' do
|
18
|
+
expect(Anon::Text).to receive(:anonymise!) do |input, _output|
|
19
|
+
expect(input).to be_a File
|
20
|
+
expect(input.path).to eq 'spec/fixture/test.in'
|
21
|
+
end
|
22
|
+
subject.options = { infile: 'spec/fixture/test.in' }
|
23
|
+
subject.text
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
context 'specifying an output file' do
|
28
|
+
it 'calls the text anonomiser with the file' do
|
29
|
+
expect(Anon::Text).to receive(:anonymise!) do |input, _output|
|
30
|
+
expect(input).to be_a File
|
31
|
+
expect(input.path).to eq 'spec/fixture/test.in'
|
32
|
+
end
|
33
|
+
subject.options = { infile: 'spec/fixture/test.in' }
|
34
|
+
subject.text
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe '#csv' do
|
40
|
+
it 'calls the csv anonomiser with the correct columns' do
|
41
|
+
expect(Anon::CSV).to receive(:anonymise!).with($stdin, $stdout, '1,4,7', nil)
|
42
|
+
subject.options = { columns: '1,4,7' }
|
43
|
+
subject.csv
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'anon/csv/columns'
|
3
|
+
|
4
|
+
describe Anon::CSV::Columns do
|
5
|
+
describe '#to_anonymise' do
|
6
|
+
context 'index columns' do
|
7
|
+
subject { described_class.new('1,2,3', double) }
|
8
|
+
|
9
|
+
it 'converts the indexes to Integers' do
|
10
|
+
expect(subject.to_anonymise).to eq [1, 2, 3]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
context 'named columns' do
|
15
|
+
subject { described_class.new('email_address,name,foo', double) }
|
16
|
+
|
17
|
+
it 'returns the named columns' do
|
18
|
+
expect(subject.to_anonymise).to eq %w(email_address name foo)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
context 'automatic column detection' do
|
23
|
+
let(:email_headers) do
|
24
|
+
[
|
25
|
+
'email',
|
26
|
+
'zemailaddress',
|
27
|
+
'contact email address',
|
28
|
+
'emailaddress',
|
29
|
+
'e-mail',
|
30
|
+
'personal email',
|
31
|
+
'email address',
|
32
|
+
'ct_email_addr',
|
33
|
+
'e_mail_address',
|
34
|
+
'student_e_mail',
|
35
|
+
'e-mail address',
|
36
|
+
'parent email address',
|
37
|
+
'customer email address',
|
38
|
+
'email_address',
|
39
|
+
'contact address.email',
|
40
|
+
'user email string',
|
41
|
+
'email client',
|
42
|
+
'client email',
|
43
|
+
'ot ship-to email address',
|
44
|
+
'customer email',
|
45
|
+
'clnp email address1',
|
46
|
+
'default_email',
|
47
|
+
'email_addr',
|
48
|
+
'sender_email',
|
49
|
+
'webemail',
|
50
|
+
]
|
51
|
+
end
|
52
|
+
|
53
|
+
let(:non_email_headers) do
|
54
|
+
[
|
55
|
+
'pgid_sku',
|
56
|
+
'contract start date',
|
57
|
+
'post_code',
|
58
|
+
'product_category',
|
59
|
+
'transmission_type',
|
60
|
+
'policy_start_date',
|
61
|
+
'category-genericname',
|
62
|
+
'order id',
|
63
|
+
'unique_model',
|
64
|
+
'start',
|
65
|
+
'image',
|
66
|
+
'orderdate',
|
67
|
+
'model',
|
68
|
+
'homebound date',
|
69
|
+
'pid-cid',
|
70
|
+
'ct_name_last',
|
71
|
+
'booking ref.',
|
72
|
+
'a5',
|
73
|
+
'id',
|
74
|
+
'bazaarvoice id',
|
75
|
+
'questionnaire_name',
|
76
|
+
'deeplink_id',
|
77
|
+
'hotel_name',
|
78
|
+
'series_identifier',
|
79
|
+
'registration',
|
80
|
+
'zmailname',
|
81
|
+
]
|
82
|
+
end
|
83
|
+
|
84
|
+
let(:headers) { email_headers + non_email_headers }
|
85
|
+
|
86
|
+
subject { described_class.new(nil, headers.shuffle) }
|
87
|
+
|
88
|
+
it 'returns the email related headers' do
|
89
|
+
expect(subject.to_anonymise.sort).to eq email_headers.sort
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'anon/csv'
|
3
|
+
|
4
|
+
describe Anon::CSV do
|
5
|
+
|
6
|
+
describe '.anonymise!' do
|
7
|
+
|
8
|
+
before do
|
9
|
+
described_class.anonymise!(input_stream, output_stream, columns, headers)
|
10
|
+
end
|
11
|
+
|
12
|
+
let(:output_stream) { StringIO.new }
|
13
|
+
let(:columns) { '0' }
|
14
|
+
|
15
|
+
context 'with headers' do
|
16
|
+
let(:headers) { true }
|
17
|
+
|
18
|
+
let(:input_stream) do
|
19
|
+
StringIO.new "email,foo,bar
|
20
|
+
foo@bar.com,34545,bannas
|
21
|
+
foopface@fooofoo.co.uk,124353,apples"
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'anonymises the correct column' do
|
25
|
+
output_stream.rewind
|
26
|
+
expect(output_stream.gets).to eq "email,foo,bar\n"
|
27
|
+
expect(output_stream.gets).to eq "anon1@anon.com,34545,bannas\n"
|
28
|
+
expect(output_stream.gets).to eq "anon2@anon.com,124353,apples\n"
|
29
|
+
expect(output_stream.gets).to eq nil
|
30
|
+
end
|
31
|
+
|
32
|
+
context 'with named columns' do
|
33
|
+
let(:columns) { 'email,bar' }
|
34
|
+
|
35
|
+
it 'anonymises the correct columns' do
|
36
|
+
output_stream.rewind
|
37
|
+
expect(output_stream.gets).to eq "email,foo,bar\n"
|
38
|
+
expect(output_stream.gets).to eq "anon1@anon.com,34545,anon2@anon.com\n"
|
39
|
+
expect(output_stream.gets).to eq "anon3@anon.com,124353,anon4@anon.com\n"
|
40
|
+
expect(output_stream.gets).to eq nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context 'without named columns' do
|
45
|
+
let(:columns) { nil }
|
46
|
+
|
47
|
+
it 'guesses the columns to anonymise' do
|
48
|
+
output_stream.rewind
|
49
|
+
expect(output_stream.gets).to eq "email,foo,bar\n"
|
50
|
+
expect(output_stream.gets).to eq "anon1@anon.com,34545,bannas\n"
|
51
|
+
expect(output_stream.gets).to eq "anon2@anon.com,124353,apples\n"
|
52
|
+
expect(output_stream.gets).to eq nil
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
context 'without headers' do
|
59
|
+
let(:headers) { false }
|
60
|
+
|
61
|
+
let(:input_stream) do
|
62
|
+
StringIO.new "looloo@example.com,2456,satsuma
|
63
|
+
foo@bar.com,34545,bannas
|
64
|
+
foopface@fooofoo.co.uk,124353,apples"
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'anonymises the correct column' do
|
68
|
+
output_stream.rewind
|
69
|
+
expect(output_stream.gets).to eq "anon1@anon.com,2456,satsuma\n"
|
70
|
+
expect(output_stream.gets).to eq "anon2@anon.com,34545,bannas\n"
|
71
|
+
expect(output_stream.gets).to eq "anon3@anon.com,124353,apples\n"
|
72
|
+
expect(output_stream.gets).to eq nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'anon/text'
|
3
|
+
|
4
|
+
describe Anon::Text do
|
5
|
+
let(:output_stream) { StringIO.new }
|
6
|
+
subject { described_class.new(input_stream, output_stream) }
|
7
|
+
|
8
|
+
describe '#anonymise!' do
|
9
|
+
let(:input_stream) do
|
10
|
+
StringIO.new ' someone@foo.com then some other interesting text another@icecream.museum'
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'anonymises any email addresss' do
|
14
|
+
subject.anonymise!
|
15
|
+
expect(output_stream.string).to_not include 'someone@foo.com'
|
16
|
+
expect(output_stream.string).to_not include 'another@icecream.museum'
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'leaves the other text alone' do
|
20
|
+
subject.anonymise!
|
21
|
+
expect(output_stream.string).to eq " anon1@anon.com then some other interesting text anon2@anon.com\n"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
metadata
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: anon
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Reevoo Engineering
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-02-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: time_difference
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: thor
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: simplecov
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: reevoocop
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Replaces personal data with fake data
|
98
|
+
email: developers@reevoo.com
|
99
|
+
executables:
|
100
|
+
- anon
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- bin/anon
|
105
|
+
- lib/anon.rb
|
106
|
+
- lib/anon/base.rb
|
107
|
+
- lib/anon/cli.rb
|
108
|
+
- lib/anon/csv.rb
|
109
|
+
- lib/anon/csv/columns.rb
|
110
|
+
- lib/anon/text.rb
|
111
|
+
- spec/intergration/cli_spec.rb
|
112
|
+
- spec/spec_helper.rb
|
113
|
+
- spec/unit/cli_spec.rb
|
114
|
+
- spec/unit/columns_spec.rb
|
115
|
+
- spec/unit/csv_spec.rb
|
116
|
+
- spec/unit/text_spec.rb
|
117
|
+
homepage: https://github.com/reevoo/anon
|
118
|
+
licenses:
|
119
|
+
- MIT
|
120
|
+
metadata: {}
|
121
|
+
post_install_message:
|
122
|
+
rdoc_options: []
|
123
|
+
require_paths:
|
124
|
+
- lib
|
125
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - ">="
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0'
|
130
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
131
|
+
requirements:
|
132
|
+
- - ">="
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
version: '0'
|
135
|
+
requirements: []
|
136
|
+
rubyforge_project:
|
137
|
+
rubygems_version: 2.2.2
|
138
|
+
signing_key:
|
139
|
+
specification_version: 4
|
140
|
+
summary: Replaces personal data with fake data
|
141
|
+
test_files: []
|
142
|
+
has_rdoc:
|