anon 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/anon +5 -0
- data/lib/anon/base.rb +70 -0
- data/lib/anon/cli.rb +57 -0
- data/lib/anon/csv/columns.rb +33 -0
- data/lib/anon/csv.rb +63 -0
- data/lib/anon/text.rb +44 -0
- data/lib/anon.rb +3 -0
- data/spec/intergration/cli_spec.rb +76 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/unit/cli_spec.rb +46 -0
- data/spec/unit/columns_spec.rb +93 -0
- data/spec/unit/csv_spec.rb +76 -0
- data/spec/unit/text_spec.rb +24 -0
- metadata +142 -0
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            SHA1:
         | 
| 3 | 
            +
              metadata.gz: b58dd8e96a5488b0ae05d43be7b9a9cc2162db34
         | 
| 4 | 
            +
              data.tar.gz: e8ae73279bd357ab92e6aa258426d63206db3734
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: 162ebe61b83c353976ff81da3d2882d32cbf9b0b464a3b6b8a67e3ad9bdce20abdaa7cadbdcd91ce0b3ac7838f1f72ef98dfe21b38ed6b104c7605e105f0c046
         | 
| 7 | 
            +
              data.tar.gz: ab5a70456e30880db8e2a238454ddc003813c339218ccbd152ee3e7d5dac656cfd5a215948dab083d02bbad08b9552695641afb9f162b1402ea5245a784438c3
         | 
    
        data/bin/anon
    ADDED
    
    
    
        data/lib/anon/base.rb
    ADDED
    
    | @@ -0,0 +1,70 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'time_difference'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Anon
         | 
| 6 | 
            +
              # Anonymiser base class
         | 
| 7 | 
            +
              class Base
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                # Performs anonymisation
         | 
| 10 | 
            +
                def self.anonymise!(*args)
         | 
| 11 | 
            +
                  new(*args).anonymise!
         | 
| 12 | 
            +
                end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                protected
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                # Helper method that replaces a personal e-mail
         | 
| 17 | 
            +
                # with an anonymous one.
         | 
| 18 | 
            +
                #
         | 
| 19 | 
            +
                # The same personal e-mail will be replaced
         | 
| 20 | 
            +
                # with the same anonymous e-mail.
         | 
| 21 | 
            +
                def anonymous_email(personal_email)
         | 
| 22 | 
            +
                  @anonymised_emails ||= {}
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                  unless @anonymised_emails.key? personal_email
         | 
| 25 | 
            +
                    next_count = @anonymised_emails.count + 1
         | 
| 26 | 
            +
                    @anonymised_emails[personal_email] = "anon#{next_count}@anon.com"
         | 
| 27 | 
            +
                  end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  @anonymised_emails[personal_email]
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                # Initializes progress tracking.
         | 
| 33 | 
            +
                def start_progress
         | 
| 34 | 
            +
                  @progress = 0
         | 
| 35 | 
            +
                  @started = Time.now
         | 
| 36 | 
            +
                  update_progress
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                # Adds 1 to the progress count.
         | 
| 40 | 
            +
                def increment_progress
         | 
| 41 | 
            +
                  @progress += 1
         | 
| 42 | 
            +
                  update_progress
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                # End progress tracking and output the results.
         | 
| 46 | 
            +
                def complete_progress
         | 
| 47 | 
            +
                  stopped = Time.now
         | 
| 48 | 
            +
                  duration = TimeDifference.between(@started, stopped).in_seconds
         | 
| 49 | 
            +
                  if duration == 0
         | 
| 50 | 
            +
                    average = @progress
         | 
| 51 | 
            +
                  else
         | 
| 52 | 
            +
                    average = (@progress.to_f / duration.to_f).round
         | 
| 53 | 
            +
                  end
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                  $stderr.puts "Read #{@progress} lines in #{duration} seconds (#{average} lines/s)"
         | 
| 56 | 
            +
                  $stderr.puts "#{@anonymised_emails.count} unique e-mails replaced"
         | 
| 57 | 
            +
                end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                private
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                def update_progress
         | 
| 62 | 
            +
                  output_progress if @progress % 100 == 0
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                def output_progress
         | 
| 66 | 
            +
                  $stderr.print "Working... #{@progress}\r"
         | 
| 67 | 
            +
                  $stderr.flush
         | 
| 68 | 
            +
                end
         | 
| 69 | 
            +
              end
         | 
| 70 | 
            +
            end
         | 
    
        data/lib/anon/cli.rb
    ADDED
    
    | @@ -0,0 +1,57 @@ | |
| 1 | 
            +
            require 'thor'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Anon
         | 
| 4 | 
            +
              # Command Line Interface for Anon
         | 
| 5 | 
            +
              class CLI < Thor
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                desc 'csv [OPTIONS]', 'Anonymise a csv file'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                option :infile,
         | 
| 10 | 
            +
                       aliases: [:i],
         | 
| 11 | 
            +
                       desc: 'input filename to read from, reads from STDIN if ommited'
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                option :outfile,
         | 
| 14 | 
            +
                       aliases: [:o],
         | 
| 15 | 
            +
                       desc: 'output filename write to, writes to STDOUT if ommited'
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                option :columns,
         | 
| 18 | 
            +
                       aliases: [:c],
         | 
| 19 | 
            +
                       desc: 'columns to anonymise, by index or name
         | 
| 20 | 
            +
            e.g. 0,1,5 or email-address,other_email, guesses based on header if ommited'
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                option :header,
         | 
| 23 | 
            +
                       type: :boolean,
         | 
| 24 | 
            +
                       default: true,
         | 
| 25 | 
            +
                       desc: 'if the csv file to be processed has a header row'
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                def csv
         | 
| 28 | 
            +
                  require 'anon/csv'
         | 
| 29 | 
            +
                  Anon::CSV.anonymise!(input, output, options[:columns], options[:header])
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                desc 'text [OPTIONS]', 'Anonymise a text file'
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                option :infile,
         | 
| 35 | 
            +
                       aliases: [:i],
         | 
| 36 | 
            +
                       desc: 'input filename to read from, reads from STDIN if ommited'
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                option :outfile,
         | 
| 39 | 
            +
                       aliases: [:o],
         | 
| 40 | 
            +
                       desc: 'output filename write to, writes to STDOUT if ommited'
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                def text
         | 
| 43 | 
            +
                  require 'anon/text'
         | 
| 44 | 
            +
                  Anon::Text.anonymise!(input, output)
         | 
| 45 | 
            +
                end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                private
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                def input
         | 
| 50 | 
            +
                  options[:infile] ? File.open(options[:infile]) : $stdin
         | 
| 51 | 
            +
                end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                def output
         | 
| 54 | 
            +
                  options[:outfile] ? File.open(options[:outfile], 'w') : $stdout
         | 
| 55 | 
            +
                end
         | 
| 56 | 
            +
              end
         | 
| 57 | 
            +
            end
         | 
| @@ -0,0 +1,33 @@ | |
| 1 | 
            +
            require 'anon/base'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Anon
         | 
| 4 | 
            +
              class CSV < Base
         | 
| 5 | 
            +
                class Columns
         | 
| 6 | 
            +
                  def initialize(columns, headers)
         | 
| 7 | 
            +
                    @columns, @headers = columns, headers
         | 
| 8 | 
            +
                  end
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                  def to_anonymise
         | 
| 11 | 
            +
                    @_to_anonymise ||= indexes || columns || best_guess
         | 
| 12 | 
            +
                  end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                  private
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                  attr_reader :headers
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                  def indexes
         | 
| 19 | 
            +
                    columns.map { |c| Integer(c) } if columns
         | 
| 20 | 
            +
                  rescue ArgumentError
         | 
| 21 | 
            +
                    nil
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                  def columns
         | 
| 25 | 
            +
                    @columns.split(',') if @columns
         | 
| 26 | 
            +
                  end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                  def best_guess
         | 
| 29 | 
            +
                    headers.select { |h| h.match(/e.*mail/i) }
         | 
| 30 | 
            +
                  end
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
            end
         | 
    
        data/lib/anon/csv.rb
    ADDED
    
    | @@ -0,0 +1,63 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'anon/base'
         | 
| 4 | 
            +
            require 'anon/csv/columns'
         | 
| 5 | 
            +
            require 'csv'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            module Anon
         | 
| 8 | 
            +
              # Replaces the contents of a set of columns with anonymous e-mails.
         | 
| 9 | 
            +
              class CSV < Base
         | 
| 10 | 
            +
                def initialize(input, output, columns_to_anonymise, has_header = true)
         | 
| 11 | 
            +
                  @input = input
         | 
| 12 | 
            +
                  @output = output
         | 
| 13 | 
            +
                  @columns_to_anonymise = columns_to_anonymise
         | 
| 14 | 
            +
                  @has_header = has_header
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                # Anonymises all content of the columns set in the initializer
         | 
| 18 | 
            +
                def anonymise!
         | 
| 19 | 
            +
                  start_progress
         | 
| 20 | 
            +
                  map_lines do |line|
         | 
| 21 | 
            +
                    anonymise(line)
         | 
| 22 | 
            +
                    increment_progress
         | 
| 23 | 
            +
                    line
         | 
| 24 | 
            +
                  end
         | 
| 25 | 
            +
                  complete_progress
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                private
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                attr_reader :has_header
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                def anonymise(line)
         | 
| 33 | 
            +
                  columns.to_anonymise.each do |anon_index|
         | 
| 34 | 
            +
                    line[anon_index] = anonymous_email(line[anon_index])
         | 
| 35 | 
            +
                  end
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                def input
         | 
| 39 | 
            +
                  @_input ||= ::CSV.new(@input, headers: has_header)
         | 
| 40 | 
            +
                end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                def output
         | 
| 43 | 
            +
                  @_output ||= ::CSV.new(@output, write_headers: has_header, headers: headers)
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
             | 
| 47 | 
            +
                def columns
         | 
| 48 | 
            +
                  @_columns ||= Columns.new(@columns_to_anonymise, input.headers)
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                # Reads each line from the incoming file, processes it using the block
         | 
| 52 | 
            +
                # and saves the return value of the block to the outgoing file.
         | 
| 53 | 
            +
                def map_lines
         | 
| 54 | 
            +
                  while (inline = input.gets)
         | 
| 55 | 
            +
                    output.puts yield(inline)
         | 
| 56 | 
            +
                  end
         | 
| 57 | 
            +
                end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                def headers
         | 
| 60 | 
            +
                  input.headers
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
              end
         | 
| 63 | 
            +
            end
         | 
    
        data/lib/anon/text.rb
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'anon/base'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Anon
         | 
| 6 | 
            +
              # Anonymises any detected e-mail address in a text file.
         | 
| 7 | 
            +
              class Text < Base
         | 
| 8 | 
            +
                # From the email regex research: http://fightingforalostcause.net/misc/2006/compare-email-regex.php
         | 
| 9 | 
            +
                # Authors: James Watts and Francisco Jose Martin Moreno
         | 
| 10 | 
            +
                EMAIL_REGEX = /([\w\!\#\z\%\&\'\*\+\-\/\=\?\\A\`{\|\}\~]+\.)*[\w\+-]+@((((([a-z0-9]{1}[a-z0-9\-]{0,62}[a-z0-9]{1})|[a-z])\.)+[a-z]{2,6})|(\d{1,3}\.){3}\d{1,3}(\:\d{1,5})?)/i # rubocop:disable Metrics/LineLength
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                def initialize(incoming_filename, outgoing_filename)
         | 
| 13 | 
            +
                  @input = incoming_filename
         | 
| 14 | 
            +
                  @output = outgoing_filename
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                # Anonymises any e-mail addresses found in the text
         | 
| 18 | 
            +
                def anonymise!
         | 
| 19 | 
            +
                  start_progress
         | 
| 20 | 
            +
                  map_lines do |line|
         | 
| 21 | 
            +
                    line = anonymise_line(line)
         | 
| 22 | 
            +
                    increment_progress
         | 
| 23 | 
            +
                    line
         | 
| 24 | 
            +
                  end
         | 
| 25 | 
            +
                  complete_progress
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                private
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                attr_reader :input, :output
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                # Reads each line from the incoming file, processes it using the block
         | 
| 33 | 
            +
                # and saves the return value of the block to the outgoing file.
         | 
| 34 | 
            +
                def map_lines
         | 
| 35 | 
            +
                  while (inline = input.gets)
         | 
| 36 | 
            +
                    output.puts yield(inline)
         | 
| 37 | 
            +
                  end
         | 
| 38 | 
            +
                end
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                def anonymise_line(line)
         | 
| 41 | 
            +
                  line.gsub(EMAIL_REGEX) { |email| anonymous_email(email) }
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
              end
         | 
| 44 | 
            +
            end
         | 
    
        data/lib/anon.rb
    ADDED
    
    
| @@ -0,0 +1,76 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
            require 'fileutils'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe 'The comand line interface' do
         | 
| 5 | 
            +
              let(:expected_output) do
         | 
| 6 | 
            +
                'pgid_sku,contract start date,e---mail,post_code,product_category,transmission_type
         | 
| 7 | 
            +
            F^u4bgebdfds, 15/07/1756,anon1@anon.com, XY1 TH7, jokes > joke books, oily
         | 
| 8 | 
            +
            FNY7dujh, 15/11/1856,anon2@anon.com, CZ1 NJ7, books > dusty, automatic
         | 
| 9 | 
            +
            '
         | 
| 10 | 
            +
              end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              describe 'text' do
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                describe 'reading from a file' do
         | 
| 15 | 
            +
                  let(:output) { `bin/anon text -i spec/fixture/csv_with_headers.csv` }
         | 
| 16 | 
            +
                  specify { expect(output).to eq expected_output }
         | 
| 17 | 
            +
                end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                describe 'reading from stdin' do
         | 
| 20 | 
            +
                  let(:output) { `cat spec/fixture/csv_with_headers.csv | bin/anon text` }
         | 
| 21 | 
            +
                  specify { expect(output).to eq expected_output }
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                describe 'writing to a file' do
         | 
| 25 | 
            +
                  let(:output) do
         | 
| 26 | 
            +
                    FileUtils.rm_rf('spec/fixture/test.out')
         | 
| 27 | 
            +
                    `cat spec/fixture/csv_with_headers.csv | bin/anon text -o spec/fixture/test.out`
         | 
| 28 | 
            +
                    File.read('spec/fixture/test.out')
         | 
| 29 | 
            +
                  end
         | 
| 30 | 
            +
                  specify { expect(output).to eq expected_output }
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
              describe 'csv' do
         | 
| 36 | 
            +
                describe 'reading from a file' do
         | 
| 37 | 
            +
                  let(:output) { `bin/anon csv -i spec/fixture/csv_with_headers.csv -c 2` }
         | 
| 38 | 
            +
                  specify { expect(output).to eq expected_output }
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                describe 'reading from stdin' do
         | 
| 42 | 
            +
                  let(:output) { `cat spec/fixture/csv_with_headers.csv | bin/anon csv -c 2` }
         | 
| 43 | 
            +
                  specify { expect(output).to eq expected_output }
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                describe 'writing to a file' do
         | 
| 47 | 
            +
                  let(:output) do
         | 
| 48 | 
            +
                    FileUtils.rm_rf('spec/fixture/test.out')
         | 
| 49 | 
            +
                    `cat spec/fixture/csv_with_headers.csv | bin/anon csv -c 2 -o spec/fixture/test.out`
         | 
| 50 | 
            +
                    File.read('spec/fixture/test.out')
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
                  specify { expect(output).to eq expected_output }
         | 
| 53 | 
            +
                end
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                describe 'a headerless file' do
         | 
| 56 | 
            +
                  let(:expected_output) do
         | 
| 57 | 
            +
                    'F^u4bgebdfds, 15/07/1756,anon1@anon.com, XY1 TH7, jokes > joke books, oily
         | 
| 58 | 
            +
            FNY7dujh, 15/11/1856,anon2@anon.com, CZ1 NJ7, books > dusty, automatic
         | 
| 59 | 
            +
            '
         | 
| 60 | 
            +
                  end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                  let(:output) { `bin/anon csv -c 2 -i spec/fixture/csv_without_headers.csv --no-header` }
         | 
| 63 | 
            +
                  specify { expect(output).to eq expected_output }
         | 
| 64 | 
            +
                end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                describe 'named headers' do
         | 
| 67 | 
            +
                  let(:output) { `bin/anon csv -i spec/fixture/csv_with_headers.csv -c e---mail` }
         | 
| 68 | 
            +
                  specify { expect(output).to eq expected_output }
         | 
| 69 | 
            +
                end
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                describe 'automatic header detection' do
         | 
| 72 | 
            +
                  let(:output) { `bin/anon csv -i spec/fixture/csv_with_headers.csv` }
         | 
| 73 | 
            +
                  specify { expect(output).to eq expected_output }
         | 
| 74 | 
            +
                end
         | 
| 75 | 
            +
              end
         | 
| 76 | 
            +
            end
         | 
    
        data/spec/spec_helper.rb
    ADDED
    
    
| @@ -0,0 +1,46 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'anon/cli'
         | 
| 4 | 
            +
            require 'anon/csv'
         | 
| 5 | 
            +
            require 'anon/text'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            describe Anon::CLI do
         | 
| 8 | 
            +
              describe '#text' do
         | 
| 9 | 
            +
                context 'without options' do
         | 
| 10 | 
            +
                  it 'calls the text anonomiser with standard in and out' do
         | 
| 11 | 
            +
                    expect(Anon::Text).to receive(:anonymise!).with($stdin, $stdout)
         | 
| 12 | 
            +
                    subject.text
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                context 'specifying an input file' do
         | 
| 17 | 
            +
                  it 'calls the text anonomiser with the file' do
         | 
| 18 | 
            +
                    expect(Anon::Text).to receive(:anonymise!) do |input, _output|
         | 
| 19 | 
            +
                      expect(input).to be_a File
         | 
| 20 | 
            +
                      expect(input.path).to eq 'spec/fixture/test.in'
         | 
| 21 | 
            +
                    end
         | 
| 22 | 
            +
                    subject.options = { infile: 'spec/fixture/test.in' }
         | 
| 23 | 
            +
                    subject.text
         | 
| 24 | 
            +
                  end
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                context 'specifying an output file' do
         | 
| 28 | 
            +
                  it 'calls the text anonomiser with the file' do
         | 
| 29 | 
            +
                    expect(Anon::Text).to receive(:anonymise!) do |input, _output|
         | 
| 30 | 
            +
                      expect(input).to be_a File
         | 
| 31 | 
            +
                      expect(input.path).to eq 'spec/fixture/test.in'
         | 
| 32 | 
            +
                    end
         | 
| 33 | 
            +
                    subject.options = { infile: 'spec/fixture/test.in' }
         | 
| 34 | 
            +
                    subject.text
         | 
| 35 | 
            +
                  end
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
              end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
              describe '#csv' do
         | 
| 40 | 
            +
                it 'calls the csv anonomiser with the correct columns' do
         | 
| 41 | 
            +
                  expect(Anon::CSV).to receive(:anonymise!).with($stdin, $stdout, '1,4,7', nil)
         | 
| 42 | 
            +
                  subject.options = { columns: '1,4,7' }
         | 
| 43 | 
            +
                  subject.csv
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
              end
         | 
| 46 | 
            +
            end
         | 
| @@ -0,0 +1,93 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
            require 'anon/csv/columns'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe Anon::CSV::Columns do
         | 
| 5 | 
            +
              describe '#to_anonymise' do
         | 
| 6 | 
            +
                context 'index columns' do
         | 
| 7 | 
            +
                  subject { described_class.new('1,2,3', double) }
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  it 'converts the indexes to Integers' do
         | 
| 10 | 
            +
                    expect(subject.to_anonymise).to eq [1, 2, 3]
         | 
| 11 | 
            +
                  end
         | 
| 12 | 
            +
                end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                context 'named columns' do
         | 
| 15 | 
            +
                  subject { described_class.new('email_address,name,foo', double) }
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  it 'returns the named columns' do
         | 
| 18 | 
            +
                    expect(subject.to_anonymise).to eq %w(email_address name foo)
         | 
| 19 | 
            +
                  end
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                context 'automatic column detection' do
         | 
| 23 | 
            +
                  let(:email_headers) do
         | 
| 24 | 
            +
                    [
         | 
| 25 | 
            +
                      'email',
         | 
| 26 | 
            +
                      'zemailaddress',
         | 
| 27 | 
            +
                      'contact email address',
         | 
| 28 | 
            +
                      'emailaddress',
         | 
| 29 | 
            +
                      'e-mail',
         | 
| 30 | 
            +
                      'personal email',
         | 
| 31 | 
            +
                      'email address',
         | 
| 32 | 
            +
                      'ct_email_addr',
         | 
| 33 | 
            +
                      'e_mail_address',
         | 
| 34 | 
            +
                      'student_e_mail',
         | 
| 35 | 
            +
                      'e-mail address',
         | 
| 36 | 
            +
                      'parent email address',
         | 
| 37 | 
            +
                      'customer email address',
         | 
| 38 | 
            +
                      'email_address',
         | 
| 39 | 
            +
                      'contact address.email',
         | 
| 40 | 
            +
                      'user email string',
         | 
| 41 | 
            +
                      'email client',
         | 
| 42 | 
            +
                      'client email',
         | 
| 43 | 
            +
                      'ot ship-to email address',
         | 
| 44 | 
            +
                      'customer email',
         | 
| 45 | 
            +
                      'clnp email address1',
         | 
| 46 | 
            +
                      'default_email',
         | 
| 47 | 
            +
                      'email_addr',
         | 
| 48 | 
            +
                      'sender_email',
         | 
| 49 | 
            +
                      'webemail',
         | 
| 50 | 
            +
                    ]
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                  let(:non_email_headers) do
         | 
| 54 | 
            +
                    [
         | 
| 55 | 
            +
                      'pgid_sku',
         | 
| 56 | 
            +
                      'contract start date',
         | 
| 57 | 
            +
                      'post_code',
         | 
| 58 | 
            +
                      'product_category',
         | 
| 59 | 
            +
                      'transmission_type',
         | 
| 60 | 
            +
                      'policy_start_date',
         | 
| 61 | 
            +
                      'category-genericname',
         | 
| 62 | 
            +
                      'order id',
         | 
| 63 | 
            +
                      'unique_model',
         | 
| 64 | 
            +
                      'start',
         | 
| 65 | 
            +
                      'image',
         | 
| 66 | 
            +
                      'orderdate',
         | 
| 67 | 
            +
                      'model',
         | 
| 68 | 
            +
                      'homebound date',
         | 
| 69 | 
            +
                      'pid-cid',
         | 
| 70 | 
            +
                      'ct_name_last',
         | 
| 71 | 
            +
                      'booking ref.',
         | 
| 72 | 
            +
                      'a5',
         | 
| 73 | 
            +
                      'id',
         | 
| 74 | 
            +
                      'bazaarvoice id',
         | 
| 75 | 
            +
                      'questionnaire_name',
         | 
| 76 | 
            +
                      'deeplink_id',
         | 
| 77 | 
            +
                      'hotel_name',
         | 
| 78 | 
            +
                      'series_identifier',
         | 
| 79 | 
            +
                      'registration',
         | 
| 80 | 
            +
                      'zmailname',
         | 
| 81 | 
            +
                    ]
         | 
| 82 | 
            +
                  end
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                  let(:headers) { email_headers + non_email_headers }
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                  subject { described_class.new(nil, headers.shuffle) }
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                  it 'returns the email related headers' do
         | 
| 89 | 
            +
                    expect(subject.to_anonymise.sort).to eq email_headers.sort
         | 
| 90 | 
            +
                  end
         | 
| 91 | 
            +
                end
         | 
| 92 | 
            +
              end
         | 
| 93 | 
            +
            end
         | 
| @@ -0,0 +1,76 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
            require 'anon/csv'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe Anon::CSV do
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              describe '.anonymise!' do
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                before do
         | 
| 9 | 
            +
                  described_class.anonymise!(input_stream, output_stream, columns, headers)
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                let(:output_stream) { StringIO.new }
         | 
| 13 | 
            +
                let(:columns) { '0' }
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                context 'with headers' do
         | 
| 16 | 
            +
                  let(:headers) { true }
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                  let(:input_stream) do
         | 
| 19 | 
            +
                    StringIO.new "email,foo,bar
         | 
| 20 | 
            +
                    foo@bar.com,34545,bannas
         | 
| 21 | 
            +
                    foopface@fooofoo.co.uk,124353,apples"
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                  it 'anonymises the correct column' do
         | 
| 25 | 
            +
                    output_stream.rewind
         | 
| 26 | 
            +
                    expect(output_stream.gets).to eq "email,foo,bar\n"
         | 
| 27 | 
            +
                    expect(output_stream.gets).to eq "anon1@anon.com,34545,bannas\n"
         | 
| 28 | 
            +
                    expect(output_stream.gets).to eq "anon2@anon.com,124353,apples\n"
         | 
| 29 | 
            +
                    expect(output_stream.gets).to eq nil
         | 
| 30 | 
            +
                  end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                  context 'with named columns' do
         | 
| 33 | 
            +
                    let(:columns) { 'email,bar' }
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                    it 'anonymises the correct columns' do
         | 
| 36 | 
            +
                      output_stream.rewind
         | 
| 37 | 
            +
                      expect(output_stream.gets).to eq "email,foo,bar\n"
         | 
| 38 | 
            +
                      expect(output_stream.gets).to eq "anon1@anon.com,34545,anon2@anon.com\n"
         | 
| 39 | 
            +
                      expect(output_stream.gets).to eq "anon3@anon.com,124353,anon4@anon.com\n"
         | 
| 40 | 
            +
                      expect(output_stream.gets).to eq nil
         | 
| 41 | 
            +
                    end
         | 
| 42 | 
            +
                  end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                  context 'without named columns' do
         | 
| 45 | 
            +
                    let(:columns) { nil }
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                    it 'guesses the columns to anonymise' do
         | 
| 48 | 
            +
                      output_stream.rewind
         | 
| 49 | 
            +
                      expect(output_stream.gets).to eq "email,foo,bar\n"
         | 
| 50 | 
            +
                      expect(output_stream.gets).to eq "anon1@anon.com,34545,bannas\n"
         | 
| 51 | 
            +
                      expect(output_stream.gets).to eq "anon2@anon.com,124353,apples\n"
         | 
| 52 | 
            +
                      expect(output_stream.gets).to eq nil
         | 
| 53 | 
            +
                    end
         | 
| 54 | 
            +
                  end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                context 'without headers' do
         | 
| 59 | 
            +
                  let(:headers) { false }
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                  let(:input_stream) do
         | 
| 62 | 
            +
                    StringIO.new "looloo@example.com,2456,satsuma
         | 
| 63 | 
            +
                    foo@bar.com,34545,bannas
         | 
| 64 | 
            +
                    foopface@fooofoo.co.uk,124353,apples"
         | 
| 65 | 
            +
                  end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                  it 'anonymises the correct column' do
         | 
| 68 | 
            +
                    output_stream.rewind
         | 
| 69 | 
            +
                    expect(output_stream.gets).to eq "anon1@anon.com,2456,satsuma\n"
         | 
| 70 | 
            +
                    expect(output_stream.gets).to eq "anon2@anon.com,34545,bannas\n"
         | 
| 71 | 
            +
                    expect(output_stream.gets).to eq "anon3@anon.com,124353,apples\n"
         | 
| 72 | 
            +
                    expect(output_stream.gets).to eq nil
         | 
| 73 | 
            +
                  end
         | 
| 74 | 
            +
                end
         | 
| 75 | 
            +
              end
         | 
| 76 | 
            +
            end
         | 
| @@ -0,0 +1,24 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
            require 'anon/text'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe Anon::Text do
         | 
| 5 | 
            +
              let(:output_stream) { StringIO.new }
         | 
| 6 | 
            +
              subject { described_class.new(input_stream, output_stream) }
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              describe '#anonymise!' do
         | 
| 9 | 
            +
                let(:input_stream) do
         | 
| 10 | 
            +
                  StringIO.new ' someone@foo.com then some other interesting text another@icecream.museum'
         | 
| 11 | 
            +
                end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                it 'anonymises any email addresss' do
         | 
| 14 | 
            +
                  subject.anonymise!
         | 
| 15 | 
            +
                  expect(output_stream.string).to_not include 'someone@foo.com'
         | 
| 16 | 
            +
                  expect(output_stream.string).to_not include 'another@icecream.museum'
         | 
| 17 | 
            +
                end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                it 'leaves the other text alone' do
         | 
| 20 | 
            +
                  subject.anonymise!
         | 
| 21 | 
            +
                  expect(output_stream.string).to eq " anon1@anon.com then some other interesting text anon2@anon.com\n"
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
              end
         | 
| 24 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,142 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: anon
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: 0.0.1
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors:
         | 
| 7 | 
            +
            - Reevoo Engineering
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: bin
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
            date: 2014-02-03 00:00:00.000000000 Z
         | 
| 12 | 
            +
            dependencies:
         | 
| 13 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            +
              name: time_difference
         | 
| 15 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 | 
            +
                requirements:
         | 
| 17 | 
            +
                - - ">="
         | 
| 18 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            +
                    version: '0'
         | 
| 20 | 
            +
              type: :runtime
         | 
| 21 | 
            +
              prerelease: false
         | 
| 22 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 | 
            +
                requirements:
         | 
| 24 | 
            +
                - - ">="
         | 
| 25 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            +
                    version: '0'
         | 
| 27 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 28 | 
            +
              name: thor
         | 
| 29 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 | 
            +
                requirements:
         | 
| 31 | 
            +
                - - ">="
         | 
| 32 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            +
                    version: '0'
         | 
| 34 | 
            +
              type: :runtime
         | 
| 35 | 
            +
              prerelease: false
         | 
| 36 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 | 
            +
                requirements:
         | 
| 38 | 
            +
                - - ">="
         | 
| 39 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            +
                    version: '0'
         | 
| 41 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            +
              name: rspec
         | 
| 43 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 | 
            +
                requirements:
         | 
| 45 | 
            +
                - - "~>"
         | 
| 46 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            +
                    version: '3'
         | 
| 48 | 
            +
              type: :development
         | 
| 49 | 
            +
              prerelease: false
         | 
| 50 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 | 
            +
                requirements:
         | 
| 52 | 
            +
                - - "~>"
         | 
| 53 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            +
                    version: '3'
         | 
| 55 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 56 | 
            +
              name: simplecov
         | 
| 57 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 58 | 
            +
                requirements:
         | 
| 59 | 
            +
                - - ">="
         | 
| 60 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 61 | 
            +
                    version: '0'
         | 
| 62 | 
            +
              type: :development
         | 
| 63 | 
            +
              prerelease: false
         | 
| 64 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 65 | 
            +
                requirements:
         | 
| 66 | 
            +
                - - ">="
         | 
| 67 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 68 | 
            +
                    version: '0'
         | 
| 69 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 70 | 
            +
              name: reevoocop
         | 
| 71 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 72 | 
            +
                requirements:
         | 
| 73 | 
            +
                - - ">="
         | 
| 74 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 75 | 
            +
                    version: '0'
         | 
| 76 | 
            +
              type: :development
         | 
| 77 | 
            +
              prerelease: false
         | 
| 78 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 79 | 
            +
                requirements:
         | 
| 80 | 
            +
                - - ">="
         | 
| 81 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 82 | 
            +
                    version: '0'
         | 
| 83 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 84 | 
            +
              name: rake
         | 
| 85 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 86 | 
            +
                requirements:
         | 
| 87 | 
            +
                - - ">="
         | 
| 88 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 89 | 
            +
                    version: '0'
         | 
| 90 | 
            +
              type: :development
         | 
| 91 | 
            +
              prerelease: false
         | 
| 92 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 93 | 
            +
                requirements:
         | 
| 94 | 
            +
                - - ">="
         | 
| 95 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 96 | 
            +
                    version: '0'
         | 
| 97 | 
            +
            description: Replaces personal data with fake data
         | 
| 98 | 
            +
            email: developers@reevoo.com
         | 
| 99 | 
            +
            executables:
         | 
| 100 | 
            +
            - anon
         | 
| 101 | 
            +
            extensions: []
         | 
| 102 | 
            +
            extra_rdoc_files: []
         | 
| 103 | 
            +
            files:
         | 
| 104 | 
            +
            - bin/anon
         | 
| 105 | 
            +
            - lib/anon.rb
         | 
| 106 | 
            +
            - lib/anon/base.rb
         | 
| 107 | 
            +
            - lib/anon/cli.rb
         | 
| 108 | 
            +
            - lib/anon/csv.rb
         | 
| 109 | 
            +
            - lib/anon/csv/columns.rb
         | 
| 110 | 
            +
            - lib/anon/text.rb
         | 
| 111 | 
            +
            - spec/intergration/cli_spec.rb
         | 
| 112 | 
            +
            - spec/spec_helper.rb
         | 
| 113 | 
            +
            - spec/unit/cli_spec.rb
         | 
| 114 | 
            +
            - spec/unit/columns_spec.rb
         | 
| 115 | 
            +
            - spec/unit/csv_spec.rb
         | 
| 116 | 
            +
            - spec/unit/text_spec.rb
         | 
| 117 | 
            +
            homepage: https://github.com/reevoo/anon
         | 
| 118 | 
            +
            licenses:
         | 
| 119 | 
            +
            - MIT
         | 
| 120 | 
            +
            metadata: {}
         | 
| 121 | 
            +
            post_install_message: 
         | 
| 122 | 
            +
            rdoc_options: []
         | 
| 123 | 
            +
            require_paths:
         | 
| 124 | 
            +
            - lib
         | 
| 125 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 126 | 
            +
              requirements:
         | 
| 127 | 
            +
              - - ">="
         | 
| 128 | 
            +
                - !ruby/object:Gem::Version
         | 
| 129 | 
            +
                  version: '0'
         | 
| 130 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 131 | 
            +
              requirements:
         | 
| 132 | 
            +
              - - ">="
         | 
| 133 | 
            +
                - !ruby/object:Gem::Version
         | 
| 134 | 
            +
                  version: '0'
         | 
| 135 | 
            +
            requirements: []
         | 
| 136 | 
            +
            rubyforge_project: 
         | 
| 137 | 
            +
            rubygems_version: 2.2.2
         | 
| 138 | 
            +
            signing_key: 
         | 
| 139 | 
            +
            specification_version: 4
         | 
| 140 | 
            +
            summary: Replaces personal data with fake data
         | 
| 141 | 
            +
            test_files: []
         | 
| 142 | 
            +
            has_rdoc: 
         |