anonymize 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
@@ -0,0 +1,8 @@
1
+ v0.0.1 (2013-06-13)
2
+ -------------
3
+
4
+ * First release
5
+ * Simple `Anonymize.define` dsl
6
+ * `Anonymize::GoogleNews.related_news` anonymizer
7
+ * Support for retrying (useful if random data needs to be unique)
8
+ * Optional visual progressbar
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Gems for examples
4
+ gem 'mysql2'
5
+ gem 'net-ssh-gateway'
6
+ gem 'google-search'
7
+ gem 'ruby-progressbar'
8
+ gem 'faker'
9
+
10
+ # Specify your gem's dependencies in anonymize.gemspec
11
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Andrew Fecheyr
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ # Anonymize
2
+
3
+ Ruby gem to anonymize database data.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'anonymize'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install anonymize
18
+
19
+ ## Usage
20
+
21
+ See [the included example](https://github.com/andruby/anonymize/blob/master/examples/ssh_tunnel.rb) for usage information.
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'anonymize/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "anonymize"
8
+ spec.version = Anonymize::VERSION
9
+ spec.authors = ["Andrew Fecheyr"]
10
+ spec.email = ["andrew@bedesign.be"]
11
+ spec.description = %q{Anonymize database data on the fly}
12
+ spec.summary = %q{Anonymize database data}
13
+ spec.homepage = "https://github.com/andruby/anonymize"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ end
@@ -0,0 +1,35 @@
1
+ require_relative '../lib/anonymize.rb'
2
+
3
+ require 'mysql2'
4
+ require 'net/ssh/gateway'
5
+ require 'faker'
6
+
7
+ # Set up an ssh gateway
8
+ gateway = Net::SSH::Gateway.new('ssh_host', 'ssh_username')
9
+ port = gateway.open('127.0.0.1', 3306, 3307)
10
+
11
+ # Connect through the local ssh gateway to the real server
12
+ connection = Mysql2::Client.new(
13
+ host: "127.0.0.1",
14
+ database: 'db_name',
15
+ username: 'db_user',
16
+ password: 'db_password',
17
+ port: port
18
+ )
19
+
20
+ Anonymize.define(connection, verbose: false, pretend: true, progress: true) do
21
+ table :company do
22
+ # If the Proc returns false or nil, the value is not updated.
23
+ column(:summary) { |text| Anonymize::GoogleNews.related_news(text) if text.length > 3 }
24
+ column(:name) { Faker::Company.name }
25
+ end
26
+
27
+ # Using up to 5 retries to prevent uniqueness errors
28
+ table :users, retries: 5 do
29
+ # Use a Proc with 2 arguments to the full row for simple conditionals
30
+ column(:name) { |name, row| Faker::Name.name unless row["super_user"] == 1 }
31
+ column(:email) { Faker::Internet.email }
32
+ end
33
+ end
34
+
35
+ connection.close
@@ -0,0 +1,27 @@
1
+ require "anonymize/version"
2
+
3
+ module Anonymize
4
+ autoload :GoogleNews, 'anonymize/google_news'
5
+ autoload :SQL, 'anonymize/sql'
6
+ class << self
7
+ attr_accessor :definition, :connection, :options
8
+
9
+ def define(connection, options = {}, &block)
10
+ self.options = options
11
+ self.connection = connection
12
+ self.definition = {}
13
+ class_eval &block
14
+ Anonymize::SQL.run!(connection, options, self.definition)
15
+ end
16
+
17
+ def table(table_name, options = {}, &block)
18
+ @table_name = table_name
19
+ self.definition[@table_name] = {columns: {}, options: options}
20
+ class_eval &block
21
+ end
22
+
23
+ def column(column_name, &block)
24
+ self.definition[@table_name][:columns][column_name] = block
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,29 @@
1
+ require 'google-search'
2
+
3
+ module Anonymize::GoogleNews
4
+ class << self
5
+ def related_news(text, char_count = text.length)
6
+ words = longest_words(text)
7
+ new_text = ""
8
+ until new_text.length >= char_count
9
+ Google::Search::News.new(:query => [words.pop, words.pop].join(' ')).each do |result|
10
+ new_text << utf8_to_ascii(strip_html(result.content))
11
+ break if new_text.length >= char_count
12
+ end
13
+ end
14
+ new_text[0..(char_count-1)]
15
+ end
16
+
17
+ def longest_words(text)
18
+ strip_html(text).scan(/\w+/).uniq.sort_by(&:length)
19
+ end
20
+
21
+ def utf8_to_ascii(string)
22
+ string.encode('ascii', 'utf-8', :undef => :replace, :invalid => :replace, :replace => '')
23
+ end
24
+
25
+ def strip_html(string)
26
+ string.gsub(/<[^>]+>/,'')
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,60 @@
1
+ autoload :ProgressBar, "ruby-progressbar"
2
+
3
+ class Anonymize::SQL
4
+ def initialize(connection, options, definition)
5
+ @connection = connection
6
+ @options = options
7
+ @definition = definition
8
+ end
9
+
10
+ def self.run!(*args)
11
+ self.new(*args).run!
12
+ end
13
+
14
+ def run!
15
+ total_count = @definition.count
16
+ counter = 0
17
+ @definition.each do |table, data|
18
+ puts "(#{counter+=1}/#{total_count}) Anonymizing columns #{data[:columns].keys.inspect} from table '#{table}'"
19
+ process_table(table, data)
20
+ end
21
+ end
22
+
23
+ def process_table(table, data)
24
+ columns = data[:columns]
25
+ rows = @connection.query("SELECT * FROM #{table}")
26
+ pbar = ProgressBar.create(:format => '%a %B %c of %C', :total => rows.count) if @options[:progress]
27
+ rows.each do |row|
28
+ tuples = {}
29
+ columns.each do |column, proc|
30
+ replacement = replacement(column, row, proc)
31
+ tuples[column] = replacement if replacement
32
+ end
33
+ update_row(row["id"], table, tuples, data[:options][:retries])
34
+ pbar.increment if @options[:progress]
35
+ end
36
+ end
37
+
38
+ def replacement(column, row, proc)
39
+ original = row[column.to_s]
40
+ case proc.arity
41
+ when 1 then proc.call(original)
42
+ when 2 then proc.call(original, row)
43
+ else proc.call
44
+ end
45
+ end
46
+
47
+ def update_row(id, table, tuples, retries)
48
+ retries ||= 0
49
+ if tuples.count > 0
50
+ update_part = tuples.map { |column, value| "#{column} = \"#{@connection.escape(value)}\"" }
51
+ update_sql = "UPDATE #{table} SET #{update_part.join(', ')} WHERE id = #{id}"
52
+ puts update_sql if @options[:verbose]
53
+ @connection.query(update_sql) unless @options[:pretend]
54
+ end
55
+ rescue StandardError => e
56
+ raise e if retries <= 0
57
+ retries -= 1
58
+ retry
59
+ end
60
+ end
@@ -0,0 +1,3 @@
1
+ module Anonymize
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: anonymize
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Fecheyr
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-06-12 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: Anonymize database data on the fly
47
+ email:
48
+ - andrew@bedesign.be
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - CHANGELOG.md
55
+ - Gemfile
56
+ - LICENSE.txt
57
+ - README.md
58
+ - Rakefile
59
+ - anonymize.gemspec
60
+ - examples/ssh_tunnel.rb
61
+ - lib/anonymize.rb
62
+ - lib/anonymize/google_news.rb
63
+ - lib/anonymize/sql.rb
64
+ - lib/anonymize/version.rb
65
+ homepage: https://github.com/andruby/anonymize
66
+ licenses:
67
+ - MIT
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ segments:
79
+ - 0
80
+ hash: 1489173178318267050
81
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ! '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ segments:
88
+ - 0
89
+ hash: 1489173178318267050
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 1.8.23
93
+ signing_key:
94
+ specification_version: 3
95
+ summary: Anonymize database data
96
+ test_files: []