brainmap-anonymizer 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest +13 -0
- data/README.rdoc +0 -0
- data/Rakefile +12 -0
- data/anonymizer.gemspec +33 -0
- data/bin/anonymize_data.rb +51 -0
- data/lib/anonymizer.rb +118 -0
- metadata +66 -0
data/Manifest
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
anonymizer.gemspec
|
2
|
+
bin/anonymize_data.rb
|
3
|
+
lib/anonymizer.rb
|
4
|
+
Manifest
|
5
|
+
Rakefile
|
6
|
+
README.rdoc
|
7
|
+
test/out_2/0/ddd.txt
|
8
|
+
test/out_2/1.txt
|
9
|
+
test/out_2/2/bbb.txt
|
10
|
+
test/out_2/3/ccc.txt
|
11
|
+
test/out_2/4/eee.txt
|
12
|
+
test/out_2/5/aaa.txt
|
13
|
+
test/out_2/keymap.csv
|
data/README.rdoc
ADDED
File without changes
|
data/Rakefile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new('anonymizer', '0.1.0') do |p|
|
6
|
+
p.description = "Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like."
|
7
|
+
p.url = "http://github.com/kjkosmatka/anonymizer"
|
8
|
+
p.author = "Kristopher J. Kosmatka"
|
9
|
+
p.email = "kk4@medicine.wisc.edu"
|
10
|
+
p.ignore_pattern = ["test/out*"]
|
11
|
+
p.development_dependencies = []
|
12
|
+
end
|
data/anonymizer.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{anonymizer}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Kristopher J. Kosmatka"]
|
9
|
+
s.date = %q{2009-08-13}
|
10
|
+
s.default_executable = %q{anonymize_data.rb}
|
11
|
+
s.description = %q{Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.}
|
12
|
+
s.email = %q{kk4@medicine.wisc.edu}
|
13
|
+
s.executables = ["anonymize_data.rb"]
|
14
|
+
s.extra_rdoc_files = ["bin/anonymize_data.rb", "lib/anonymizer.rb", "README.rdoc"]
|
15
|
+
s.files = ["anonymizer.gemspec", "bin/anonymize_data.rb", "lib/anonymizer.rb", "Manifest", "Rakefile", "README.rdoc"]
|
16
|
+
s.has_rdoc = true
|
17
|
+
s.homepage = %q{http://github.com/kjkosmatka/anonymizer}
|
18
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Anonymizer", "--main", "README.rdoc"]
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
s.rubyforge_project = %q{anonymizer}
|
21
|
+
s.rubygems_version = %q{1.3.1}
|
22
|
+
s.summary = %q{Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.}
|
23
|
+
|
24
|
+
if s.respond_to? :specification_version then
|
25
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
26
|
+
s.specification_version = 2
|
27
|
+
|
28
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
29
|
+
else
|
30
|
+
end
|
31
|
+
else
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
3
|
+
|
4
|
+
require 'optparse'
|
5
|
+
require 'anonymizer'
|
6
|
+
|
7
|
+
options = Hash.new
|
8
|
+
options[:collection] = Array.new
|
9
|
+
options[:options_for_record_to] = Hash.new
|
10
|
+
|
11
|
+
OptionParser.new do |opts|
|
12
|
+
opts.banner = "Usage: anonymize_data.rb [options]"
|
13
|
+
|
14
|
+
opts.on("-d", "--destination <directory>", "Destination directory in which anonymized data should be save") do |d|
|
15
|
+
options[:destination] = d
|
16
|
+
end
|
17
|
+
opts.on("-c", '--collection "<list>"', "A collection of files and/or directories to anonymize, seperate with spaces. Quotes are necessary.") do |c|
|
18
|
+
options[:collection] += c.split
|
19
|
+
end
|
20
|
+
opts.on("-g", '--glob "<pattern>"', "Use a shell globbing pattern to select your collection. Quotes are neccessary.") do |g|
|
21
|
+
options[:collection] += Dir.glob(g)
|
22
|
+
end
|
23
|
+
opts.on("--include-csv","Write a csv formatted keymap file in addition to the yaml file.") do |i|
|
24
|
+
options[:options_for_record_to][:include_csv] = true
|
25
|
+
end
|
26
|
+
opts.on("-k","--keymap-name <basename>","Use an alternative file basename (i.e. without extension) for the keymap file.") do |k|
|
27
|
+
options[:options_for_record_to][:keymap_name] = k
|
28
|
+
end
|
29
|
+
end.parse!
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
raise(IndexError, "No items specified to anonymize. Check your glob pattern or explicitly list your items using -c.") if options[:collection].empty?
|
34
|
+
|
35
|
+
puts "Anonymizing:"
|
36
|
+
options[:collection].each do |i|
|
37
|
+
puts "\t#{i}"
|
38
|
+
end
|
39
|
+
puts "To:"
|
40
|
+
puts "\t#{options[:destination]}"
|
41
|
+
|
42
|
+
begin
|
43
|
+
a = Anonymizer.new(options[:collection])
|
44
|
+
a.record_to(options[:destination], options[:options_for_record_to])
|
45
|
+
rescue
|
46
|
+
puts "There was an unknown problem with anonymization. Sorry."
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
|
50
|
+
puts "+++ Successfully Anonymized. +++\n"
|
51
|
+
exit 0
|
data/lib/anonymizer.rb
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class Anonymizer
|
5
|
+
|
6
|
+
# an array of strings, the paths to the items to be anonymized
|
7
|
+
attr_accessor :filepaths
|
8
|
+
# a string, the directory to which all of the anoymizations and the key should be copied
|
9
|
+
attr_accessor :destination_dir
|
10
|
+
# a hash, the keys are the original filepaths, the values are the corresponding anonymized names
|
11
|
+
attr_accessor :anonymizations
|
12
|
+
|
13
|
+
|
14
|
+
# Creates a new instance with an array of filepaths (strings)
|
15
|
+
# A new set of anaonymizations will be created automatically at initialization. If you want to reshuffle the
|
16
|
+
# anonymizations you can call anonymize at any time afterward.
|
17
|
+
def initialize(filepaths)
|
18
|
+
@filepaths = filepaths
|
19
|
+
@anonymizations = Hash.new
|
20
|
+
anonymize
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
# Reshuffles the anonymized names
|
25
|
+
def anonymize
|
26
|
+
keys = Array.new(@filepaths)
|
27
|
+
idx = 0
|
28
|
+
ndigits = (Math.log10(@filepaths.length)+1).floor
|
29
|
+
until keys.empty?
|
30
|
+
@anonymizations[keys.pick!] = "%0#{ndigits}d" % idx
|
31
|
+
idx += 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# Copies the collection of files/directories to the destination directory using the current anonymized names. Records
|
37
|
+
# the anonymizations in a keymap.yml file.
|
38
|
+
#
|
39
|
+
# === Options
|
40
|
+
# :inclue_csv => true -- writes the csv format keymap in addition to the yaml
|
41
|
+
#
|
42
|
+
# :keymap_name => "basename" -- uses an alternate file basename for the keymap files (the default is "keymap")
|
43
|
+
#
|
44
|
+
def record_to(destination_dir, options = {})
|
45
|
+
raise(IOError, "Destination directory not found.") unless File.exists?(destination_dir) and File.directory?(destination_dir)
|
46
|
+
@anonymizations.each do |orig, anon|
|
47
|
+
copy_directory_or_file(orig, File.join(destination_dir, anon))
|
48
|
+
end
|
49
|
+
keymap_filename = options[:keymap_name].nil? ? "keymap" : options[:keymap_name]
|
50
|
+
write_yml(destination_dir, keymap_filename + '.yml')
|
51
|
+
write_csv(destination_dir, keymap_filename + '.csv') if options[:include_csv]
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
|
58
|
+
# recursively copies directories, simply copies files preserving file extension.
|
59
|
+
def copy_directory_or_file(orig, dest)
|
60
|
+
raise(IOError, "Original file or directory not found: #{orig}") unless File.exists?(orig)
|
61
|
+
if File.directory?(orig) # recursively copy directories
|
62
|
+
FileUtils.cp_r(orig, dest)
|
63
|
+
else # simple copy of files, but preserve file extensions
|
64
|
+
ext = File.extname(orig)
|
65
|
+
FileUtils.cp(orig, dest + ext)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
# creates yaml text based on current anonymization
|
71
|
+
def to_yaml
|
72
|
+
@anonymizations.to_yaml
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
# creates csv text based on current anonymization
|
77
|
+
def to_csv
|
78
|
+
csv_contents = ""
|
79
|
+
@anonymizations.each do |orig, anon|
|
80
|
+
csv_contents += "#{orig}, #{anon}\n"
|
81
|
+
end
|
82
|
+
return csv_contents
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
# writes the current anonymization yaml to a file
|
87
|
+
def write_yml(destination_dir,keymap_filename)
|
88
|
+
File.open(File.join(destination_dir,keymap_filename), 'w') do |out|
|
89
|
+
YAML.dump(@anonymizations, out)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
# writes the current anonymization csv to a file
|
95
|
+
def write_csv(destination_dir,keymap_filename)
|
96
|
+
File.open(File.join(destination_dir,keymap_filename), 'w') do |out|
|
97
|
+
out.write(to_csv)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
# Augments the ruby array class with a couple convenient random picking methods
|
106
|
+
class Array
|
107
|
+
|
108
|
+
# Picks one value from the array at random, array remains unchanged.
|
109
|
+
def pick
|
110
|
+
at(rand(length))
|
111
|
+
end
|
112
|
+
|
113
|
+
# Removes and returns one element of the array at random, the original array is changed by this method.
|
114
|
+
def pick!
|
115
|
+
delete_at(rand(length))
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: brainmap-anonymizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Kristopher J. Kosmatka
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-08-13 00:00:00 -07:00
|
13
|
+
default_executable: anonymize_data.rb
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.
|
17
|
+
email: kk4@medicine.wisc.edu
|
18
|
+
executables:
|
19
|
+
- anonymize_data.rb
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- bin/anonymize_data.rb
|
24
|
+
- lib/anonymizer.rb
|
25
|
+
- README.rdoc
|
26
|
+
files:
|
27
|
+
- anonymizer.gemspec
|
28
|
+
- bin/anonymize_data.rb
|
29
|
+
- lib/anonymizer.rb
|
30
|
+
- Manifest
|
31
|
+
- Rakefile
|
32
|
+
- README.rdoc
|
33
|
+
has_rdoc: true
|
34
|
+
homepage: http://github.com/kjkosmatka/anonymizer
|
35
|
+
licenses:
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options:
|
38
|
+
- --line-numbers
|
39
|
+
- --inline-source
|
40
|
+
- --title
|
41
|
+
- Anonymizer
|
42
|
+
- --main
|
43
|
+
- README.rdoc
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
version:
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "1.2"
|
57
|
+
version:
|
58
|
+
requirements: []
|
59
|
+
|
60
|
+
rubyforge_project: anonymizer
|
61
|
+
rubygems_version: 1.3.5
|
62
|
+
signing_key:
|
63
|
+
specification_version: 2
|
64
|
+
summary: Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.
|
65
|
+
test_files: []
|
66
|
+
|