brainmap-anonymizer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest +13 -0
- data/README.rdoc +0 -0
- data/Rakefile +12 -0
- data/anonymizer.gemspec +33 -0
- data/bin/anonymize_data.rb +51 -0
- data/lib/anonymizer.rb +118 -0
- metadata +66 -0
data/Manifest
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
anonymizer.gemspec
|
2
|
+
bin/anonymize_data.rb
|
3
|
+
lib/anonymizer.rb
|
4
|
+
Manifest
|
5
|
+
Rakefile
|
6
|
+
README.rdoc
|
7
|
+
test/out_2/0/ddd.txt
|
8
|
+
test/out_2/1.txt
|
9
|
+
test/out_2/2/bbb.txt
|
10
|
+
test/out_2/3/ccc.txt
|
11
|
+
test/out_2/4/eee.txt
|
12
|
+
test/out_2/5/aaa.txt
|
13
|
+
test/out_2/keymap.csv
|
data/README.rdoc
ADDED
File without changes
|
data/Rakefile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new('anonymizer', '0.1.0') do |p|
|
6
|
+
p.description = "Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like."
|
7
|
+
p.url = "http://github.com/kjkosmatka/anonymizer"
|
8
|
+
p.author = "Kristopher J. Kosmatka"
|
9
|
+
p.email = "kk4@medicine.wisc.edu"
|
10
|
+
p.ignore_pattern = ["test/out*"]
|
11
|
+
p.development_dependencies = []
|
12
|
+
end
|
data/anonymizer.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{anonymizer}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Kristopher J. Kosmatka"]
|
9
|
+
s.date = %q{2009-08-13}
|
10
|
+
s.default_executable = %q{anonymize_data.rb}
|
11
|
+
s.description = %q{Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.}
|
12
|
+
s.email = %q{kk4@medicine.wisc.edu}
|
13
|
+
s.executables = ["anonymize_data.rb"]
|
14
|
+
s.extra_rdoc_files = ["bin/anonymize_data.rb", "lib/anonymizer.rb", "README.rdoc"]
|
15
|
+
s.files = ["anonymizer.gemspec", "bin/anonymize_data.rb", "lib/anonymizer.rb", "Manifest", "Rakefile", "README.rdoc"]
|
16
|
+
s.has_rdoc = true
|
17
|
+
s.homepage = %q{http://github.com/kjkosmatka/anonymizer}
|
18
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Anonymizer", "--main", "README.rdoc"]
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
s.rubyforge_project = %q{anonymizer}
|
21
|
+
s.rubygems_version = %q{1.3.1}
|
22
|
+
s.summary = %q{Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.}
|
23
|
+
|
24
|
+
if s.respond_to? :specification_version then
|
25
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
26
|
+
s.specification_version = 2
|
27
|
+
|
28
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
29
|
+
else
|
30
|
+
end
|
31
|
+
else
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
3
|
+
|
4
|
+
require 'optparse'
|
5
|
+
require 'anonymizer'
|
6
|
+
|
7
|
+
options = Hash.new
|
8
|
+
options[:collection] = Array.new
|
9
|
+
options[:options_for_record_to] = Hash.new
|
10
|
+
|
11
|
+
OptionParser.new do |opts|
|
12
|
+
opts.banner = "Usage: anonymize_data.rb [options]"
|
13
|
+
|
14
|
+
opts.on("-d", "--destination <directory>", "Destination directory in which anonymized data should be save") do |d|
|
15
|
+
options[:destination] = d
|
16
|
+
end
|
17
|
+
opts.on("-c", '--collection "<list>"', "A collection of files and/or directories to anonymize, seperate with spaces. Quotes are necessary.") do |c|
|
18
|
+
options[:collection] += c.split
|
19
|
+
end
|
20
|
+
opts.on("-g", '--glob "<pattern>"', "Use a shell globbing pattern to select your collection. Quotes are neccessary.") do |g|
|
21
|
+
options[:collection] += Dir.glob(g)
|
22
|
+
end
|
23
|
+
opts.on("--include-csv","Write a csv formatted keymap file in addition to the yaml file.") do |i|
|
24
|
+
options[:options_for_record_to][:include_csv] = true
|
25
|
+
end
|
26
|
+
opts.on("-k","--keymap-name <basename>","Use an alternative file basename (i.e. without extension) for the keymap file.") do |k|
|
27
|
+
options[:options_for_record_to][:keymap_name] = k
|
28
|
+
end
|
29
|
+
end.parse!
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
raise(IndexError, "No items specified to anonymize. Check your glob pattern or explicitly list your items using -c.") if options[:collection].empty?
|
34
|
+
|
35
|
+
puts "Anonymizing:"
|
36
|
+
options[:collection].each do |i|
|
37
|
+
puts "\t#{i}"
|
38
|
+
end
|
39
|
+
puts "To:"
|
40
|
+
puts "\t#{options[:destination]}"
|
41
|
+
|
42
|
+
begin
|
43
|
+
a = Anonymizer.new(options[:collection])
|
44
|
+
a.record_to(options[:destination], options[:options_for_record_to])
|
45
|
+
rescue
|
46
|
+
puts "There was an unknown problem with anonymization. Sorry."
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
|
50
|
+
puts "+++ Successfully Anonymized. +++\n"
|
51
|
+
exit 0
|
data/lib/anonymizer.rb
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class Anonymizer
|
5
|
+
|
6
|
+
# an array of strings, the paths to the items to be anonymized
|
7
|
+
attr_accessor :filepaths
|
8
|
+
# a string, the directory to which all of the anoymizations and the key should be copied
|
9
|
+
attr_accessor :destination_dir
|
10
|
+
# a hash, the keys are the original filepaths, the values are the corresponding anonymized names
|
11
|
+
attr_accessor :anonymizations
|
12
|
+
|
13
|
+
|
14
|
+
# Creates a new instance with an array of filepaths (strings)
|
15
|
+
# A new set of anaonymizations will be created automatically at initialization. If you want to reshuffle the
|
16
|
+
# anonymizations you can call anonymize at any time afterward.
|
17
|
+
def initialize(filepaths)
|
18
|
+
@filepaths = filepaths
|
19
|
+
@anonymizations = Hash.new
|
20
|
+
anonymize
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
# Reshuffles the anonymized names
|
25
|
+
def anonymize
|
26
|
+
keys = Array.new(@filepaths)
|
27
|
+
idx = 0
|
28
|
+
ndigits = (Math.log10(@filepaths.length)+1).floor
|
29
|
+
until keys.empty?
|
30
|
+
@anonymizations[keys.pick!] = "%0#{ndigits}d" % idx
|
31
|
+
idx += 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# Copies the collection of files/directories to the destination directory using the current anonymized names. Records
|
37
|
+
# the anonymizations in a keymap.yml file.
|
38
|
+
#
|
39
|
+
# === Options
|
40
|
+
# :inclue_csv => true -- writes the csv format keymap in addition to the yaml
|
41
|
+
#
|
42
|
+
# :keymap_name => "basename" -- uses an alternate file basename for the keymap files (the default is "keymap")
|
43
|
+
#
|
44
|
+
def record_to(destination_dir, options = {})
|
45
|
+
raise(IOError, "Destination directory not found.") unless File.exists?(destination_dir) and File.directory?(destination_dir)
|
46
|
+
@anonymizations.each do |orig, anon|
|
47
|
+
copy_directory_or_file(orig, File.join(destination_dir, anon))
|
48
|
+
end
|
49
|
+
keymap_filename = options[:keymap_name].nil? ? "keymap" : options[:keymap_name]
|
50
|
+
write_yml(destination_dir, keymap_filename + '.yml')
|
51
|
+
write_csv(destination_dir, keymap_filename + '.csv') if options[:include_csv]
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
|
58
|
+
# recursively copies directories, simply copies files preserving file extension.
|
59
|
+
def copy_directory_or_file(orig, dest)
|
60
|
+
raise(IOError, "Original file or directory not found: #{orig}") unless File.exists?(orig)
|
61
|
+
if File.directory?(orig) # recursively copy directories
|
62
|
+
FileUtils.cp_r(orig, dest)
|
63
|
+
else # simple copy of files, but preserve file extensions
|
64
|
+
ext = File.extname(orig)
|
65
|
+
FileUtils.cp(orig, dest + ext)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
# creates yaml text based on current anonymization
|
71
|
+
def to_yaml
|
72
|
+
@anonymizations.to_yaml
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
# creates csv text based on current anonymization
|
77
|
+
def to_csv
|
78
|
+
csv_contents = ""
|
79
|
+
@anonymizations.each do |orig, anon|
|
80
|
+
csv_contents += "#{orig}, #{anon}\n"
|
81
|
+
end
|
82
|
+
return csv_contents
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
# writes the current anonymization yaml to a file
|
87
|
+
def write_yml(destination_dir,keymap_filename)
|
88
|
+
File.open(File.join(destination_dir,keymap_filename), 'w') do |out|
|
89
|
+
YAML.dump(@anonymizations, out)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
# writes the current anonymization csv to a file
|
95
|
+
def write_csv(destination_dir,keymap_filename)
|
96
|
+
File.open(File.join(destination_dir,keymap_filename), 'w') do |out|
|
97
|
+
out.write(to_csv)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
# Augments the ruby array class with a couple convenient random picking methods
|
106
|
+
class Array
|
107
|
+
|
108
|
+
# Picks one value from the array at random, array remains unchanged.
|
109
|
+
def pick
|
110
|
+
at(rand(length))
|
111
|
+
end
|
112
|
+
|
113
|
+
# Removes and returns one element of the array at random, the original array is changed by this method.
|
114
|
+
def pick!
|
115
|
+
delete_at(rand(length))
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: brainmap-anonymizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Kristopher J. Kosmatka
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-08-13 00:00:00 -07:00
|
13
|
+
default_executable: anonymize_data.rb
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.
|
17
|
+
email: kk4@medicine.wisc.edu
|
18
|
+
executables:
|
19
|
+
- anonymize_data.rb
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- bin/anonymize_data.rb
|
24
|
+
- lib/anonymizer.rb
|
25
|
+
- README.rdoc
|
26
|
+
files:
|
27
|
+
- anonymizer.gemspec
|
28
|
+
- bin/anonymize_data.rb
|
29
|
+
- lib/anonymizer.rb
|
30
|
+
- Manifest
|
31
|
+
- Rakefile
|
32
|
+
- README.rdoc
|
33
|
+
has_rdoc: true
|
34
|
+
homepage: http://github.com/kjkosmatka/anonymizer
|
35
|
+
licenses:
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options:
|
38
|
+
- --line-numbers
|
39
|
+
- --inline-source
|
40
|
+
- --title
|
41
|
+
- Anonymizer
|
42
|
+
- --main
|
43
|
+
- README.rdoc
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
version:
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "1.2"
|
57
|
+
version:
|
58
|
+
requirements: []
|
59
|
+
|
60
|
+
rubyforge_project: anonymizer
|
61
|
+
rubygems_version: 1.3.5
|
62
|
+
signing_key:
|
63
|
+
specification_version: 2
|
64
|
+
summary: Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.
|
65
|
+
test_files: []
|
66
|
+
|