brainmap-anonymizer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ anonymizer.gemspec
2
+ bin/anonymize_data.rb
3
+ lib/anonymizer.rb
4
+ Manifest
5
+ Rakefile
6
+ README.rdoc
7
+ test/out_2/0/ddd.txt
8
+ test/out_2/1.txt
9
+ test/out_2/2/bbb.txt
10
+ test/out_2/3/ccc.txt
11
+ test/out_2/4/eee.txt
12
+ test/out_2/5/aaa.txt
13
+ test/out_2/keymap.csv
File without changes
@@ -0,0 +1,12 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'echoe'
4
+
5
+ Echoe.new('anonymizer', '0.1.0') do |p|
6
+ p.description = "Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like."
7
+ p.url = "http://github.com/kjkosmatka/anonymizer"
8
+ p.author = "Kristopher J. Kosmatka"
9
+ p.email = "kk4@medicine.wisc.edu"
10
+ p.ignore_pattern = ["test/out*"]
11
+ p.development_dependencies = []
12
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{anonymizer}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Kristopher J. Kosmatka"]
9
+ s.date = %q{2009-08-13}
10
+ s.default_executable = %q{anonymize_data.rb}
11
+ s.description = %q{Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.}
12
+ s.email = %q{kk4@medicine.wisc.edu}
13
+ s.executables = ["anonymize_data.rb"]
14
+ s.extra_rdoc_files = ["bin/anonymize_data.rb", "lib/anonymizer.rb", "README.rdoc"]
15
+ s.files = ["anonymizer.gemspec", "bin/anonymize_data.rb", "lib/anonymizer.rb", "Manifest", "Rakefile", "README.rdoc"]
16
+ s.has_rdoc = true
17
+ s.homepage = %q{http://github.com/kjkosmatka/anonymizer}
18
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Anonymizer", "--main", "README.rdoc"]
19
+ s.require_paths = ["lib"]
20
+ s.rubyforge_project = %q{anonymizer}
21
+ s.rubygems_version = %q{1.3.1}
22
+ s.summary = %q{Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.}
23
+
24
+ if s.respond_to? :specification_version then
25
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
26
+ s.specification_version = 2
27
+
28
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
29
+ else
30
+ end
31
+ else
32
+ end
33
+ end
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
3
+
4
+ require 'optparse'
5
+ require 'anonymizer'
6
+
7
+ options = Hash.new
8
+ options[:collection] = Array.new
9
+ options[:options_for_record_to] = Hash.new
10
+
11
+ OptionParser.new do |opts|
12
+ opts.banner = "Usage: anonymize_data.rb [options]"
13
+
14
+ opts.on("-d", "--destination <directory>", "Destination directory in which anonymized data should be save") do |d|
15
+ options[:destination] = d
16
+ end
17
+ opts.on("-c", '--collection "<list>"', "A collection of files and/or directories to anonymize, seperate with spaces. Quotes are necessary.") do |c|
18
+ options[:collection] += c.split
19
+ end
20
+ opts.on("-g", '--glob "<pattern>"', "Use a shell globbing pattern to select your collection. Quotes are neccessary.") do |g|
21
+ options[:collection] += Dir.glob(g)
22
+ end
23
+ opts.on("--include-csv","Write a csv formatted keymap file in addition to the yaml file.") do |i|
24
+ options[:options_for_record_to][:include_csv] = true
25
+ end
26
+ opts.on("-k","--keymap-name <basename>","Use an alternative file basename (i.e. without extension) for the keymap file.") do |k|
27
+ options[:options_for_record_to][:keymap_name] = k
28
+ end
29
+ end.parse!
30
+
31
+
32
+
33
+ raise(IndexError, "No items specified to anonymize. Check your glob pattern or explicitly list your items using -c.") if options[:collection].empty?
34
+
35
+ puts "Anonymizing:"
36
+ options[:collection].each do |i|
37
+ puts "\t#{i}"
38
+ end
39
+ puts "To:"
40
+ puts "\t#{options[:destination]}"
41
+
42
+ begin
43
+ a = Anonymizer.new(options[:collection])
44
+ a.record_to(options[:destination], options[:options_for_record_to])
45
+ rescue
46
+ puts "There was an unknown problem with anonymization. Sorry."
47
+ exit 1
48
+ end
49
+
50
+ puts "+++ Successfully Anonymized. +++\n"
51
+ exit 0
@@ -0,0 +1,118 @@
1
+ require 'fileutils'
2
+ require 'yaml'
3
+
4
+ class Anonymizer
5
+
6
+ # an array of strings, the paths to the items to be anonymized
7
+ attr_accessor :filepaths
8
+ # a string, the directory to which all of the anoymizations and the key should be copied
9
+ attr_accessor :destination_dir
10
+ # a hash, the keys are the original filepaths, the values are the corresponding anonymized names
11
+ attr_accessor :anonymizations
12
+
13
+
14
+ # Creates a new instance with an array of filepaths (strings)
15
+ # A new set of anaonymizations will be created automatically at initialization. If you want to reshuffle the
16
+ # anonymizations you can call anonymize at any time afterward.
17
+ def initialize(filepaths)
18
+ @filepaths = filepaths
19
+ @anonymizations = Hash.new
20
+ anonymize
21
+ end
22
+
23
+
24
+ # Reshuffles the anonymized names
25
+ def anonymize
26
+ keys = Array.new(@filepaths)
27
+ idx = 0
28
+ ndigits = (Math.log10(@filepaths.length)+1).floor
29
+ until keys.empty?
30
+ @anonymizations[keys.pick!] = "%0#{ndigits}d" % idx
31
+ idx += 1
32
+ end
33
+ end
34
+
35
+
36
+ # Copies the collection of files/directories to the destination directory using the current anonymized names. Records
37
+ # the anonymizations in a keymap.yml file.
38
+ #
39
+ # === Options
40
+ # :inclue_csv => true -- writes the csv format keymap in addition to the yaml
41
+ #
42
+ # :keymap_name => "basename" -- uses an alternate file basename for the keymap files (the default is "keymap")
43
+ #
44
+ def record_to(destination_dir, options = {})
45
+ raise(IOError, "Destination directory not found.") unless File.exists?(destination_dir) and File.directory?(destination_dir)
46
+ @anonymizations.each do |orig, anon|
47
+ copy_directory_or_file(orig, File.join(destination_dir, anon))
48
+ end
49
+ keymap_filename = options[:keymap_name].nil? ? "keymap" : options[:keymap_name]
50
+ write_yml(destination_dir, keymap_filename + '.yml')
51
+ write_csv(destination_dir, keymap_filename + '.csv') if options[:include_csv]
52
+ end
53
+
54
+
55
+ private
56
+
57
+
58
+ # recursively copies directories, simply copies files preserving file extension.
59
+ def copy_directory_or_file(orig, dest)
60
+ raise(IOError, "Original file or directory not found: #{orig}") unless File.exists?(orig)
61
+ if File.directory?(orig) # recursively copy directories
62
+ FileUtils.cp_r(orig, dest)
63
+ else # simple copy of files, but preserve file extensions
64
+ ext = File.extname(orig)
65
+ FileUtils.cp(orig, dest + ext)
66
+ end
67
+ end
68
+
69
+
70
+ # creates yaml text based on current anonymization
71
+ def to_yaml
72
+ @anonymizations.to_yaml
73
+ end
74
+
75
+
76
+ # creates csv text based on current anonymization
77
+ def to_csv
78
+ csv_contents = ""
79
+ @anonymizations.each do |orig, anon|
80
+ csv_contents += "#{orig}, #{anon}\n"
81
+ end
82
+ return csv_contents
83
+ end
84
+
85
+
86
+ # writes the current anonymization yaml to a file
87
+ def write_yml(destination_dir,keymap_filename)
88
+ File.open(File.join(destination_dir,keymap_filename), 'w') do |out|
89
+ YAML.dump(@anonymizations, out)
90
+ end
91
+ end
92
+
93
+
94
+ # writes the current anonymization csv to a file
95
+ def write_csv(destination_dir,keymap_filename)
96
+ File.open(File.join(destination_dir,keymap_filename), 'w') do |out|
97
+ out.write(to_csv)
98
+ end
99
+ end
100
+
101
+ end
102
+
103
+
104
+
105
+ # Augments the ruby array class with a couple convenient random picking methods
106
+ class Array
107
+
108
+ # Picks one value from the array at random, array remains unchanged.
109
+ def pick
110
+ at(rand(length))
111
+ end
112
+
113
+ # Removes and returns one element of the array at random, the original array is changed by this method.
114
+ def pick!
115
+ delete_at(rand(length))
116
+ end
117
+
118
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: brainmap-anonymizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kristopher J. Kosmatka
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-08-13 00:00:00 -07:00
13
+ default_executable: anonymize_data.rb
14
+ dependencies: []
15
+
16
+ description: Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.
17
+ email: kk4@medicine.wisc.edu
18
+ executables:
19
+ - anonymize_data.rb
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - bin/anonymize_data.rb
24
+ - lib/anonymizer.rb
25
+ - README.rdoc
26
+ files:
27
+ - anonymizer.gemspec
28
+ - bin/anonymize_data.rb
29
+ - lib/anonymizer.rb
30
+ - Manifest
31
+ - Rakefile
32
+ - README.rdoc
33
+ has_rdoc: true
34
+ homepage: http://github.com/kjkosmatka/anonymizer
35
+ licenses:
36
+ post_install_message:
37
+ rdoc_options:
38
+ - --line-numbers
39
+ - --inline-source
40
+ - --title
41
+ - Anonymizer
42
+ - --main
43
+ - README.rdoc
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: "0"
51
+ version:
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "1.2"
57
+ version:
58
+ requirements: []
59
+
60
+ rubyforge_project: anonymizer
61
+ rubygems_version: 1.3.5
62
+ signing_key:
63
+ specification_version: 2
64
+ summary: Anonymizes a collection of files and/or directories for interrater reliabiliy analyses and the like.
65
+ test_files: []
66
+