tabtools 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tabtools.rb +1 -1
- data/lib/tabtools/utilities/anonymizer.rb +70 -70
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed62a42e91a2a3f2ca4ae053d440346657239b05
|
4
|
+
data.tar.gz: 949d953c1593fb3c3017eec678747b3208122726
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df5816cad2199da41bc5c72439dd25af9b9cc87dc54d3f058c61fc9c06c42819dbe7bf0e9f656a72975c49354c44e1b958962153beedc2b7ec4adc174a64c948
|
7
|
+
data.tar.gz: b0ef4eca6156dbe575a8f1ff3f366e438414b237e8eeccd2f85693e0f5a8387ecd9064e61646bf2b0b7da6460ea5953db79a7c70a5eaab1fce07b33786e34364
|
data/lib/tabtools.rb
CHANGED
@@ -1,86 +1,86 @@
|
|
1
1
|
require 'csv'
|
2
2
|
|
3
|
-
|
3
|
+
module TabTools
|
4
4
|
|
5
|
-
|
6
|
-
attr_accessor :input, :output, :field
|
5
|
+
class Anonymizer
|
7
6
|
|
8
|
-
|
9
|
-
|
7
|
+
attr_reader :anonWords, :low, :high
|
8
|
+
attr_accessor :input, :output, :field
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
10
|
+
@minWords = 2
|
11
|
+
@maxWords = 4
|
12
|
+
|
13
|
+
@@anonymousText = <<ANONTEXT
|
14
|
+
Lorem ipsum dolor sit amet consectetur adipiscing elit Mauris urna erat volutpat a ullamcorper sit amet
|
15
|
+
convallis quis dui Ut sem erat euismod in iaculis id tincidunt aliquam magna Vivamus lobortis eu erat in feugiat
|
16
|
+
Vestibulum fermentum dictum fermentum Class aptent taciti sociosqu ad litora torquent per conubia nostra
|
17
|
+
per inceptos himenaeos Ut quis rutrum mi Mauris sit amet cursus nisi Suspendisse accumsan leo ac lobortis egestas
|
18
|
+
sapien eros mattis lorem eu posuere mi justo in nunc Sed felis ligula egestas vel nulla sed egestas lobortis justo
|
19
|
+
Donec quis interdum velit Proin felis arcu vestibulum non augue nec ultricies pulvinar velit Nunc ac libero velit
|
20
|
+
Aenean bibendum varius nisi ut molestie Nam tempus nec lacus eget malesuada Donec et accumsan dolor et ullamcorper ante
|
21
|
+
Etiam quis consectetur leo Suspendisse mattis sagittis imperdiet Phasellus facilisis eget justo ac mollis
|
22
|
+
Phasellus eleifend tortor est sit amet venenatis enim fringilla id Vestibulum rutrum tempus tortor ac volutpat
|
23
|
+
Nullam non imperdiet lorem Cras tincidunt porttitor condimentum Pellentesque tincidunt magna id aliquet interdum augue
|
24
|
+
nisl convallis enim volutpat mollis urna metus et lacus Donec tortor sapien ultrices sit amet blandit quis gravida ut
|
25
|
+
massa Mauris mauris lacus pretium sit amet risus quis blandit viverra lorem Nunc fringilla elit a elementum gravida
|
26
|
+
Duis vel sodales elit eget tempor leo Quisque in semper nisl Fusce eget risus sollicitudin molestie diam et tincidunt
|
27
|
+
sem Curabitur accumsan pretium interdum Suspendisse semper enim ac augue hendrerit vel sollicitudin diam mollis
|
28
|
+
Curabitur lacinia ante nec eros maximus a vehicula velit aliquet In ullamcorper nec ante eu dictum Proin mi enim
|
29
|
+
malesuada ut ornare sit amet maximus commodo ipsum Nullam venenatis purus non sollicitudin sollicitudin augue
|
30
|
+
augue euismod lorem ut hendrerit velit neque sed nibh Class aptent taciti sociosqu ad litora torquent per conubia
|
31
|
+
nostra per inceptos himenaeos Proin ac elementum nisl
|
30
32
|
ANONTEXT
|
31
33
|
|
32
|
-
|
34
|
+
@codedWords = {}
|
33
35
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
36
|
+
def initialize
|
37
|
+
@anonWords = @@anonymousText.gsub(/\n/,' ').split
|
38
|
+
@coded = {}
|
39
|
+
@low = 0
|
40
|
+
@limit = @anonWords.length-1
|
41
|
+
@rand = Random.new
|
42
|
+
@minWords = 2
|
43
|
+
@maxWords = 4
|
44
|
+
end
|
43
45
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
row[field] = anonWords(row[field]) unless row[field].nil?
|
62
|
-
puts "pst: #{row.inspect}"
|
46
|
+
def anonymize(input, fields, output)
|
47
|
+
puts "Anonymizing %-25s to %-25s - %s " % [input, output, fields.inspect]
|
48
|
+
raise 'The input file must be specified' if input.nil?
|
49
|
+
raise 'The input file must exist' unless File.exist?(input)
|
50
|
+
raise 'The field(s) to anonymize must be provided as a String or an array of Strings' unless fields.instance_of?(String) || fields.instance_of?(Array)
|
51
|
+
@input = input
|
52
|
+
@output = output
|
53
|
+
@anonVals = {}
|
54
|
+
csvFields = CSV.open(@input, &:readline)
|
55
|
+
output = CSV.open(@output,'w')
|
56
|
+
output << csvFields
|
57
|
+
fields = [fields] if fields.is_a? String
|
58
|
+
CSV.foreach('input.csv', :headers => true) do |row|
|
59
|
+
fields.each do |field|
|
60
|
+
row[field] = anonWords(row[field]) unless row[field].nil?
|
61
|
+
end
|
62
|
+
output << row
|
63
63
|
end
|
64
|
-
output << row
|
65
64
|
end
|
66
|
-
end
|
67
65
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
66
|
+
def anonWords value
|
67
|
+
return @anonVals[value] unless @anonVals[value].nil?
|
68
|
+
range = rand(@minWords..@maxWords)
|
69
|
+
limit = @limit - range
|
70
|
+
first = rand(range..limit)
|
71
|
+
last = first + range - 1
|
72
|
+
words = @anonWords[first..last].join(' ')
|
73
|
+
@anonVals[value] = words
|
74
|
+
end
|
77
75
|
|
78
|
-
|
79
|
-
|
80
|
-
|
76
|
+
def codeValue value
|
77
|
+
if @codedWords[value].nil?
|
78
|
+
@codedWords[value] = anonWords
|
79
|
+
end
|
80
|
+
@codedWords[value]
|
81
81
|
end
|
82
|
-
@codedWords[value]
|
83
|
-
end
|
84
82
|
|
85
83
|
|
86
|
-
end # class Anonymizer
|
84
|
+
end # class Anonymizer
|
85
|
+
|
86
|
+
end # module utility
|