fingerprint 1.2.4 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +14 -3
- data/bin/fingerprint +56 -24
- data/lib/fingerprint.rb +5 -5
- data/lib/fingerprint/checker.rb +72 -52
- data/lib/fingerprint/record.rb +262 -0
- data/lib/fingerprint/scanner.rb +176 -42
- data/lib/fingerprint/version.rb +2 -2
- metadata +6 -5
data/README.md
CHANGED
@@ -10,11 +10,22 @@ disk. It also provides a programmatic interface for this procedure.
|
|
10
10
|
|
11
11
|
Because Fingerprint produces output to `IO` like structures, it is easy to transmit
|
12
12
|
this data across a network, or store it for later use. As an example, it could be
|
13
|
-
used to check the integrity of a backup.
|
13
|
+
used to check the integrity of a remote backup.
|
14
14
|
|
15
|
-
For examples please see the main [project page][1].
|
15
|
+
For examples and documentation please see the main [project page][1].
|
16
16
|
|
17
|
-
[1]: http://www.oriontransfer.co.nz/
|
17
|
+
[1]: http://www.oriontransfer.co.nz/gems/fingerprint
|
18
|
+
|
19
|
+
Todo
|
20
|
+
----
|
21
|
+
|
22
|
+
* Command line option to show files that have been created (e.g. don't exist in master fingerprint).
|
23
|
+
* Command line option to show files that have changed but have the same modified time (hardware corrutpion).
|
24
|
+
* Command line option to check fingerprint files based on checksums, e.g. duplicate files, unique files, over a set of directories.
|
25
|
+
* Command line tool for extracting duplicate and unique files over a set of directories?
|
26
|
+
* Supporting tools for signing fingerprints easily.
|
27
|
+
* Support indexing specific files as well as whole directories (maybe?).
|
28
|
+
* Support general filenames for `--archive`, e.g. along with `-n`, maybe support a file called `index.fingerprint` by default: improved visibility for end user.
|
18
29
|
|
19
30
|
License
|
20
31
|
-------
|
data/bin/fingerprint
CHANGED
@@ -27,32 +27,34 @@ require 'fingerprint'
|
|
27
27
|
OPTIONS = {
|
28
28
|
:root => "./",
|
29
29
|
:mode => :scan,
|
30
|
-
:output =>
|
30
|
+
:output => $stdout,
|
31
31
|
:verbose => false,
|
32
32
|
:force => false,
|
33
|
-
:name => ".
|
33
|
+
:name => "._index.fingerprint",
|
34
|
+
:extended => false,
|
35
|
+
:checksums => Fingerprint::DEFAULT_CHECKSUMS,
|
34
36
|
}
|
35
37
|
|
36
38
|
ARGV.options do |o|
|
37
39
|
script_name = File.basename($0)
|
38
40
|
|
39
41
|
o.banner = "Usage: #{script_name} [options] [path]"
|
40
|
-
o.define_head "This script is used to
|
42
|
+
o.define_head "This script is used to create and compare file system fingerprints."
|
41
43
|
|
42
44
|
o.separator ""
|
43
45
|
o.separator "Directory analysis and verification:"
|
44
46
|
|
45
|
-
o.on("--analyze [path]", String, "Generage a fingerprint of the given path and save it for later verification.") do |path|
|
47
|
+
o.on("-a", "--analyze [path]", String, "Generage a fingerprint of the given path and save it for later verification.") do |path|
|
46
48
|
OPTIONS[:mode] = :analyze
|
47
|
-
OPTIONS[:root] = path
|
49
|
+
OPTIONS[:root] = path if path
|
48
50
|
end
|
49
51
|
|
50
|
-
o.on("--verify [path]", String, "Verify a given path based on a previously saved fingerprint.") do |path|
|
52
|
+
o.on("-v", "--verify [path]", String, "Verify a given path based on a previously saved fingerprint.") do |path|
|
51
53
|
OPTIONS[:mode] = :verify
|
52
|
-
OPTIONS[:root] = path
|
54
|
+
OPTIONS[:root] = path if path
|
53
55
|
end
|
54
56
|
|
55
|
-
o.on("-n name", String, "Specify the name of the fingerprint file
|
57
|
+
o.on("-n name", String, "Specify the name of the fingerprint file.", "Default: #{OPTIONS[:name]}") do |name|
|
56
58
|
OPTIONS[:name] = name
|
57
59
|
end
|
58
60
|
|
@@ -74,18 +76,35 @@ ARGV.options do |o|
|
|
74
76
|
o.on("--verbose", "Verbose output, include additional details in the file transcript.") do
|
75
77
|
OPTIONS[:verbose] = true
|
76
78
|
end
|
79
|
+
|
80
|
+
o.on("--progress", "Print percentage progress to standard error.") do
|
81
|
+
OPTIONS[:progress] = true
|
82
|
+
end
|
77
83
|
|
84
|
+
o.separator ""
|
85
|
+
|
86
|
+
o.on("-x", "Include additional extended information about files and directories.") do
|
87
|
+
OPTIONS[:extended] = true
|
88
|
+
end
|
89
|
+
|
90
|
+
o.on("-s [checksum1,checksum2]", "Provide a list of the checksum algorithms to use.", "Available: #{Fingerprint::CHECKSUMS.keys.join(', ')}; Default: #{OPTIONS[:checksums].join(', ')}") do |checksums|
|
91
|
+
OPTIONS[:checksums] = checksums.split(/[\s,]+/)
|
92
|
+
end
|
93
|
+
|
78
94
|
o.separator ""
|
79
95
|
o.separator "Help and Copyright information:"
|
80
96
|
|
81
|
-
o.on_tail("--copy", "Display copyright information")
|
82
|
-
$stderr.puts "#{script_name} v#{Fingerprint::VERSION::STRING}. Copyright (c) 2011 Samuel Williams.
|
97
|
+
o.on_tail("--copy", "Display copyright and warranty information") do
|
98
|
+
$stderr.puts "#{script_name} v#{Fingerprint::VERSION::STRING}. Copyright (c) 2011 Samuel Williams."
|
99
|
+
$stderr.puts "This software is released under the MIT license and comes with ABSOLUTELY NO WARRANTY."
|
83
100
|
$stderr.puts "See http://www.oriontransfer.co.nz/ for more information."
|
84
|
-
|
85
101
|
exit
|
86
|
-
|
102
|
+
end
|
87
103
|
|
88
|
-
o.on_tail("-h", "--help", "Show this help message.")
|
104
|
+
o.on_tail("-h", "--help", "Show this help message.") do
|
105
|
+
$stderr.puts o
|
106
|
+
exit
|
107
|
+
end
|
89
108
|
end.parse!
|
90
109
|
|
91
110
|
unless File.directory? OPTIONS[:root]
|
@@ -93,6 +112,10 @@ unless File.directory? OPTIONS[:root]
|
|
93
112
|
exit(255)
|
94
113
|
end
|
95
114
|
|
115
|
+
if OPTIONS[:checksums].size == 0
|
116
|
+
OPTIONS[:checksums] = ['MD5', 'SHA2.256']
|
117
|
+
end
|
118
|
+
|
96
119
|
case (OPTIONS[:mode])
|
97
120
|
when :analyze
|
98
121
|
output_file = Pathname.new(OPTIONS[:root]) + OPTIONS[:name]
|
@@ -102,9 +125,8 @@ case (OPTIONS[:mode])
|
|
102
125
|
exit(2)
|
103
126
|
end
|
104
127
|
|
105
|
-
options =
|
128
|
+
options = OPTIONS.dup
|
106
129
|
options[:excludes] = [OPTIONS[:name]]
|
107
|
-
options[:verbose] = true if OPTIONS[:verbose]
|
108
130
|
|
109
131
|
File.open(output_file, "w") do |io|
|
110
132
|
options[:output] = io
|
@@ -121,13 +143,23 @@ case (OPTIONS[:mode])
|
|
121
143
|
exit(3)
|
122
144
|
end
|
123
145
|
|
124
|
-
|
125
|
-
|
146
|
+
options = OPTIONS.dup
|
147
|
+
|
148
|
+
master = Fingerprint::Recordset.new
|
126
149
|
|
127
150
|
File.open(input_file, "r") do |io|
|
128
|
-
|
151
|
+
master.parse(io)
|
129
152
|
end
|
130
153
|
|
154
|
+
if master.configuration
|
155
|
+
options.merge!(master.configuration.options)
|
156
|
+
end
|
157
|
+
|
158
|
+
scanner = Fingerprint::Scanner.new([OPTIONS[:root]], options)
|
159
|
+
copy = Fingerprint::SparseRecordset.new(scanner)
|
160
|
+
|
161
|
+
error_count += Fingerprint::Checker::verify(master, copy, options)
|
162
|
+
|
131
163
|
if error_count == 0
|
132
164
|
$stderr.puts "Data verified, 0 errors found."
|
133
165
|
exit(0)
|
@@ -148,13 +180,13 @@ case (OPTIONS[:mode])
|
|
148
180
|
true
|
149
181
|
end
|
150
182
|
end
|
151
|
-
|
152
|
-
options =
|
153
|
-
|
154
|
-
options[:output] = OPTIONS[:output] || $stdout
|
155
|
-
|
183
|
+
|
184
|
+
options = OPTIONS.dup
|
185
|
+
|
156
186
|
Fingerprint::Scanner.scan_paths(roots, options)
|
157
187
|
when :check
|
158
|
-
|
188
|
+
options = OPTIONS.dup
|
189
|
+
|
190
|
+
error_count = Fingerprint::Checker.check_files(ARGV[0], ARGV[1], options)
|
159
191
|
exit(error_count > 0 ? 1 : 0)
|
160
192
|
end
|
data/lib/fingerprint.rb
CHANGED
@@ -28,13 +28,13 @@ module Fingerprint
|
|
28
28
|
master = Scanner.new([master_path])
|
29
29
|
copy = Scanner.new([copy_path])
|
30
30
|
|
31
|
-
|
32
|
-
copy
|
31
|
+
master_recordset = Recordset.new
|
32
|
+
copy_recordset = SparseRecordset.new(copy)
|
33
33
|
|
34
|
-
master.
|
35
|
-
|
34
|
+
master.scan(master_recordset)
|
35
|
+
|
36
|
+
checker = Checker.new(master_recordset, copy_recordset)
|
36
37
|
|
37
|
-
checker = Checker.new(master.output, copy.output)
|
38
38
|
checker.check(&block)
|
39
39
|
|
40
40
|
return checker
|
data/lib/fingerprint/checker.rb
CHANGED
@@ -18,7 +18,7 @@
|
|
18
18
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
19
|
# THE SOFTWARE.
|
20
20
|
|
21
|
-
require '
|
21
|
+
require 'fingerprint/record'
|
22
22
|
|
23
23
|
module Fingerprint
|
24
24
|
# Given two fingerprints (master and copy) ensures that the copy has at least everything contained
|
@@ -35,81 +35,101 @@ module Fingerprint
|
|
35
35
|
@master = master
|
36
36
|
@copy = copy
|
37
37
|
|
38
|
-
@mismatches = []
|
39
|
-
|
40
38
|
@options = options
|
41
|
-
|
42
|
-
@failures = []
|
43
39
|
end
|
44
40
|
|
41
|
+
attr :master
|
42
|
+
attr :copy
|
43
|
+
|
45
44
|
# Run the checking process.
|
46
45
|
def check (options = {}, &block)
|
47
|
-
@files = Set.new
|
48
|
-
@file_paths = {}
|
49
|
-
@file_hashes = {}
|
50
|
-
|
51
|
-
# Parse original fingerprint
|
52
|
-
@copy.each_line do |line|
|
53
|
-
# Skip comments
|
54
|
-
next if line.match(/^\s+#/)
|
55
|
-
|
56
|
-
if line.chomp.match(/^([a-fA-F0-9]{32}): (.*)$/)
|
57
|
-
@files.add([$1, $2])
|
58
|
-
|
59
|
-
@file_paths[$2] = $1
|
60
|
-
@file_hashes[$1] ||= Set.new
|
61
|
-
@file_hashes[$1].add($2)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
46
|
# For every file in the src, we check that it exists
|
66
47
|
# in the destination:
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
48
|
+
|
49
|
+
total_count = @master.records.count
|
50
|
+
processed_size = 0
|
51
|
+
total_size = @master.records.inject(0) { |count, record| count + (record['file.size'] || 0).to_i }
|
52
|
+
|
53
|
+
@master.records.each_with_index do |record, processed_count|
|
54
|
+
next if record.mode != :file
|
55
|
+
|
56
|
+
result, message = @copy.compare(record)
|
57
|
+
if result != :valid
|
58
|
+
yield record, result, message
|
59
|
+
elsif @options[:extended]
|
60
|
+
# Extended check compares other attributes such as user, group, file modes.
|
61
|
+
changes = record.diff(copy.paths[record.path])
|
62
|
+
|
63
|
+
if changes.size > 0
|
64
|
+
yield record, :attribute_changed, "Attribute(s) #{changes.join(', ')} changed"
|
75
65
|
end
|
76
66
|
end
|
67
|
+
|
68
|
+
if @options[:progress]
|
69
|
+
$stderr.puts "# Progress: File #{processed_count} / #{total_count} = #{sprintf('%0.2f%', processed_count.to_f / total_count.to_f * 100.0)}; Byte #{processed_size} / #{total_size} = #{sprintf('%0.2f%', processed_size.to_f / total_size.to_f * 100.0)}"
|
70
|
+
|
71
|
+
processed_size += (record['file.size'] || 0).to_i
|
72
|
+
end
|
77
73
|
end
|
78
74
|
end
|
79
75
|
|
80
76
|
# A list of files which either did not exist in the copy, or had the wrong checksum.
|
81
77
|
attr :failures
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
78
|
+
|
79
|
+
def self.check_files(master, copy, options = {}, &block)
|
80
|
+
# New API that takes two Recordsets...
|
81
|
+
|
82
|
+
File.open(master) do |master_file|
|
83
|
+
File.open(copy) do |copy_file|
|
84
|
+
master_recordset = Recordset.new
|
85
|
+
master_recordset.parse(master_file)
|
86
|
+
|
87
|
+
copy_recordset = Recordset.new
|
88
|
+
copy_recordset.parse(copy_file)
|
89
|
+
|
90
|
+
verify(master_recordset, copy_recordset, options, &block)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
91
94
|
|
92
95
|
# Helper function to check two fingerprint files.
|
93
|
-
def self.
|
96
|
+
def self.verify(master, copy, options = {}, &block)
|
94
97
|
error_count = 0
|
95
|
-
|
96
|
-
master = File.open(master) unless master.respond_to? :read
|
97
|
-
copy = File.open(copy) unless copy.respond_to? :read
|
98
|
-
|
99
|
-
checker = Checker.new(master, copy)
|
100
98
|
|
101
|
-
|
99
|
+
errors = options.delete(:recordset) || Recordset.new
|
100
|
+
if options[:output]
|
101
|
+
errors = RecordsetPrinter.new(errors, options[:output])
|
102
|
+
end
|
103
|
+
|
104
|
+
checker = Checker.new(master, copy, options)
|
105
|
+
|
106
|
+
checker.check do |record, result, message|
|
102
107
|
error_count += 1
|
108
|
+
copy = checker.copy.paths[record.path]
|
109
|
+
|
110
|
+
metadata = {
|
111
|
+
'error.code' => result,
|
112
|
+
'error.message' => message
|
113
|
+
}
|
103
114
|
|
104
|
-
if
|
105
|
-
|
106
|
-
|
107
|
-
|
115
|
+
if copy
|
116
|
+
changes = record.diff(copy)
|
117
|
+
|
118
|
+
changes.each do |name|
|
119
|
+
metadata["changes.#{name}.old"] = record[name]
|
120
|
+
metadata["changes.#{name}.new"] = copy[name]
|
121
|
+
end
|
122
|
+
|
123
|
+
errors << Record.new(:warning, record.path, metadata)
|
108
124
|
else
|
109
|
-
|
125
|
+
errors << Record.new(:warning, record.path, metadata)
|
110
126
|
end
|
111
127
|
end
|
112
128
|
|
129
|
+
errors << Record.new(:summary, nil, {
|
130
|
+
'error.count' => error_count
|
131
|
+
})
|
132
|
+
|
113
133
|
return error_count
|
114
134
|
end
|
115
135
|
|
@@ -0,0 +1,262 @@
|
|
1
|
+
# Copyright (c) 2011 Samuel G. D. Williams. <http://www.oriontransfer.co.nz>
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
require 'set'
|
22
|
+
|
23
|
+
module Fingerprint
|
24
|
+
|
25
|
+
MODES = {
|
26
|
+
:configuration => 'C',
|
27
|
+
:file => 'F',
|
28
|
+
:directory => 'D',
|
29
|
+
:summary => 'S',
|
30
|
+
:warning => 'W',
|
31
|
+
:excluded => '#',
|
32
|
+
}
|
33
|
+
|
34
|
+
class Record
|
35
|
+
def initialize(mode, path, metadata)
|
36
|
+
@mode = mode
|
37
|
+
@path = path
|
38
|
+
|
39
|
+
@metadata = metadata
|
40
|
+
@keys = metadata.keys.grep(/^key\./)
|
41
|
+
end
|
42
|
+
|
43
|
+
attr :mode
|
44
|
+
attr :path
|
45
|
+
attr :metadata
|
46
|
+
attr :keys
|
47
|
+
|
48
|
+
def [](key)
|
49
|
+
@metadata[key]
|
50
|
+
end
|
51
|
+
|
52
|
+
def diff(other)
|
53
|
+
changes = []
|
54
|
+
|
55
|
+
all_keys = Set.new
|
56
|
+
all_keys += @metadata.keys + other.metadata.keys
|
57
|
+
# all_keys -= @keys + other.keys
|
58
|
+
|
59
|
+
all_keys.each do |key|
|
60
|
+
changes << key if @metadata[key].to_s != other.metadata[key].to_s
|
61
|
+
end
|
62
|
+
|
63
|
+
return changes
|
64
|
+
end
|
65
|
+
|
66
|
+
def options
|
67
|
+
options = {}
|
68
|
+
|
69
|
+
options[:extended] = true if @metadata['options.extended'] == 'true'
|
70
|
+
options[:checksum] = @metadata['options.checksums'].split(/[\s,]+/) if @metadata['options.checksums']
|
71
|
+
|
72
|
+
return options
|
73
|
+
end
|
74
|
+
|
75
|
+
def write(output)
|
76
|
+
output.puts "#{MODES[@mode]} #{@path}"
|
77
|
+
|
78
|
+
return if @mode == :excluded
|
79
|
+
|
80
|
+
@metadata.keys.sort.each do |key|
|
81
|
+
output.puts "\t#{key} #{@metadata[key]}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class Recordset
|
87
|
+
def initialize
|
88
|
+
@records = []
|
89
|
+
@paths = {}
|
90
|
+
@keys = {}
|
91
|
+
|
92
|
+
@configuration = nil
|
93
|
+
|
94
|
+
@callback = nil
|
95
|
+
end
|
96
|
+
|
97
|
+
attr :records
|
98
|
+
attr :paths
|
99
|
+
attr :keys
|
100
|
+
|
101
|
+
attr :configuration
|
102
|
+
|
103
|
+
def <<(record)
|
104
|
+
@records << record
|
105
|
+
if record.mode == :configuration
|
106
|
+
# What should we do if we get multiple configurations?
|
107
|
+
@configuration = record
|
108
|
+
else
|
109
|
+
@paths[record.path] = record
|
110
|
+
record.keys.each do |key, value|
|
111
|
+
@keys[key] = value
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def lookup(path)
|
117
|
+
return @paths[path]
|
118
|
+
end
|
119
|
+
|
120
|
+
def find(record)
|
121
|
+
result = lookup(record.path)
|
122
|
+
|
123
|
+
return result if result
|
124
|
+
|
125
|
+
@record.keys.each do |key, value|
|
126
|
+
result = @keys[key][value]
|
127
|
+
|
128
|
+
return result if result
|
129
|
+
end
|
130
|
+
|
131
|
+
return nil
|
132
|
+
end
|
133
|
+
|
134
|
+
def compare(other)
|
135
|
+
main = lookup(other.path)
|
136
|
+
|
137
|
+
# Did we find a corresponding other at the same path?
|
138
|
+
if main
|
139
|
+
# Keep track of how many keys were checked..
|
140
|
+
checked = 0
|
141
|
+
|
142
|
+
# Are all the keys of the other record equivalent to the main record?
|
143
|
+
other.keys.each do |key|
|
144
|
+
if main[key]
|
145
|
+
checked += 1
|
146
|
+
|
147
|
+
# Is the key the same?
|
148
|
+
if main[key] != other[key]
|
149
|
+
return :keys_different, "Key #{key.gsub(/^key\./, '')} does not match"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Are the records the same size? We put this check second because we do this as a last resort to
|
155
|
+
# ensure that the file hasn't been deliberately tampered with.
|
156
|
+
if main.metadata['size'] and other.metadata['size'] and main.metadata['size'] != other.metadata['size']
|
157
|
+
return :size_different, "File size differs"
|
158
|
+
end
|
159
|
+
|
160
|
+
if checked == 0
|
161
|
+
return :no_keys, "No valid keys to check"
|
162
|
+
else
|
163
|
+
# At least one key could be validated.
|
164
|
+
return :valid, "Valid"
|
165
|
+
end
|
166
|
+
else
|
167
|
+
return :not_found, "File not found"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def self.parse(input)
|
172
|
+
mode = nil
|
173
|
+
path = nil
|
174
|
+
metadata = nil
|
175
|
+
|
176
|
+
markers = {}
|
177
|
+
MODES.each do |key, value|
|
178
|
+
markers[value] = key
|
179
|
+
end
|
180
|
+
|
181
|
+
# Parse original fingerprint
|
182
|
+
input.each_line do |line|
|
183
|
+
# Skip comments and blank lines
|
184
|
+
next if line.match(/^\s*#/) || line.match(/^\s*$/)
|
185
|
+
|
186
|
+
if line.match(/^([A-Z])\s+(.*)$/)
|
187
|
+
if path
|
188
|
+
yield mode, path, metadata
|
189
|
+
end
|
190
|
+
|
191
|
+
mode = markers[$1] || :unknown
|
192
|
+
|
193
|
+
path = $2
|
194
|
+
metadata = {}
|
195
|
+
elsif line.match(/^\s+([a-zA-Z\.0-9]+)\s+(.*)$/)
|
196
|
+
metadata[$1] = $2
|
197
|
+
else
|
198
|
+
$stderr.puts "Unhandled line: #{line}"
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
if path
|
203
|
+
yield mode, path, metadata
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def parse(input)
|
208
|
+
self.class.parse(input) do |mode, path, metadata|
|
209
|
+
self << Record.new(mode, path, metadata)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
def write(output)
|
214
|
+
@records.each do |record|
|
215
|
+
record.write(output)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
# This record set dynamically computes data from the disk as required.
|
221
|
+
class SparseRecordset < Recordset
|
222
|
+
def initialize(scanner)
|
223
|
+
super()
|
224
|
+
|
225
|
+
@scanner = scanner
|
226
|
+
end
|
227
|
+
|
228
|
+
def lookup(path)
|
229
|
+
if @paths.key?(path)
|
230
|
+
return @paths[path]
|
231
|
+
else
|
232
|
+
@paths[path] = @scanner.scan_path(path)
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
class RecordsetWrapper
|
238
|
+
def initialize(recordset)
|
239
|
+
@recordset = recordset
|
240
|
+
end
|
241
|
+
|
242
|
+
def method_missing(name, *args, &block)
|
243
|
+
@recordset.send(name, *args, &block)
|
244
|
+
end
|
245
|
+
|
246
|
+
def respond_to?(name)
|
247
|
+
@recordset.respond_to?(name)
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
class RecordsetPrinter < RecordsetWrapper
|
252
|
+
def initialize(recordset, output)
|
253
|
+
super(recordset)
|
254
|
+
@output = output
|
255
|
+
end
|
256
|
+
|
257
|
+
def <<(record)
|
258
|
+
record.write(@output)
|
259
|
+
@recordset << record
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
data/lib/fingerprint/scanner.rb
CHANGED
@@ -20,10 +20,20 @@
|
|
20
20
|
|
21
21
|
require 'stringio'
|
22
22
|
require 'find'
|
23
|
-
require '
|
23
|
+
require 'etc'
|
24
|
+
require 'digest/sha2'
|
24
25
|
|
25
26
|
module Fingerprint
|
26
27
|
|
28
|
+
CHECKSUMS = {
|
29
|
+
'MD5' => lambda { Digest::MD5.new },
|
30
|
+
'SHA1' => lambda { Digest::SHA1.new },
|
31
|
+
'SHA2.256' => lambda { Digest::SHA2.new(256) },
|
32
|
+
'SHA2.512' => lambda { Digest::SHA2.new(512) },
|
33
|
+
}
|
34
|
+
|
35
|
+
DEFAULT_CHECKSUMS = ['MD5', 'SHA2.256']
|
36
|
+
|
27
37
|
# The scanner class can scan a set of directories and produce an index.
|
28
38
|
class Scanner
|
29
39
|
# Initialize the scanner to scan a given set of directories in order.
|
@@ -33,45 +43,100 @@ module Fingerprint
|
|
33
43
|
@roots = roots
|
34
44
|
|
35
45
|
@excludes = options[:excludes] || []
|
36
|
-
@output = options[:output] || StringIO.new
|
37
|
-
|
38
46
|
@options = options
|
47
|
+
|
48
|
+
@digests = {}
|
49
|
+
|
50
|
+
unless @options[:checksums] and @options[:checksums].size > 0
|
51
|
+
@options[:checksums] = DEFAULT_CHECKSUMS
|
52
|
+
end
|
53
|
+
|
54
|
+
@options[:checksums].each do |name|
|
55
|
+
@digests[name] = CHECKSUMS[name].call
|
56
|
+
end
|
57
|
+
|
58
|
+
@callback = nil
|
39
59
|
end
|
40
60
|
|
41
|
-
attr :
|
61
|
+
attr :recordset
|
62
|
+
attr :digests
|
42
63
|
|
43
64
|
protected
|
44
|
-
|
65
|
+
|
45
66
|
# Adds a header for a given path which is mainly version information.
|
46
|
-
def
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
@output.puts ""
|
54
|
-
@output.puts((" " * 32) + " #{path}")
|
67
|
+
def header_for(root)
|
68
|
+
Record.new(:configuration, File.expand_path(root), {
|
69
|
+
'options.extended' => @options[:extended] == true,
|
70
|
+
'options.checksums' => @options[:checksums].join(', '),
|
71
|
+
'summary.time.start' => Time.now,
|
72
|
+
'fingerprint.version' => Fingerprint::VERSION::STRING
|
73
|
+
})
|
55
74
|
end
|
56
|
-
|
57
|
-
# Output a file and associated metadata.
|
58
|
-
def output_file(path)
|
59
|
-
d = Digest::MD5.new
|
60
75
|
|
61
|
-
|
62
|
-
|
63
|
-
|
76
|
+
# This code won't handle multiple threads..
|
77
|
+
def digests_for(path)
|
78
|
+
@digests.each do |key, digest|
|
79
|
+
digest.reset
|
80
|
+
end
|
81
|
+
|
82
|
+
File.open(path, "rb") do |file|
|
83
|
+
buf = ""
|
84
|
+
while file.read(1024 * 1024 * 10, buf)
|
85
|
+
@digests.each do |key, digest|
|
86
|
+
digest << buf
|
87
|
+
end
|
64
88
|
end
|
65
89
|
end
|
66
90
|
|
67
|
-
|
91
|
+
metadata = {}
|
92
|
+
|
93
|
+
@digests.each do |key, digest|
|
94
|
+
metadata["key." + key] = digest.hexdigest
|
95
|
+
end
|
96
|
+
|
97
|
+
return metadata
|
68
98
|
end
|
69
99
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
100
|
+
def metadata_for(type, path)
|
101
|
+
stat = File.stat(path)
|
102
|
+
metadata = {}
|
103
|
+
|
104
|
+
if type == :file
|
105
|
+
metadata['file.size'] = stat.size
|
106
|
+
digests = digests_for(path)
|
74
107
|
end
|
108
|
+
|
109
|
+
# Extended information
|
110
|
+
if @options[:extended]
|
111
|
+
metadata['posix.time.modified'] = File.mtime(path)
|
112
|
+
|
113
|
+
metadata['posix.mode'] = stat.mode.to_s(8)
|
114
|
+
|
115
|
+
metadata['posix.permissions.user.id'] = stat.uid
|
116
|
+
metadata['posix.permissions.user.name'] = Etc.getpwuid(stat.uid).name
|
117
|
+
metadata['posix.permissions.group.id'] = stat.gid
|
118
|
+
metadata['posix.permissions.group.name'] = Etc.getgrgid(stat.gid).name
|
119
|
+
end
|
120
|
+
|
121
|
+
return metadata
|
122
|
+
end
|
123
|
+
|
124
|
+
# Output a directory header.
|
125
|
+
def directory_record_for(path)
|
126
|
+
Record.new(:directory, path, metadata_for(:directory, path))
|
127
|
+
end
|
128
|
+
|
129
|
+
# Output a file and associated metadata.
|
130
|
+
def file_record_for(path)
|
131
|
+
metadata = metadata_for(:file, path)
|
132
|
+
metadata.merge!(digests_for(path))
|
133
|
+
|
134
|
+
Record.new(:file, path, metadata)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Add information about excluded paths.
|
138
|
+
def excluded_record_for(path)
|
139
|
+
Record.new(:excluded, path)
|
75
140
|
end
|
76
141
|
|
77
142
|
public
|
@@ -87,50 +152,119 @@ module Fingerprint
|
|
87
152
|
return false
|
88
153
|
end
|
89
154
|
|
155
|
+
def valid_file?(path)
|
156
|
+
!(excluded?(path) || File.symlink?(path) || !File.file?(path) || !File.readable?(path))
|
157
|
+
end
|
158
|
+
|
159
|
+
def scan_path(path)
|
160
|
+
@roots.each do |root|
|
161
|
+
Dir.chdir(root) do
|
162
|
+
if valid_file?(path)
|
163
|
+
return file_record_for(path)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
return nil
|
169
|
+
end
|
170
|
+
|
90
171
|
# Run the scanning process.
|
91
|
-
def scan
|
172
|
+
def scan(recordset)
|
92
173
|
excluded_count = 0
|
93
|
-
|
174
|
+
processed_count = 0
|
175
|
+
processed_size = 0
|
94
176
|
directory_count = 0
|
177
|
+
|
178
|
+
total_count = 0
|
179
|
+
total_size = 0
|
180
|
+
|
181
|
+
# Estimate the number of files and amount of data to process..
|
182
|
+
if @options[:progress]
|
183
|
+
@roots.each do |root|
|
184
|
+
Dir.chdir(root) do
|
185
|
+
Find.find("./") do |path|
|
186
|
+
if File.directory?(path)
|
187
|
+
if excluded?(path)
|
188
|
+
Find.prune # Ignore this directory
|
189
|
+
end
|
190
|
+
else
|
191
|
+
# Skip anything that isn't a valid file (e.g. pipes, sockets, symlinks).
|
192
|
+
if valid_file?(path)
|
193
|
+
total_count += 1
|
194
|
+
total_size += File.size(path)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
95
201
|
|
96
202
|
@roots.each do |root|
|
97
203
|
Dir.chdir(root) do
|
98
|
-
|
204
|
+
recordset << header_for(root)
|
205
|
+
|
99
206
|
Find.find("./") do |path|
|
100
207
|
if File.directory?(path)
|
101
208
|
if excluded?(path)
|
102
209
|
excluded_count += 1
|
103
|
-
|
210
|
+
|
211
|
+
if @options[:verbose]
|
212
|
+
recordset << excluded_record_for(path)
|
213
|
+
end
|
214
|
+
|
104
215
|
Find.prune # Ignore this directory
|
105
216
|
else
|
106
217
|
directory_count += 1
|
107
|
-
|
218
|
+
|
219
|
+
recordset << directory_record_for(path)
|
108
220
|
end
|
109
221
|
else
|
110
222
|
# Skip anything that isn't a valid file (e.g. pipes, sockets, symlinks).
|
111
|
-
if
|
112
|
-
|
113
|
-
|
223
|
+
if valid_file?(path)
|
224
|
+
processed_count += 1
|
225
|
+
processed_size += File.size(path)
|
226
|
+
|
227
|
+
recordset << file_record_for(path)
|
114
228
|
else
|
115
|
-
|
116
|
-
|
229
|
+
excluded_count += 1
|
230
|
+
|
231
|
+
if @options[:verbose]
|
232
|
+
recordset << excluded_record_for(path)
|
233
|
+
end
|
117
234
|
end
|
118
235
|
end
|
236
|
+
|
237
|
+
# Print out a progress summary if requested
|
238
|
+
if @options[:progress]
|
239
|
+
$stderr.puts "# Progress: File #{processed_count} / #{total_count} = #{sprintf('%0.2f%', processed_count.to_f / total_count.to_f * 100.0)}; Byte #{processed_size} / #{total_size} = #{sprintf('%0.2f%', processed_size.to_f / total_size.to_f * 100.0)}"
|
240
|
+
end
|
119
241
|
end
|
120
242
|
end
|
121
243
|
end
|
122
|
-
|
244
|
+
|
123
245
|
# Output summary
|
124
|
-
|
246
|
+
recordset << Record.new(:summary, nil, {
|
247
|
+
'summary.directories' => directory_count,
|
248
|
+
'summary.files' => processed_count,
|
249
|
+
'summary.size' => processed_size,
|
250
|
+
'summary.excluded' => excluded_count,
|
251
|
+
'summary.time.end' => Time.now
|
252
|
+
})
|
253
|
+
|
254
|
+
return recordset
|
125
255
|
end
|
126
256
|
|
127
257
|
# A helper function to scan a set of directories.
|
128
258
|
def self.scan_paths(paths, options = {})
|
259
|
+
if options[:output]
|
260
|
+
options[:recordset] = RecordsetPrinter.new(Recordset.new, options[:output])
|
261
|
+
end
|
262
|
+
|
129
263
|
scanner = Scanner.new(paths, options)
|
130
|
-
|
131
|
-
scanner.scan
|
132
|
-
|
133
|
-
return
|
264
|
+
|
265
|
+
scanner.scan(options[:recordset])
|
266
|
+
|
267
|
+
return options[:recordset]
|
134
268
|
end
|
135
269
|
end
|
136
270
|
end
|
data/lib/fingerprint/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fingerprint
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 1.
|
8
|
+
- 3
|
9
|
+
- 0
|
10
|
+
version: 1.3.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Samuel Williams
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-08-
|
18
|
+
date: 2011-08-18 00:00:00 Z
|
19
19
|
dependencies: []
|
20
20
|
|
21
21
|
description:
|
@@ -29,6 +29,7 @@ extra_rdoc_files: []
|
|
29
29
|
files:
|
30
30
|
- bin/fingerprint
|
31
31
|
- lib/fingerprint/checker.rb
|
32
|
+
- lib/fingerprint/record.rb
|
32
33
|
- lib/fingerprint/scanner.rb
|
33
34
|
- lib/fingerprint/version.rb
|
34
35
|
- lib/fingerprint.rb
|