fingerprint 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +14 -3
- data/bin/fingerprint +56 -24
- data/lib/fingerprint.rb +5 -5
- data/lib/fingerprint/checker.rb +72 -52
- data/lib/fingerprint/record.rb +262 -0
- data/lib/fingerprint/scanner.rb +176 -42
- data/lib/fingerprint/version.rb +2 -2
- metadata +6 -5
data/README.md
CHANGED
@@ -10,11 +10,22 @@ disk. It also provides a programmatic interface for this procedure.
|
|
10
10
|
|
11
11
|
Because Fingerprint produces output to `IO` like structures, it is easy to transmit
|
12
12
|
this data across a network, or store it for later use. As an example, it could be
|
13
|
-
used to check the integrity of a backup.
|
13
|
+
used to check the integrity of a remote backup.
|
14
14
|
|
15
|
-
For examples please see the main [project page][1].
|
15
|
+
For examples and documentation please see the main [project page][1].
|
16
16
|
|
17
|
-
[1]: http://www.oriontransfer.co.nz/
|
17
|
+
[1]: http://www.oriontransfer.co.nz/gems/fingerprint
|
18
|
+
|
19
|
+
Todo
|
20
|
+
----
|
21
|
+
|
22
|
+
* Command line option to show files that have been created (e.g. don't exist in master fingerprint).
|
23
|
+
* Command line option to show files that have changed but have the same modified time (hardware corrutpion).
|
24
|
+
* Command line option to check fingerprint files based on checksums, e.g. duplicate files, unique files, over a set of directories.
|
25
|
+
* Command line tool for extracting duplicate and unique files over a set of directories?
|
26
|
+
* Supporting tools for signing fingerprints easily.
|
27
|
+
* Support indexing specific files as well as whole directories (maybe?).
|
28
|
+
* Support general filenames for `--archive`, e.g. along with `-n`, maybe support a file called `index.fingerprint` by default: improved visibility for end user.
|
18
29
|
|
19
30
|
License
|
20
31
|
-------
|
data/bin/fingerprint
CHANGED
@@ -27,32 +27,34 @@ require 'fingerprint'
|
|
27
27
|
OPTIONS = {
|
28
28
|
:root => "./",
|
29
29
|
:mode => :scan,
|
30
|
-
:output =>
|
30
|
+
:output => $stdout,
|
31
31
|
:verbose => false,
|
32
32
|
:force => false,
|
33
|
-
:name => ".
|
33
|
+
:name => "._index.fingerprint",
|
34
|
+
:extended => false,
|
35
|
+
:checksums => Fingerprint::DEFAULT_CHECKSUMS,
|
34
36
|
}
|
35
37
|
|
36
38
|
ARGV.options do |o|
|
37
39
|
script_name = File.basename($0)
|
38
40
|
|
39
41
|
o.banner = "Usage: #{script_name} [options] [path]"
|
40
|
-
o.define_head "This script is used to
|
42
|
+
o.define_head "This script is used to create and compare file system fingerprints."
|
41
43
|
|
42
44
|
o.separator ""
|
43
45
|
o.separator "Directory analysis and verification:"
|
44
46
|
|
45
|
-
o.on("--analyze [path]", String, "Generage a fingerprint of the given path and save it for later verification.") do |path|
|
47
|
+
o.on("-a", "--analyze [path]", String, "Generage a fingerprint of the given path and save it for later verification.") do |path|
|
46
48
|
OPTIONS[:mode] = :analyze
|
47
|
-
OPTIONS[:root] = path
|
49
|
+
OPTIONS[:root] = path if path
|
48
50
|
end
|
49
51
|
|
50
|
-
o.on("--verify [path]", String, "Verify a given path based on a previously saved fingerprint.") do |path|
|
52
|
+
o.on("-v", "--verify [path]", String, "Verify a given path based on a previously saved fingerprint.") do |path|
|
51
53
|
OPTIONS[:mode] = :verify
|
52
|
-
OPTIONS[:root] = path
|
54
|
+
OPTIONS[:root] = path if path
|
53
55
|
end
|
54
56
|
|
55
|
-
o.on("-n name", String, "Specify the name of the fingerprint file
|
57
|
+
o.on("-n name", String, "Specify the name of the fingerprint file.", "Default: #{OPTIONS[:name]}") do |name|
|
56
58
|
OPTIONS[:name] = name
|
57
59
|
end
|
58
60
|
|
@@ -74,18 +76,35 @@ ARGV.options do |o|
|
|
74
76
|
o.on("--verbose", "Verbose output, include additional details in the file transcript.") do
|
75
77
|
OPTIONS[:verbose] = true
|
76
78
|
end
|
79
|
+
|
80
|
+
o.on("--progress", "Print percentage progress to standard error.") do
|
81
|
+
OPTIONS[:progress] = true
|
82
|
+
end
|
77
83
|
|
84
|
+
o.separator ""
|
85
|
+
|
86
|
+
o.on("-x", "Include additional extended information about files and directories.") do
|
87
|
+
OPTIONS[:extended] = true
|
88
|
+
end
|
89
|
+
|
90
|
+
o.on("-s [checksum1,checksum2]", "Provide a list of the checksum algorithms to use.", "Available: #{Fingerprint::CHECKSUMS.keys.join(', ')}; Default: #{OPTIONS[:checksums].join(', ')}") do |checksums|
|
91
|
+
OPTIONS[:checksums] = checksums.split(/[\s,]+/)
|
92
|
+
end
|
93
|
+
|
78
94
|
o.separator ""
|
79
95
|
o.separator "Help and Copyright information:"
|
80
96
|
|
81
|
-
o.on_tail("--copy", "Display copyright information")
|
82
|
-
$stderr.puts "#{script_name} v#{Fingerprint::VERSION::STRING}. Copyright (c) 2011 Samuel Williams.
|
97
|
+
o.on_tail("--copy", "Display copyright and warranty information") do
|
98
|
+
$stderr.puts "#{script_name} v#{Fingerprint::VERSION::STRING}. Copyright (c) 2011 Samuel Williams."
|
99
|
+
$stderr.puts "This software is released under the MIT license and comes with ABSOLUTELY NO WARRANTY."
|
83
100
|
$stderr.puts "See http://www.oriontransfer.co.nz/ for more information."
|
84
|
-
|
85
101
|
exit
|
86
|
-
|
102
|
+
end
|
87
103
|
|
88
|
-
o.on_tail("-h", "--help", "Show this help message.")
|
104
|
+
o.on_tail("-h", "--help", "Show this help message.") do
|
105
|
+
$stderr.puts o
|
106
|
+
exit
|
107
|
+
end
|
89
108
|
end.parse!
|
90
109
|
|
91
110
|
unless File.directory? OPTIONS[:root]
|
@@ -93,6 +112,10 @@ unless File.directory? OPTIONS[:root]
|
|
93
112
|
exit(255)
|
94
113
|
end
|
95
114
|
|
115
|
+
if OPTIONS[:checksums].size == 0
|
116
|
+
OPTIONS[:checksums] = ['MD5', 'SHA2.256']
|
117
|
+
end
|
118
|
+
|
96
119
|
case (OPTIONS[:mode])
|
97
120
|
when :analyze
|
98
121
|
output_file = Pathname.new(OPTIONS[:root]) + OPTIONS[:name]
|
@@ -102,9 +125,8 @@ case (OPTIONS[:mode])
|
|
102
125
|
exit(2)
|
103
126
|
end
|
104
127
|
|
105
|
-
options =
|
128
|
+
options = OPTIONS.dup
|
106
129
|
options[:excludes] = [OPTIONS[:name]]
|
107
|
-
options[:verbose] = true if OPTIONS[:verbose]
|
108
130
|
|
109
131
|
File.open(output_file, "w") do |io|
|
110
132
|
options[:output] = io
|
@@ -121,13 +143,23 @@ case (OPTIONS[:mode])
|
|
121
143
|
exit(3)
|
122
144
|
end
|
123
145
|
|
124
|
-
|
125
|
-
|
146
|
+
options = OPTIONS.dup
|
147
|
+
|
148
|
+
master = Fingerprint::Recordset.new
|
126
149
|
|
127
150
|
File.open(input_file, "r") do |io|
|
128
|
-
|
151
|
+
master.parse(io)
|
129
152
|
end
|
130
153
|
|
154
|
+
if master.configuration
|
155
|
+
options.merge!(master.configuration.options)
|
156
|
+
end
|
157
|
+
|
158
|
+
scanner = Fingerprint::Scanner.new([OPTIONS[:root]], options)
|
159
|
+
copy = Fingerprint::SparseRecordset.new(scanner)
|
160
|
+
|
161
|
+
error_count += Fingerprint::Checker::verify(master, copy, options)
|
162
|
+
|
131
163
|
if error_count == 0
|
132
164
|
$stderr.puts "Data verified, 0 errors found."
|
133
165
|
exit(0)
|
@@ -148,13 +180,13 @@ case (OPTIONS[:mode])
|
|
148
180
|
true
|
149
181
|
end
|
150
182
|
end
|
151
|
-
|
152
|
-
options =
|
153
|
-
|
154
|
-
options[:output] = OPTIONS[:output] || $stdout
|
155
|
-
|
183
|
+
|
184
|
+
options = OPTIONS.dup
|
185
|
+
|
156
186
|
Fingerprint::Scanner.scan_paths(roots, options)
|
157
187
|
when :check
|
158
|
-
|
188
|
+
options = OPTIONS.dup
|
189
|
+
|
190
|
+
error_count = Fingerprint::Checker.check_files(ARGV[0], ARGV[1], options)
|
159
191
|
exit(error_count > 0 ? 1 : 0)
|
160
192
|
end
|
data/lib/fingerprint.rb
CHANGED
@@ -28,13 +28,13 @@ module Fingerprint
|
|
28
28
|
master = Scanner.new([master_path])
|
29
29
|
copy = Scanner.new([copy_path])
|
30
30
|
|
31
|
-
|
32
|
-
copy
|
31
|
+
master_recordset = Recordset.new
|
32
|
+
copy_recordset = SparseRecordset.new(copy)
|
33
33
|
|
34
|
-
master.
|
35
|
-
|
34
|
+
master.scan(master_recordset)
|
35
|
+
|
36
|
+
checker = Checker.new(master_recordset, copy_recordset)
|
36
37
|
|
37
|
-
checker = Checker.new(master.output, copy.output)
|
38
38
|
checker.check(&block)
|
39
39
|
|
40
40
|
return checker
|
data/lib/fingerprint/checker.rb
CHANGED
@@ -18,7 +18,7 @@
|
|
18
18
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
19
|
# THE SOFTWARE.
|
20
20
|
|
21
|
-
require '
|
21
|
+
require 'fingerprint/record'
|
22
22
|
|
23
23
|
module Fingerprint
|
24
24
|
# Given two fingerprints (master and copy) ensures that the copy has at least everything contained
|
@@ -35,81 +35,101 @@ module Fingerprint
|
|
35
35
|
@master = master
|
36
36
|
@copy = copy
|
37
37
|
|
38
|
-
@mismatches = []
|
39
|
-
|
40
38
|
@options = options
|
41
|
-
|
42
|
-
@failures = []
|
43
39
|
end
|
44
40
|
|
41
|
+
attr :master
|
42
|
+
attr :copy
|
43
|
+
|
45
44
|
# Run the checking process.
|
46
45
|
def check (options = {}, &block)
|
47
|
-
@files = Set.new
|
48
|
-
@file_paths = {}
|
49
|
-
@file_hashes = {}
|
50
|
-
|
51
|
-
# Parse original fingerprint
|
52
|
-
@copy.each_line do |line|
|
53
|
-
# Skip comments
|
54
|
-
next if line.match(/^\s+#/)
|
55
|
-
|
56
|
-
if line.chomp.match(/^([a-fA-F0-9]{32}): (.*)$/)
|
57
|
-
@files.add([$1, $2])
|
58
|
-
|
59
|
-
@file_paths[$2] = $1
|
60
|
-
@file_hashes[$1] ||= Set.new
|
61
|
-
@file_hashes[$1].add($2)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
46
|
# For every file in the src, we check that it exists
|
66
47
|
# in the destination:
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
48
|
+
|
49
|
+
total_count = @master.records.count
|
50
|
+
processed_size = 0
|
51
|
+
total_size = @master.records.inject(0) { |count, record| count + (record['file.size'] || 0).to_i }
|
52
|
+
|
53
|
+
@master.records.each_with_index do |record, processed_count|
|
54
|
+
next if record.mode != :file
|
55
|
+
|
56
|
+
result, message = @copy.compare(record)
|
57
|
+
if result != :valid
|
58
|
+
yield record, result, message
|
59
|
+
elsif @options[:extended]
|
60
|
+
# Extended check compares other attributes such as user, group, file modes.
|
61
|
+
changes = record.diff(copy.paths[record.path])
|
62
|
+
|
63
|
+
if changes.size > 0
|
64
|
+
yield record, :attribute_changed, "Attribute(s) #{changes.join(', ')} changed"
|
75
65
|
end
|
76
66
|
end
|
67
|
+
|
68
|
+
if @options[:progress]
|
69
|
+
$stderr.puts "# Progress: File #{processed_count} / #{total_count} = #{sprintf('%0.2f%', processed_count.to_f / total_count.to_f * 100.0)}; Byte #{processed_size} / #{total_size} = #{sprintf('%0.2f%', processed_size.to_f / total_size.to_f * 100.0)}"
|
70
|
+
|
71
|
+
processed_size += (record['file.size'] || 0).to_i
|
72
|
+
end
|
77
73
|
end
|
78
74
|
end
|
79
75
|
|
80
76
|
# A list of files which either did not exist in the copy, or had the wrong checksum.
|
81
77
|
attr :failures
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
78
|
+
|
79
|
+
def self.check_files(master, copy, options = {}, &block)
|
80
|
+
# New API that takes two Recordsets...
|
81
|
+
|
82
|
+
File.open(master) do |master_file|
|
83
|
+
File.open(copy) do |copy_file|
|
84
|
+
master_recordset = Recordset.new
|
85
|
+
master_recordset.parse(master_file)
|
86
|
+
|
87
|
+
copy_recordset = Recordset.new
|
88
|
+
copy_recordset.parse(copy_file)
|
89
|
+
|
90
|
+
verify(master_recordset, copy_recordset, options, &block)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
91
94
|
|
92
95
|
# Helper function to check two fingerprint files.
|
93
|
-
def self.
|
96
|
+
def self.verify(master, copy, options = {}, &block)
|
94
97
|
error_count = 0
|
95
|
-
|
96
|
-
master = File.open(master) unless master.respond_to? :read
|
97
|
-
copy = File.open(copy) unless copy.respond_to? :read
|
98
|
-
|
99
|
-
checker = Checker.new(master, copy)
|
100
98
|
|
101
|
-
|
99
|
+
errors = options.delete(:recordset) || Recordset.new
|
100
|
+
if options[:output]
|
101
|
+
errors = RecordsetPrinter.new(errors, options[:output])
|
102
|
+
end
|
103
|
+
|
104
|
+
checker = Checker.new(master, copy, options)
|
105
|
+
|
106
|
+
checker.check do |record, result, message|
|
102
107
|
error_count += 1
|
108
|
+
copy = checker.copy.paths[record.path]
|
109
|
+
|
110
|
+
metadata = {
|
111
|
+
'error.code' => result,
|
112
|
+
'error.message' => message
|
113
|
+
}
|
103
114
|
|
104
|
-
if
|
105
|
-
|
106
|
-
|
107
|
-
|
115
|
+
if copy
|
116
|
+
changes = record.diff(copy)
|
117
|
+
|
118
|
+
changes.each do |name|
|
119
|
+
metadata["changes.#{name}.old"] = record[name]
|
120
|
+
metadata["changes.#{name}.new"] = copy[name]
|
121
|
+
end
|
122
|
+
|
123
|
+
errors << Record.new(:warning, record.path, metadata)
|
108
124
|
else
|
109
|
-
|
125
|
+
errors << Record.new(:warning, record.path, metadata)
|
110
126
|
end
|
111
127
|
end
|
112
128
|
|
129
|
+
errors << Record.new(:summary, nil, {
|
130
|
+
'error.count' => error_count
|
131
|
+
})
|
132
|
+
|
113
133
|
return error_count
|
114
134
|
end
|
115
135
|
|
@@ -0,0 +1,262 @@
|
|
1
|
+
# Copyright (c) 2011 Samuel G. D. Williams. <http://www.oriontransfer.co.nz>
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
require 'set'
|
22
|
+
|
23
|
+
module Fingerprint
|
24
|
+
|
25
|
+
MODES = {
|
26
|
+
:configuration => 'C',
|
27
|
+
:file => 'F',
|
28
|
+
:directory => 'D',
|
29
|
+
:summary => 'S',
|
30
|
+
:warning => 'W',
|
31
|
+
:excluded => '#',
|
32
|
+
}
|
33
|
+
|
34
|
+
class Record
|
35
|
+
def initialize(mode, path, metadata)
|
36
|
+
@mode = mode
|
37
|
+
@path = path
|
38
|
+
|
39
|
+
@metadata = metadata
|
40
|
+
@keys = metadata.keys.grep(/^key\./)
|
41
|
+
end
|
42
|
+
|
43
|
+
attr :mode
|
44
|
+
attr :path
|
45
|
+
attr :metadata
|
46
|
+
attr :keys
|
47
|
+
|
48
|
+
def [](key)
|
49
|
+
@metadata[key]
|
50
|
+
end
|
51
|
+
|
52
|
+
def diff(other)
|
53
|
+
changes = []
|
54
|
+
|
55
|
+
all_keys = Set.new
|
56
|
+
all_keys += @metadata.keys + other.metadata.keys
|
57
|
+
# all_keys -= @keys + other.keys
|
58
|
+
|
59
|
+
all_keys.each do |key|
|
60
|
+
changes << key if @metadata[key].to_s != other.metadata[key].to_s
|
61
|
+
end
|
62
|
+
|
63
|
+
return changes
|
64
|
+
end
|
65
|
+
|
66
|
+
def options
|
67
|
+
options = {}
|
68
|
+
|
69
|
+
options[:extended] = true if @metadata['options.extended'] == 'true'
|
70
|
+
options[:checksum] = @metadata['options.checksums'].split(/[\s,]+/) if @metadata['options.checksums']
|
71
|
+
|
72
|
+
return options
|
73
|
+
end
|
74
|
+
|
75
|
+
def write(output)
|
76
|
+
output.puts "#{MODES[@mode]} #{@path}"
|
77
|
+
|
78
|
+
return if @mode == :excluded
|
79
|
+
|
80
|
+
@metadata.keys.sort.each do |key|
|
81
|
+
output.puts "\t#{key} #{@metadata[key]}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class Recordset
|
87
|
+
def initialize
|
88
|
+
@records = []
|
89
|
+
@paths = {}
|
90
|
+
@keys = {}
|
91
|
+
|
92
|
+
@configuration = nil
|
93
|
+
|
94
|
+
@callback = nil
|
95
|
+
end
|
96
|
+
|
97
|
+
attr :records
|
98
|
+
attr :paths
|
99
|
+
attr :keys
|
100
|
+
|
101
|
+
attr :configuration
|
102
|
+
|
103
|
+
def <<(record)
|
104
|
+
@records << record
|
105
|
+
if record.mode == :configuration
|
106
|
+
# What should we do if we get multiple configurations?
|
107
|
+
@configuration = record
|
108
|
+
else
|
109
|
+
@paths[record.path] = record
|
110
|
+
record.keys.each do |key, value|
|
111
|
+
@keys[key] = value
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def lookup(path)
|
117
|
+
return @paths[path]
|
118
|
+
end
|
119
|
+
|
120
|
+
def find(record)
|
121
|
+
result = lookup(record.path)
|
122
|
+
|
123
|
+
return result if result
|
124
|
+
|
125
|
+
@record.keys.each do |key, value|
|
126
|
+
result = @keys[key][value]
|
127
|
+
|
128
|
+
return result if result
|
129
|
+
end
|
130
|
+
|
131
|
+
return nil
|
132
|
+
end
|
133
|
+
|
134
|
+
def compare(other)
|
135
|
+
main = lookup(other.path)
|
136
|
+
|
137
|
+
# Did we find a corresponding other at the same path?
|
138
|
+
if main
|
139
|
+
# Keep track of how many keys were checked..
|
140
|
+
checked = 0
|
141
|
+
|
142
|
+
# Are all the keys of the other record equivalent to the main record?
|
143
|
+
other.keys.each do |key|
|
144
|
+
if main[key]
|
145
|
+
checked += 1
|
146
|
+
|
147
|
+
# Is the key the same?
|
148
|
+
if main[key] != other[key]
|
149
|
+
return :keys_different, "Key #{key.gsub(/^key\./, '')} does not match"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Are the records the same size? We put this check second because we do this as a last resort to
|
155
|
+
# ensure that the file hasn't been deliberately tampered with.
|
156
|
+
if main.metadata['size'] and other.metadata['size'] and main.metadata['size'] != other.metadata['size']
|
157
|
+
return :size_different, "File size differs"
|
158
|
+
end
|
159
|
+
|
160
|
+
if checked == 0
|
161
|
+
return :no_keys, "No valid keys to check"
|
162
|
+
else
|
163
|
+
# At least one key could be validated.
|
164
|
+
return :valid, "Valid"
|
165
|
+
end
|
166
|
+
else
|
167
|
+
return :not_found, "File not found"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def self.parse(input)
|
172
|
+
mode = nil
|
173
|
+
path = nil
|
174
|
+
metadata = nil
|
175
|
+
|
176
|
+
markers = {}
|
177
|
+
MODES.each do |key, value|
|
178
|
+
markers[value] = key
|
179
|
+
end
|
180
|
+
|
181
|
+
# Parse original fingerprint
|
182
|
+
input.each_line do |line|
|
183
|
+
# Skip comments and blank lines
|
184
|
+
next if line.match(/^\s*#/) || line.match(/^\s*$/)
|
185
|
+
|
186
|
+
if line.match(/^([A-Z])\s+(.*)$/)
|
187
|
+
if path
|
188
|
+
yield mode, path, metadata
|
189
|
+
end
|
190
|
+
|
191
|
+
mode = markers[$1] || :unknown
|
192
|
+
|
193
|
+
path = $2
|
194
|
+
metadata = {}
|
195
|
+
elsif line.match(/^\s+([a-zA-Z\.0-9]+)\s+(.*)$/)
|
196
|
+
metadata[$1] = $2
|
197
|
+
else
|
198
|
+
$stderr.puts "Unhandled line: #{line}"
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
if path
|
203
|
+
yield mode, path, metadata
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def parse(input)
|
208
|
+
self.class.parse(input) do |mode, path, metadata|
|
209
|
+
self << Record.new(mode, path, metadata)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
def write(output)
|
214
|
+
@records.each do |record|
|
215
|
+
record.write(output)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
# This record set dynamically computes data from the disk as required.
|
221
|
+
class SparseRecordset < Recordset
|
222
|
+
def initialize(scanner)
|
223
|
+
super()
|
224
|
+
|
225
|
+
@scanner = scanner
|
226
|
+
end
|
227
|
+
|
228
|
+
def lookup(path)
|
229
|
+
if @paths.key?(path)
|
230
|
+
return @paths[path]
|
231
|
+
else
|
232
|
+
@paths[path] = @scanner.scan_path(path)
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
class RecordsetWrapper
|
238
|
+
def initialize(recordset)
|
239
|
+
@recordset = recordset
|
240
|
+
end
|
241
|
+
|
242
|
+
def method_missing(name, *args, &block)
|
243
|
+
@recordset.send(name, *args, &block)
|
244
|
+
end
|
245
|
+
|
246
|
+
def respond_to?(name)
|
247
|
+
@recordset.respond_to?(name)
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
class RecordsetPrinter < RecordsetWrapper
|
252
|
+
def initialize(recordset, output)
|
253
|
+
super(recordset)
|
254
|
+
@output = output
|
255
|
+
end
|
256
|
+
|
257
|
+
def <<(record)
|
258
|
+
record.write(@output)
|
259
|
+
@recordset << record
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
data/lib/fingerprint/scanner.rb
CHANGED
@@ -20,10 +20,20 @@
|
|
20
20
|
|
21
21
|
require 'stringio'
|
22
22
|
require 'find'
|
23
|
-
require '
|
23
|
+
require 'etc'
|
24
|
+
require 'digest/sha2'
|
24
25
|
|
25
26
|
module Fingerprint
|
26
27
|
|
28
|
+
CHECKSUMS = {
|
29
|
+
'MD5' => lambda { Digest::MD5.new },
|
30
|
+
'SHA1' => lambda { Digest::SHA1.new },
|
31
|
+
'SHA2.256' => lambda { Digest::SHA2.new(256) },
|
32
|
+
'SHA2.512' => lambda { Digest::SHA2.new(512) },
|
33
|
+
}
|
34
|
+
|
35
|
+
DEFAULT_CHECKSUMS = ['MD5', 'SHA2.256']
|
36
|
+
|
27
37
|
# The scanner class can scan a set of directories and produce an index.
|
28
38
|
class Scanner
|
29
39
|
# Initialize the scanner to scan a given set of directories in order.
|
@@ -33,45 +43,100 @@ module Fingerprint
|
|
33
43
|
@roots = roots
|
34
44
|
|
35
45
|
@excludes = options[:excludes] || []
|
36
|
-
@output = options[:output] || StringIO.new
|
37
|
-
|
38
46
|
@options = options
|
47
|
+
|
48
|
+
@digests = {}
|
49
|
+
|
50
|
+
unless @options[:checksums] and @options[:checksums].size > 0
|
51
|
+
@options[:checksums] = DEFAULT_CHECKSUMS
|
52
|
+
end
|
53
|
+
|
54
|
+
@options[:checksums].each do |name|
|
55
|
+
@digests[name] = CHECKSUMS[name].call
|
56
|
+
end
|
57
|
+
|
58
|
+
@callback = nil
|
39
59
|
end
|
40
60
|
|
41
|
-
attr :
|
61
|
+
attr :recordset
|
62
|
+
attr :digests
|
42
63
|
|
43
64
|
protected
|
44
|
-
|
65
|
+
|
45
66
|
# Adds a header for a given path which is mainly version information.
|
46
|
-
def
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
@output.puts ""
|
54
|
-
@output.puts((" " * 32) + " #{path}")
|
67
|
+
def header_for(root)
|
68
|
+
Record.new(:configuration, File.expand_path(root), {
|
69
|
+
'options.extended' => @options[:extended] == true,
|
70
|
+
'options.checksums' => @options[:checksums].join(', '),
|
71
|
+
'summary.time.start' => Time.now,
|
72
|
+
'fingerprint.version' => Fingerprint::VERSION::STRING
|
73
|
+
})
|
55
74
|
end
|
56
|
-
|
57
|
-
# Output a file and associated metadata.
|
58
|
-
def output_file(path)
|
59
|
-
d = Digest::MD5.new
|
60
75
|
|
61
|
-
|
62
|
-
|
63
|
-
|
76
|
+
# This code won't handle multiple threads..
|
77
|
+
def digests_for(path)
|
78
|
+
@digests.each do |key, digest|
|
79
|
+
digest.reset
|
80
|
+
end
|
81
|
+
|
82
|
+
File.open(path, "rb") do |file|
|
83
|
+
buf = ""
|
84
|
+
while file.read(1024 * 1024 * 10, buf)
|
85
|
+
@digests.each do |key, digest|
|
86
|
+
digest << buf
|
87
|
+
end
|
64
88
|
end
|
65
89
|
end
|
66
90
|
|
67
|
-
|
91
|
+
metadata = {}
|
92
|
+
|
93
|
+
@digests.each do |key, digest|
|
94
|
+
metadata["key." + key] = digest.hexdigest
|
95
|
+
end
|
96
|
+
|
97
|
+
return metadata
|
68
98
|
end
|
69
99
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
100
|
+
def metadata_for(type, path)
|
101
|
+
stat = File.stat(path)
|
102
|
+
metadata = {}
|
103
|
+
|
104
|
+
if type == :file
|
105
|
+
metadata['file.size'] = stat.size
|
106
|
+
digests = digests_for(path)
|
74
107
|
end
|
108
|
+
|
109
|
+
# Extended information
|
110
|
+
if @options[:extended]
|
111
|
+
metadata['posix.time.modified'] = File.mtime(path)
|
112
|
+
|
113
|
+
metadata['posix.mode'] = stat.mode.to_s(8)
|
114
|
+
|
115
|
+
metadata['posix.permissions.user.id'] = stat.uid
|
116
|
+
metadata['posix.permissions.user.name'] = Etc.getpwuid(stat.uid).name
|
117
|
+
metadata['posix.permissions.group.id'] = stat.gid
|
118
|
+
metadata['posix.permissions.group.name'] = Etc.getgrgid(stat.gid).name
|
119
|
+
end
|
120
|
+
|
121
|
+
return metadata
|
122
|
+
end
|
123
|
+
|
124
|
+
# Output a directory header.
|
125
|
+
def directory_record_for(path)
|
126
|
+
Record.new(:directory, path, metadata_for(:directory, path))
|
127
|
+
end
|
128
|
+
|
129
|
+
# Output a file and associated metadata.
|
130
|
+
def file_record_for(path)
|
131
|
+
metadata = metadata_for(:file, path)
|
132
|
+
metadata.merge!(digests_for(path))
|
133
|
+
|
134
|
+
Record.new(:file, path, metadata)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Add information about excluded paths.
|
138
|
+
def excluded_record_for(path)
|
139
|
+
Record.new(:excluded, path)
|
75
140
|
end
|
76
141
|
|
77
142
|
public
|
@@ -87,50 +152,119 @@ module Fingerprint
|
|
87
152
|
return false
|
88
153
|
end
|
89
154
|
|
155
|
+
def valid_file?(path)
|
156
|
+
!(excluded?(path) || File.symlink?(path) || !File.file?(path) || !File.readable?(path))
|
157
|
+
end
|
158
|
+
|
159
|
+
def scan_path(path)
|
160
|
+
@roots.each do |root|
|
161
|
+
Dir.chdir(root) do
|
162
|
+
if valid_file?(path)
|
163
|
+
return file_record_for(path)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
return nil
|
169
|
+
end
|
170
|
+
|
90
171
|
# Run the scanning process.
|
91
|
-
def scan
|
172
|
+
def scan(recordset)
|
92
173
|
excluded_count = 0
|
93
|
-
|
174
|
+
processed_count = 0
|
175
|
+
processed_size = 0
|
94
176
|
directory_count = 0
|
177
|
+
|
178
|
+
total_count = 0
|
179
|
+
total_size = 0
|
180
|
+
|
181
|
+
# Estimate the number of files and amount of data to process..
|
182
|
+
if @options[:progress]
|
183
|
+
@roots.each do |root|
|
184
|
+
Dir.chdir(root) do
|
185
|
+
Find.find("./") do |path|
|
186
|
+
if File.directory?(path)
|
187
|
+
if excluded?(path)
|
188
|
+
Find.prune # Ignore this directory
|
189
|
+
end
|
190
|
+
else
|
191
|
+
# Skip anything that isn't a valid file (e.g. pipes, sockets, symlinks).
|
192
|
+
if valid_file?(path)
|
193
|
+
total_count += 1
|
194
|
+
total_size += File.size(path)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
95
201
|
|
96
202
|
@roots.each do |root|
|
97
203
|
Dir.chdir(root) do
|
98
|
-
|
204
|
+
recordset << header_for(root)
|
205
|
+
|
99
206
|
Find.find("./") do |path|
|
100
207
|
if File.directory?(path)
|
101
208
|
if excluded?(path)
|
102
209
|
excluded_count += 1
|
103
|
-
|
210
|
+
|
211
|
+
if @options[:verbose]
|
212
|
+
recordset << excluded_record_for(path)
|
213
|
+
end
|
214
|
+
|
104
215
|
Find.prune # Ignore this directory
|
105
216
|
else
|
106
217
|
directory_count += 1
|
107
|
-
|
218
|
+
|
219
|
+
recordset << directory_record_for(path)
|
108
220
|
end
|
109
221
|
else
|
110
222
|
# Skip anything that isn't a valid file (e.g. pipes, sockets, symlinks).
|
111
|
-
if
|
112
|
-
|
113
|
-
|
223
|
+
if valid_file?(path)
|
224
|
+
processed_count += 1
|
225
|
+
processed_size += File.size(path)
|
226
|
+
|
227
|
+
recordset << file_record_for(path)
|
114
228
|
else
|
115
|
-
|
116
|
-
|
229
|
+
excluded_count += 1
|
230
|
+
|
231
|
+
if @options[:verbose]
|
232
|
+
recordset << excluded_record_for(path)
|
233
|
+
end
|
117
234
|
end
|
118
235
|
end
|
236
|
+
|
237
|
+
# Print out a progress summary if requested
|
238
|
+
if @options[:progress]
|
239
|
+
$stderr.puts "# Progress: File #{processed_count} / #{total_count} = #{sprintf('%0.2f%', processed_count.to_f / total_count.to_f * 100.0)}; Byte #{processed_size} / #{total_size} = #{sprintf('%0.2f%', processed_size.to_f / total_size.to_f * 100.0)}"
|
240
|
+
end
|
119
241
|
end
|
120
242
|
end
|
121
243
|
end
|
122
|
-
|
244
|
+
|
123
245
|
# Output summary
|
124
|
-
|
246
|
+
recordset << Record.new(:summary, nil, {
|
247
|
+
'summary.directories' => directory_count,
|
248
|
+
'summary.files' => processed_count,
|
249
|
+
'summary.size' => processed_size,
|
250
|
+
'summary.excluded' => excluded_count,
|
251
|
+
'summary.time.end' => Time.now
|
252
|
+
})
|
253
|
+
|
254
|
+
return recordset
|
125
255
|
end
|
126
256
|
|
127
257
|
# A helper function to scan a set of directories.
|
128
258
|
def self.scan_paths(paths, options = {})
|
259
|
+
if options[:output]
|
260
|
+
options[:recordset] = RecordsetPrinter.new(Recordset.new, options[:output])
|
261
|
+
end
|
262
|
+
|
129
263
|
scanner = Scanner.new(paths, options)
|
130
|
-
|
131
|
-
scanner.scan
|
132
|
-
|
133
|
-
return
|
264
|
+
|
265
|
+
scanner.scan(options[:recordset])
|
266
|
+
|
267
|
+
return options[:recordset]
|
134
268
|
end
|
135
269
|
end
|
136
270
|
end
|
data/lib/fingerprint/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fingerprint
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 1.
|
8
|
+
- 3
|
9
|
+
- 0
|
10
|
+
version: 1.3.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Samuel Williams
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-08-
|
18
|
+
date: 2011-08-18 00:00:00 Z
|
19
19
|
dependencies: []
|
20
20
|
|
21
21
|
description:
|
@@ -29,6 +29,7 @@ extra_rdoc_files: []
|
|
29
29
|
files:
|
30
30
|
- bin/fingerprint
|
31
31
|
- lib/fingerprint/checker.rb
|
32
|
+
- lib/fingerprint/record.rb
|
32
33
|
- lib/fingerprint/scanner.rb
|
33
34
|
- lib/fingerprint/version.rb
|
34
35
|
- lib/fingerprint.rb
|