fingerprint 1.2.4 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -10,11 +10,22 @@ disk. It also provides a programmatic interface for this procedure.
10
10
 
11
11
  Because Fingerprint produces output to `IO` like structures, it is easy to transmit
12
12
  this data across a network, or store it for later use. As an example, it could be
13
- used to check the integrity of a backup.
13
+ used to check the integrity of a remote backup.
14
14
 
15
- For examples please see the main [project page][1].
15
+ For examples and documentation please see the main [project page][1].
16
16
 
17
- [1]: http://www.oriontransfer.co.nz/projects/admin-toolbox/fingerprint
17
+ [1]: http://www.oriontransfer.co.nz/gems/fingerprint
18
+
19
+ Todo
20
+ ----
21
+
22
+ * Command line option to show files that have been created (e.g. don't exist in master fingerprint).
23
+ * Command line option to show files that have changed but have the same modified time (hardware corrutpion).
24
+ * Command line option to check fingerprint files based on checksums, e.g. duplicate files, unique files, over a set of directories.
25
+ * Command line tool for extracting duplicate and unique files over a set of directories?
26
+ * Supporting tools for signing fingerprints easily.
27
+ * Support indexing specific files as well as whole directories (maybe?).
28
+ * Support general filenames for `--archive`, e.g. along with `-n`, maybe support a file called `index.fingerprint` by default: improved visibility for end user.
18
29
 
19
30
  License
20
31
  -------
data/bin/fingerprint CHANGED
@@ -27,32 +27,34 @@ require 'fingerprint'
27
27
  OPTIONS = {
28
28
  :root => "./",
29
29
  :mode => :scan,
30
- :output => nil,
30
+ :output => $stdout,
31
31
  :verbose => false,
32
32
  :force => false,
33
- :name => "._fingerprint.txt"
33
+ :name => "._index.fingerprint",
34
+ :extended => false,
35
+ :checksums => Fingerprint::DEFAULT_CHECKSUMS,
34
36
  }
35
37
 
36
38
  ARGV.options do |o|
37
39
  script_name = File.basename($0)
38
40
 
39
41
  o.banner = "Usage: #{script_name} [options] [path]"
40
- o.define_head "This script is used to calculate and compare a basic file system fingerprint."
42
+ o.define_head "This script is used to create and compare file system fingerprints."
41
43
 
42
44
  o.separator ""
43
45
  o.separator "Directory analysis and verification:"
44
46
 
45
- o.on("--analyze [path]", String, "Generage a fingerprint of the given path and save it for later verification.") do |path|
47
+ o.on("-a", "--analyze [path]", String, "Generage a fingerprint of the given path and save it for later verification.") do |path|
46
48
  OPTIONS[:mode] = :analyze
47
- OPTIONS[:root] = path
49
+ OPTIONS[:root] = path if path
48
50
  end
49
51
 
50
- o.on("--verify [path]", String, "Verify a given path based on a previously saved fingerprint.") do |path|
52
+ o.on("-v", "--verify [path]", String, "Verify a given path based on a previously saved fingerprint.") do |path|
51
53
  OPTIONS[:mode] = :verify
52
- OPTIONS[:root] = path
54
+ OPTIONS[:root] = path if path
53
55
  end
54
56
 
55
- o.on("-n name", String, "Specify the name of the fingerprint file (default #{OPTIONS[:name]}).") do |name|
57
+ o.on("-n name", String, "Specify the name of the fingerprint file.", "Default: #{OPTIONS[:name]}") do |name|
56
58
  OPTIONS[:name] = name
57
59
  end
58
60
 
@@ -74,18 +76,35 @@ ARGV.options do |o|
74
76
  o.on("--verbose", "Verbose output, include additional details in the file transcript.") do
75
77
  OPTIONS[:verbose] = true
76
78
  end
79
+
80
+ o.on("--progress", "Print percentage progress to standard error.") do
81
+ OPTIONS[:progress] = true
82
+ end
77
83
 
84
+ o.separator ""
85
+
86
+ o.on("-x", "Include additional extended information about files and directories.") do
87
+ OPTIONS[:extended] = true
88
+ end
89
+
90
+ o.on("-s [checksum1,checksum2]", "Provide a list of the checksum algorithms to use.", "Available: #{Fingerprint::CHECKSUMS.keys.join(', ')}; Default: #{OPTIONS[:checksums].join(', ')}") do |checksums|
91
+ OPTIONS[:checksums] = checksums.split(/[\s,]+/)
92
+ end
93
+
78
94
  o.separator ""
79
95
  o.separator "Help and Copyright information:"
80
96
 
81
- o.on_tail("--copy", "Display copyright information") {
82
- $stderr.puts "#{script_name} v#{Fingerprint::VERSION::STRING}. Copyright (c) 2011 Samuel Williams. Released under the MIT license."
97
+ o.on_tail("--copy", "Display copyright and warranty information") do
98
+ $stderr.puts "#{script_name} v#{Fingerprint::VERSION::STRING}. Copyright (c) 2011 Samuel Williams."
99
+ $stderr.puts "This software is released under the MIT license and comes with ABSOLUTELY NO WARRANTY."
83
100
  $stderr.puts "See http://www.oriontransfer.co.nz/ for more information."
84
-
85
101
  exit
86
- }
102
+ end
87
103
 
88
- o.on_tail("-h", "--help", "Show this help message.") { $stderr.puts o; exit }
104
+ o.on_tail("-h", "--help", "Show this help message.") do
105
+ $stderr.puts o
106
+ exit
107
+ end
89
108
  end.parse!
90
109
 
91
110
  unless File.directory? OPTIONS[:root]
@@ -93,6 +112,10 @@ unless File.directory? OPTIONS[:root]
93
112
  exit(255)
94
113
  end
95
114
 
115
+ if OPTIONS[:checksums].size == 0
116
+ OPTIONS[:checksums] = ['MD5', 'SHA2.256']
117
+ end
118
+
96
119
  case (OPTIONS[:mode])
97
120
  when :analyze
98
121
  output_file = Pathname.new(OPTIONS[:root]) + OPTIONS[:name]
@@ -102,9 +125,8 @@ case (OPTIONS[:mode])
102
125
  exit(2)
103
126
  end
104
127
 
105
- options = {}
128
+ options = OPTIONS.dup
106
129
  options[:excludes] = [OPTIONS[:name]]
107
- options[:verbose] = true if OPTIONS[:verbose]
108
130
 
109
131
  File.open(output_file, "w") do |io|
110
132
  options[:output] = io
@@ -121,13 +143,23 @@ case (OPTIONS[:mode])
121
143
  exit(3)
122
144
  end
123
145
 
124
- scanner = Fingerprint::Scanner.scan_paths([OPTIONS[:root]])
125
- scanner.output.seek(0)
146
+ options = OPTIONS.dup
147
+
148
+ master = Fingerprint::Recordset.new
126
149
 
127
150
  File.open(input_file, "r") do |io|
128
- error_count += Fingerprint::Checker.check_files(io, scanner.output)
151
+ master.parse(io)
129
152
  end
130
153
 
154
+ if master.configuration
155
+ options.merge!(master.configuration.options)
156
+ end
157
+
158
+ scanner = Fingerprint::Scanner.new([OPTIONS[:root]], options)
159
+ copy = Fingerprint::SparseRecordset.new(scanner)
160
+
161
+ error_count += Fingerprint::Checker::verify(master, copy, options)
162
+
131
163
  if error_count == 0
132
164
  $stderr.puts "Data verified, 0 errors found."
133
165
  exit(0)
@@ -148,13 +180,13 @@ case (OPTIONS[:mode])
148
180
  true
149
181
  end
150
182
  end
151
-
152
- options = {}
153
- options[:verbose] = true if OPTIONS[:verbose]
154
- options[:output] = OPTIONS[:output] || $stdout
155
-
183
+
184
+ options = OPTIONS.dup
185
+
156
186
  Fingerprint::Scanner.scan_paths(roots, options)
157
187
  when :check
158
- error_count = Fingerprint::Checker.check_files(ARGV[0], ARGV[1])
188
+ options = OPTIONS.dup
189
+
190
+ error_count = Fingerprint::Checker.check_files(ARGV[0], ARGV[1], options)
159
191
  exit(error_count > 0 ? 1 : 0)
160
192
  end
data/lib/fingerprint.rb CHANGED
@@ -28,13 +28,13 @@ module Fingerprint
28
28
  master = Scanner.new([master_path])
29
29
  copy = Scanner.new([copy_path])
30
30
 
31
- master.scan
32
- copy.scan
31
+ master_recordset = Recordset.new
32
+ copy_recordset = SparseRecordset.new(copy)
33
33
 
34
- master.output.seek(0)
35
- copy.output.seek(0)
34
+ master.scan(master_recordset)
35
+
36
+ checker = Checker.new(master_recordset, copy_recordset)
36
37
 
37
- checker = Checker.new(master.output, copy.output)
38
38
  checker.check(&block)
39
39
 
40
40
  return checker
@@ -18,7 +18,7 @@
18
18
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
19
  # THE SOFTWARE.
20
20
 
21
- require 'set'
21
+ require 'fingerprint/record'
22
22
 
23
23
  module Fingerprint
24
24
  # Given two fingerprints (master and copy) ensures that the copy has at least everything contained
@@ -35,81 +35,101 @@ module Fingerprint
35
35
  @master = master
36
36
  @copy = copy
37
37
 
38
- @mismatches = []
39
-
40
38
  @options = options
41
-
42
- @failures = []
43
39
  end
44
40
 
41
+ attr :master
42
+ attr :copy
43
+
45
44
  # Run the checking process.
46
45
  def check (options = {}, &block)
47
- @files = Set.new
48
- @file_paths = {}
49
- @file_hashes = {}
50
-
51
- # Parse original fingerprint
52
- @copy.each_line do |line|
53
- # Skip comments
54
- next if line.match(/^\s+#/)
55
-
56
- if line.chomp.match(/^([a-fA-F0-9]{32}): (.*)$/)
57
- @files.add([$1, $2])
58
-
59
- @file_paths[$2] = $1
60
- @file_hashes[$1] ||= Set.new
61
- @file_hashes[$1].add($2)
62
- end
63
- end
64
-
65
46
  # For every file in the src, we check that it exists
66
47
  # in the destination:
67
- @master.each_line do |line|
68
- # Skip comments
69
- next if line.match(/^\s+#/)
70
-
71
- if line.chomp.match(/^([a-fA-F0-9]{32}): (.*)$/)
72
- unless @files.include?([$1, $2])
73
- yield($1, $2) if block_given?
74
- @failures << [$1, $2]
48
+
49
+ total_count = @master.records.count
50
+ processed_size = 0
51
+ total_size = @master.records.inject(0) { |count, record| count + (record['file.size'] || 0).to_i }
52
+
53
+ @master.records.each_with_index do |record, processed_count|
54
+ next if record.mode != :file
55
+
56
+ result, message = @copy.compare(record)
57
+ if result != :valid
58
+ yield record, result, message
59
+ elsif @options[:extended]
60
+ # Extended check compares other attributes such as user, group, file modes.
61
+ changes = record.diff(copy.paths[record.path])
62
+
63
+ if changes.size > 0
64
+ yield record, :attribute_changed, "Attribute(s) #{changes.join(', ')} changed"
75
65
  end
76
66
  end
67
+
68
+ if @options[:progress]
69
+ $stderr.puts "# Progress: File #{processed_count} / #{total_count} = #{sprintf('%0.2f%', processed_count.to_f / total_count.to_f * 100.0)}; Byte #{processed_size} / #{total_size} = #{sprintf('%0.2f%', processed_size.to_f / total_size.to_f * 100.0)}"
70
+
71
+ processed_size += (record['file.size'] || 0).to_i
72
+ end
77
73
  end
78
74
  end
79
75
 
80
76
  # A list of files which either did not exist in the copy, or had the wrong checksum.
81
77
  attr :failures
82
-
83
- # An array of all files in the copy
84
- attr :files
85
-
86
- # A hash of all files in copy +path => file hash+
87
- attr :file_paths
88
-
89
- # A hash of all files in copy +file hash => [file1, file2, ...]+
90
- attr :file_hashes
78
+
79
+ def self.check_files(master, copy, options = {}, &block)
80
+ # New API that takes two Recordsets...
81
+
82
+ File.open(master) do |master_file|
83
+ File.open(copy) do |copy_file|
84
+ master_recordset = Recordset.new
85
+ master_recordset.parse(master_file)
86
+
87
+ copy_recordset = Recordset.new
88
+ copy_recordset.parse(copy_file)
89
+
90
+ verify(master_recordset, copy_recordset, options, &block)
91
+ end
92
+ end
93
+ end
91
94
 
92
95
  # Helper function to check two fingerprint files.
93
- def self.check_files(master, copy, &block)
96
+ def self.verify(master, copy, options = {}, &block)
94
97
  error_count = 0
95
-
96
- master = File.open(master) unless master.respond_to? :read
97
- copy = File.open(copy) unless copy.respond_to? :read
98
-
99
- checker = Checker.new(master, copy)
100
98
 
101
- checker.check do |hash, path|
99
+ errors = options.delete(:recordset) || Recordset.new
100
+ if options[:output]
101
+ errors = RecordsetPrinter.new(errors, options[:output])
102
+ end
103
+
104
+ checker = Checker.new(master, copy, options)
105
+
106
+ checker.check do |record, result, message|
102
107
  error_count += 1
108
+ copy = checker.copy.paths[record.path]
109
+
110
+ metadata = {
111
+ 'error.code' => result,
112
+ 'error.message' => message
113
+ }
103
114
 
104
- if !checker.file_paths[path]
105
- $stderr.puts "File #{path.dump} is missing!"
106
- elsif checker.file_paths[path] != hash
107
- $stderr.puts "File #{path.dump} is different!"
115
+ if copy
116
+ changes = record.diff(copy)
117
+
118
+ changes.each do |name|
119
+ metadata["changes.#{name}.old"] = record[name]
120
+ metadata["changes.#{name}.new"] = copy[name]
121
+ end
122
+
123
+ errors << Record.new(:warning, record.path, metadata)
108
124
  else
109
- $stderr.puts "Unknown error for path #{path.dump}"
125
+ errors << Record.new(:warning, record.path, metadata)
110
126
  end
111
127
  end
112
128
 
129
+ errors << Record.new(:summary, nil, {
130
+ 'error.count' => error_count
131
+ })
132
+
113
133
  return error_count
114
134
  end
115
135
 
@@ -0,0 +1,262 @@
1
+ # Copyright (c) 2011 Samuel G. D. Williams. <http://www.oriontransfer.co.nz>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require 'set'
22
+
23
+ module Fingerprint
24
+
25
+ MODES = {
26
+ :configuration => 'C',
27
+ :file => 'F',
28
+ :directory => 'D',
29
+ :summary => 'S',
30
+ :warning => 'W',
31
+ :excluded => '#',
32
+ }
33
+
34
+ class Record
35
+ def initialize(mode, path, metadata)
36
+ @mode = mode
37
+ @path = path
38
+
39
+ @metadata = metadata
40
+ @keys = metadata.keys.grep(/^key\./)
41
+ end
42
+
43
+ attr :mode
44
+ attr :path
45
+ attr :metadata
46
+ attr :keys
47
+
48
+ def [](key)
49
+ @metadata[key]
50
+ end
51
+
52
+ def diff(other)
53
+ changes = []
54
+
55
+ all_keys = Set.new
56
+ all_keys += @metadata.keys + other.metadata.keys
57
+ # all_keys -= @keys + other.keys
58
+
59
+ all_keys.each do |key|
60
+ changes << key if @metadata[key].to_s != other.metadata[key].to_s
61
+ end
62
+
63
+ return changes
64
+ end
65
+
66
+ def options
67
+ options = {}
68
+
69
+ options[:extended] = true if @metadata['options.extended'] == 'true'
70
+ options[:checksum] = @metadata['options.checksums'].split(/[\s,]+/) if @metadata['options.checksums']
71
+
72
+ return options
73
+ end
74
+
75
+ def write(output)
76
+ output.puts "#{MODES[@mode]} #{@path}"
77
+
78
+ return if @mode == :excluded
79
+
80
+ @metadata.keys.sort.each do |key|
81
+ output.puts "\t#{key} #{@metadata[key]}"
82
+ end
83
+ end
84
+ end
85
+
86
+ class Recordset
87
+ def initialize
88
+ @records = []
89
+ @paths = {}
90
+ @keys = {}
91
+
92
+ @configuration = nil
93
+
94
+ @callback = nil
95
+ end
96
+
97
+ attr :records
98
+ attr :paths
99
+ attr :keys
100
+
101
+ attr :configuration
102
+
103
+ def <<(record)
104
+ @records << record
105
+ if record.mode == :configuration
106
+ # What should we do if we get multiple configurations?
107
+ @configuration = record
108
+ else
109
+ @paths[record.path] = record
110
+ record.keys.each do |key, value|
111
+ @keys[key] = value
112
+ end
113
+ end
114
+ end
115
+
116
+ def lookup(path)
117
+ return @paths[path]
118
+ end
119
+
120
+ def find(record)
121
+ result = lookup(record.path)
122
+
123
+ return result if result
124
+
125
+ @record.keys.each do |key, value|
126
+ result = @keys[key][value]
127
+
128
+ return result if result
129
+ end
130
+
131
+ return nil
132
+ end
133
+
134
+ def compare(other)
135
+ main = lookup(other.path)
136
+
137
+ # Did we find a corresponding other at the same path?
138
+ if main
139
+ # Keep track of how many keys were checked..
140
+ checked = 0
141
+
142
+ # Are all the keys of the other record equivalent to the main record?
143
+ other.keys.each do |key|
144
+ if main[key]
145
+ checked += 1
146
+
147
+ # Is the key the same?
148
+ if main[key] != other[key]
149
+ return :keys_different, "Key #{key.gsub(/^key\./, '')} does not match"
150
+ end
151
+ end
152
+ end
153
+
154
+ # Are the records the same size? We put this check second because we do this as a last resort to
155
+ # ensure that the file hasn't been deliberately tampered with.
156
+ if main.metadata['size'] and other.metadata['size'] and main.metadata['size'] != other.metadata['size']
157
+ return :size_different, "File size differs"
158
+ end
159
+
160
+ if checked == 0
161
+ return :no_keys, "No valid keys to check"
162
+ else
163
+ # At least one key could be validated.
164
+ return :valid, "Valid"
165
+ end
166
+ else
167
+ return :not_found, "File not found"
168
+ end
169
+ end
170
+
171
+ def self.parse(input)
172
+ mode = nil
173
+ path = nil
174
+ metadata = nil
175
+
176
+ markers = {}
177
+ MODES.each do |key, value|
178
+ markers[value] = key
179
+ end
180
+
181
+ # Parse original fingerprint
182
+ input.each_line do |line|
183
+ # Skip comments and blank lines
184
+ next if line.match(/^\s*#/) || line.match(/^\s*$/)
185
+
186
+ if line.match(/^([A-Z])\s+(.*)$/)
187
+ if path
188
+ yield mode, path, metadata
189
+ end
190
+
191
+ mode = markers[$1] || :unknown
192
+
193
+ path = $2
194
+ metadata = {}
195
+ elsif line.match(/^\s+([a-zA-Z\.0-9]+)\s+(.*)$/)
196
+ metadata[$1] = $2
197
+ else
198
+ $stderr.puts "Unhandled line: #{line}"
199
+ end
200
+ end
201
+
202
+ if path
203
+ yield mode, path, metadata
204
+ end
205
+ end
206
+
207
+ def parse(input)
208
+ self.class.parse(input) do |mode, path, metadata|
209
+ self << Record.new(mode, path, metadata)
210
+ end
211
+ end
212
+
213
+ def write(output)
214
+ @records.each do |record|
215
+ record.write(output)
216
+ end
217
+ end
218
+ end
219
+
220
+ # This record set dynamically computes data from the disk as required.
221
+ class SparseRecordset < Recordset
222
+ def initialize(scanner)
223
+ super()
224
+
225
+ @scanner = scanner
226
+ end
227
+
228
+ def lookup(path)
229
+ if @paths.key?(path)
230
+ return @paths[path]
231
+ else
232
+ @paths[path] = @scanner.scan_path(path)
233
+ end
234
+ end
235
+ end
236
+
237
+ class RecordsetWrapper
238
+ def initialize(recordset)
239
+ @recordset = recordset
240
+ end
241
+
242
+ def method_missing(name, *args, &block)
243
+ @recordset.send(name, *args, &block)
244
+ end
245
+
246
+ def respond_to?(name)
247
+ @recordset.respond_to?(name)
248
+ end
249
+ end
250
+
251
+ class RecordsetPrinter < RecordsetWrapper
252
+ def initialize(recordset, output)
253
+ super(recordset)
254
+ @output = output
255
+ end
256
+
257
+ def <<(record)
258
+ record.write(@output)
259
+ @recordset << record
260
+ end
261
+ end
262
+ end
@@ -20,10 +20,20 @@
20
20
 
21
21
  require 'stringio'
22
22
  require 'find'
23
- require 'digest'
23
+ require 'etc'
24
+ require 'digest/sha2'
24
25
 
25
26
  module Fingerprint
26
27
 
28
+ CHECKSUMS = {
29
+ 'MD5' => lambda { Digest::MD5.new },
30
+ 'SHA1' => lambda { Digest::SHA1.new },
31
+ 'SHA2.256' => lambda { Digest::SHA2.new(256) },
32
+ 'SHA2.512' => lambda { Digest::SHA2.new(512) },
33
+ }
34
+
35
+ DEFAULT_CHECKSUMS = ['MD5', 'SHA2.256']
36
+
27
37
  # The scanner class can scan a set of directories and produce an index.
28
38
  class Scanner
29
39
  # Initialize the scanner to scan a given set of directories in order.
@@ -33,45 +43,100 @@ module Fingerprint
33
43
  @roots = roots
34
44
 
35
45
  @excludes = options[:excludes] || []
36
- @output = options[:output] || StringIO.new
37
-
38
46
  @options = options
47
+
48
+ @digests = {}
49
+
50
+ unless @options[:checksums] and @options[:checksums].size > 0
51
+ @options[:checksums] = DEFAULT_CHECKSUMS
52
+ end
53
+
54
+ @options[:checksums].each do |name|
55
+ @digests[name] = CHECKSUMS[name].call
56
+ end
57
+
58
+ @callback = nil
39
59
  end
40
60
 
41
- attr :output
61
+ attr :recordset
62
+ attr :digests
42
63
 
43
64
  protected
44
-
65
+
45
66
  # Adds a header for a given path which is mainly version information.
46
- def output_header(root)
47
- @output.puts "\# Checksum generated by Fingerprint (#{Fingerprint::VERSION::STRING}) at #{Time.now.to_s}"
48
- @output.puts "\# Root: #{root}"
49
- end
50
-
51
- # Output a directory header.
52
- def output_dir(path)
53
- @output.puts ""
54
- @output.puts((" " * 32) + " #{path}")
67
+ def header_for(root)
68
+ Record.new(:configuration, File.expand_path(root), {
69
+ 'options.extended' => @options[:extended] == true,
70
+ 'options.checksums' => @options[:checksums].join(', '),
71
+ 'summary.time.start' => Time.now,
72
+ 'fingerprint.version' => Fingerprint::VERSION::STRING
73
+ })
55
74
  end
56
-
57
- # Output a file and associated metadata.
58
- def output_file(path)
59
- d = Digest::MD5.new
60
75
 
61
- File.open(path) do |f|
62
- while buf = f.read(1024*1024*10)
63
- d << buf
76
+ # This code won't handle multiple threads..
77
+ def digests_for(path)
78
+ @digests.each do |key, digest|
79
+ digest.reset
80
+ end
81
+
82
+ File.open(path, "rb") do |file|
83
+ buf = ""
84
+ while file.read(1024 * 1024 * 10, buf)
85
+ @digests.each do |key, digest|
86
+ digest << buf
87
+ end
64
88
  end
65
89
  end
66
90
 
67
- @output.puts "#{d.hexdigest}: #{path}"
91
+ metadata = {}
92
+
93
+ @digests.each do |key, digest|
94
+ metadata["key." + key] = digest.hexdigest
95
+ end
96
+
97
+ return metadata
68
98
  end
69
99
 
70
- # Add information about excluded paths.
71
- def output_excluded(path)
72
- if @options[:verbose]
73
- @output.puts '#'.ljust(32) + ": #{path}"
100
+ def metadata_for(type, path)
101
+ stat = File.stat(path)
102
+ metadata = {}
103
+
104
+ if type == :file
105
+ metadata['file.size'] = stat.size
106
+ digests = digests_for(path)
74
107
  end
108
+
109
+ # Extended information
110
+ if @options[:extended]
111
+ metadata['posix.time.modified'] = File.mtime(path)
112
+
113
+ metadata['posix.mode'] = stat.mode.to_s(8)
114
+
115
+ metadata['posix.permissions.user.id'] = stat.uid
116
+ metadata['posix.permissions.user.name'] = Etc.getpwuid(stat.uid).name
117
+ metadata['posix.permissions.group.id'] = stat.gid
118
+ metadata['posix.permissions.group.name'] = Etc.getgrgid(stat.gid).name
119
+ end
120
+
121
+ return metadata
122
+ end
123
+
124
+ # Output a directory header.
125
+ def directory_record_for(path)
126
+ Record.new(:directory, path, metadata_for(:directory, path))
127
+ end
128
+
129
+ # Output a file and associated metadata.
130
+ def file_record_for(path)
131
+ metadata = metadata_for(:file, path)
132
+ metadata.merge!(digests_for(path))
133
+
134
+ Record.new(:file, path, metadata)
135
+ end
136
+
137
+ # Add information about excluded paths.
138
+ def excluded_record_for(path)
139
+ Record.new(:excluded, path)
75
140
  end
76
141
 
77
142
  public
@@ -87,50 +152,119 @@ module Fingerprint
87
152
  return false
88
153
  end
89
154
 
155
+ def valid_file?(path)
156
+ !(excluded?(path) || File.symlink?(path) || !File.file?(path) || !File.readable?(path))
157
+ end
158
+
159
+ def scan_path(path)
160
+ @roots.each do |root|
161
+ Dir.chdir(root) do
162
+ if valid_file?(path)
163
+ return file_record_for(path)
164
+ end
165
+ end
166
+ end
167
+
168
+ return nil
169
+ end
170
+
90
171
  # Run the scanning process.
91
- def scan
172
+ def scan(recordset)
92
173
  excluded_count = 0
93
- checksummed_count = 0
174
+ processed_count = 0
175
+ processed_size = 0
94
176
  directory_count = 0
177
+
178
+ total_count = 0
179
+ total_size = 0
180
+
181
+ # Estimate the number of files and amount of data to process..
182
+ if @options[:progress]
183
+ @roots.each do |root|
184
+ Dir.chdir(root) do
185
+ Find.find("./") do |path|
186
+ if File.directory?(path)
187
+ if excluded?(path)
188
+ Find.prune # Ignore this directory
189
+ end
190
+ else
191
+ # Skip anything that isn't a valid file (e.g. pipes, sockets, symlinks).
192
+ if valid_file?(path)
193
+ total_count += 1
194
+ total_size += File.size(path)
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end
95
201
 
96
202
  @roots.each do |root|
97
203
  Dir.chdir(root) do
98
- output_header(root)
204
+ recordset << header_for(root)
205
+
99
206
  Find.find("./") do |path|
100
207
  if File.directory?(path)
101
208
  if excluded?(path)
102
209
  excluded_count += 1
103
- output_excluded(path)
210
+
211
+ if @options[:verbose]
212
+ recordset << excluded_record_for(path)
213
+ end
214
+
104
215
  Find.prune # Ignore this directory
105
216
  else
106
217
  directory_count += 1
107
- output_dir(path)
218
+
219
+ recordset << directory_record_for(path)
108
220
  end
109
221
  else
110
222
  # Skip anything that isn't a valid file (e.g. pipes, sockets, symlinks).
111
- if excluded?(path) || File.symlink?(path) || !File.file?(path) || !File.readable?(path)
112
- excluded_count += 1
113
- output_excluded(path)
223
+ if valid_file?(path)
224
+ processed_count += 1
225
+ processed_size += File.size(path)
226
+
227
+ recordset << file_record_for(path)
114
228
  else
115
- checksummed_count += 1
116
- output_file(path)
229
+ excluded_count += 1
230
+
231
+ if @options[:verbose]
232
+ recordset << excluded_record_for(path)
233
+ end
117
234
  end
118
235
  end
236
+
237
+ # Print out a progress summary if requested
238
+ if @options[:progress]
239
+ $stderr.puts "# Progress: File #{processed_count} / #{total_count} = #{sprintf('%0.2f%', processed_count.to_f / total_count.to_f * 100.0)}; Byte #{processed_size} / #{total_size} = #{sprintf('%0.2f%', processed_size.to_f / total_size.to_f * 100.0)}"
240
+ end
119
241
  end
120
242
  end
121
243
  end
122
-
244
+
123
245
  # Output summary
124
- @output.puts "\# Directories: #{directory_count} Files: #{checksummed_count} Excluded: #{excluded_count}"
246
+ recordset << Record.new(:summary, nil, {
247
+ 'summary.directories' => directory_count,
248
+ 'summary.files' => processed_count,
249
+ 'summary.size' => processed_size,
250
+ 'summary.excluded' => excluded_count,
251
+ 'summary.time.end' => Time.now
252
+ })
253
+
254
+ return recordset
125
255
  end
126
256
 
127
257
  # A helper function to scan a set of directories.
128
258
  def self.scan_paths(paths, options = {})
259
+ if options[:output]
260
+ options[:recordset] = RecordsetPrinter.new(Recordset.new, options[:output])
261
+ end
262
+
129
263
  scanner = Scanner.new(paths, options)
130
-
131
- scanner.scan
132
-
133
- return scanner
264
+
265
+ scanner.scan(options[:recordset])
266
+
267
+ return options[:recordset]
134
268
  end
135
269
  end
136
270
  end
@@ -22,8 +22,8 @@
22
22
  module Fingerprint
23
23
  module VERSION
24
24
  MAJOR = 1
25
- MINOR = 2
26
- TINY = 4
25
+ MINOR = 3
26
+ TINY = 0
27
27
 
28
28
  STRING = [MAJOR, MINOR, TINY].join('.')
29
29
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fingerprint
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 2
9
- - 4
10
- version: 1.2.4
8
+ - 3
9
+ - 0
10
+ version: 1.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Samuel Williams
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-08-09 00:00:00 Z
18
+ date: 2011-08-18 00:00:00 Z
19
19
  dependencies: []
20
20
 
21
21
  description:
@@ -29,6 +29,7 @@ extra_rdoc_files: []
29
29
  files:
30
30
  - bin/fingerprint
31
31
  - lib/fingerprint/checker.rb
32
+ - lib/fingerprint/record.rb
32
33
  - lib/fingerprint/scanner.rb
33
34
  - lib/fingerprint/version.rb
34
35
  - lib/fingerprint.rb