fingerprint 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -10,11 +10,22 @@ disk. It also provides a programmatic interface for this procedure.
10
10
 
11
11
  Because Fingerprint produces output to `IO` like structures, it is easy to transmit
12
12
  this data across a network, or store it for later use. As an example, it could be
13
- used to check the integrity of a backup.
13
+ used to check the integrity of a remote backup.
14
14
 
15
- For examples please see the main [project page][1].
15
+ For examples and documentation please see the main [project page][1].
16
16
 
17
- [1]: http://www.oriontransfer.co.nz/projects/admin-toolbox/fingerprint
17
+ [1]: http://www.oriontransfer.co.nz/gems/fingerprint
18
+
19
+ Todo
20
+ ----
21
+
22
+ * Command line option to show files that have been created (e.g. don't exist in master fingerprint).
23
+ * Command line option to show files that have changed but have the same modified time (hardware corrutpion).
24
+ * Command line option to check fingerprint files based on checksums, e.g. duplicate files, unique files, over a set of directories.
25
+ * Command line tool for extracting duplicate and unique files over a set of directories?
26
+ * Supporting tools for signing fingerprints easily.
27
+ * Support indexing specific files as well as whole directories (maybe?).
28
+ * Support general filenames for `--archive`, e.g. along with `-n`, maybe support a file called `index.fingerprint` by default: improved visibility for end user.
18
29
 
19
30
  License
20
31
  -------
data/bin/fingerprint CHANGED
@@ -27,32 +27,34 @@ require 'fingerprint'
27
27
  OPTIONS = {
28
28
  :root => "./",
29
29
  :mode => :scan,
30
- :output => nil,
30
+ :output => $stdout,
31
31
  :verbose => false,
32
32
  :force => false,
33
- :name => "._fingerprint.txt"
33
+ :name => "._index.fingerprint",
34
+ :extended => false,
35
+ :checksums => Fingerprint::DEFAULT_CHECKSUMS,
34
36
  }
35
37
 
36
38
  ARGV.options do |o|
37
39
  script_name = File.basename($0)
38
40
 
39
41
  o.banner = "Usage: #{script_name} [options] [path]"
40
- o.define_head "This script is used to calculate and compare a basic file system fingerprint."
42
+ o.define_head "This script is used to create and compare file system fingerprints."
41
43
 
42
44
  o.separator ""
43
45
  o.separator "Directory analysis and verification:"
44
46
 
45
- o.on("--analyze [path]", String, "Generage a fingerprint of the given path and save it for later verification.") do |path|
47
+ o.on("-a", "--analyze [path]", String, "Generage a fingerprint of the given path and save it for later verification.") do |path|
46
48
  OPTIONS[:mode] = :analyze
47
- OPTIONS[:root] = path
49
+ OPTIONS[:root] = path if path
48
50
  end
49
51
 
50
- o.on("--verify [path]", String, "Verify a given path based on a previously saved fingerprint.") do |path|
52
+ o.on("-v", "--verify [path]", String, "Verify a given path based on a previously saved fingerprint.") do |path|
51
53
  OPTIONS[:mode] = :verify
52
- OPTIONS[:root] = path
54
+ OPTIONS[:root] = path if path
53
55
  end
54
56
 
55
- o.on("-n name", String, "Specify the name of the fingerprint file (default #{OPTIONS[:name]}).") do |name|
57
+ o.on("-n name", String, "Specify the name of the fingerprint file.", "Default: #{OPTIONS[:name]}") do |name|
56
58
  OPTIONS[:name] = name
57
59
  end
58
60
 
@@ -74,18 +76,35 @@ ARGV.options do |o|
74
76
  o.on("--verbose", "Verbose output, include additional details in the file transcript.") do
75
77
  OPTIONS[:verbose] = true
76
78
  end
79
+
80
+ o.on("--progress", "Print percentage progress to standard error.") do
81
+ OPTIONS[:progress] = true
82
+ end
77
83
 
84
+ o.separator ""
85
+
86
+ o.on("-x", "Include additional extended information about files and directories.") do
87
+ OPTIONS[:extended] = true
88
+ end
89
+
90
+ o.on("-s [checksum1,checksum2]", "Provide a list of the checksum algorithms to use.", "Available: #{Fingerprint::CHECKSUMS.keys.join(', ')}; Default: #{OPTIONS[:checksums].join(', ')}") do |checksums|
91
+ OPTIONS[:checksums] = checksums.split(/[\s,]+/)
92
+ end
93
+
78
94
  o.separator ""
79
95
  o.separator "Help and Copyright information:"
80
96
 
81
- o.on_tail("--copy", "Display copyright information") {
82
- $stderr.puts "#{script_name} v#{Fingerprint::VERSION::STRING}. Copyright (c) 2011 Samuel Williams. Released under the MIT license."
97
+ o.on_tail("--copy", "Display copyright and warranty information") do
98
+ $stderr.puts "#{script_name} v#{Fingerprint::VERSION::STRING}. Copyright (c) 2011 Samuel Williams."
99
+ $stderr.puts "This software is released under the MIT license and comes with ABSOLUTELY NO WARRANTY."
83
100
  $stderr.puts "See http://www.oriontransfer.co.nz/ for more information."
84
-
85
101
  exit
86
- }
102
+ end
87
103
 
88
- o.on_tail("-h", "--help", "Show this help message.") { $stderr.puts o; exit }
104
+ o.on_tail("-h", "--help", "Show this help message.") do
105
+ $stderr.puts o
106
+ exit
107
+ end
89
108
  end.parse!
90
109
 
91
110
  unless File.directory? OPTIONS[:root]
@@ -93,6 +112,10 @@ unless File.directory? OPTIONS[:root]
93
112
  exit(255)
94
113
  end
95
114
 
115
+ if OPTIONS[:checksums].size == 0
116
+ OPTIONS[:checksums] = ['MD5', 'SHA2.256']
117
+ end
118
+
96
119
  case (OPTIONS[:mode])
97
120
  when :analyze
98
121
  output_file = Pathname.new(OPTIONS[:root]) + OPTIONS[:name]
@@ -102,9 +125,8 @@ case (OPTIONS[:mode])
102
125
  exit(2)
103
126
  end
104
127
 
105
- options = {}
128
+ options = OPTIONS.dup
106
129
  options[:excludes] = [OPTIONS[:name]]
107
- options[:verbose] = true if OPTIONS[:verbose]
108
130
 
109
131
  File.open(output_file, "w") do |io|
110
132
  options[:output] = io
@@ -121,13 +143,23 @@ case (OPTIONS[:mode])
121
143
  exit(3)
122
144
  end
123
145
 
124
- scanner = Fingerprint::Scanner.scan_paths([OPTIONS[:root]])
125
- scanner.output.seek(0)
146
+ options = OPTIONS.dup
147
+
148
+ master = Fingerprint::Recordset.new
126
149
 
127
150
  File.open(input_file, "r") do |io|
128
- error_count += Fingerprint::Checker.check_files(io, scanner.output)
151
+ master.parse(io)
129
152
  end
130
153
 
154
+ if master.configuration
155
+ options.merge!(master.configuration.options)
156
+ end
157
+
158
+ scanner = Fingerprint::Scanner.new([OPTIONS[:root]], options)
159
+ copy = Fingerprint::SparseRecordset.new(scanner)
160
+
161
+ error_count += Fingerprint::Checker::verify(master, copy, options)
162
+
131
163
  if error_count == 0
132
164
  $stderr.puts "Data verified, 0 errors found."
133
165
  exit(0)
@@ -148,13 +180,13 @@ case (OPTIONS[:mode])
148
180
  true
149
181
  end
150
182
  end
151
-
152
- options = {}
153
- options[:verbose] = true if OPTIONS[:verbose]
154
- options[:output] = OPTIONS[:output] || $stdout
155
-
183
+
184
+ options = OPTIONS.dup
185
+
156
186
  Fingerprint::Scanner.scan_paths(roots, options)
157
187
  when :check
158
- error_count = Fingerprint::Checker.check_files(ARGV[0], ARGV[1])
188
+ options = OPTIONS.dup
189
+
190
+ error_count = Fingerprint::Checker.check_files(ARGV[0], ARGV[1], options)
159
191
  exit(error_count > 0 ? 1 : 0)
160
192
  end
data/lib/fingerprint.rb CHANGED
@@ -28,13 +28,13 @@ module Fingerprint
28
28
  master = Scanner.new([master_path])
29
29
  copy = Scanner.new([copy_path])
30
30
 
31
- master.scan
32
- copy.scan
31
+ master_recordset = Recordset.new
32
+ copy_recordset = SparseRecordset.new(copy)
33
33
 
34
- master.output.seek(0)
35
- copy.output.seek(0)
34
+ master.scan(master_recordset)
35
+
36
+ checker = Checker.new(master_recordset, copy_recordset)
36
37
 
37
- checker = Checker.new(master.output, copy.output)
38
38
  checker.check(&block)
39
39
 
40
40
  return checker
@@ -18,7 +18,7 @@
18
18
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
19
  # THE SOFTWARE.
20
20
 
21
- require 'set'
21
+ require 'fingerprint/record'
22
22
 
23
23
  module Fingerprint
24
24
  # Given two fingerprints (master and copy) ensures that the copy has at least everything contained
@@ -35,81 +35,101 @@ module Fingerprint
35
35
  @master = master
36
36
  @copy = copy
37
37
 
38
- @mismatches = []
39
-
40
38
  @options = options
41
-
42
- @failures = []
43
39
  end
44
40
 
41
+ attr :master
42
+ attr :copy
43
+
45
44
  # Run the checking process.
46
45
  def check (options = {}, &block)
47
- @files = Set.new
48
- @file_paths = {}
49
- @file_hashes = {}
50
-
51
- # Parse original fingerprint
52
- @copy.each_line do |line|
53
- # Skip comments
54
- next if line.match(/^\s+#/)
55
-
56
- if line.chomp.match(/^([a-fA-F0-9]{32}): (.*)$/)
57
- @files.add([$1, $2])
58
-
59
- @file_paths[$2] = $1
60
- @file_hashes[$1] ||= Set.new
61
- @file_hashes[$1].add($2)
62
- end
63
- end
64
-
65
46
  # For every file in the src, we check that it exists
66
47
  # in the destination:
67
- @master.each_line do |line|
68
- # Skip comments
69
- next if line.match(/^\s+#/)
70
-
71
- if line.chomp.match(/^([a-fA-F0-9]{32}): (.*)$/)
72
- unless @files.include?([$1, $2])
73
- yield($1, $2) if block_given?
74
- @failures << [$1, $2]
48
+
49
+ total_count = @master.records.count
50
+ processed_size = 0
51
+ total_size = @master.records.inject(0) { |count, record| count + (record['file.size'] || 0).to_i }
52
+
53
+ @master.records.each_with_index do |record, processed_count|
54
+ next if record.mode != :file
55
+
56
+ result, message = @copy.compare(record)
57
+ if result != :valid
58
+ yield record, result, message
59
+ elsif @options[:extended]
60
+ # Extended check compares other attributes such as user, group, file modes.
61
+ changes = record.diff(copy.paths[record.path])
62
+
63
+ if changes.size > 0
64
+ yield record, :attribute_changed, "Attribute(s) #{changes.join(', ')} changed"
75
65
  end
76
66
  end
67
+
68
+ if @options[:progress]
69
+ $stderr.puts "# Progress: File #{processed_count} / #{total_count} = #{sprintf('%0.2f%', processed_count.to_f / total_count.to_f * 100.0)}; Byte #{processed_size} / #{total_size} = #{sprintf('%0.2f%', processed_size.to_f / total_size.to_f * 100.0)}"
70
+
71
+ processed_size += (record['file.size'] || 0).to_i
72
+ end
77
73
  end
78
74
  end
79
75
 
80
76
  # A list of files which either did not exist in the copy, or had the wrong checksum.
81
77
  attr :failures
82
-
83
- # An array of all files in the copy
84
- attr :files
85
-
86
- # A hash of all files in copy +path => file hash+
87
- attr :file_paths
88
-
89
- # A hash of all files in copy +file hash => [file1, file2, ...]+
90
- attr :file_hashes
78
+
79
+ def self.check_files(master, copy, options = {}, &block)
80
+ # New API that takes two Recordsets...
81
+
82
+ File.open(master) do |master_file|
83
+ File.open(copy) do |copy_file|
84
+ master_recordset = Recordset.new
85
+ master_recordset.parse(master_file)
86
+
87
+ copy_recordset = Recordset.new
88
+ copy_recordset.parse(copy_file)
89
+
90
+ verify(master_recordset, copy_recordset, options, &block)
91
+ end
92
+ end
93
+ end
91
94
 
92
95
  # Helper function to check two fingerprint files.
93
- def self.check_files(master, copy, &block)
96
+ def self.verify(master, copy, options = {}, &block)
94
97
  error_count = 0
95
-
96
- master = File.open(master) unless master.respond_to? :read
97
- copy = File.open(copy) unless copy.respond_to? :read
98
-
99
- checker = Checker.new(master, copy)
100
98
 
101
- checker.check do |hash, path|
99
+ errors = options.delete(:recordset) || Recordset.new
100
+ if options[:output]
101
+ errors = RecordsetPrinter.new(errors, options[:output])
102
+ end
103
+
104
+ checker = Checker.new(master, copy, options)
105
+
106
+ checker.check do |record, result, message|
102
107
  error_count += 1
108
+ copy = checker.copy.paths[record.path]
109
+
110
+ metadata = {
111
+ 'error.code' => result,
112
+ 'error.message' => message
113
+ }
103
114
 
104
- if !checker.file_paths[path]
105
- $stderr.puts "File #{path.dump} is missing!"
106
- elsif checker.file_paths[path] != hash
107
- $stderr.puts "File #{path.dump} is different!"
115
+ if copy
116
+ changes = record.diff(copy)
117
+
118
+ changes.each do |name|
119
+ metadata["changes.#{name}.old"] = record[name]
120
+ metadata["changes.#{name}.new"] = copy[name]
121
+ end
122
+
123
+ errors << Record.new(:warning, record.path, metadata)
108
124
  else
109
- $stderr.puts "Unknown error for path #{path.dump}"
125
+ errors << Record.new(:warning, record.path, metadata)
110
126
  end
111
127
  end
112
128
 
129
+ errors << Record.new(:summary, nil, {
130
+ 'error.count' => error_count
131
+ })
132
+
113
133
  return error_count
114
134
  end
115
135
 
@@ -0,0 +1,262 @@
1
+ # Copyright (c) 2011 Samuel G. D. Williams. <http://www.oriontransfer.co.nz>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require 'set'
22
+
23
+ module Fingerprint
24
+
25
+ MODES = {
26
+ :configuration => 'C',
27
+ :file => 'F',
28
+ :directory => 'D',
29
+ :summary => 'S',
30
+ :warning => 'W',
31
+ :excluded => '#',
32
+ }
33
+
34
+ class Record
35
+ def initialize(mode, path, metadata)
36
+ @mode = mode
37
+ @path = path
38
+
39
+ @metadata = metadata
40
+ @keys = metadata.keys.grep(/^key\./)
41
+ end
42
+
43
+ attr :mode
44
+ attr :path
45
+ attr :metadata
46
+ attr :keys
47
+
48
+ def [](key)
49
+ @metadata[key]
50
+ end
51
+
52
+ def diff(other)
53
+ changes = []
54
+
55
+ all_keys = Set.new
56
+ all_keys += @metadata.keys + other.metadata.keys
57
+ # all_keys -= @keys + other.keys
58
+
59
+ all_keys.each do |key|
60
+ changes << key if @metadata[key].to_s != other.metadata[key].to_s
61
+ end
62
+
63
+ return changes
64
+ end
65
+
66
+ def options
67
+ options = {}
68
+
69
+ options[:extended] = true if @metadata['options.extended'] == 'true'
70
+ options[:checksum] = @metadata['options.checksums'].split(/[\s,]+/) if @metadata['options.checksums']
71
+
72
+ return options
73
+ end
74
+
75
+ def write(output)
76
+ output.puts "#{MODES[@mode]} #{@path}"
77
+
78
+ return if @mode == :excluded
79
+
80
+ @metadata.keys.sort.each do |key|
81
+ output.puts "\t#{key} #{@metadata[key]}"
82
+ end
83
+ end
84
+ end
85
+
86
+ class Recordset
87
+ def initialize
88
+ @records = []
89
+ @paths = {}
90
+ @keys = {}
91
+
92
+ @configuration = nil
93
+
94
+ @callback = nil
95
+ end
96
+
97
+ attr :records
98
+ attr :paths
99
+ attr :keys
100
+
101
+ attr :configuration
102
+
103
+ def <<(record)
104
+ @records << record
105
+ if record.mode == :configuration
106
+ # What should we do if we get multiple configurations?
107
+ @configuration = record
108
+ else
109
+ @paths[record.path] = record
110
+ record.keys.each do |key, value|
111
+ @keys[key] = value
112
+ end
113
+ end
114
+ end
115
+
116
+ def lookup(path)
117
+ return @paths[path]
118
+ end
119
+
120
+ def find(record)
121
+ result = lookup(record.path)
122
+
123
+ return result if result
124
+
125
+ @record.keys.each do |key, value|
126
+ result = @keys[key][value]
127
+
128
+ return result if result
129
+ end
130
+
131
+ return nil
132
+ end
133
+
134
+ def compare(other)
135
+ main = lookup(other.path)
136
+
137
+ # Did we find a corresponding other at the same path?
138
+ if main
139
+ # Keep track of how many keys were checked..
140
+ checked = 0
141
+
142
+ # Are all the keys of the other record equivalent to the main record?
143
+ other.keys.each do |key|
144
+ if main[key]
145
+ checked += 1
146
+
147
+ # Is the key the same?
148
+ if main[key] != other[key]
149
+ return :keys_different, "Key #{key.gsub(/^key\./, '')} does not match"
150
+ end
151
+ end
152
+ end
153
+
154
+ # Are the records the same size? We put this check second because we do this as a last resort to
155
+ # ensure that the file hasn't been deliberately tampered with.
156
+ if main.metadata['size'] and other.metadata['size'] and main.metadata['size'] != other.metadata['size']
157
+ return :size_different, "File size differs"
158
+ end
159
+
160
+ if checked == 0
161
+ return :no_keys, "No valid keys to check"
162
+ else
163
+ # At least one key could be validated.
164
+ return :valid, "Valid"
165
+ end
166
+ else
167
+ return :not_found, "File not found"
168
+ end
169
+ end
170
+
171
+ def self.parse(input)
172
+ mode = nil
173
+ path = nil
174
+ metadata = nil
175
+
176
+ markers = {}
177
+ MODES.each do |key, value|
178
+ markers[value] = key
179
+ end
180
+
181
+ # Parse original fingerprint
182
+ input.each_line do |line|
183
+ # Skip comments and blank lines
184
+ next if line.match(/^\s*#/) || line.match(/^\s*$/)
185
+
186
+ if line.match(/^([A-Z])\s+(.*)$/)
187
+ if path
188
+ yield mode, path, metadata
189
+ end
190
+
191
+ mode = markers[$1] || :unknown
192
+
193
+ path = $2
194
+ metadata = {}
195
+ elsif line.match(/^\s+([a-zA-Z\.0-9]+)\s+(.*)$/)
196
+ metadata[$1] = $2
197
+ else
198
+ $stderr.puts "Unhandled line: #{line}"
199
+ end
200
+ end
201
+
202
+ if path
203
+ yield mode, path, metadata
204
+ end
205
+ end
206
+
207
+ def parse(input)
208
+ self.class.parse(input) do |mode, path, metadata|
209
+ self << Record.new(mode, path, metadata)
210
+ end
211
+ end
212
+
213
+ def write(output)
214
+ @records.each do |record|
215
+ record.write(output)
216
+ end
217
+ end
218
+ end
219
+
220
+ # This record set dynamically computes data from the disk as required.
221
+ class SparseRecordset < Recordset
222
+ def initialize(scanner)
223
+ super()
224
+
225
+ @scanner = scanner
226
+ end
227
+
228
+ def lookup(path)
229
+ if @paths.key?(path)
230
+ return @paths[path]
231
+ else
232
+ @paths[path] = @scanner.scan_path(path)
233
+ end
234
+ end
235
+ end
236
+
237
+ class RecordsetWrapper
238
+ def initialize(recordset)
239
+ @recordset = recordset
240
+ end
241
+
242
+ def method_missing(name, *args, &block)
243
+ @recordset.send(name, *args, &block)
244
+ end
245
+
246
+ def respond_to?(name)
247
+ @recordset.respond_to?(name)
248
+ end
249
+ end
250
+
251
+ class RecordsetPrinter < RecordsetWrapper
252
+ def initialize(recordset, output)
253
+ super(recordset)
254
+ @output = output
255
+ end
256
+
257
+ def <<(record)
258
+ record.write(@output)
259
+ @recordset << record
260
+ end
261
+ end
262
+ end
@@ -20,10 +20,20 @@
20
20
 
21
21
  require 'stringio'
22
22
  require 'find'
23
- require 'digest'
23
+ require 'etc'
24
+ require 'digest/sha2'
24
25
 
25
26
  module Fingerprint
26
27
 
28
+ CHECKSUMS = {
29
+ 'MD5' => lambda { Digest::MD5.new },
30
+ 'SHA1' => lambda { Digest::SHA1.new },
31
+ 'SHA2.256' => lambda { Digest::SHA2.new(256) },
32
+ 'SHA2.512' => lambda { Digest::SHA2.new(512) },
33
+ }
34
+
35
+ DEFAULT_CHECKSUMS = ['MD5', 'SHA2.256']
36
+
27
37
  # The scanner class can scan a set of directories and produce an index.
28
38
  class Scanner
29
39
  # Initialize the scanner to scan a given set of directories in order.
@@ -33,45 +43,100 @@ module Fingerprint
33
43
  @roots = roots
34
44
 
35
45
  @excludes = options[:excludes] || []
36
- @output = options[:output] || StringIO.new
37
-
38
46
  @options = options
47
+
48
+ @digests = {}
49
+
50
+ unless @options[:checksums] and @options[:checksums].size > 0
51
+ @options[:checksums] = DEFAULT_CHECKSUMS
52
+ end
53
+
54
+ @options[:checksums].each do |name|
55
+ @digests[name] = CHECKSUMS[name].call
56
+ end
57
+
58
+ @callback = nil
39
59
  end
40
60
 
41
- attr :output
61
+ attr :recordset
62
+ attr :digests
42
63
 
43
64
  protected
44
-
65
+
45
66
  # Adds a header for a given path which is mainly version information.
46
- def output_header(root)
47
- @output.puts "\# Checksum generated by Fingerprint (#{Fingerprint::VERSION::STRING}) at #{Time.now.to_s}"
48
- @output.puts "\# Root: #{root}"
49
- end
50
-
51
- # Output a directory header.
52
- def output_dir(path)
53
- @output.puts ""
54
- @output.puts((" " * 32) + " #{path}")
67
+ def header_for(root)
68
+ Record.new(:configuration, File.expand_path(root), {
69
+ 'options.extended' => @options[:extended] == true,
70
+ 'options.checksums' => @options[:checksums].join(', '),
71
+ 'summary.time.start' => Time.now,
72
+ 'fingerprint.version' => Fingerprint::VERSION::STRING
73
+ })
55
74
  end
56
-
57
- # Output a file and associated metadata.
58
- def output_file(path)
59
- d = Digest::MD5.new
60
75
 
61
- File.open(path) do |f|
62
- while buf = f.read(1024*1024*10)
63
- d << buf
76
+ # This code won't handle multiple threads..
77
+ def digests_for(path)
78
+ @digests.each do |key, digest|
79
+ digest.reset
80
+ end
81
+
82
+ File.open(path, "rb") do |file|
83
+ buf = ""
84
+ while file.read(1024 * 1024 * 10, buf)
85
+ @digests.each do |key, digest|
86
+ digest << buf
87
+ end
64
88
  end
65
89
  end
66
90
 
67
- @output.puts "#{d.hexdigest}: #{path}"
91
+ metadata = {}
92
+
93
+ @digests.each do |key, digest|
94
+ metadata["key." + key] = digest.hexdigest
95
+ end
96
+
97
+ return metadata
68
98
  end
69
99
 
70
- # Add information about excluded paths.
71
- def output_excluded(path)
72
- if @options[:verbose]
73
- @output.puts '#'.ljust(32) + ": #{path}"
100
+ def metadata_for(type, path)
101
+ stat = File.stat(path)
102
+ metadata = {}
103
+
104
+ if type == :file
105
+ metadata['file.size'] = stat.size
106
+ digests = digests_for(path)
74
107
  end
108
+
109
+ # Extended information
110
+ if @options[:extended]
111
+ metadata['posix.time.modified'] = File.mtime(path)
112
+
113
+ metadata['posix.mode'] = stat.mode.to_s(8)
114
+
115
+ metadata['posix.permissions.user.id'] = stat.uid
116
+ metadata['posix.permissions.user.name'] = Etc.getpwuid(stat.uid).name
117
+ metadata['posix.permissions.group.id'] = stat.gid
118
+ metadata['posix.permissions.group.name'] = Etc.getgrgid(stat.gid).name
119
+ end
120
+
121
+ return metadata
122
+ end
123
+
124
+ # Output a directory header.
125
+ def directory_record_for(path)
126
+ Record.new(:directory, path, metadata_for(:directory, path))
127
+ end
128
+
129
+ # Output a file and associated metadata.
130
+ def file_record_for(path)
131
+ metadata = metadata_for(:file, path)
132
+ metadata.merge!(digests_for(path))
133
+
134
+ Record.new(:file, path, metadata)
135
+ end
136
+
137
+ # Add information about excluded paths.
138
+ def excluded_record_for(path)
139
+ Record.new(:excluded, path)
75
140
  end
76
141
 
77
142
  public
@@ -87,50 +152,119 @@ module Fingerprint
87
152
  return false
88
153
  end
89
154
 
155
+ def valid_file?(path)
156
+ !(excluded?(path) || File.symlink?(path) || !File.file?(path) || !File.readable?(path))
157
+ end
158
+
159
+ def scan_path(path)
160
+ @roots.each do |root|
161
+ Dir.chdir(root) do
162
+ if valid_file?(path)
163
+ return file_record_for(path)
164
+ end
165
+ end
166
+ end
167
+
168
+ return nil
169
+ end
170
+
90
171
  # Run the scanning process.
91
- def scan
172
+ def scan(recordset)
92
173
  excluded_count = 0
93
- checksummed_count = 0
174
+ processed_count = 0
175
+ processed_size = 0
94
176
  directory_count = 0
177
+
178
+ total_count = 0
179
+ total_size = 0
180
+
181
+ # Estimate the number of files and amount of data to process..
182
+ if @options[:progress]
183
+ @roots.each do |root|
184
+ Dir.chdir(root) do
185
+ Find.find("./") do |path|
186
+ if File.directory?(path)
187
+ if excluded?(path)
188
+ Find.prune # Ignore this directory
189
+ end
190
+ else
191
+ # Skip anything that isn't a valid file (e.g. pipes, sockets, symlinks).
192
+ if valid_file?(path)
193
+ total_count += 1
194
+ total_size += File.size(path)
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end
95
201
 
96
202
  @roots.each do |root|
97
203
  Dir.chdir(root) do
98
- output_header(root)
204
+ recordset << header_for(root)
205
+
99
206
  Find.find("./") do |path|
100
207
  if File.directory?(path)
101
208
  if excluded?(path)
102
209
  excluded_count += 1
103
- output_excluded(path)
210
+
211
+ if @options[:verbose]
212
+ recordset << excluded_record_for(path)
213
+ end
214
+
104
215
  Find.prune # Ignore this directory
105
216
  else
106
217
  directory_count += 1
107
- output_dir(path)
218
+
219
+ recordset << directory_record_for(path)
108
220
  end
109
221
  else
110
222
  # Skip anything that isn't a valid file (e.g. pipes, sockets, symlinks).
111
- if excluded?(path) || File.symlink?(path) || !File.file?(path) || !File.readable?(path)
112
- excluded_count += 1
113
- output_excluded(path)
223
+ if valid_file?(path)
224
+ processed_count += 1
225
+ processed_size += File.size(path)
226
+
227
+ recordset << file_record_for(path)
114
228
  else
115
- checksummed_count += 1
116
- output_file(path)
229
+ excluded_count += 1
230
+
231
+ if @options[:verbose]
232
+ recordset << excluded_record_for(path)
233
+ end
117
234
  end
118
235
  end
236
+
237
+ # Print out a progress summary if requested
238
+ if @options[:progress]
239
+ $stderr.puts "# Progress: File #{processed_count} / #{total_count} = #{sprintf('%0.2f%', processed_count.to_f / total_count.to_f * 100.0)}; Byte #{processed_size} / #{total_size} = #{sprintf('%0.2f%', processed_size.to_f / total_size.to_f * 100.0)}"
240
+ end
119
241
  end
120
242
  end
121
243
  end
122
-
244
+
123
245
  # Output summary
124
- @output.puts "\# Directories: #{directory_count} Files: #{checksummed_count} Excluded: #{excluded_count}"
246
+ recordset << Record.new(:summary, nil, {
247
+ 'summary.directories' => directory_count,
248
+ 'summary.files' => processed_count,
249
+ 'summary.size' => processed_size,
250
+ 'summary.excluded' => excluded_count,
251
+ 'summary.time.end' => Time.now
252
+ })
253
+
254
+ return recordset
125
255
  end
126
256
 
127
257
  # A helper function to scan a set of directories.
128
258
  def self.scan_paths(paths, options = {})
259
+ if options[:output]
260
+ options[:recordset] = RecordsetPrinter.new(Recordset.new, options[:output])
261
+ end
262
+
129
263
  scanner = Scanner.new(paths, options)
130
-
131
- scanner.scan
132
-
133
- return scanner
264
+
265
+ scanner.scan(options[:recordset])
266
+
267
+ return options[:recordset]
134
268
  end
135
269
  end
136
270
  end
@@ -22,8 +22,8 @@
22
22
  module Fingerprint
23
23
  module VERSION
24
24
  MAJOR = 1
25
- MINOR = 2
26
- TINY = 4
25
+ MINOR = 3
26
+ TINY = 0
27
27
 
28
28
  STRING = [MAJOR, MINOR, TINY].join('.')
29
29
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fingerprint
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 2
9
- - 4
10
- version: 1.2.4
8
+ - 3
9
+ - 0
10
+ version: 1.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Samuel Williams
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-08-09 00:00:00 Z
18
+ date: 2011-08-18 00:00:00 Z
19
19
  dependencies: []
20
20
 
21
21
  description:
@@ -29,6 +29,7 @@ extra_rdoc_files: []
29
29
  files:
30
30
  - bin/fingerprint
31
31
  - lib/fingerprint/checker.rb
32
+ - lib/fingerprint/record.rb
32
33
  - lib/fingerprint/scanner.rb
33
34
  - lib/fingerprint/version.rb
34
35
  - lib/fingerprint.rb