fingerprint 1.3.1 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,90 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+
27
+ require_relative 'scanner'
28
+
29
+ require_relative 'command/scan'
30
+ require_relative 'command/analyze'
31
+ require_relative 'command/verify'
32
+ require_relative 'command/compare'
33
+ require_relative 'command/duplicates'
34
+
35
+ module Fingerprint
36
+ module Command
37
+ def self.call(*args)
38
+ Top.call(*args)
39
+ end
40
+
41
+ class Top < Samovar::Command
42
+ self.description = "A file checksum analysis and verification tool."
43
+
44
+ options do
45
+ option '--root <path>', "Work in the given root directory."
46
+
47
+ option '-o/--output <path>', "Output the transcript to a specific file rather than stdout."
48
+
49
+ option '-h/--help', "Print out help information."
50
+ option '-v/--version', "Print out the application version."
51
+ end
52
+
53
+ def chdir(&block)
54
+ if root = @options[:root]
55
+ Dir.chdir(root, &block)
56
+ else
57
+ yield
58
+ end
59
+ end
60
+
61
+ def output
62
+ if path = @options[:output]
63
+ File.open(path, "w")
64
+ else
65
+ $stdout
66
+ end
67
+ end
68
+
69
+ nested :command, {
70
+ 'scan' => Scan,
71
+ 'analyze' => Analyze,
72
+ 'verify' => Verify,
73
+ 'compare' => Compare,
74
+ 'duplicates' => Duplicates
75
+ }, default: 'analyze'
76
+
77
+ def call
78
+ if @options[:version]
79
+ puts "fingerprint v#{VERSION}"
80
+ elsif @options[:help]
81
+ self.print_usage
82
+ else
83
+ chdir do
84
+ @command.call
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,74 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+ require 'fileutils'
27
+
28
+ require_relative '../checksums'
29
+ require_relative '../scanner'
30
+ require_relative '../record'
31
+
32
+ module Fingerprint
33
+ module Command
34
+ class Analyze < Samovar::Command
35
+ self.description = "Generates a fingerprint for the specified paths and saves it."
36
+
37
+ options do
38
+ option "-n/--name <name>", "The fingerprint file name.", default: INDEX_FINGERPRINT
39
+
40
+ option "-f/--force", "Force all operations to complete despite warnings."
41
+ option "-x/--extended", "Include extended information about files and directories."
42
+ option "-s/--checksums <SHA2.256>", "Specify what checksum algorithms to use: #{CHECKSUMS.keys.join(', ')}.", default: DEFAULT_CHECKSUMS, type: Checksums
43
+
44
+ option "--progress", "Print structured progress to standard error."
45
+ option "--verbose", "Verbose fingerprint output, e.g. excluded paths."
46
+ end
47
+
48
+ many :paths, "Paths relative to the root to use for verification, or pwd if not specified.", default: ["./"]
49
+
50
+ def call
51
+ output_file = @options[:name]
52
+
53
+ if File.exist?(output_file) and !@options[:force]
54
+ abort "Output file #{output_file} already exists. Aborting."
55
+ end
56
+
57
+ options = @options.dup
58
+ options[:excludes] = [File.expand_path(options[:name], Dir.pwd)]
59
+
60
+ finished = false
61
+ begin
62
+ File.open(output_file, "w") do |io|
63
+ options[:output] = io
64
+
65
+ Scanner.scan_paths(@paths, **options)
66
+ end
67
+ finished = true
68
+ ensure
69
+ FileUtils.rm(output_file) unless finished
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,55 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+
27
+ module Fingerprint
28
+ module Command
29
+ class Compare < Samovar::Command
30
+ self.description = "Compare two fingerprints and report additions, removals and changes."
31
+
32
+ options do
33
+ option "-x/--extended", "Include extended information about files and directories."
34
+ option "-a/--additions", "Report files that have been added to the copy."
35
+ option "--fail-on-errors", "Exit with non-zero status if errors are encountered."
36
+
37
+ option "--progress", "Print structured progress to standard error."
38
+ end
39
+
40
+ one :master, "The fingerprint which represents the original data."
41
+ one :copy, "The fingerprint which represents a copy of the data."
42
+
43
+ def call
44
+ options = @options.dup
45
+ options[:output] = @parent.output
46
+
47
+ error_count = Checker.check_files(@master, @copy, **options)
48
+
49
+ if @options[:fail_on_errors]
50
+ abort "Data inconsistent, #{error_count} error(s) found!" if error_count != 0
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,98 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+
27
+ module Fingerprint
28
+ module Command
29
+ class Duplicates < Samovar::Command
30
+ self.description = "Efficiently find duplicates in a given fingerprint."
31
+
32
+ options do
33
+ option "-i/--inverse", "Invert the output, i.e. show files which are not duplicates."
34
+ option "-x/--extended", "Include extended information about files and directories."
35
+
36
+ option "--verbose", "Verbose output, e.g. what is happening."
37
+ end
38
+
39
+ one :master, "The source fingerprint which represents the primarily file list."
40
+ many :copies, "Zero or more fingerprints which might contain duplicates.", default: []
41
+
42
+ attr :duplicates_recordset
43
+
44
+ def call
45
+ @options[:output] = @parent.output
46
+
47
+ @duplicates_recordset = RecordSet.new
48
+ results = RecordSetPrinter.new(duplicates_recordset, @options[:output])
49
+
50
+ master_file_path = @master
51
+ File.open(master_file_path) do |master_file|
52
+ master_recordset = RecordSet.new
53
+ master_recordset.parse(master_file)
54
+
55
+ ignore_similar = false
56
+
57
+ copy_file_paths = @copies
58
+
59
+ if copy_file_paths.size == 0
60
+ copy_file_paths = [master_file_path]
61
+ ignore_similar = true
62
+ end
63
+
64
+ copy_file_paths.each do |copy_file_path|
65
+ File.open(copy_file_path) do |copy_file|
66
+ copy_recordset = RecordSet.new
67
+ copy_recordset.parse(copy_file)
68
+
69
+ copy_recordset.records.each do |record|
70
+ record.metadata['fingerprint'] = copy_file_path
71
+ # We need to see if the record exists in the master
72
+
73
+ if @options[:verbose]
74
+ $stderr.puts "Checking #{record.inspect}"
75
+ end
76
+
77
+ main_record = master_recordset.find_by_key(record)
78
+
79
+ # If we are scanning the same index, don't print out every file, just those that are duplicates within the single file.
80
+ if ignore_similar && main_record && (main_record.path == record.path)
81
+ main_record = nil
82
+ end
83
+
84
+ if main_record
85
+ record.metadata['original.path'] = main_record.path
86
+ record.metadata['original.fingerprint'] = master_file_path
87
+ results << record if !@options[:inverse]
88
+ else
89
+ results << record if @options[:inverse]
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,61 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+
27
+ require_relative '../checksums'
28
+ require_relative '../scanner'
29
+ require_relative '../record'
30
+
31
+ module Fingerprint
32
+ module Command
33
+ class Scan < Samovar::Command
34
+ self.description = "Generate a fingerprint from the given paths."
35
+
36
+ options do
37
+ option "-p/--path <path>", "Analyze the given path relative to root.", default: "./"
38
+
39
+ option "-x/--extended", "Include extended information about files and directories."
40
+ option "-s/--checksums <SHA2.256>", "Specify what checksum algorithms to use: #{CHECKSUMS.keys.join(', ')}.", default: DEFAULT_CHECKSUMS, type: Checksums
41
+
42
+ option "--progress", "Print structured progress to standard error."
43
+ option "--verbose", "Verbose fingerprint output, e.g. excluded paths."
44
+ end
45
+
46
+ many :paths, "Paths to scan."
47
+
48
+ def call
49
+ @paths = [Dir.pwd] unless @paths
50
+
51
+ options = @options.dup
52
+
53
+ # This configuration ensures that the output is printed to $stdout.
54
+ options[:output] = @parent.output
55
+ options[:recordset] = nil
56
+
57
+ Scanner.scan_paths(@paths, **options)
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,86 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+
27
+ require_relative '../checker'
28
+ require_relative '../record'
29
+
30
+ module Fingerprint
31
+ module Command
32
+ class Verify < Samovar::Command
33
+ self.description = "Check an existing fingerprint against the filesystem."
34
+
35
+ options do
36
+ option "-n/--name <name>", "The fingerprint file name.", default: INDEX_FINGERPRINT
37
+
38
+ option "-f/--force", "Force all operations to complete despite warnings."
39
+ option "-x/--extended", "Include extended information about files and directories."
40
+
41
+ option "-s/--checksums <SHA2.256>", "Specify what checksum algorithms to use (#{Fingerprint::CHECKSUMS.keys.join(', ')}).", default: Fingerprint::DEFAULT_CHECKSUMS
42
+
43
+ option "--progress", "Print structured progress to standard error."
44
+ option "--verbose", "Verbose fingerprint output, e.g. excluded paths."
45
+
46
+ option "--fail-on-errors", "Exit with non-zero status if errors are encountered."
47
+ end
48
+
49
+ many :paths, "Paths relative to the root to use for verification, or ./ if not specified.", default: ["./"]
50
+
51
+ attr :error_count
52
+
53
+ def call
54
+ input_file = @options[:name]
55
+
56
+ unless File.exist? input_file
57
+ abort "Can't find index #{input_file}. Aborting."
58
+ end
59
+
60
+ options = @options.dup
61
+ options[:output] = @parent.output
62
+
63
+ master = RecordSet.new
64
+
65
+ File.open(input_file, "r") do |io|
66
+ master.parse(io)
67
+ end
68
+
69
+ if master.configuration
70
+ options.merge!(master.configuration.options)
71
+ end
72
+
73
+ scanner = Scanner.new(@paths, **options)
74
+
75
+ # We use a sparse record set here, so we can't check for additions.
76
+ copy = SparseRecordSet.new(scanner)
77
+
78
+ @error_count = Checker.verify(master, copy, **options)
79
+
80
+ if @options[:fail_on_errors]
81
+ abort "Data inconsistent, #{error_count} error(s) found!" if error_count != 0
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end