fingerprint 1.3.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+
27
+ require_relative 'scanner'
28
+
29
+ require_relative 'command/scan'
30
+ require_relative 'command/analyze'
31
+ require_relative 'command/verify'
32
+ require_relative 'command/compare'
33
+ require_relative 'command/duplicates'
34
+
35
+ module Fingerprint
36
+ module Command
37
+ def self.call(*args)
38
+ Top.call(*args)
39
+ end
40
+
41
+ class Top < Samovar::Command
42
+ self.description = "A file checksum analysis and verification tool."
43
+
44
+ options do
45
+ option '--root <path>', "Work in the given root directory."
46
+
47
+ option '-o/--output <path>', "Output the transcript to a specific file rather than stdout."
48
+
49
+ option '-h/--help', "Print out help information."
50
+ option '-v/--version', "Print out the application version."
51
+ end
52
+
53
+ def chdir(&block)
54
+ if root = @options[:root]
55
+ Dir.chdir(root, &block)
56
+ else
57
+ yield
58
+ end
59
+ end
60
+
61
+ def output
62
+ if path = @options[:output]
63
+ File.open(path, "w")
64
+ else
65
+ $stdout
66
+ end
67
+ end
68
+
69
+ nested :command, {
70
+ 'scan' => Scan,
71
+ 'analyze' => Analyze,
72
+ 'verify' => Verify,
73
+ 'compare' => Compare,
74
+ 'duplicates' => Duplicates
75
+ }, default: 'analyze'
76
+
77
+ def call
78
+ if @options[:version]
79
+ puts "fingerprint v#{VERSION}"
80
+ elsif @options[:help]
81
+ self.print_usage
82
+ else
83
+ chdir do
84
+ @command.call
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,74 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+ require 'fileutils'
27
+
28
+ require_relative '../checksums'
29
+ require_relative '../scanner'
30
+ require_relative '../record'
31
+
32
+ module Fingerprint
33
+ module Command
34
+ class Analyze < Samovar::Command
35
+ self.description = "Generates a fingerprint for the specified paths and saves it."
36
+
37
+ options do
38
+ option "-n/--name <name>", "The fingerprint file name.", default: INDEX_FINGERPRINT
39
+
40
+ option "-f/--force", "Force all operations to complete despite warnings."
41
+ option "-x/--extended", "Include extended information about files and directories."
42
+ option "-s/--checksums <SHA2.256>", "Specify what checksum algorithms to use: #{CHECKSUMS.keys.join(', ')}.", default: DEFAULT_CHECKSUMS, type: Checksums
43
+
44
+ option "--progress", "Print structured progress to standard error."
45
+ option "--verbose", "Verbose fingerprint output, e.g. excluded paths."
46
+ end
47
+
48
+ many :paths, "Paths relative to the root to use for verification, or pwd if not specified.", default: ["./"]
49
+
50
+ def call
51
+ output_file = @options[:name]
52
+
53
+ if File.exist?(output_file) and !@options[:force]
54
+ abort "Output file #{output_file} already exists. Aborting."
55
+ end
56
+
57
+ options = @options.dup
58
+ options[:excludes] = [File.expand_path(options[:name], Dir.pwd)]
59
+
60
+ finished = false
61
+ begin
62
+ File.open(output_file, "w") do |io|
63
+ options[:output] = io
64
+
65
+ Scanner.scan_paths(@paths, **options)
66
+ end
67
+ finished = true
68
+ ensure
69
+ FileUtils.rm(output_file) unless finished
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,55 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+
27
+ module Fingerprint
28
+ module Command
29
+ class Compare < Samovar::Command
30
+ self.description = "Compare two fingerprints and report additions, removals and changes."
31
+
32
+ options do
33
+ option "-x/--extended", "Include extended information about files and directories."
34
+ option "-a/--additions", "Report files that have been added to the copy."
35
+ option "--fail-on-errors", "Exit with non-zero status if errors are encountered."
36
+
37
+ option "--progress", "Print structured progress to standard error."
38
+ end
39
+
40
+ one :master, "The fingerprint which represents the original data."
41
+ one :copy, "The fingerprint which represents a copy of the data."
42
+
43
+ def call
44
+ options = @options.dup
45
+ options[:output] = @parent.output
46
+
47
+ error_count = Checker.check_files(@master, @copy, **options)
48
+
49
+ if @options[:fail_on_errors]
50
+ abort "Data inconsistent, #{error_count} error(s) found!" if error_count != 0
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,98 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+
27
+ module Fingerprint
28
+ module Command
29
+ class Duplicates < Samovar::Command
30
+ self.description = "Efficiently find duplicates in a given fingerprint."
31
+
32
+ options do
33
+ option "-i/--inverse", "Invert the output, i.e. show files which are not duplicates."
34
+ option "-x/--extended", "Include extended information about files and directories."
35
+
36
+ option "--verbose", "Verbose output, e.g. what is happening."
37
+ end
38
+
39
+ one :master, "The source fingerprint which represents the primarily file list."
40
+ many :copies, "Zero or more fingerprints which might contain duplicates.", default: []
41
+
42
+ attr :duplicates_recordset
43
+
44
+ def call
45
+ @options[:output] = @parent.output
46
+
47
+ @duplicates_recordset = RecordSet.new
48
+ results = RecordSetPrinter.new(duplicates_recordset, @options[:output])
49
+
50
+ master_file_path = @master
51
+ File.open(master_file_path) do |master_file|
52
+ master_recordset = RecordSet.new
53
+ master_recordset.parse(master_file)
54
+
55
+ ignore_similar = false
56
+
57
+ copy_file_paths = @copies
58
+
59
+ if copy_file_paths.size == 0
60
+ copy_file_paths = [master_file_path]
61
+ ignore_similar = true
62
+ end
63
+
64
+ copy_file_paths.each do |copy_file_path|
65
+ File.open(copy_file_path) do |copy_file|
66
+ copy_recordset = RecordSet.new
67
+ copy_recordset.parse(copy_file)
68
+
69
+ copy_recordset.records.each do |record|
70
+ record.metadata['fingerprint'] = copy_file_path
71
+ # We need to see if the record exists in the master
72
+
73
+ if @options[:verbose]
74
+ $stderr.puts "Checking #{record.inspect}"
75
+ end
76
+
77
+ main_record = master_recordset.find_by_key(record)
78
+
79
+ # If we are scanning the same index, don't print out every file, just those that are duplicates within the single file.
80
+ if ignore_similar && main_record && (main_record.path == record.path)
81
+ main_record = nil
82
+ end
83
+
84
+ if main_record
85
+ record.metadata['original.path'] = main_record.path
86
+ record.metadata['original.fingerprint'] = master_file_path
87
+ results << record if !@options[:inverse]
88
+ else
89
+ results << record if @options[:inverse]
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,61 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+
27
+ require_relative '../checksums'
28
+ require_relative '../scanner'
29
+ require_relative '../record'
30
+
31
+ module Fingerprint
32
+ module Command
33
+ class Scan < Samovar::Command
34
+ self.description = "Generate a fingerprint from the given paths."
35
+
36
+ options do
37
+ option "-p/--path <path>", "Analyze the given path relative to root.", default: "./"
38
+
39
+ option "-x/--extended", "Include extended information about files and directories."
40
+ option "-s/--checksums <SHA2.256>", "Specify what checksum algorithms to use: #{CHECKSUMS.keys.join(', ')}.", default: DEFAULT_CHECKSUMS, type: Checksums
41
+
42
+ option "--progress", "Print structured progress to standard error."
43
+ option "--verbose", "Verbose fingerprint output, e.g. excluded paths."
44
+ end
45
+
46
+ many :paths, "Paths to scan."
47
+
48
+ def call
49
+ @paths = [Dir.pwd] unless @paths
50
+
51
+ options = @options.dup
52
+
53
+ # This configuration ensures that the output is printed to $stdout.
54
+ options[:output] = @parent.output
55
+ options[:recordset] = nil
56
+
57
+ Scanner.scan_paths(@paths, **options)
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,86 @@
1
+ # Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # This script takes a given path, and renames it with the given format.
22
+ # It then ensures that there is a symlink called "latest" that points
23
+ # to the renamed directory.
24
+
25
+ require 'samovar'
26
+
27
+ require_relative '../checker'
28
+ require_relative '../record'
29
+
30
+ module Fingerprint
31
+ module Command
32
+ class Verify < Samovar::Command
33
+ self.description = "Check an existing fingerprint against the filesystem."
34
+
35
+ options do
36
+ option "-n/--name <name>", "The fingerprint file name.", default: INDEX_FINGERPRINT
37
+
38
+ option "-f/--force", "Force all operations to complete despite warnings."
39
+ option "-x/--extended", "Include extended information about files and directories."
40
+
41
+ option "-s/--checksums <SHA2.256>", "Specify what checksum algorithms to use (#{Fingerprint::CHECKSUMS.keys.join(', ')}).", default: Fingerprint::DEFAULT_CHECKSUMS
42
+
43
+ option "--progress", "Print structured progress to standard error."
44
+ option "--verbose", "Verbose fingerprint output, e.g. excluded paths."
45
+
46
+ option "--fail-on-errors", "Exit with non-zero status if errors are encountered."
47
+ end
48
+
49
+ many :paths, "Paths relative to the root to use for verification, or ./ if not specified.", default: ["./"]
50
+
51
+ attr :error_count
52
+
53
+ def call
54
+ input_file = @options[:name]
55
+
56
+ unless File.exist? input_file
57
+ abort "Can't find index #{input_file}. Aborting."
58
+ end
59
+
60
+ options = @options.dup
61
+ options[:output] = @parent.output
62
+
63
+ master = RecordSet.new
64
+
65
+ File.open(input_file, "r") do |io|
66
+ master.parse(io)
67
+ end
68
+
69
+ if master.configuration
70
+ options.merge!(master.configuration.options)
71
+ end
72
+
73
+ scanner = Scanner.new(@paths, **options)
74
+
75
+ # We use a sparse record set here, so we can't check for additions.
76
+ copy = SparseRecordSet.new(scanner)
77
+
78
+ @error_count = Checker.verify(master, copy, **options)
79
+
80
+ if @options[:fail_on_errors]
81
+ abort "Data inconsistent, #{error_count} error(s) found!" if error_count != 0
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end