finddups 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.travis.yml +6 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +34 -0
- data/LICENSE.txt +21 -0
- data/README.md +45 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/finddups +151 -0
- data/finddups.gemspec +25 -0
- data/lib/finddups.rb +5 -0
- data/lib/finddups/version.rb +3 -0
- metadata +60 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1cb43cbb4ba1ea49a71b5d369cffbd0decf21f0c77764b2252d4211b7b1ccb65
|
4
|
+
data.tar.gz: cc04d2a23bcb19c8b8849e815e847df2472f681d46e6e7007d8cf8880acae1c6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1c46b2d368ba239f703bf20a4c5297cf03d9f9275ea5e924a87112a2d9d92d8488e4b1e37453aec7b81b4b294629d2fe39a0d6e76114981eefa8b1c23e4b61d7
|
7
|
+
data.tar.gz: ec31d5d9d66ac0081c8d0571a9a7783006df85f63f5a0711b1fbc10e3e2249fb2eb758804eabe6cc4db740c07c4517586686d1e9c1e5a140f9d766314ceb9dca
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
finddups (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.3)
|
10
|
+
rake (12.3.3)
|
11
|
+
rspec (3.9.0)
|
12
|
+
rspec-core (~> 3.9.0)
|
13
|
+
rspec-expectations (~> 3.9.0)
|
14
|
+
rspec-mocks (~> 3.9.0)
|
15
|
+
rspec-core (3.9.2)
|
16
|
+
rspec-support (~> 3.9.3)
|
17
|
+
rspec-expectations (3.9.2)
|
18
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
19
|
+
rspec-support (~> 3.9.0)
|
20
|
+
rspec-mocks (3.9.1)
|
21
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
22
|
+
rspec-support (~> 3.9.0)
|
23
|
+
rspec-support (3.9.3)
|
24
|
+
|
25
|
+
PLATFORMS
|
26
|
+
ruby
|
27
|
+
|
28
|
+
DEPENDENCIES
|
29
|
+
finddups!
|
30
|
+
rake (~> 12.0)
|
31
|
+
rspec (~> 3.0)
|
32
|
+
|
33
|
+
BUNDLED WITH
|
34
|
+
2.1.4
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2020 Alex Clink
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# Finddups
|
2
|
+
|
3
|
+
Shows duplicate files within a list of directories and outputs as JSON.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
$ gem install finddups
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
```
|
12
|
+
finddups (version 0.1.0)
|
13
|
+
Usage: finddups [dirs] [options]
|
14
|
+
-i, --ignore path ignore paths
|
15
|
+
--atime (default) Use file access time to sort duplicates
|
16
|
+
--mtime Use file modification time to sort duplicates
|
17
|
+
--ctime Use file change time to sort duplicates
|
18
|
+
(the time at which directory information about the file was changed, not the file itself)
|
19
|
+
-t, --threads threads Number of threads to use (default 16)
|
20
|
+
-d, --depth depth Max depth to search
|
21
|
+
-h, --help Show this help
|
22
|
+
-v Show version
|
23
|
+
```
|
24
|
+
|
25
|
+
Example:
|
26
|
+
|
27
|
+
```
|
28
|
+
$ finddups ~/Documents/folder1 ~/Documents/folder2 -i node_modules -i vendor
|
29
|
+
[
|
30
|
+
[
|
31
|
+
"/Users/alex/Documents/folder1/file1",
|
32
|
+
"/Users/alex/Documents/folder1/file1 (2)",
|
33
|
+
"/Users/alex/Documents/folder2/file1"
|
34
|
+
]
|
35
|
+
]
|
36
|
+
```
|
37
|
+
|
38
|
+
|
39
|
+
#### Contributing
|
40
|
+
|
41
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/sleepinginsomniac/finddups.
|
42
|
+
|
43
|
+
#### License
|
44
|
+
|
45
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "finddups"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/finddups
ADDED
@@ -0,0 +1,151 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'digest/sha1'
|
4
|
+
require 'json'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'thread'
|
7
|
+
require 'optparse'
|
8
|
+
|
9
|
+
$LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
|
10
|
+
|
11
|
+
require 'finddups'
|
12
|
+
|
13
|
+
@options = {
|
14
|
+
sort: 'atime',
|
15
|
+
depth: Float::INFINITY,
|
16
|
+
ignore: [],
|
17
|
+
threads: 16,
|
18
|
+
}
|
19
|
+
|
20
|
+
optparser = OptionParser.new do |opts|
|
21
|
+
opts.banner = <<~BANNER
|
22
|
+
finddups (version #{Finddups::VERSION})
|
23
|
+
Usage: #{File.basename(__FILE__)} [dirs] [options]
|
24
|
+
BANNER
|
25
|
+
|
26
|
+
opts.on("-i path", "--ignore path", "ignore paths") do |path|
|
27
|
+
@options[:ignore] << path
|
28
|
+
end
|
29
|
+
|
30
|
+
opts.on("--atime", "(default) Use file access time to sort duplicates") do
|
31
|
+
@options[:sort] = 'atime'
|
32
|
+
end
|
33
|
+
|
34
|
+
opts.on("--mtime", "Use file modification time to sort duplicates") do
|
35
|
+
@options[:sort] = 'mtime'
|
36
|
+
end
|
37
|
+
|
38
|
+
desc = <<~DESC
|
39
|
+
Use file change time to sort duplicates
|
40
|
+
(the time at which directory information about the file was changed, not the file itself)
|
41
|
+
DESC
|
42
|
+
opts.on("--ctime", desc) do
|
43
|
+
@options[:sort] = 'ctime'
|
44
|
+
end
|
45
|
+
|
46
|
+
opts.on("-t threads", "--threads threads", "Number of threads to use (default 16)") do |threads|
|
47
|
+
@options[:threads] = threads.to_i
|
48
|
+
end
|
49
|
+
|
50
|
+
opts.on("-d depth", "--depth depth", "Max depth to search") do |depth|
|
51
|
+
@options[:depth] = depth.to_i
|
52
|
+
end
|
53
|
+
|
54
|
+
opts.on("-h", "--help", "Show this help") do
|
55
|
+
puts opts
|
56
|
+
exit
|
57
|
+
end
|
58
|
+
|
59
|
+
opts.on("-v", "Show version") do
|
60
|
+
puts "finddups (version #{Finddups::VERSION})"
|
61
|
+
exit
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
optparser.parse!
|
66
|
+
|
67
|
+
if ARGV.empty? || ARGV.any? { |entry| !File.directory?(entry) }
|
68
|
+
$stderr.puts "Every argument must be a directory"
|
69
|
+
exit 1
|
70
|
+
end
|
71
|
+
|
72
|
+
# ========
|
73
|
+
# = Prog =
|
74
|
+
# ========
|
75
|
+
|
76
|
+
search_dirs = ARGV
|
77
|
+
trash_dir = "/tmp/duplicates/"
|
78
|
+
|
79
|
+
@mutex = Mutex.new
|
80
|
+
@queue = []
|
81
|
+
|
82
|
+
def search(directory, depth = 0)
|
83
|
+
# puts "Searching: #{directory}"
|
84
|
+
|
85
|
+
# Skips
|
86
|
+
return @duplicates if @options[:ignore].include?(File.basename(directory))
|
87
|
+
|
88
|
+
Dir.entries(directory).each do |entry|
|
89
|
+
next if entry.start_with?('.')
|
90
|
+
path = File.join(directory, entry)
|
91
|
+
|
92
|
+
if File.directory?(path)
|
93
|
+
if depth < @options[:depth]
|
94
|
+
@queue.push -> { search(path, depth + 1) }
|
95
|
+
end
|
96
|
+
elsif File.symlink?(path)
|
97
|
+
next
|
98
|
+
else
|
99
|
+
begin
|
100
|
+
digest = Digest::SHA1.hexdigest(File.read(path))
|
101
|
+
@mutex.synchronize do
|
102
|
+
@duplicates[digest] ||= []
|
103
|
+
@duplicates[digest] << path
|
104
|
+
end
|
105
|
+
rescue Errno::EINVAL => e
|
106
|
+
$stderr.puts "#{path}: #{e}"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
@duplicates
|
111
|
+
end
|
112
|
+
|
113
|
+
@duplicates = {}
|
114
|
+
|
115
|
+
search_dirs.each do |search_dir|
|
116
|
+
@queue.push -> { search(search_dir) }
|
117
|
+
end
|
118
|
+
|
119
|
+
until @queue.empty?
|
120
|
+
threads = []
|
121
|
+
@options[:threads].times do
|
122
|
+
_proc = @queue.shift
|
123
|
+
threads << Thread.new { _proc.call } if _proc
|
124
|
+
end
|
125
|
+
threads.each(&:join)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Trim non dups
|
129
|
+
@duplicates = @duplicates
|
130
|
+
.values
|
131
|
+
.reject do |files|
|
132
|
+
files.length < 2
|
133
|
+
end
|
134
|
+
|
135
|
+
# Stort
|
136
|
+
@duplicates.each do |dups|
|
137
|
+
dups = dups.sort do |a, b|
|
138
|
+
case @options[:sort]
|
139
|
+
when 'atime'
|
140
|
+
File.atime(a) <=> File.atime(a)
|
141
|
+
when 'mtime'
|
142
|
+
File.mtime(a) <=> File.mtime(a)
|
143
|
+
when 'ctime'
|
144
|
+
File.ctime(a) <=> File.ctime(a)
|
145
|
+
else
|
146
|
+
a.length <=> b.length
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
$stdout.puts JSON.pretty_generate(@duplicates)
|
data/finddups.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require_relative 'lib/finddups/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "finddups"
|
5
|
+
spec.version = Finddups::VERSION
|
6
|
+
spec.authors = ["Alex Clink"]
|
7
|
+
spec.email = ["code@alexclink.com"]
|
8
|
+
|
9
|
+
spec.summary = %q{Shows duplicate files within a list of directories and outputs as JSON.}
|
10
|
+
spec.homepage = "https://github.com/SleepingInsomniac/finddups"
|
11
|
+
spec.license = "MIT"
|
12
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
13
|
+
|
14
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
15
|
+
spec.metadata["source_code_uri"] = "https://github.com/SleepingInsomniac/finddups"
|
16
|
+
|
17
|
+
# Specify which files should be added to the gem when it is released.
|
18
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
19
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
20
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
21
|
+
end
|
22
|
+
spec.bindir = "exe"
|
23
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
24
|
+
spec.require_paths = ["lib"]
|
25
|
+
end
|
data/lib/finddups.rb
ADDED
metadata
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: finddups
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alex Clink
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-06-03 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email:
|
15
|
+
- code@alexclink.com
|
16
|
+
executables:
|
17
|
+
- finddups
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- ".gitignore"
|
22
|
+
- ".rspec"
|
23
|
+
- ".travis.yml"
|
24
|
+
- Gemfile
|
25
|
+
- Gemfile.lock
|
26
|
+
- LICENSE.txt
|
27
|
+
- README.md
|
28
|
+
- Rakefile
|
29
|
+
- bin/console
|
30
|
+
- bin/setup
|
31
|
+
- exe/finddups
|
32
|
+
- finddups.gemspec
|
33
|
+
- lib/finddups.rb
|
34
|
+
- lib/finddups/version.rb
|
35
|
+
homepage: https://github.com/SleepingInsomniac/finddups
|
36
|
+
licenses:
|
37
|
+
- MIT
|
38
|
+
metadata:
|
39
|
+
homepage_uri: https://github.com/SleepingInsomniac/finddups
|
40
|
+
source_code_uri: https://github.com/SleepingInsomniac/finddups
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 2.3.0
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
requirements: []
|
56
|
+
rubygems_version: 3.1.3
|
57
|
+
signing_key:
|
58
|
+
specification_version: 4
|
59
|
+
summary: Shows duplicate files within a list of directories and outputs as JSON.
|
60
|
+
test_files: []
|