finddups 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1cb43cbb4ba1ea49a71b5d369cffbd0decf21f0c77764b2252d4211b7b1ccb65
4
+ data.tar.gz: cc04d2a23bcb19c8b8849e815e847df2472f681d46e6e7007d8cf8880acae1c6
5
+ SHA512:
6
+ metadata.gz: 1c46b2d368ba239f703bf20a4c5297cf03d9f9275ea5e924a87112a2d9d92d8488e4b1e37453aec7b81b4b294629d2fe39a0d6e76114981eefa8b1c23e4b61d7
7
+ data.tar.gz: ec31d5d9d66ac0081c8d0571a9a7783006df85f63f5a0711b1fbc10e3e2249fb2eb758804eabe6cc4db740c07c4517586686d1e9c1e5a140f9d766314ceb9dca
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.1
6
+ before_install: gem install bundler -v 2.1.4
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in finddups.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rspec", "~> 3.0"
@@ -0,0 +1,34 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ finddups (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.3)
10
+ rake (12.3.3)
11
+ rspec (3.9.0)
12
+ rspec-core (~> 3.9.0)
13
+ rspec-expectations (~> 3.9.0)
14
+ rspec-mocks (~> 3.9.0)
15
+ rspec-core (3.9.2)
16
+ rspec-support (~> 3.9.3)
17
+ rspec-expectations (3.9.2)
18
+ diff-lcs (>= 1.2.0, < 2.0)
19
+ rspec-support (~> 3.9.0)
20
+ rspec-mocks (3.9.1)
21
+ diff-lcs (>= 1.2.0, < 2.0)
22
+ rspec-support (~> 3.9.0)
23
+ rspec-support (3.9.3)
24
+
25
+ PLATFORMS
26
+ ruby
27
+
28
+ DEPENDENCIES
29
+ finddups!
30
+ rake (~> 12.0)
31
+ rspec (~> 3.0)
32
+
33
+ BUNDLED WITH
34
+ 2.1.4
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Alex Clink
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,45 @@
1
+ # Finddups
2
+
3
+ Shows duplicate files within a list of directories and outputs as JSON.
4
+
5
+ ## Installation
6
+
7
+ $ gem install finddups
8
+
9
+ ## Usage
10
+
11
+ ```
12
+ finddups (version 0.1.0)
13
+ Usage: finddups [dirs] [options]
14
+ -i, --ignore path ignore paths
15
+ --atime (default) Use file access time to sort duplicates
16
+ --mtime Use file modification time to sort duplicates
17
+ --ctime Use file change time to sort duplicates
18
+ (the time at which directory information about the file was changed, not the file itself)
19
+ -t, --threads threads Number of threads to use (default 16)
20
+ -d, --depth depth Max depth to search
21
+ -h, --help Show this help
22
+ -v Show version
23
+ ```
24
+
25
+ Example:
26
+
27
+ ```
28
+ $ finddups ~/Documents/folder1 ~/Documents/folder2 -i node_modules -i vendor
29
+ [
30
+ [
31
+ "/Users/alex/Documents/folder1/file1",
32
+ "/Users/alex/Documents/folder1/file1 (2)",
33
+ "/Users/alex/Documents/folder2/file1"
34
+ ]
35
+ ]
36
+ ```
37
+
38
+
39
+ #### Contributing
40
+
41
+ Bug reports and pull requests are welcome on GitHub at https://github.com/sleepinginsomniac/finddups.
42
+
43
+ #### License
44
+
45
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "finddups"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,151 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'digest/sha1'
4
+ require 'json'
5
+ require 'fileutils'
6
+ require 'thread'
7
+ require 'optparse'
8
+
9
+ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
10
+
11
+ require 'finddups'
12
+
13
+ @options = {
14
+ sort: 'atime',
15
+ depth: Float::INFINITY,
16
+ ignore: [],
17
+ threads: 16,
18
+ }
19
+
20
+ optparser = OptionParser.new do |opts|
21
+ opts.banner = <<~BANNER
22
+ finddups (version #{Finddups::VERSION})
23
+ Usage: #{File.basename(__FILE__)} [dirs] [options]
24
+ BANNER
25
+
26
+ opts.on("-i path", "--ignore path", "ignore paths") do |path|
27
+ @options[:ignore] << path
28
+ end
29
+
30
+ opts.on("--atime", "(default) Use file access time to sort duplicates") do
31
+ @options[:sort] = 'atime'
32
+ end
33
+
34
+ opts.on("--mtime", "Use file modification time to sort duplicates") do
35
+ @options[:sort] = 'mtime'
36
+ end
37
+
38
+ desc = <<~DESC
39
+ Use file change time to sort duplicates
40
+ (the time at which directory information about the file was changed, not the file itself)
41
+ DESC
42
+ opts.on("--ctime", desc) do
43
+ @options[:sort] = 'ctime'
44
+ end
45
+
46
+ opts.on("-t threads", "--threads threads", "Number of threads to use (default 16)") do |threads|
47
+ @options[:threads] = threads.to_i
48
+ end
49
+
50
+ opts.on("-d depth", "--depth depth", "Max depth to search") do |depth|
51
+ @options[:depth] = depth.to_i
52
+ end
53
+
54
+ opts.on("-h", "--help", "Show this help") do
55
+ puts opts
56
+ exit
57
+ end
58
+
59
+ opts.on("-v", "Show version") do
60
+ puts "finddups (version #{Finddups::VERSION})"
61
+ exit
62
+ end
63
+ end
64
+
65
+ optparser.parse!
66
+
67
+ if ARGV.empty? || ARGV.any? { |entry| !File.directory?(entry) }
68
+ $stderr.puts "Every argument must be a directory"
69
+ exit 1
70
+ end
71
+
72
+ # ========
73
+ # = Prog =
74
+ # ========
75
+
76
+ search_dirs = ARGV
77
+ trash_dir = "/tmp/duplicates/"
78
+
79
+ @mutex = Mutex.new
80
+ @queue = []
81
+
82
+ def search(directory, depth = 0)
83
+ # puts "Searching: #{directory}"
84
+
85
+ # Skips
86
+ return @duplicates if @options[:ignore].include?(File.basename(directory))
87
+
88
+ Dir.entries(directory).each do |entry|
89
+ next if entry.start_with?('.')
90
+ path = File.join(directory, entry)
91
+
92
+ if File.directory?(path)
93
+ if depth < @options[:depth]
94
+ @queue.push -> { search(path, depth + 1) }
95
+ end
96
+ elsif File.symlink?(path)
97
+ next
98
+ else
99
+ begin
100
+ digest = Digest::SHA1.hexdigest(File.read(path))
101
+ @mutex.synchronize do
102
+ @duplicates[digest] ||= []
103
+ @duplicates[digest] << path
104
+ end
105
+ rescue Errno::EINVAL => e
106
+ $stderr.puts "#{path}: #{e}"
107
+ end
108
+ end
109
+ end
110
+ @duplicates
111
+ end
112
+
113
+ @duplicates = {}
114
+
115
+ search_dirs.each do |search_dir|
116
+ @queue.push -> { search(search_dir) }
117
+ end
118
+
119
+ until @queue.empty?
120
+ threads = []
121
+ @options[:threads].times do
122
+ _proc = @queue.shift
123
+ threads << Thread.new { _proc.call } if _proc
124
+ end
125
+ threads.each(&:join)
126
+ end
127
+
128
+ # Trim non dups
129
+ @duplicates = @duplicates
130
+ .values
131
+ .reject do |files|
132
+ files.length < 2
133
+ end
134
+
135
+ # Stort
136
+ @duplicates.each do |dups|
137
+ dups = dups.sort do |a, b|
138
+ case @options[:sort]
139
+ when 'atime'
140
+ File.atime(a) <=> File.atime(a)
141
+ when 'mtime'
142
+ File.mtime(a) <=> File.mtime(a)
143
+ when 'ctime'
144
+ File.ctime(a) <=> File.ctime(a)
145
+ else
146
+ a.length <=> b.length
147
+ end
148
+ end
149
+ end
150
+
151
+ $stdout.puts JSON.pretty_generate(@duplicates)
@@ -0,0 +1,25 @@
1
+ require_relative 'lib/finddups/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "finddups"
5
+ spec.version = Finddups::VERSION
6
+ spec.authors = ["Alex Clink"]
7
+ spec.email = ["code@alexclink.com"]
8
+
9
+ spec.summary = %q{Shows duplicate files within a list of directories and outputs as JSON.}
10
+ spec.homepage = "https://github.com/SleepingInsomniac/finddups"
11
+ spec.license = "MIT"
12
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
13
+
14
+ spec.metadata["homepage_uri"] = spec.homepage
15
+ spec.metadata["source_code_uri"] = "https://github.com/SleepingInsomniac/finddups"
16
+
17
+ # Specify which files should be added to the gem when it is released.
18
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
20
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ end
22
+ spec.bindir = "exe"
23
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
+ spec.require_paths = ["lib"]
25
+ end
@@ -0,0 +1,5 @@
1
+ require "finddups/version"
2
+
3
+ module Finddups
4
+ class Error < StandardError; end
5
+ end
@@ -0,0 +1,3 @@
1
+ module Finddups
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: finddups
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Alex Clink
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-06-03 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - code@alexclink.com
16
+ executables:
17
+ - finddups
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".gitignore"
22
+ - ".rspec"
23
+ - ".travis.yml"
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - LICENSE.txt
27
+ - README.md
28
+ - Rakefile
29
+ - bin/console
30
+ - bin/setup
31
+ - exe/finddups
32
+ - finddups.gemspec
33
+ - lib/finddups.rb
34
+ - lib/finddups/version.rb
35
+ homepage: https://github.com/SleepingInsomniac/finddups
36
+ licenses:
37
+ - MIT
38
+ metadata:
39
+ homepage_uri: https://github.com/SleepingInsomniac/finddups
40
+ source_code_uri: https://github.com/SleepingInsomniac/finddups
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 2.3.0
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubygems_version: 3.1.3
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: Shows duplicate files within a list of directories and outputs as JSON.
60
+ test_files: []