finddups 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1cb43cbb4ba1ea49a71b5d369cffbd0decf21f0c77764b2252d4211b7b1ccb65
4
+ data.tar.gz: cc04d2a23bcb19c8b8849e815e847df2472f681d46e6e7007d8cf8880acae1c6
5
+ SHA512:
6
+ metadata.gz: 1c46b2d368ba239f703bf20a4c5297cf03d9f9275ea5e924a87112a2d9d92d8488e4b1e37453aec7b81b4b294629d2fe39a0d6e76114981eefa8b1c23e4b61d7
7
+ data.tar.gz: ec31d5d9d66ac0081c8d0571a9a7783006df85f63f5a0711b1fbc10e3e2249fb2eb758804eabe6cc4db740c07c4517586686d1e9c1e5a140f9d766314ceb9dca
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.1
6
+ before_install: gem install bundler -v 2.1.4
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in finddups.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rspec", "~> 3.0"
@@ -0,0 +1,34 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ finddups (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.3)
10
+ rake (12.3.3)
11
+ rspec (3.9.0)
12
+ rspec-core (~> 3.9.0)
13
+ rspec-expectations (~> 3.9.0)
14
+ rspec-mocks (~> 3.9.0)
15
+ rspec-core (3.9.2)
16
+ rspec-support (~> 3.9.3)
17
+ rspec-expectations (3.9.2)
18
+ diff-lcs (>= 1.2.0, < 2.0)
19
+ rspec-support (~> 3.9.0)
20
+ rspec-mocks (3.9.1)
21
+ diff-lcs (>= 1.2.0, < 2.0)
22
+ rspec-support (~> 3.9.0)
23
+ rspec-support (3.9.3)
24
+
25
+ PLATFORMS
26
+ ruby
27
+
28
+ DEPENDENCIES
29
+ finddups!
30
+ rake (~> 12.0)
31
+ rspec (~> 3.0)
32
+
33
+ BUNDLED WITH
34
+ 2.1.4
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Alex Clink
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,45 @@
1
+ # Finddups
2
+
3
+ Shows duplicate files within a list of directories and outputs as JSON.
4
+
5
+ ## Installation
6
+
7
+ $ gem install finddups
8
+
9
+ ## Usage
10
+
11
+ ```
12
+ finddups (version 0.1.0)
13
+ Usage: finddups [dirs] [options]
14
+ -i, --ignore path ignore paths
15
+ --atime (default) Use file access time to sort duplicates
16
+ --mtime Use file modification time to sort duplicates
17
+ --ctime Use file change time to sort duplicates
18
+ (the time at which directory information about the file was changed, not the file itself)
19
+ -t, --threads threads Number of threads to use (default 16)
20
+ -d, --depth depth Max depth to search
21
+ -h, --help Show this help
22
+ -v Show version
23
+ ```
24
+
25
+ Example:
26
+
27
+ ```
28
+ $ finddups ~/Documents/folder1 ~/Documents/folder2 -i node_modules -i vendor
29
+ [
30
+ [
31
+ "/Users/alex/Documents/folder1/file1",
32
+ "/Users/alex/Documents/folder1/file1 (2)",
33
+ "/Users/alex/Documents/folder2/file1"
34
+ ]
35
+ ]
36
+ ```
37
+
38
+
39
+ #### Contributing
40
+
41
+ Bug reports and pull requests are welcome on GitHub at https://github.com/sleepinginsomniac/finddups.
42
+
43
+ #### License
44
+
45
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "finddups"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,151 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'digest/sha1'
4
+ require 'json'
5
+ require 'fileutils'
6
+ require 'thread'
7
+ require 'optparse'
8
+
9
+ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
10
+
11
+ require 'finddups'
12
+
13
+ @options = {
14
+ sort: 'atime',
15
+ depth: Float::INFINITY,
16
+ ignore: [],
17
+ threads: 16,
18
+ }
19
+
20
+ optparser = OptionParser.new do |opts|
21
+ opts.banner = <<~BANNER
22
+ finddups (version #{Finddups::VERSION})
23
+ Usage: #{File.basename(__FILE__)} [dirs] [options]
24
+ BANNER
25
+
26
+ opts.on("-i path", "--ignore path", "ignore paths") do |path|
27
+ @options[:ignore] << path
28
+ end
29
+
30
+ opts.on("--atime", "(default) Use file access time to sort duplicates") do
31
+ @options[:sort] = 'atime'
32
+ end
33
+
34
+ opts.on("--mtime", "Use file modification time to sort duplicates") do
35
+ @options[:sort] = 'mtime'
36
+ end
37
+
38
+ desc = <<~DESC
39
+ Use file change time to sort duplicates
40
+ (the time at which directory information about the file was changed, not the file itself)
41
+ DESC
42
+ opts.on("--ctime", desc) do
43
+ @options[:sort] = 'ctime'
44
+ end
45
+
46
+ opts.on("-t threads", "--threads threads", "Number of threads to use (default 16)") do |threads|
47
+ @options[:threads] = threads.to_i
48
+ end
49
+
50
+ opts.on("-d depth", "--depth depth", "Max depth to search") do |depth|
51
+ @options[:depth] = depth.to_i
52
+ end
53
+
54
+ opts.on("-h", "--help", "Show this help") do
55
+ puts opts
56
+ exit
57
+ end
58
+
59
+ opts.on("-v", "Show version") do
60
+ puts "finddups (version #{Finddups::VERSION})"
61
+ exit
62
+ end
63
+ end
64
+
65
+ optparser.parse!
66
+
67
+ if ARGV.empty? || ARGV.any? { |entry| !File.directory?(entry) }
68
+ $stderr.puts "Every argument must be a directory"
69
+ exit 1
70
+ end
71
+
72
+ # ========
73
+ # = Prog =
74
+ # ========
75
+
76
+ search_dirs = ARGV
77
+ trash_dir = "/tmp/duplicates/"
78
+
79
+ @mutex = Mutex.new
80
+ @queue = []
81
+
82
+ def search(directory, depth = 0)
83
+ # puts "Searching: #{directory}"
84
+
85
+ # Skips
86
+ return @duplicates if @options[:ignore].include?(File.basename(directory))
87
+
88
+ Dir.entries(directory).each do |entry|
89
+ next if entry.start_with?('.')
90
+ path = File.join(directory, entry)
91
+
92
+ if File.directory?(path)
93
+ if depth < @options[:depth]
94
+ @queue.push -> { search(path, depth + 1) }
95
+ end
96
+ elsif File.symlink?(path)
97
+ next
98
+ else
99
+ begin
100
+ digest = Digest::SHA1.hexdigest(File.read(path))
101
+ @mutex.synchronize do
102
+ @duplicates[digest] ||= []
103
+ @duplicates[digest] << path
104
+ end
105
+ rescue Errno::EINVAL => e
106
+ $stderr.puts "#{path}: #{e}"
107
+ end
108
+ end
109
+ end
110
+ @duplicates
111
+ end
112
+
113
+ @duplicates = {}
114
+
115
+ search_dirs.each do |search_dir|
116
+ @queue.push -> { search(search_dir) }
117
+ end
118
+
119
+ until @queue.empty?
120
+ threads = []
121
+ @options[:threads].times do
122
+ _proc = @queue.shift
123
+ threads << Thread.new { _proc.call } if _proc
124
+ end
125
+ threads.each(&:join)
126
+ end
127
+
128
+ # Trim non dups
129
+ @duplicates = @duplicates
130
+ .values
131
+ .reject do |files|
132
+ files.length < 2
133
+ end
134
+
135
+ # Stort
136
+ @duplicates.each do |dups|
137
+ dups = dups.sort do |a, b|
138
+ case @options[:sort]
139
+ when 'atime'
140
+ File.atime(a) <=> File.atime(a)
141
+ when 'mtime'
142
+ File.mtime(a) <=> File.mtime(a)
143
+ when 'ctime'
144
+ File.ctime(a) <=> File.ctime(a)
145
+ else
146
+ a.length <=> b.length
147
+ end
148
+ end
149
+ end
150
+
151
+ $stdout.puts JSON.pretty_generate(@duplicates)
@@ -0,0 +1,25 @@
1
+ require_relative 'lib/finddups/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "finddups"
5
+ spec.version = Finddups::VERSION
6
+ spec.authors = ["Alex Clink"]
7
+ spec.email = ["code@alexclink.com"]
8
+
9
+ spec.summary = %q{Shows duplicate files within a list of directories and outputs as JSON.}
10
+ spec.homepage = "https://github.com/SleepingInsomniac/finddups"
11
+ spec.license = "MIT"
12
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
13
+
14
+ spec.metadata["homepage_uri"] = spec.homepage
15
+ spec.metadata["source_code_uri"] = "https://github.com/SleepingInsomniac/finddups"
16
+
17
+ # Specify which files should be added to the gem when it is released.
18
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
20
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ end
22
+ spec.bindir = "exe"
23
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
+ spec.require_paths = ["lib"]
25
+ end
@@ -0,0 +1,5 @@
1
+ require "finddups/version"
2
+
3
+ module Finddups
4
+ class Error < StandardError; end
5
+ end
@@ -0,0 +1,3 @@
1
+ module Finddups
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: finddups
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Alex Clink
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-06-03 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ - code@alexclink.com
16
+ executables:
17
+ - finddups
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".gitignore"
22
+ - ".rspec"
23
+ - ".travis.yml"
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - LICENSE.txt
27
+ - README.md
28
+ - Rakefile
29
+ - bin/console
30
+ - bin/setup
31
+ - exe/finddups
32
+ - finddups.gemspec
33
+ - lib/finddups.rb
34
+ - lib/finddups/version.rb
35
+ homepage: https://github.com/SleepingInsomniac/finddups
36
+ licenses:
37
+ - MIT
38
+ metadata:
39
+ homepage_uri: https://github.com/SleepingInsomniac/finddups
40
+ source_code_uri: https://github.com/SleepingInsomniac/finddups
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 2.3.0
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubygems_version: 3.1.3
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: Shows duplicate files within a list of directories and outputs as JSON.
60
+ test_files: []