crf 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +72 -0
- data/.rubocop.yml +13 -0
- data/.travis.yml +35 -0
- data/Gemfile +11 -0
- data/LICENSE.md +21 -0
- data/README.md +62 -0
- data/Rakefile +1 -0
- data/bin/crf +35 -0
- data/crf.gemspec +28 -0
- data/lib/crf/configuration.rb +7 -0
- data/lib/crf/finder.rb +84 -0
- data/lib/crf/interactive_finder.rb +50 -0
- data/lib/crf/interactive_remover.rb +43 -0
- data/lib/crf/logger.rb +33 -0
- data/lib/crf/remover.rb +52 -0
- data/lib/crf/repetitions_list.rb +44 -0
- data/lib/crf/version.rb +6 -0
- data/lib/crf.rb +82 -0
- metadata +151 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cedcfa93b22235198b5fe6cb7390bdee22f02109
|
4
|
+
data.tar.gz: fe5131cf2039948c9ffa7db13a7ced10d8f6f1a2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 48487f353a617433b44ffc1514dc92fed3e365e4422a626e8ba28bbffb82b5859088649599e7fb142198d96174ecb7ffd95b41daf93766b8def8a36d2852ac11
|
7
|
+
data.tar.gz: f1a8b5badf11ede2776637900365fb49b132f34fcd65c02da7a2ed6b0772ed01d2484eca4f0a147d1951db4be8366b3b1644d7231545101054469e332f4a69d5
|
data/.gitignore
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
|
2
|
+
# Created by https://www.gitignore.io/api/ruby,osx
|
3
|
+
|
4
|
+
*.log
|
5
|
+
.byebug_history
|
6
|
+
/spec/test_files
|
7
|
+
*.test
|
8
|
+
|
9
|
+
### Ruby ###
|
10
|
+
*.gem
|
11
|
+
*.rbc
|
12
|
+
/.config
|
13
|
+
/coverage/
|
14
|
+
/InstalledFiles
|
15
|
+
/pkg/
|
16
|
+
/spec/reports/
|
17
|
+
/spec/examples.txt
|
18
|
+
/test/tmp/
|
19
|
+
/test/version_tmp/
|
20
|
+
/tmp/
|
21
|
+
|
22
|
+
## Specific to RubyMotion:
|
23
|
+
.dat*
|
24
|
+
.repl_history
|
25
|
+
build/
|
26
|
+
|
27
|
+
## Documentation cache and generated files:
|
28
|
+
/.yardoc/
|
29
|
+
/_yardoc/
|
30
|
+
/doc/
|
31
|
+
/rdoc/
|
32
|
+
|
33
|
+
## Environment normalisation:
|
34
|
+
/.bundle/
|
35
|
+
/vendor/bundle
|
36
|
+
/lib/bundler/man/
|
37
|
+
|
38
|
+
# for a library or gem, you might want to ignore these files since the code is
|
39
|
+
# intended to run in multiple environments; otherwise, check them in:
|
40
|
+
Gemfile.lock
|
41
|
+
.ruby-version
|
42
|
+
.ruby-gemset
|
43
|
+
|
44
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
45
|
+
.rvmrc
|
46
|
+
|
47
|
+
### OSX ###
|
48
|
+
.DS_Store
|
49
|
+
.AppleDouble
|
50
|
+
.LSOverride
|
51
|
+
|
52
|
+
# Icon must end with two \r
|
53
|
+
Icon
|
54
|
+
|
55
|
+
# Thumbnails
|
56
|
+
._*
|
57
|
+
|
58
|
+
# Files that might appear in the root of a volume
|
59
|
+
.DocumentRevisions-V100
|
60
|
+
.fseventsd
|
61
|
+
.Spotlight-V100
|
62
|
+
.TemporaryItems
|
63
|
+
.Trashes
|
64
|
+
.VolumeIcon.icns
|
65
|
+
|
66
|
+
# Directories potentially created on remote AFP share
|
67
|
+
.AppleDB
|
68
|
+
.AppleDesktop
|
69
|
+
Network Trash Folder
|
70
|
+
Temporary Items
|
71
|
+
.apdisk
|
72
|
+
|
data/.rubocop.yml
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.3
|
4
|
+
- 2.0.0
|
5
|
+
- 2.1
|
6
|
+
- 2.2
|
7
|
+
- 2.2.4
|
8
|
+
- 2.3.0
|
9
|
+
|
10
|
+
install:
|
11
|
+
- gem install bundler
|
12
|
+
- bundle install --retry=3
|
13
|
+
|
14
|
+
script:
|
15
|
+
- bundle exec rspec
|
16
|
+
- bundle exec rubocop -R --format simple
|
17
|
+
|
18
|
+
addons:
|
19
|
+
code_climate:
|
20
|
+
repo_token: f2595e3a0b6f5dcb7586e0f956747e711373819d96daaed13e4371ce089fc744
|
21
|
+
|
22
|
+
os:
|
23
|
+
- linux
|
24
|
+
- osx
|
25
|
+
|
26
|
+
matrix:
|
27
|
+
exclude:
|
28
|
+
- rvm: 1.9.3
|
29
|
+
os: osx
|
30
|
+
- rvm: 2.2
|
31
|
+
os: osx
|
32
|
+
- rvm: 2.2.4
|
33
|
+
os: osx
|
34
|
+
- rvm: 2.3.0
|
35
|
+
os: osx
|
data/Gemfile
ADDED
data/LICENSE.md
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Alejandro Bezdjian, aka alebian
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
# CRF - Check Repeated Files
|
2
|
+
[![Gem Version](https://badge.fury.io/rb/crf.svg)](https://badge.fury.io/rb/crf)
|
3
|
+
[![Dependency Status](https://gemnasium.com/alebian/crf.svg)](https://gemnasium.com/alebian/crf)
|
4
|
+
[![Build Status](https://travis-ci.org/alebian/crf.svg)](https://travis-ci.org/alebian/crf)
|
5
|
+
[![Code Climate](https://codeclimate.com/github/alebian/crf/badges/gpa.svg)](https://codeclimate.com/github/alebian/crf)
|
6
|
+
[![Test Coverage](https://codeclimate.com/github/alebian/crf/badges/coverage.svg)](https://codeclimate.com/github/alebian/crf/coverage)
|
7
|
+
[![Inline docs](http://inch-ci.org/github/alebian/crf.svg)](http://inch-ci.org/github/alebian/crf)
|
8
|
+
|
9
|
+
This gem finds exact duplicate files inside a given directory and all sub directories. The result of the execution gets stored in a file called crf_log.txt. The execution time depends on the amount of files and each size, so be careful (or patient). You have options to run an approximated version of the algorithm which is faster but more inaccurate.
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'crf'
|
17
|
+
```
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
|
21
|
+
$ bundle
|
22
|
+
|
23
|
+
Or install it yourself as:
|
24
|
+
|
25
|
+
$ gem install crf
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
After installing the gem, you can use it in your command line:
|
30
|
+
|
31
|
+
```
|
32
|
+
crf PATH [-f] [-n] [-o]
|
33
|
+
```
|
34
|
+
Or you can use it in any ruby code you want:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
require 'crf'
|
38
|
+
|
39
|
+
path = './test'
|
40
|
+
options = { interactive: true, progress: true, fast: false }
|
41
|
+
|
42
|
+
crf_checker = Crf::Checker.new(path, options)
|
43
|
+
crf_checker.check_repeated_files
|
44
|
+
```
|
45
|
+
|
46
|
+
The -f, --fast option only checks if the files have the same size (is faster but it does not mean that the files are duplicates).
|
47
|
+
|
48
|
+
The -n, --no-interactive option will save the first file of the repetitions and remove the rest of the duplicates without asking.
|
49
|
+
|
50
|
+
The -o, --no-progress option will make CRF run without showing the progress bar.
|
51
|
+
|
52
|
+
The default version compares the size and SHA256 checksums of the files (which is more than enough in most cases). When using the crf command directly on the command line the interactive and progress bar options are enabled by default. But, when using the class directly on ruby code, these options are disabled by default.
|
53
|
+
|
54
|
+
## Contributing
|
55
|
+
|
56
|
+
1. Fork it
|
57
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
58
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
59
|
+
4. Run rubocop lint (`rubocop -R --format simple`)
|
60
|
+
5. Run rspec tests (`bundle exec rspec`)
|
61
|
+
6. Push your branch (`git push origin my-new-feature`)
|
62
|
+
7. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
data/bin/crf
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'crf'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
options = { interactive: true, progress: true, fast: false }
|
7
|
+
|
8
|
+
parser = OptionParser.new do |opts|
|
9
|
+
opts.banner = 'Usage: crf PATH [-f] [-n] [-o]'
|
10
|
+
|
11
|
+
opts.on('-f', '--fast', 'Only checks for files of the same size') do
|
12
|
+
options[:fast] = true
|
13
|
+
end
|
14
|
+
|
15
|
+
opts.on('-n', '--no-interactive', 'Saves the first file and removes all the repetitions without asking') do
|
16
|
+
options[:interactive] = false
|
17
|
+
end
|
18
|
+
|
19
|
+
opts.on('-o', '--no-progress', 'Hides the progress bar while executing') do
|
20
|
+
options[:progress] = false
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on('-h', '--help', 'Displays help') do
|
24
|
+
puts opts
|
25
|
+
exit
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
parser.parse!
|
30
|
+
|
31
|
+
if !ARGV[0].nil? && File.directory?(ARGV[0])
|
32
|
+
Crf::Checker.new(ARGV[0], options).check_repeated_files
|
33
|
+
else
|
34
|
+
STDOUT.puts 'No directory specified.'
|
35
|
+
end
|
data/crf.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'crf/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'crf'
|
8
|
+
spec.version = Crf::VERSION
|
9
|
+
spec.authors = ['Alejandro Bezdjian']
|
10
|
+
spec.email = 'alebezdjian@gmail.com'
|
11
|
+
spec.date = Date.today
|
12
|
+
spec.summary = 'Look for exact duplicated files.'
|
13
|
+
spec.description = 'Library that looks for exact duplicated files in a directory.'
|
14
|
+
spec.platform = Gem::Platform::RUBY
|
15
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec)/}) }
|
16
|
+
spec.require_paths = ['lib']
|
17
|
+
spec.homepage = 'https://github.com/alebian/crf'
|
18
|
+
spec.license = 'MIT'
|
19
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
20
|
+
spec.test_files = spec.files.grep(%r{^(test|spec)/})
|
21
|
+
|
22
|
+
spec.add_dependency 'colorize', '~> 0.7', '>= 0.7.5'
|
23
|
+
spec.add_dependency 'ruby-progressbar', '~> 1.7', '>= 1.7.0'
|
24
|
+
|
25
|
+
spec.add_development_dependency 'bundler', '>= 1.3.0', '< 2.0'
|
26
|
+
spec.add_development_dependency 'byebug' if RUBY_VERSION >= '2.0.0'
|
27
|
+
spec.add_development_dependency 'rubocop'
|
28
|
+
end
|
data/lib/crf/finder.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'crf/repetitions_list'
|
2
|
+
require 'digest'
|
3
|
+
require 'ruby-progressbar'
|
4
|
+
|
5
|
+
module Crf
|
6
|
+
#
|
7
|
+
# This class finds the paths of all the repeated files inside the path passed as argument.
|
8
|
+
# All files repeated have the same file_identifier and file_hash.
|
9
|
+
#
|
10
|
+
class Finder
|
11
|
+
#
|
12
|
+
# The original path provided and the list of files inside it are accessible from the outside.
|
13
|
+
#
|
14
|
+
attr_reader :path, :paths, :repetitions
|
15
|
+
|
16
|
+
#
|
17
|
+
# Creates the Finder object with a directory where it will look for duplicate files.
|
18
|
+
# Path is the string representation of the absolute path of the directory.
|
19
|
+
#
|
20
|
+
def initialize(path, fast = false)
|
21
|
+
@path = path
|
22
|
+
@fast = fast
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Method that looks for the repeated files in the path specified when the object was created.
|
27
|
+
#
|
28
|
+
def search_repeated_files
|
29
|
+
@repetitions = first_run
|
30
|
+
return repetitions if repetitions.empty? || @fast
|
31
|
+
@repetitions = second_run(repetitions)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
#
|
37
|
+
# Gets all file paths in the given directory and subdirectories.
|
38
|
+
#
|
39
|
+
def all_files(path)
|
40
|
+
@paths = []
|
41
|
+
Dir["#{path.chomp('/')}/**/*"].each { |p| paths << p.freeze if file?(p) }
|
42
|
+
paths
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
# Checks if the file is not a symlink or a directory.
|
47
|
+
#
|
48
|
+
def file?(path)
|
49
|
+
!File.directory?(path) && !File.symlink?(path)
|
50
|
+
end
|
51
|
+
|
52
|
+
#
|
53
|
+
# This looks for the files with the same size only
|
54
|
+
#
|
55
|
+
def first_run
|
56
|
+
repetitions_list = Crf::RepetitionsList.new
|
57
|
+
all_files(path).each do |file_path|
|
58
|
+
repetitions_list.add(file_identifier(file_path).freeze, file_path)
|
59
|
+
end
|
60
|
+
repetitions_list.repetitions
|
61
|
+
end
|
62
|
+
|
63
|
+
def file_identifier(path)
|
64
|
+
File.size(path).to_s
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# After finding files with the same size, perform a deeper analysis of those
|
69
|
+
#
|
70
|
+
def second_run(repetitions)
|
71
|
+
repetitions_list = Crf::RepetitionsList.new
|
72
|
+
repetitions.values.each do |repeated_array|
|
73
|
+
repeated_array.each do |file_path|
|
74
|
+
repetitions_list.add(file_hash(file_path).freeze, file_path)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
repetitions_list.repetitions
|
78
|
+
end
|
79
|
+
|
80
|
+
def file_hash(path)
|
81
|
+
Digest::SHA256.file(path).hexdigest
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'crf/repetitions_list'
|
2
|
+
require 'ruby-progressbar'
|
3
|
+
|
4
|
+
module Crf
|
5
|
+
#
|
6
|
+
# This class finds the paths of all the repeated files inside the path passed as argument.
|
7
|
+
# All files repeated have the same file_identifier and file_hash.
|
8
|
+
#
|
9
|
+
class InteractiveFinder < Crf::Finder
|
10
|
+
#
|
11
|
+
# Method that looks for the repeated files in the path specified when the object was created
|
12
|
+
# showing progress bars.
|
13
|
+
#
|
14
|
+
def search_repeated_files
|
15
|
+
all_paths = all_files(path)
|
16
|
+
progressbar = ProgressBar.create(title: 'First run', total: all_paths.count,
|
17
|
+
format: '%t: %c/%C %a |%B| %%%P')
|
18
|
+
rep_list = first_run(progressbar)
|
19
|
+
return @repetitions = rep_list.repetitions if rep_list.repetitions.empty? || @fast
|
20
|
+
second_run(rep_list)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
#
|
26
|
+
# This looks for the files with the same size only
|
27
|
+
#
|
28
|
+
def first_run(progressbar)
|
29
|
+
repetitions_list = Crf::RepetitionsList.new
|
30
|
+
all_files(path).each do |file_path|
|
31
|
+
repetitions_list.add(file_identifier(file_path), file_path)
|
32
|
+
progressbar.increment
|
33
|
+
end
|
34
|
+
repetitions_list
|
35
|
+
end
|
36
|
+
|
37
|
+
def second_run(repetitions_list)
|
38
|
+
progressbar = ProgressBar.create(title: 'Second run', format: '%t: %c/%C %a |%B| %%%P',
|
39
|
+
total: repetitions_list.total_repetitions)
|
40
|
+
confirmed_repetitions_list = Crf::RepetitionsList.new
|
41
|
+
repetitions_list.repetitions.values.each do |repeated_array|
|
42
|
+
repeated_array.each do |file_path|
|
43
|
+
confirmed_repetitions_list.add(file_hash(file_path), file_path)
|
44
|
+
progressbar.increment
|
45
|
+
end
|
46
|
+
end
|
47
|
+
@repetitions = confirmed_repetitions_list.repetitions
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'colorize'
|
2
|
+
|
3
|
+
module Crf
|
4
|
+
#
|
5
|
+
# This is a subclass of Crf::Remover.
|
6
|
+
# It asks the user if he/she wants to remove each file.
|
7
|
+
#
|
8
|
+
class InteractiveRemover < Crf::Remover
|
9
|
+
#
|
10
|
+
# This method asks the user if he/she wants to delete each of the files contained in the values
|
11
|
+
# of the repetitions hash.
|
12
|
+
#
|
13
|
+
def remove
|
14
|
+
saved = 0
|
15
|
+
repetitions.each_value do |paths|
|
16
|
+
print_all_paths(paths)
|
17
|
+
paths.each do |path|
|
18
|
+
saved += remove_confirmation(path)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
saved
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def print_all_paths(paths)
|
27
|
+
STDOUT.puts 'Found this repetitions:'.green
|
28
|
+
STDOUT.puts paths.to_s.green
|
29
|
+
end
|
30
|
+
|
31
|
+
def remove_confirmation(path)
|
32
|
+
STDOUT.print "Do you want to delete the file #{path}? [y/n] ".yellow
|
33
|
+
logger.write "Asking to remove #{path}"
|
34
|
+
answer = STDIN.gets.chomp
|
35
|
+
logger.write "User input: #{answer}"
|
36
|
+
if answer == 'y'
|
37
|
+
STDOUT.puts "Removed #{path}".red
|
38
|
+
return remove_file(path)
|
39
|
+
end
|
40
|
+
0
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/crf/logger.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module Crf
|
4
|
+
#
|
5
|
+
# This class is a wrapper of the Logger class, it hanldes the creation and sets the configuration
|
6
|
+
#
|
7
|
+
class Logger
|
8
|
+
#
|
9
|
+
# Creates the logger with the configurations in the path provided or in the current directory
|
10
|
+
#
|
11
|
+
def initialize(path = 'crf.log')
|
12
|
+
@logger = ::Logger.new(path, File::CREAT)
|
13
|
+
configurate_logger
|
14
|
+
end
|
15
|
+
|
16
|
+
#
|
17
|
+
# Wrapper of the Logger info method
|
18
|
+
#
|
19
|
+
def write(msg)
|
20
|
+
@logger.info msg
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def configurate_logger
|
26
|
+
@logger.datetime_format = Crf::LOGGER_DATE_TIME_FORMAT
|
27
|
+
@logger.progname = Crf::GEM_NAME
|
28
|
+
@logger.formatter = proc do |_severity, datetime, progname, msg|
|
29
|
+
"[#{datetime}] #{progname}: #{msg}\n"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/crf/remover.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
module Crf
|
2
|
+
#
|
3
|
+
# This class removes all the repetitions passed as an argument.
|
4
|
+
# It saves the first element of the repetitions and deletes the rest.
|
5
|
+
#
|
6
|
+
class Remover
|
7
|
+
#
|
8
|
+
# The repetitions hash and the logger file are accessible from the outside.
|
9
|
+
#
|
10
|
+
attr_reader :repetitions, :logger
|
11
|
+
|
12
|
+
#
|
13
|
+
# This object needs the repeated files obtained with Crf::Finder and the logger object.
|
14
|
+
#
|
15
|
+
def initialize(repetitions, logger)
|
16
|
+
@repetitions = repetitions
|
17
|
+
@logger = logger
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
# This method removes all the files contained on each value of the repetitions hash
|
22
|
+
# except the first one. This is done without asking the user for confirmation so be careful.
|
23
|
+
#
|
24
|
+
def remove
|
25
|
+
saved = 0
|
26
|
+
repetitions.each_value do |paths|
|
27
|
+
paths.delete_at(0)
|
28
|
+
paths.each do |path|
|
29
|
+
saved += remove_file(path)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
saved
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def remove_file(path)
|
38
|
+
size = File.size(path)
|
39
|
+
begin
|
40
|
+
File.delete(path)
|
41
|
+
log_removal(path, size)
|
42
|
+
return size
|
43
|
+
rescue
|
44
|
+
return 0
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def log_removal(path, size)
|
49
|
+
logger.write "Removed #{path}, size: #{size} bytes"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Crf
|
2
|
+
#
|
3
|
+
# This is the data structure used to get the repeated files.
|
4
|
+
#
|
5
|
+
class RepetitionsList
|
6
|
+
#
|
7
|
+
# The uniques and repetitions hashes can be accessed from the outside, along with the
|
8
|
+
# total repetitions count.
|
9
|
+
#
|
10
|
+
attr_reader :uniques, :repetitions, :total_repetitions
|
11
|
+
|
12
|
+
#
|
13
|
+
# Creates the RepetitionsList object with everything it needs.
|
14
|
+
#
|
15
|
+
def initialize
|
16
|
+
@uniques = {}
|
17
|
+
@repetitions = {}
|
18
|
+
@total_repetitions = 0
|
19
|
+
end
|
20
|
+
|
21
|
+
#
|
22
|
+
# Adds an element to one of each hashes. If the value is repeated, then it erases it from
|
23
|
+
# uniques and adds it in the repetitions hash along with the duplicate.
|
24
|
+
#
|
25
|
+
def add(key, value)
|
26
|
+
if repetitions.key?(key)
|
27
|
+
repetitions[key] << value
|
28
|
+
@total_repetitions += 1
|
29
|
+
return
|
30
|
+
end
|
31
|
+
return repetition_found(key, value) if uniques.key?(key)
|
32
|
+
uniques[key] = value
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def repetition_found(key, value)
|
38
|
+
repetitions[key] = [value]
|
39
|
+
repetitions[key] << uniques[key]
|
40
|
+
@total_repetitions += 2
|
41
|
+
uniques.delete(key)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/crf/version.rb
ADDED
data/lib/crf.rb
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'crf/finder'
|
2
|
+
require 'crf/interactive_finder'
|
3
|
+
require 'crf/remover'
|
4
|
+
require 'crf/interactive_remover'
|
5
|
+
require 'crf/logger'
|
6
|
+
require 'crf/configuration'
|
7
|
+
require 'colorize'
|
8
|
+
|
9
|
+
module Crf
|
10
|
+
#
|
11
|
+
# This class is the Crf starting point.
|
12
|
+
#
|
13
|
+
class Checker
|
14
|
+
#
|
15
|
+
# The path where it will look for repetitions, the options provided, the repetitions found
|
16
|
+
# and the logger files are accesible from the outside and used in the class.
|
17
|
+
#
|
18
|
+
attr_reader :path, :options, :repetitions, :logger
|
19
|
+
|
20
|
+
#
|
21
|
+
# Creates the object saving the directory's path and options provided. Options are set to
|
22
|
+
# default if they are not given. It also creates the logger file.
|
23
|
+
#
|
24
|
+
def initialize(path, options = { interactive: false, progress: false, fast: false })
|
25
|
+
@path = path
|
26
|
+
@options = options
|
27
|
+
@logger = Crf::Logger.new
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Starting point of Crf. You should call this if you want to check if a directory has
|
32
|
+
# duplicated files inside.
|
33
|
+
#
|
34
|
+
def check_repeated_files
|
35
|
+
find_repetitions
|
36
|
+
return no_repetitions_found if repetitions.empty?
|
37
|
+
repetitions_found
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def find_repetitions
|
43
|
+
logger.write "Looking for repetitions in #{path}"
|
44
|
+
finder = if options[:progress]
|
45
|
+
Crf::InteractiveFinder.new(path, options[:fast])
|
46
|
+
else
|
47
|
+
Crf::Finder.new(path, options[:fast])
|
48
|
+
end
|
49
|
+
@repetitions = finder.search_repeated_files
|
50
|
+
end
|
51
|
+
|
52
|
+
def no_repetitions_found
|
53
|
+
logger.write 'No repetitions found'
|
54
|
+
STDOUT.puts 'No repetitions found'.blue
|
55
|
+
end
|
56
|
+
|
57
|
+
def repetitions_found
|
58
|
+
logger.write "Repetitions found: #{repetitions.values}"
|
59
|
+
space_saved = remove_repetitions
|
60
|
+
logger.write "Saved a total of #{space_saved} bytes"
|
61
|
+
STDOUT.puts "You saved a total of #{number_to_human_size(space_saved)}".blue
|
62
|
+
end
|
63
|
+
|
64
|
+
def remove_repetitions
|
65
|
+
remover = Crf::Remover.new(repetitions, logger) unless options[:interactive]
|
66
|
+
remover = Crf::InteractiveRemover.new(repetitions, logger) if options[:interactive]
|
67
|
+
remover.remove
|
68
|
+
end
|
69
|
+
|
70
|
+
def number_to_human_size(size)
|
71
|
+
if size < 1024
|
72
|
+
"#{size} bytes"
|
73
|
+
elsif size < 1_048_576
|
74
|
+
"#{(size.to_f / 1024).round(2)} KB"
|
75
|
+
elsif size < 1_073_741_824
|
76
|
+
"#{(size.to_f / 1_048_576).round(2)} MB"
|
77
|
+
else
|
78
|
+
"#{(size.to_f / 1_073_741_824).round(2)} GB"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
metadata
ADDED
@@ -0,0 +1,151 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: crf
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.7
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alejandro Bezdjian
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-01-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: colorize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.7'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.7.5
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0.7'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.7.5
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: ruby-progressbar
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '1.7'
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.7.0
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '1.7'
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.7.0
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: bundler
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 1.3.0
|
60
|
+
- - "<"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '2.0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.3.0
|
70
|
+
- - "<"
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '2.0'
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: byebug
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
type: :development
|
81
|
+
prerelease: false
|
82
|
+
version_requirements: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '0'
|
87
|
+
- !ruby/object:Gem::Dependency
|
88
|
+
name: rubocop
|
89
|
+
requirement: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
type: :development
|
95
|
+
prerelease: false
|
96
|
+
version_requirements: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - ">="
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
description: Library that looks for exact duplicated files in a directory.
|
102
|
+
email: alebezdjian@gmail.com
|
103
|
+
executables:
|
104
|
+
- crf
|
105
|
+
extensions: []
|
106
|
+
extra_rdoc_files: []
|
107
|
+
files:
|
108
|
+
- ".gitignore"
|
109
|
+
- ".rubocop.yml"
|
110
|
+
- ".travis.yml"
|
111
|
+
- Gemfile
|
112
|
+
- LICENSE.md
|
113
|
+
- README.md
|
114
|
+
- Rakefile
|
115
|
+
- bin/crf
|
116
|
+
- crf.gemspec
|
117
|
+
- lib/crf.rb
|
118
|
+
- lib/crf/configuration.rb
|
119
|
+
- lib/crf/finder.rb
|
120
|
+
- lib/crf/interactive_finder.rb
|
121
|
+
- lib/crf/interactive_remover.rb
|
122
|
+
- lib/crf/logger.rb
|
123
|
+
- lib/crf/remover.rb
|
124
|
+
- lib/crf/repetitions_list.rb
|
125
|
+
- lib/crf/version.rb
|
126
|
+
homepage: https://github.com/alebian/crf
|
127
|
+
licenses:
|
128
|
+
- MIT
|
129
|
+
metadata: {}
|
130
|
+
post_install_message:
|
131
|
+
rdoc_options: []
|
132
|
+
require_paths:
|
133
|
+
- lib
|
134
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
|
+
requirements:
|
141
|
+
- - ">="
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
144
|
+
requirements: []
|
145
|
+
rubyforge_project:
|
146
|
+
rubygems_version: 2.5.1
|
147
|
+
signing_key:
|
148
|
+
specification_version: 4
|
149
|
+
summary: Look for exact duplicated files.
|
150
|
+
test_files: []
|
151
|
+
has_rdoc:
|