crf 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/.rubocop.yml +1 -1
- data/.travis.yml +0 -1
- data/Gemfile +1 -1
- data/bin/crf +11 -2
- data/crf.gemspec +2 -2
- data/lib/crf.rb +19 -24
- data/lib/crf/configuration.rb +0 -3
- data/lib/crf/finder.rb +19 -27
- data/lib/crf/interactive_finder.rb +2 -13
- data/lib/crf/interactive_remover.rb +3 -10
- data/lib/crf/logger.rb +7 -11
- data/lib/crf/remover.rb +11 -17
- data/lib/crf/repetitions_list.rb +4 -11
- data/lib/crf/version.rb +1 -4
- metadata +20 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b6b40091e9563035ada765e0622007dd18d27fc
|
4
|
+
data.tar.gz: 752bac46aef9db45980de0c18bbb65d8ac93cd16
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7111b4a41c5a117b54ea62f2e4a73444c680b26c9630c1b97425e09ff6974a6203150510a6abdc8def73a8fe90ca57520b0fe1e6dc62ea2abd5011895ac6bf9b
|
7
|
+
data.tar.gz: 1740bd54e2394b4e72bb04b25327a4a15264ca1eb4094e278e69d02883495b78f7cbb7ffd2bb44d9e7bf6bceead806c012896bd290b043655b977f95b4040674
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/.rubocop.yml
CHANGED
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/bin/crf
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'crf'
|
4
|
+
require 'crf/version'
|
4
5
|
require 'optparse'
|
5
6
|
|
6
7
|
options = { interactive: true, progress: true, fast: false }
|
@@ -20,6 +21,11 @@ parser = OptionParser.new do |opts|
|
|
20
21
|
options[:progress] = false
|
21
22
|
end
|
22
23
|
|
24
|
+
opts.on('-v', '--version', 'Displays version') do
|
25
|
+
puts "CRF Version: #{Crf::VERSION}"
|
26
|
+
exit
|
27
|
+
end
|
28
|
+
|
23
29
|
opts.on('-h', '--help', 'Displays help') do
|
24
30
|
puts opts
|
25
31
|
exit
|
@@ -28,8 +34,11 @@ end
|
|
28
34
|
|
29
35
|
parser.parse!
|
30
36
|
|
31
|
-
|
32
|
-
|
37
|
+
paths = []
|
38
|
+
ARGV.each { |arg| paths << arg if File.directory?(arg) }
|
39
|
+
|
40
|
+
unless paths.empty?
|
41
|
+
Crf::Checker.new(paths, options).check_repeated_files
|
33
42
|
else
|
34
43
|
STDOUT.puts 'No directory specified.'
|
35
44
|
end
|
data/crf.gemspec
CHANGED
@@ -23,6 +23,6 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_dependency 'ruby-progressbar', '~> 1.7', '>= 1.7.0'
|
24
24
|
|
25
25
|
spec.add_development_dependency 'bundler', '>= 1.3.0', '< 2.0'
|
26
|
-
spec.add_development_dependency 'byebug' if RUBY_VERSION >= '2.0.0'
|
27
|
-
spec.add_development_dependency 'rubocop'
|
26
|
+
spec.add_development_dependency 'byebug', '~> 9.0', '>= 9.0.5' if RUBY_VERSION >= '2.0.0'
|
27
|
+
spec.add_development_dependency 'rubocop', '~> 0.38', '>= 0.37.2'
|
28
28
|
end
|
data/lib/crf.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'crf/version'
|
1
2
|
require 'crf/finder'
|
2
3
|
require 'crf/interactive_finder'
|
3
4
|
require 'crf/remover'
|
@@ -7,30 +8,22 @@ require 'crf/configuration'
|
|
7
8
|
require 'colorize'
|
8
9
|
|
9
10
|
module Crf
|
10
|
-
#
|
11
|
-
# This class is the Crf starting point.
|
12
|
-
#
|
13
11
|
class Checker
|
14
|
-
|
15
|
-
# The path where it will look for repetitions, the options provided, the repetitions found
|
16
|
-
# and the logger files are accesible from the outside and used in the class.
|
17
|
-
#
|
18
|
-
attr_reader :path, :options, :repetitions, :logger
|
12
|
+
attr_reader :paths, :options, :repetitions, :logger
|
19
13
|
|
20
14
|
#
|
21
|
-
# Creates the object saving the
|
15
|
+
# Creates the object saving the paths and options provided. Options are set to
|
22
16
|
# default if they are not given. It also creates the logger file.
|
23
17
|
#
|
24
|
-
|
25
|
-
|
18
|
+
# @param path [Arsray] array of paths where the scan will start
|
19
|
+
# @param options [Hash] hash indicating the options of the scan
|
20
|
+
#
|
21
|
+
def initialize(paths, options = { interactive: false, progress: false, fast: false })
|
22
|
+
@paths = paths
|
26
23
|
@options = options
|
27
24
|
@logger = Crf::Logger.new
|
28
25
|
end
|
29
26
|
|
30
|
-
#
|
31
|
-
# Starting point of Crf. You should call this if you want to check if a directory has
|
32
|
-
# duplicated files inside.
|
33
|
-
#
|
34
27
|
def check_repeated_files
|
35
28
|
find_repetitions
|
36
29
|
return no_repetitions_found if repetitions.empty?
|
@@ -40,15 +33,18 @@ module Crf
|
|
40
33
|
private
|
41
34
|
|
42
35
|
def find_repetitions
|
43
|
-
logger.write "Looking for repetitions in #{
|
44
|
-
finder = if options[:progress]
|
45
|
-
Crf::InteractiveFinder.new(path, options[:fast])
|
46
|
-
else
|
47
|
-
Crf::Finder.new(path, options[:fast])
|
48
|
-
end
|
36
|
+
logger.write "Looking for repetitions in #{paths}"
|
49
37
|
@repetitions = finder.search_repeated_files
|
50
38
|
end
|
51
39
|
|
40
|
+
def finder
|
41
|
+
unless instance_variable_defined?(:@finder)
|
42
|
+
@finder = Crf::InteractiveFinder.new(paths, options[:fast]) if options[:progress]
|
43
|
+
@finder = Crf::Finder.new(paths, options[:fast]) unless options[:progress]
|
44
|
+
end
|
45
|
+
@finder
|
46
|
+
end
|
47
|
+
|
52
48
|
def no_repetitions_found
|
53
49
|
logger.write 'No repetitions found'
|
54
50
|
STDOUT.puts 'No repetitions found'.blue
|
@@ -62,9 +58,8 @@ module Crf
|
|
62
58
|
end
|
63
59
|
|
64
60
|
def remove_repetitions
|
65
|
-
|
66
|
-
|
67
|
-
remover.remove
|
61
|
+
return Crf::InteractiveRemover.new(repetitions, logger).remove if options[:interactive]
|
62
|
+
Crf::Remover.new(repetitions, logger).remove
|
68
63
|
end
|
69
64
|
|
70
65
|
def number_to_human_size(size)
|
data/lib/crf/configuration.rb
CHANGED
data/lib/crf/finder.rb
CHANGED
@@ -1,30 +1,24 @@
|
|
1
1
|
require 'crf/repetitions_list'
|
2
2
|
require 'digest'
|
3
3
|
require 'ruby-progressbar'
|
4
|
+
require 'byebug'
|
4
5
|
|
5
6
|
module Crf
|
6
|
-
#
|
7
|
-
# This class finds the paths of all the repeated files inside the path passed as argument.
|
8
|
-
# All files repeated have the same file_identifier and file_hash.
|
9
|
-
#
|
10
7
|
class Finder
|
11
|
-
|
12
|
-
# The original path provided and the list of files inside it are accessible from the outside.
|
13
|
-
#
|
14
|
-
attr_reader :path, :paths, :repetitions
|
8
|
+
attr_reader :paths, :repetitions, :files
|
15
9
|
|
16
|
-
|
10
|
+
##
|
17
11
|
# Creates the Finder object with a directory where it will look for duplicate files.
|
18
12
|
# Path is the string representation of the absolute path of the directory.
|
19
13
|
#
|
20
|
-
|
21
|
-
|
14
|
+
# @param paths [Array] paths of the folders where the scan will start.
|
15
|
+
# @param fast [Boolean] boolean indicating if this class will make a fast scan or not.
|
16
|
+
#
|
17
|
+
def initialize(paths, fast = false)
|
18
|
+
@paths = paths
|
22
19
|
@fast = fast
|
23
20
|
end
|
24
21
|
|
25
|
-
#
|
26
|
-
# Method that looks for the repeated files in the path specified when the object was created.
|
27
|
-
#
|
28
22
|
def search_repeated_files
|
29
23
|
@repetitions = first_run
|
30
24
|
return repetitions if repetitions.empty? || @fast
|
@@ -33,28 +27,26 @@ module Crf
|
|
33
27
|
|
34
28
|
private
|
35
29
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
30
|
+
def all_files
|
31
|
+
@files = []
|
32
|
+
paths.each do |path|
|
33
|
+
Dir["#{path.chomp('/')}/**/*"].each do |file_path|
|
34
|
+
@files << file_path.freeze if file?(file_path) && !@files.include?(file_path)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
@files
|
43
38
|
end
|
44
39
|
|
45
|
-
#
|
46
|
-
# Checks if the file is not a symlink or a directory.
|
47
|
-
#
|
48
40
|
def file?(path)
|
49
41
|
!File.directory?(path) && !File.symlink?(path)
|
50
42
|
end
|
51
43
|
|
52
|
-
|
44
|
+
##
|
53
45
|
# This looks for the files with the same size only
|
54
46
|
#
|
55
47
|
def first_run
|
56
48
|
repetitions_list = Crf::RepetitionsList.new
|
57
|
-
all_files
|
49
|
+
all_files.each do |file_path|
|
58
50
|
repetitions_list.add(file_identifier(file_path).freeze, file_path)
|
59
51
|
end
|
60
52
|
repetitions_list.repetitions
|
@@ -64,7 +56,7 @@ module Crf
|
|
64
56
|
File.size(path).to_s
|
65
57
|
end
|
66
58
|
|
67
|
-
|
59
|
+
##
|
68
60
|
# After finding files with the same size, perform a deeper analysis of those
|
69
61
|
#
|
70
62
|
def second_run(repetitions)
|
@@ -2,17 +2,9 @@ require 'crf/repetitions_list'
|
|
2
2
|
require 'ruby-progressbar'
|
3
3
|
|
4
4
|
module Crf
|
5
|
-
#
|
6
|
-
# This class finds the paths of all the repeated files inside the path passed as argument.
|
7
|
-
# All files repeated have the same file_identifier and file_hash.
|
8
|
-
#
|
9
5
|
class InteractiveFinder < Crf::Finder
|
10
|
-
#
|
11
|
-
# Method that looks for the repeated files in the path specified when the object was created
|
12
|
-
# showing progress bars.
|
13
|
-
#
|
14
6
|
def search_repeated_files
|
15
|
-
all_paths = all_files
|
7
|
+
all_paths = all_files
|
16
8
|
progressbar = ProgressBar.create(title: 'First run', total: all_paths.count,
|
17
9
|
format: '%t: %c/%C %a |%B| %%%P')
|
18
10
|
rep_list = first_run(progressbar)
|
@@ -22,12 +14,9 @@ module Crf
|
|
22
14
|
|
23
15
|
private
|
24
16
|
|
25
|
-
#
|
26
|
-
# This looks for the files with the same size only
|
27
|
-
#
|
28
17
|
def first_run(progressbar)
|
29
18
|
repetitions_list = Crf::RepetitionsList.new
|
30
|
-
all_files
|
19
|
+
all_files.each do |file_path|
|
31
20
|
repetitions_list.add(file_identifier(file_path), file_path)
|
32
21
|
progressbar.increment
|
33
22
|
end
|
@@ -1,15 +1,8 @@
|
|
1
1
|
require 'colorize'
|
2
|
+
require 'pp'
|
2
3
|
|
3
4
|
module Crf
|
4
|
-
#
|
5
|
-
# This is a subclass of Crf::Remover.
|
6
|
-
# It asks the user if he/she wants to remove each file.
|
7
|
-
#
|
8
5
|
class InteractiveRemover < Crf::Remover
|
9
|
-
#
|
10
|
-
# This method asks the user if he/she wants to delete each of the files contained in the values
|
11
|
-
# of the repetitions hash.
|
12
|
-
#
|
13
6
|
def remove
|
14
7
|
saved = 0
|
15
8
|
repetitions.each_value do |paths|
|
@@ -25,13 +18,13 @@ module Crf
|
|
25
18
|
|
26
19
|
def print_all_paths(paths)
|
27
20
|
STDOUT.puts 'Found this repetitions:'.green
|
28
|
-
STDOUT.puts paths.
|
21
|
+
STDOUT.puts paths.pretty_inspect.green
|
29
22
|
end
|
30
23
|
|
31
24
|
def remove_confirmation(path)
|
32
25
|
STDOUT.print "Do you want to delete the file #{path}? [y/n] ".yellow
|
33
26
|
logger.write "Asking to remove #{path}"
|
34
|
-
answer =
|
27
|
+
answer = $stdin.gets.chomp
|
35
28
|
logger.write "User input: #{answer}"
|
36
29
|
if answer == 'y'
|
37
30
|
STDOUT.puts "Removed #{path}".red
|
data/lib/crf/logger.rb
CHANGED
@@ -1,23 +1,19 @@
|
|
1
1
|
require 'logger'
|
2
2
|
|
3
3
|
module Crf
|
4
|
-
#
|
5
|
-
# This class is a wrapper of the Logger class, it hanldes the creation and sets the configuration
|
6
|
-
#
|
7
4
|
class Logger
|
8
|
-
|
5
|
+
##
|
9
6
|
# Creates the logger with the configurations in the path provided or in the current directory
|
10
7
|
#
|
8
|
+
# @param path [String] path where the logger is or will be created.
|
9
|
+
#
|
11
10
|
def initialize(path = 'crf.log')
|
12
11
|
@logger = ::Logger.new(path, File::CREAT)
|
13
12
|
configurate_logger
|
14
13
|
end
|
15
14
|
|
16
|
-
|
17
|
-
|
18
|
-
#
|
19
|
-
def write(msg)
|
20
|
-
@logger.info msg
|
15
|
+
def write(message)
|
16
|
+
@logger.info message
|
21
17
|
end
|
22
18
|
|
23
19
|
private
|
@@ -25,8 +21,8 @@ module Crf
|
|
25
21
|
def configurate_logger
|
26
22
|
@logger.datetime_format = Crf::LOGGER_DATE_TIME_FORMAT
|
27
23
|
@logger.progname = Crf::GEM_NAME
|
28
|
-
@logger.formatter = proc do |_severity,
|
29
|
-
"[#{
|
24
|
+
@logger.formatter = proc do |_severity, date_time, program_name, message|
|
25
|
+
"[#{date_time}] #{program_name}: #{message}\n"
|
30
26
|
end
|
31
27
|
end
|
32
28
|
end
|
data/lib/crf/remover.rb
CHANGED
@@ -1,25 +1,21 @@
|
|
1
1
|
module Crf
|
2
|
-
#
|
3
|
-
# This class removes all the repetitions passed as an argument.
|
4
|
-
# It saves the first element of the repetitions and deletes the rest.
|
5
|
-
#
|
6
2
|
class Remover
|
7
|
-
#
|
8
|
-
# The repetitions hash and the logger file are accessible from the outside.
|
9
|
-
#
|
10
3
|
attr_reader :repetitions, :logger
|
11
4
|
|
12
|
-
|
5
|
+
##
|
13
6
|
# This object needs the repeated files obtained with Crf::Finder and the logger object.
|
14
7
|
#
|
8
|
+
# @param repetitions [Hash] repetitions found by one of the finder classes.
|
9
|
+
# @param logger [Crf::Logger] logger file which this class will write.
|
10
|
+
#
|
15
11
|
def initialize(repetitions, logger)
|
16
12
|
@repetitions = repetitions
|
17
13
|
@logger = logger
|
18
14
|
end
|
19
15
|
|
20
|
-
|
16
|
+
##
|
21
17
|
# This method removes all the files contained on each value of the repetitions hash
|
22
|
-
# except the first one. This is done without asking the user for confirmation
|
18
|
+
# except the first one. This is done without asking the user for confirmation.
|
23
19
|
#
|
24
20
|
def remove
|
25
21
|
saved = 0
|
@@ -36,13 +32,11 @@ module Crf
|
|
36
32
|
|
37
33
|
def remove_file(path)
|
38
34
|
size = File.size(path)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
return 0
|
45
|
-
end
|
35
|
+
File.delete(path)
|
36
|
+
log_removal(path, size)
|
37
|
+
return size
|
38
|
+
rescue
|
39
|
+
return 0
|
46
40
|
end
|
47
41
|
|
48
42
|
def log_removal(path, size)
|
data/lib/crf/repetitions_list.rb
CHANGED
@@ -1,27 +1,20 @@
|
|
1
1
|
module Crf
|
2
|
-
#
|
3
|
-
# This is the data structure used to get the repeated files.
|
4
|
-
#
|
5
2
|
class RepetitionsList
|
6
|
-
#
|
7
|
-
# The uniques and repetitions hashes can be accessed from the outside, along with the
|
8
|
-
# total repetitions count.
|
9
|
-
#
|
10
3
|
attr_reader :uniques, :repetitions, :total_repetitions
|
11
4
|
|
12
|
-
#
|
13
|
-
# Creates the RepetitionsList object with everything it needs.
|
14
|
-
#
|
15
5
|
def initialize
|
16
6
|
@uniques = {}
|
17
7
|
@repetitions = {}
|
18
8
|
@total_repetitions = 0
|
19
9
|
end
|
20
10
|
|
21
|
-
|
11
|
+
##
|
22
12
|
# Adds an element to one of each hashes. If the value is repeated, then it erases it from
|
23
13
|
# uniques and adds it in the repetitions hash along with the duplicate.
|
24
14
|
#
|
15
|
+
# @param key result of the function that identifies the file
|
16
|
+
# @param value [String] path of the file
|
17
|
+
#
|
25
18
|
def add(key, value)
|
26
19
|
if repetitions.key?(key)
|
27
20
|
repetitions[key] << value
|
data/lib/crf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alejandro Bezdjian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colorize
|
@@ -74,30 +74,42 @@ dependencies:
|
|
74
74
|
name: byebug
|
75
75
|
requirement: !ruby/object:Gem::Requirement
|
76
76
|
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '9.0'
|
77
80
|
- - ">="
|
78
81
|
- !ruby/object:Gem::Version
|
79
|
-
version:
|
82
|
+
version: 9.0.5
|
80
83
|
type: :development
|
81
84
|
prerelease: false
|
82
85
|
version_requirements: !ruby/object:Gem::Requirement
|
83
86
|
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '9.0'
|
84
90
|
- - ">="
|
85
91
|
- !ruby/object:Gem::Version
|
86
|
-
version:
|
92
|
+
version: 9.0.5
|
87
93
|
- !ruby/object:Gem::Dependency
|
88
94
|
name: rubocop
|
89
95
|
requirement: !ruby/object:Gem::Requirement
|
90
96
|
requirements:
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0.38'
|
91
100
|
- - ">="
|
92
101
|
- !ruby/object:Gem::Version
|
93
|
-
version:
|
102
|
+
version: 0.37.2
|
94
103
|
type: :development
|
95
104
|
prerelease: false
|
96
105
|
version_requirements: !ruby/object:Gem::Requirement
|
97
106
|
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0.38'
|
98
110
|
- - ">="
|
99
111
|
- !ruby/object:Gem::Version
|
100
|
-
version:
|
112
|
+
version: 0.37.2
|
101
113
|
description: Library that looks for exact duplicated files in a directory.
|
102
114
|
email: alebezdjian@gmail.com
|
103
115
|
executables:
|
@@ -106,6 +118,7 @@ extensions: []
|
|
106
118
|
extra_rdoc_files: []
|
107
119
|
files:
|
108
120
|
- ".gitignore"
|
121
|
+
- ".rspec"
|
109
122
|
- ".rubocop.yml"
|
110
123
|
- ".travis.yml"
|
111
124
|
- Gemfile
|
@@ -143,9 +156,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
143
156
|
version: '0'
|
144
157
|
requirements: []
|
145
158
|
rubyforge_project:
|
146
|
-
rubygems_version: 2.5.1
|
159
|
+
rubygems_version: 2.4.5.1
|
147
160
|
signing_key:
|
148
161
|
specification_version: 4
|
149
162
|
summary: Look for exact duplicated files.
|
150
163
|
test_files: []
|
151
|
-
has_rdoc:
|