rivalry 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rivalry.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Anthony Cook
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,53 @@
1
+ # Rivalry
2
+
3
+ Rivalry is the (fast!) duplicate file finder written in Ruby.
4
+
5
+ It supports specifically targeting audio and image files and ignores SCM folders such as `.git` and `.hg` by default.
6
+
7
+ ## Installation
8
+
9
+ Using rubygems just:
10
+
11
+ $ gem install rivalry
12
+
13
+ If you wish to use Rivalry's functionality inside your app your can instead add this line to your application's Gemfile:
14
+
15
+ gem 'rivalry'
16
+
17
+ And then execute:
18
+
19
+ $ bundle
20
+
21
+ ## Usage
22
+
23
+ On the commandline the most basic usage is:
24
+
25
+ ```bash
26
+ $ rivalry ~/directory/with/duplicates
27
+ ```
28
+
29
+ It will display the duplicate files on the screen, which can then be grepped or otherwise used to remove the unwanted duplicates.
30
+
31
+ Example output:
32
+
33
+ ```bash
34
+ $ rivalry ..
35
+ Scanning all files...
36
+ -- Total Size : 173 MB
37
+ -- Total Count : 95 files
38
+ -- Similar : 34 files with the same size
39
+
40
+ Determining duplicates...
41
+ -- Dupes Count : 2 files
42
+
43
+ /Users/acook/Dropbox/Projects/Negutyv Xeiro/Audio/Cytokine Storm/Negutyv Xeiro - Cytokine Storm [Mørch Mix].mp3
44
+ /Users/acook/Dropbox/Projects/Negutyv Xeiro/Audio/Cytokine Storm/alexcyto/Cytokine Storm m.mp3
45
+ ```
46
+
47
+ ## Contributing
48
+
49
+ 1. Fork it
50
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
51
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
52
+ 4. Push to the branch (`git push origin my-new-feature`)
53
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ begin
4
+ require 'rivalry'
5
+ rescue LoadError
6
+ $: << File.expand_path(File.join(File.dirname(__FILE__), '../lib'))
7
+ require 'rivalry'
8
+ end
9
+
10
+ Rivalry.run ARGV
11
+
@@ -0,0 +1,27 @@
1
+ require 'pry' rescue LoadError
2
+
3
+ require 'rivalry/version'
4
+ require 'rivalry/display'
5
+ require 'rivalry/base'
6
+ require 'rivalry/scanner'
7
+ require 'rivalry/file_path'
8
+ require 'rivalry/files_with_data'
9
+ require 'rivalry/find_files'
10
+ require 'rivalry/hash_files'
11
+
12
+ module Rivalry
13
+ extend Display
14
+ module_function
15
+
16
+ def run args
17
+ path = args.first || usage
18
+
19
+ scanner = Rivalry::Scanner.new path
20
+ files = scanner.scan
21
+
22
+ out
23
+ files.each do |file|
24
+ puts file.to_s
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,5 @@
1
+ module Rivalry
2
+ class Base
3
+ include Display
4
+ end
5
+ end
@@ -0,0 +1,86 @@
1
+ module Rivalry
2
+ module Display
3
+ def self.included object
4
+ unless $silent then
5
+ object.send :include, Methods
6
+ else
7
+ object.send :include, NoopMethods
8
+ end
9
+ end
10
+
11
+ def self.extended object
12
+ unless $silent then
13
+ object.send :extend, Methods
14
+ else
15
+ object.send :extend, NoopMethods
16
+ end
17
+ end
18
+
19
+ module Methods
20
+
21
+ def usage
22
+ puts "usage: #{$0} path/to/search/for/duplicates"
23
+ exit 1
24
+ end
25
+
26
+ def out text = ''
27
+ if $verbose then
28
+ puts text
29
+ else
30
+ puts text[0..(width - 1)]
31
+ end
32
+ end
33
+
34
+ def progress type, file, count = nil, total = nil
35
+ if count && total then
36
+ tally = " (#{count}/#{total})"
37
+ elsif count then
38
+ tally = " (#{count})"
39
+ else
40
+ tally = ''
41
+ end
42
+
43
+ text = "-- #{type}#{tally}: #{file}"
44
+
45
+ if $verbose then
46
+ puts text
47
+ else
48
+ clear_line
49
+ print text[0..(width - 1)]
50
+ end
51
+ end
52
+
53
+ def clear_line
54
+ clear_line = "\e[2K"
55
+ start_of_line = "\e[0G"
56
+ print start_of_line, clear_line
57
+ end
58
+
59
+ def width
60
+ tiocgwinsz = 0x40087468
61
+ str = [0, 0, 0, 0].pack('SSSS')
62
+ if $stdin.ioctl(tiocgwinsz, str) >= 0 then
63
+ str.unpack('SSSS')[1]
64
+ else
65
+ 80
66
+ end
67
+ end
68
+
69
+ BYTE_UNITS =[[1073741824, 'GB'], [1048576, 'MB'], [1024, 'KB'], [-1, 'B']]
70
+
71
+ def humanize bytes
72
+ unit = BYTE_UNITS.find{|u| bytes > u[0] }
73
+ "#{bytes / unit[0]} #{unit[1]}"
74
+ end
75
+
76
+ end
77
+
78
+ module NoopMethods
79
+ def noop; end
80
+
81
+ Methods.instance_methods(false).each do |method|
82
+ alias_method method, :noop
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,6 @@
1
+ module Rivalry
2
+ class FilePath < Pathname
3
+ alias_method :each, :each_child
4
+ alias_method :exists?, :exist?
5
+ end
6
+ end
@@ -0,0 +1,7 @@
1
+ module Rivalry
2
+ class FilesWithData < Hash
3
+ def initialize
4
+ super { |hash, key| hash[key] = Array.new }
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,63 @@
1
+ module Rivalry
2
+ class FindFiles < Base
3
+ def initialize pathname, do_want = {}, do_not_want = {}
4
+ @pathname = pathname
5
+ @do_want = do_want
6
+ @do_not_want = do_not_want
7
+ end
8
+ attr :pathname, :do_want, :do_not_want, :count, :size
9
+
10
+ def find
11
+ self.size = 0
12
+ self.count = 0
13
+
14
+ files_with_sizes = FilesWithData.new
15
+
16
+ pathname.find do |file|
17
+
18
+ if file.directory? then
19
+ if do_not_want? file then
20
+ progress 'SKIP DIR', file
21
+ Find.prune
22
+ else
23
+ progress 'DIRECTORY', file
24
+ end
25
+ elsif valid? file then
26
+ if do_want? file then
27
+ progress 'FILE', file
28
+
29
+ file_size = file.size
30
+
31
+ files_with_sizes[file_size] << file
32
+
33
+ self.size += file_size
34
+ self.count += 1
35
+ else
36
+ progress 'SKIP FILE', file
37
+ end
38
+ end
39
+
40
+ end
41
+
42
+ files_with_sizes
43
+ end
44
+
45
+ protected
46
+
47
+ attr_writer :count, :size
48
+
49
+ def do_want? path
50
+ extension = File.extname path
51
+ do_want.find{|name, pattern| extension =~ pattern} && true
52
+ end
53
+
54
+ def do_not_want? path
55
+ basename = File.basename path
56
+ do_not_want.find{|name, pattern| basename =~ pattern} && true
57
+ end
58
+
59
+ def valid? file
60
+ file.exists?
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,32 @@
1
+ module Rivalry
2
+ class HashFiles < Base
3
+ def initialize similar_files
4
+ @similar_files = similar_files
5
+ end
6
+ attr :similar_files
7
+
8
+ def hash
9
+ out
10
+ out "Determining duplicates..."
11
+
12
+ similar_count = 0
13
+ file_hashes = FilesWithData.new
14
+
15
+ similar_files.each do |file|
16
+ similar_count += 1
17
+
18
+ progress 'HASHING', file, similar_count, total_similar
19
+
20
+ hash = Digest::SHA256.file(file).to_s
21
+
22
+ file_hashes[hash] << file
23
+ end
24
+
25
+ file_hashes
26
+ end
27
+
28
+ def total_similar
29
+ similar_files.length
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,57 @@
1
+ require 'find'
2
+ require 'pathname'
3
+ require 'digest'
4
+
5
+ module Rivalry
6
+ class Scanner < Base
7
+ def initialize pathname
8
+ @pathname = FilePath.new File.expand_path(pathname)
9
+ end
10
+ attr :pathname
11
+
12
+ def scan
13
+
14
+ out "Scanning all files..."
15
+
16
+ finder = FindFiles.new pathname, wants, ignores
17
+ files_with_sizes = finder.find
18
+
19
+ similar_files = dedup files_with_sizes
20
+
21
+ clear_line
22
+ out "-- Total Size : #{humanize finder.size}"
23
+ out "-- Total Count : #{finder.count} files"
24
+ out "-- Similar : #{similar_files.length} files with the same size"
25
+
26
+ hasher = HashFiles.new similar_files
27
+ files_with_hashes = hasher.hash
28
+
29
+ duplicate_files = dedup files_with_hashes
30
+
31
+ clear_line
32
+ out "-- Dupes Count : #{duplicate_files.length} files"
33
+
34
+ duplicate_files
35
+ end
36
+
37
+ def dedup files_with_data
38
+ files_with_data.map do |_, file_list|
39
+ file_list if file_list && file_list.length > 1
40
+ end.flatten.compact
41
+ end
42
+
43
+ def ignores
44
+ {
45
+ dir: /^\.$/,
46
+ scm: /^\.(git|hg|svn|gitkeep)$/
47
+ }
48
+ end
49
+
50
+ def wants
51
+ {
52
+ audio: /^\.(mp3|ogg|flac|wav|aiff|mid)$/i,
53
+ images: /^\.(png|jpg|gif|bmp|tga|jpeg|tif|tiff)/i
54
+ }
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,3 @@
1
+ module Rivalry
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rivalry/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "rivalry"
8
+ gem.version = Rivalry::VERSION
9
+ gem.authors = ["Anthony Cook"]
10
+ gem.email = ["anthonymichaelcook@gmail.com"]
11
+ gem.description = %q{The (fast!) duplicate file finder for Ruby! Supports media file and ignoring SCM directories.}
12
+ gem.summary = %q{The (fast!) duplicate file finder for Ruby!}
13
+ gem.homepage = "https://github.com/acook/rivalry#readme"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rivalry
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Anthony Cook
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-27 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: The (fast!) duplicate file finder for Ruby! Supports media file and ignoring
15
+ SCM directories.
16
+ email:
17
+ - anthonymichaelcook@gmail.com
18
+ executables:
19
+ - rivalry
20
+ extensions: []
21
+ extra_rdoc_files: []
22
+ files:
23
+ - .gitignore
24
+ - Gemfile
25
+ - LICENSE.txt
26
+ - README.markdown
27
+ - Rakefile
28
+ - bin/rivalry
29
+ - lib/rivalry.rb
30
+ - lib/rivalry/base.rb
31
+ - lib/rivalry/display.rb
32
+ - lib/rivalry/file_path.rb
33
+ - lib/rivalry/files_with_data.rb
34
+ - lib/rivalry/find_files.rb
35
+ - lib/rivalry/hash_files.rb
36
+ - lib/rivalry/scanner.rb
37
+ - lib/rivalry/version.rb
38
+ - rivalry.gemspec
39
+ homepage: https://github.com/acook/rivalry#readme
40
+ licenses: []
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 1.8.24
60
+ signing_key:
61
+ specification_version: 3
62
+ summary: The (fast!) duplicate file finder for Ruby!
63
+ test_files: []