rivalry 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rivalry.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Anthony Cook
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,53 @@
1
+ # Rivalry
2
+
3
+ Rivalry is the (fast!) duplicate file finder written in Ruby.
4
+
5
+ It supports specifically targeting audio and image files and ignores SCM folders such as `.git` and `.hg` by default.
6
+
7
+ ## Installation
8
+
9
+ Using rubygems just:
10
+
11
+ $ gem install rivalry
12
+
13
+ If you wish to use Rivalry's functionality inside your app your can instead add this line to your application's Gemfile:
14
+
15
+ gem 'rivalry'
16
+
17
+ And then execute:
18
+
19
+ $ bundle
20
+
21
+ ## Usage
22
+
23
+ On the commandline the most basic usage is:
24
+
25
+ ```bash
26
+ $ rivalry ~/directory/with/duplicates
27
+ ```
28
+
29
+ It will display the duplicate files on the screen, which can then be grepped or otherwise used to remove the unwanted duplicates.
30
+
31
+ Example output:
32
+
33
+ ```bash
34
+ $ rivalry ..
35
+ Scanning all files...
36
+ -- Total Size : 173 MB
37
+ -- Total Count : 95 files
38
+ -- Similar : 34 files with the same size
39
+
40
+ Determining duplicates...
41
+ -- Dupes Count : 2 files
42
+
43
+ /Users/acook/Dropbox/Projects/Negutyv Xeiro/Audio/Cytokine Storm/Negutyv Xeiro - Cytokine Storm [Mørch Mix].mp3
44
+ /Users/acook/Dropbox/Projects/Negutyv Xeiro/Audio/Cytokine Storm/alexcyto/Cytokine Storm m.mp3
45
+ ```
46
+
47
+ ## Contributing
48
+
49
+ 1. Fork it
50
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
51
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
52
+ 4. Push to the branch (`git push origin my-new-feature`)
53
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ begin
4
+ require 'rivalry'
5
+ rescue LoadError
6
+ $: << File.expand_path(File.join(File.dirname(__FILE__), '../lib'))
7
+ require 'rivalry'
8
+ end
9
+
10
+ Rivalry.run ARGV
11
+
@@ -0,0 +1,27 @@
1
+ require 'pry' rescue LoadError
2
+
3
+ require 'rivalry/version'
4
+ require 'rivalry/display'
5
+ require 'rivalry/base'
6
+ require 'rivalry/scanner'
7
+ require 'rivalry/file_path'
8
+ require 'rivalry/files_with_data'
9
+ require 'rivalry/find_files'
10
+ require 'rivalry/hash_files'
11
+
12
+ module Rivalry
13
+ extend Display
14
+ module_function
15
+
16
+ def run args
17
+ path = args.first || usage
18
+
19
+ scanner = Rivalry::Scanner.new path
20
+ files = scanner.scan
21
+
22
+ out
23
+ files.each do |file|
24
+ puts file.to_s
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,5 @@
1
+ module Rivalry
2
+ class Base
3
+ include Display
4
+ end
5
+ end
@@ -0,0 +1,86 @@
1
+ module Rivalry
2
+ module Display
3
+ def self.included object
4
+ unless $silent then
5
+ object.send :include, Methods
6
+ else
7
+ object.send :include, NoopMethods
8
+ end
9
+ end
10
+
11
+ def self.extended object
12
+ unless $silent then
13
+ object.send :extend, Methods
14
+ else
15
+ object.send :extend, NoopMethods
16
+ end
17
+ end
18
+
19
+ module Methods
20
+
21
+ def usage
22
+ puts "usage: #{$0} path/to/search/for/duplicates"
23
+ exit 1
24
+ end
25
+
26
+ def out text = ''
27
+ if $verbose then
28
+ puts text
29
+ else
30
+ puts text[0..(width - 1)]
31
+ end
32
+ end
33
+
34
+ def progress type, file, count = nil, total = nil
35
+ if count && total then
36
+ tally = " (#{count}/#{total})"
37
+ elsif count then
38
+ tally = " (#{count})"
39
+ else
40
+ tally = ''
41
+ end
42
+
43
+ text = "-- #{type}#{tally}: #{file}"
44
+
45
+ if $verbose then
46
+ puts text
47
+ else
48
+ clear_line
49
+ print text[0..(width - 1)]
50
+ end
51
+ end
52
+
53
+ def clear_line
54
+ clear_line = "\e[2K"
55
+ start_of_line = "\e[0G"
56
+ print start_of_line, clear_line
57
+ end
58
+
59
+ def width
60
+ tiocgwinsz = 0x40087468
61
+ str = [0, 0, 0, 0].pack('SSSS')
62
+ if $stdin.ioctl(tiocgwinsz, str) >= 0 then
63
+ str.unpack('SSSS')[1]
64
+ else
65
+ 80
66
+ end
67
+ end
68
+
69
+ BYTE_UNITS =[[1073741824, 'GB'], [1048576, 'MB'], [1024, 'KB'], [-1, 'B']]
70
+
71
+ def humanize bytes
72
+ unit = BYTE_UNITS.find{|u| bytes > u[0] }
73
+ "#{bytes / unit[0]} #{unit[1]}"
74
+ end
75
+
76
+ end
77
+
78
+ module NoopMethods
79
+ def noop; end
80
+
81
+ Methods.instance_methods(false).each do |method|
82
+ alias_method method, :noop
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,6 @@
1
+ module Rivalry
2
+ class FilePath < Pathname
3
+ alias_method :each, :each_child
4
+ alias_method :exists?, :exist?
5
+ end
6
+ end
@@ -0,0 +1,7 @@
1
+ module Rivalry
2
+ class FilesWithData < Hash
3
+ def initialize
4
+ super { |hash, key| hash[key] = Array.new }
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,63 @@
1
+ module Rivalry
2
+ class FindFiles < Base
3
+ def initialize pathname, do_want = {}, do_not_want = {}
4
+ @pathname = pathname
5
+ @do_want = do_want
6
+ @do_not_want = do_not_want
7
+ end
8
+ attr :pathname, :do_want, :do_not_want, :count, :size
9
+
10
+ def find
11
+ self.size = 0
12
+ self.count = 0
13
+
14
+ files_with_sizes = FilesWithData.new
15
+
16
+ pathname.find do |file|
17
+
18
+ if file.directory? then
19
+ if do_not_want? file then
20
+ progress 'SKIP DIR', file
21
+ Find.prune
22
+ else
23
+ progress 'DIRECTORY', file
24
+ end
25
+ elsif valid? file then
26
+ if do_want? file then
27
+ progress 'FILE', file
28
+
29
+ file_size = file.size
30
+
31
+ files_with_sizes[file_size] << file
32
+
33
+ self.size += file_size
34
+ self.count += 1
35
+ else
36
+ progress 'SKIP FILE', file
37
+ end
38
+ end
39
+
40
+ end
41
+
42
+ files_with_sizes
43
+ end
44
+
45
+ protected
46
+
47
+ attr_writer :count, :size
48
+
49
+ def do_want? path
50
+ extension = File.extname path
51
+ do_want.find{|name, pattern| extension =~ pattern} && true
52
+ end
53
+
54
+ def do_not_want? path
55
+ basename = File.basename path
56
+ do_not_want.find{|name, pattern| basename =~ pattern} && true
57
+ end
58
+
59
+ def valid? file
60
+ file.exists?
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,32 @@
1
+ module Rivalry
2
+ class HashFiles < Base
3
+ def initialize similar_files
4
+ @similar_files = similar_files
5
+ end
6
+ attr :similar_files
7
+
8
+ def hash
9
+ out
10
+ out "Determining duplicates..."
11
+
12
+ similar_count = 0
13
+ file_hashes = FilesWithData.new
14
+
15
+ similar_files.each do |file|
16
+ similar_count += 1
17
+
18
+ progress 'HASHING', file, similar_count, total_similar
19
+
20
+ hash = Digest::SHA256.file(file).to_s
21
+
22
+ file_hashes[hash] << file
23
+ end
24
+
25
+ file_hashes
26
+ end
27
+
28
+ def total_similar
29
+ similar_files.length
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,57 @@
1
+ require 'find'
2
+ require 'pathname'
3
+ require 'digest'
4
+
5
+ module Rivalry
6
+ class Scanner < Base
7
+ def initialize pathname
8
+ @pathname = FilePath.new File.expand_path(pathname)
9
+ end
10
+ attr :pathname
11
+
12
+ def scan
13
+
14
+ out "Scanning all files..."
15
+
16
+ finder = FindFiles.new pathname, wants, ignores
17
+ files_with_sizes = finder.find
18
+
19
+ similar_files = dedup files_with_sizes
20
+
21
+ clear_line
22
+ out "-- Total Size : #{humanize finder.size}"
23
+ out "-- Total Count : #{finder.count} files"
24
+ out "-- Similar : #{similar_files.length} files with the same size"
25
+
26
+ hasher = HashFiles.new similar_files
27
+ files_with_hashes = hasher.hash
28
+
29
+ duplicate_files = dedup files_with_hashes
30
+
31
+ clear_line
32
+ out "-- Dupes Count : #{duplicate_files.length} files"
33
+
34
+ duplicate_files
35
+ end
36
+
37
+ def dedup files_with_data
38
+ files_with_data.map do |_, file_list|
39
+ file_list if file_list && file_list.length > 1
40
+ end.flatten.compact
41
+ end
42
+
43
+ def ignores
44
+ {
45
+ dir: /^\.$/,
46
+ scm: /^\.(git|hg|svn|gitkeep)$/
47
+ }
48
+ end
49
+
50
+ def wants
51
+ {
52
+ audio: /^\.(mp3|ogg|flac|wav|aiff|mid)$/i,
53
+ images: /^\.(png|jpg|gif|bmp|tga|jpeg|tif|tiff)/i
54
+ }
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,3 @@
1
+ module Rivalry
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rivalry/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "rivalry"
8
+ gem.version = Rivalry::VERSION
9
+ gem.authors = ["Anthony Cook"]
10
+ gem.email = ["anthonymichaelcook@gmail.com"]
11
+ gem.description = %q{The (fast!) duplicate file finder for Ruby! Supports media file and ignoring SCM directories.}
12
+ gem.summary = %q{The (fast!) duplicate file finder for Ruby!}
13
+ gem.homepage = "https://github.com/acook/rivalry#readme"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rivalry
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Anthony Cook
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-27 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: The (fast!) duplicate file finder for Ruby! Supports media file and ignoring
15
+ SCM directories.
16
+ email:
17
+ - anthonymichaelcook@gmail.com
18
+ executables:
19
+ - rivalry
20
+ extensions: []
21
+ extra_rdoc_files: []
22
+ files:
23
+ - .gitignore
24
+ - Gemfile
25
+ - LICENSE.txt
26
+ - README.markdown
27
+ - Rakefile
28
+ - bin/rivalry
29
+ - lib/rivalry.rb
30
+ - lib/rivalry/base.rb
31
+ - lib/rivalry/display.rb
32
+ - lib/rivalry/file_path.rb
33
+ - lib/rivalry/files_with_data.rb
34
+ - lib/rivalry/find_files.rb
35
+ - lib/rivalry/hash_files.rb
36
+ - lib/rivalry/scanner.rb
37
+ - lib/rivalry/version.rb
38
+ - rivalry.gemspec
39
+ homepage: https://github.com/acook/rivalry#readme
40
+ licenses: []
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 1.8.24
60
+ signing_key:
61
+ specification_version: 3
62
+ summary: The (fast!) duplicate file finder for Ruby!
63
+ test_files: []