dfm 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/README.md +43 -34
- data/bin/dfm +0 -0
- data/lib/dfm/version.rb +4 -1
- data/lib/dfm.rb +31 -5
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
MzBiY2RkYTcxNmI0ODlhMGE0ZTdhNmU3NjFhMTRkYTBhZGE4ODRiZg==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7fece188fe6d67b2870fc093036f2f155be9dd6d
|
4
|
+
data.tar.gz: db1ada3f9d2d0739ab122238ecba24f5d3c96075
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
ZmMwYWYzZDZiMTBjMWE0NjVmNmIwOTIyMzg0ODZkNmMyZjgwZTljOWZkZmU4
|
11
|
-
ZjYyODNlM2VkMDZkMjAwYWE4NTNkYjMxZjllODFjYThkYTY2NWE=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
Yjc3OTc5MTlhYmVmOTM5OTgyY2MxZGQyYzg2OGZkZDcwMjE4YjM1ZWIzMDYy
|
14
|
-
NTljYjZlNmVmZDJlMjg3NDI1ODFjMmM1ZWJhYjEwM2RiNmE4MjMyZTJjZDYx
|
15
|
-
OGMxZWM1MDQ1OWQ2NjBlNzJhNmJlNWMxN2NkZmM1ZDc1MTU1OTE=
|
6
|
+
metadata.gz: 9d3cd8d0966d3c8ad45caf4dc7978b4aa52641ebc167ced0baf338a7efd80ec03af43c47416fe0dbe080f6ea3450e806ef9b5eefff1d2d6a6b41e123c7615c12
|
7
|
+
data.tar.gz: 222170ee269434ae6918b2ea6dfcf57ed27ec8711ff6522a19a95005b633021a72b384ccf4ab251dff9dc7b42c13a033527a9c4ebff2ef42f4ad963e866b4ce9
|
data/README.md
CHANGED
@@ -1,35 +1,44 @@
|
|
1
|
-
dfm
|
2
|
-
===
|
3
|
-
|
4
|
-
Duplicate File Manager
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
1
|
+
dfm
|
2
|
+
===
|
3
|
+
|
4
|
+
Duplicate File Manager
|
5
|
+
|
6
|
+
gem install dfm
|
7
|
+
|
8
|
+
The purpose of dfm is to locate duplicate files through a recursive search.
|
9
|
+
|
10
|
+
You can create an instance of the DFM object with optionally specifying the
|
9
11
|
directory path and the file extensions.
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
If you are using a ruby version before 2 then this would be:
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
12
|
+
|
13
|
+
dfm = DFM.new( path: './', filters: ["jpg","png"] )
|
14
|
+
|
15
|
+
If you are using a ruby version before 2 then this would be:
|
16
|
+
|
17
|
+
dfm = DFM.new( { :path => './', :filters => ["jpg","png"] } )
|
18
|
+
|
19
|
+
Or you can use the default behaviour which uses the current directory and searches
|
20
|
+
all files.
|
21
|
+
|
22
|
+
dfm = DFM.new
|
23
|
+
|
24
|
+
Then you may get a hash of the MD5 hexdigest matches (indicating files with
|
25
|
+
identical content) by
|
26
|
+
|
27
|
+
dfm.hex
|
28
|
+
|
29
|
+
And by duplicate file name by
|
30
|
+
|
31
|
+
dfm.name
|
32
|
+
|
33
|
+
Either of these can be called with false if you want only single instances of files.
|
34
|
+
|
35
|
+
dfm.hex( false )
|
36
|
+
dfm.name( false )
|
37
|
+
|
38
|
+
---
|
39
|
+
|
40
|
+
Also included is a command line version which outputs nicely formatted JSON in case
|
41
|
+
you would like to use it with anything else. Type `dfm -h` on the command line to get
|
42
|
+
a list of available options. Running `dfm` by itself will recursively search the current
|
43
|
+
folder for all duplicates by both file name and MD5 hexdigest indexes.
|
44
|
+
|
data/bin/dfm
CHANGED
File without changes
|
data/lib/dfm/version.rb
CHANGED
data/lib/dfm.rb
CHANGED
@@ -1,11 +1,18 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
# Tilte:: Duplicate File Manager (dfm)
|
2
|
+
# Author:: Daniel P. Clark (mailto:6ftdan@gmail.com)
|
3
|
+
# License:: MIT License
|
4
|
+
['digest','json','dfm/version'].each {|r|require r;}
|
5
|
+
# Class instance of DFM generates a list of files recursively
|
6
|
+
# and indexes the by both MD5 hexdigest and file name
|
7
|
+
# Optional parameters include file extension filters with
|
8
|
+
# { :filters => ["jpg", "png"] } and path suh as { :path => "./" }
|
5
9
|
class DFM
|
10
|
+
# File extension filters can be set on DFM.new( { :filters => ["jpg","gif"] } )
|
11
|
+
# or you should assign it to the class instance variable filters
|
12
|
+
# before calling recurse( path )
|
6
13
|
attr_accessor :filters
|
7
14
|
|
8
|
-
def initialize( params = {} )
|
15
|
+
def initialize( params = {} ) #:nodoc:
|
9
16
|
@files_by_hexdigest = {}
|
10
17
|
@files_by_name = {}
|
11
18
|
@filters = Array( params.fetch( :filters, nil ) )
|
@@ -14,22 +21,34 @@ class DFM
|
|
14
21
|
recurse_path( @path )
|
15
22
|
end
|
16
23
|
|
24
|
+
# Prints out JSON list of single copy files by MD5 hexdigest
|
25
|
+
# index or if the parameter is set to "name" then the list is
|
26
|
+
# indexed by file name.
|
17
27
|
def print_singles( opt = "hex" )
|
18
28
|
print_match( { :type => opt, :duplicates => false } )
|
19
29
|
end
|
20
30
|
|
31
|
+
# Prints out JSON list of duplicate copy files by MD5 hexdigest
|
32
|
+
# index or if the parameter is set to "name" then the list is
|
33
|
+
# indexed by file name.
|
21
34
|
def print_duplicates( opt = "hex" )
|
22
35
|
print_match( { :type => opt } )
|
23
36
|
end
|
24
37
|
|
38
|
+
# Returns hash of duplicate files by MD5 hexdigest index. If
|
39
|
+
# the parameter is set to false then the hash returns non-duplicates.
|
25
40
|
def hex( duplicates = true )
|
26
41
|
select_duplicates( { :hash => @files_by_hexdigest, :duplicates => duplicates } )
|
27
42
|
end
|
28
43
|
|
44
|
+
# Returns hash of duplicate files by file name index. If the
|
45
|
+
# parameter is set to false then the hash returns non-duplicates.
|
29
46
|
def name( duplicates = true )
|
30
47
|
select_duplicates( { :hash => @files_by_name, :duplicates => duplicates } )
|
31
48
|
end
|
32
49
|
|
50
|
+
# Start a fresh recursive search with empty hash indexes.
|
51
|
+
# Accepts parameter for path. (See filters for settings file extensions.)
|
33
52
|
def recurse( path )
|
34
53
|
@files_by_hexdigest = {}
|
35
54
|
@files_by_name = {}
|
@@ -38,15 +57,19 @@ class DFM
|
|
38
57
|
|
39
58
|
private
|
40
59
|
|
60
|
+
# Private method which creates indexed hash(es) of files searched.
|
41
61
|
def insert_file( file )
|
42
62
|
# hex
|
43
63
|
file_io = File.open( file, "rb" ) { |io| io.read }
|
44
64
|
hex_file = @hashFunc.hexdigest( file_io )
|
45
65
|
@files_by_hexdigest[ hex_file ] = Array( @files_by_hexdigest[ hex_file ] ) << file
|
66
|
+
|
46
67
|
# name
|
47
68
|
@files_by_name[ File.basename( file ) ] = Array( @files_by_name[ File.basename( file ) ] ) << file
|
48
69
|
end
|
49
70
|
|
71
|
+
# Private method recursive search of parameter path.
|
72
|
+
# Filters by file type if defined.
|
50
73
|
def recurse_path( path = @path )
|
51
74
|
@filters.empty? ? ( filters = "" ) : ( filters = @filters.join( "," ).prepend( ".{" ).<<( "}" ) )
|
52
75
|
Dir.glob( path + '**' + File::SEPARATOR + '*' + filters ).each { |file|
|
@@ -56,16 +79,19 @@ class DFM
|
|
56
79
|
}
|
57
80
|
end
|
58
81
|
|
82
|
+
# Private method selects duplicates from hash, or if set will return hash of single instances.
|
59
83
|
def select_duplicates( opt = { :hash => @files_by_hexdigest, :duplicates => true } )
|
60
84
|
opt[ :hash ].select { |k,v| opt[ :duplicates ] ? ( v.length >= 2 ) : ( v.length == 1 ) }
|
61
85
|
end
|
62
86
|
|
87
|
+
# Private method calls either hex, or name, method to print to JSON via private method print_json.
|
63
88
|
def print_match( opt = { :type => "hex", :duplicates => true } )
|
64
89
|
if !!opt[ :type ][ "hex" ] or !!opt[ :type ][ "name" ]
|
65
90
|
print_json send( opt[ :type ], *Array( opt[ :duplicates ] ) )
|
66
91
|
end
|
67
92
|
end
|
68
93
|
|
94
|
+
# Private method prints formatted JSON to STDOUT
|
69
95
|
def print_json( hash )
|
70
96
|
puts JSON.pretty_generate( hash )
|
71
97
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dfm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel P. Clark / 6ftDan(TM)
|
@@ -32,17 +32,17 @@ require_paths:
|
|
32
32
|
- lib
|
33
33
|
required_ruby_version: !ruby/object:Gem::Requirement
|
34
34
|
requirements:
|
35
|
-
- -
|
35
|
+
- - ">="
|
36
36
|
- !ruby/object:Gem::Version
|
37
37
|
version: '0'
|
38
38
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
39
39
|
requirements:
|
40
|
-
- -
|
40
|
+
- - ">="
|
41
41
|
- !ruby/object:Gem::Version
|
42
42
|
version: '0'
|
43
43
|
requirements: []
|
44
44
|
rubyforge_project:
|
45
|
-
rubygems_version: 2.
|
45
|
+
rubygems_version: 2.3.0
|
46
46
|
signing_key:
|
47
47
|
specification_version: 4
|
48
48
|
summary: Duplicate File Manager.
|