itunes-rm-dups 0.1.1-universal-darwin-9
Sign up to get free protection for your applications and to get access to all the features.
- data/README +21 -0
- data/bin/itunes-rm-dups +4 -0
- data/lib/itunes_dup_handler.rb +299 -0
- metadata +54 -0
data/README
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
ITunes Duplicate Track Remover
|
2
|
+
|
3
|
+
After installation, run with this command:
|
4
|
+
|
5
|
+
itunes-rm-dups
|
6
|
+
|
7
|
+
and follow the prompts, which should be self-explanatory.
|
8
|
+
|
9
|
+
NOTE: This program is designed to detect and remove only exact duplicates.
|
10
|
+
Some apparent duplicates may be left intact in the iTunes library after the
|
11
|
+
program runs. These are not true duplicates because their MD5 hashes do not
|
12
|
+
match for one reason or another. It could be that one version of the track is
|
13
|
+
protected and the other is not; that one version is compressed more than the
|
14
|
+
other, or that one version uses different protection encryption, or that the
|
15
|
+
versions simply have a few bits set differently for whatever reason. Please
|
16
|
+
check the files manually to be sure.
|
17
|
+
|
18
|
+
|
19
|
+
Daniel Choi
|
20
|
+
Cambridge, MA
|
21
|
+
dhchoi@gmail.com
|
data/bin/itunes-rm-dups
ADDED
@@ -0,0 +1,299 @@
|
|
1
|
+
# To run, make sure this file is on the RUBYLIB, and type this on the command line:
|
2
|
+
#
|
3
|
+
# > ruby -r itunes_dup_handler -e 'run'
|
4
|
+
#
|
5
|
+
# And follow the prompts.
|
6
|
+
#
|
7
|
+
# NOTE: This program is designed to detect and remove only exact duplicates.
|
8
|
+
# Some apparent duplicates may be left intact in the iTunes library after the
|
9
|
+
# program runs. These are not true duplicates because their MD5 hashes do not
|
10
|
+
# match for one reason or another. It could be that one version of the track is
|
11
|
+
# protected and the other is not; that one version is compressed more than the
|
12
|
+
# other, or that one version uses different protection encryption, or that the
|
13
|
+
# versions simply have a few bits set differently for whatever reason. Please
|
14
|
+
# check the files manually to be sure.
|
15
|
+
|
16
|
+
require 'digest/md5'
|
17
|
+
require 'osx/cocoa'
|
18
|
+
require 'yaml'
|
19
|
+
require 'cgi'
|
20
|
+
include OSX
|
21
|
+
require 'fileutils'
|
22
|
+
include FileUtils
|
23
|
+
OSX.require_framework 'ScriptingBridge'
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
Itunes = SBApplication.applicationWithBundleIdentifier_("com.apple.iTunes")
|
28
|
+
SkippedFiles = []
|
29
|
+
|
30
|
+
class PossibleDuplicateTrack
|
31
|
+
# track is the iTunes track object that will be moved or deleted
|
32
|
+
attr_accessor :unix_path, :database_id, :md5
|
33
|
+
|
34
|
+
def initialize(itunesTrack, md5)
|
35
|
+
@database_id = itunesTrack.databaseID.to_i
|
36
|
+
@unix_path = unixify_path( itunesTrack.location.to_s )
|
37
|
+
@md5 = md5
|
38
|
+
end
|
39
|
+
|
40
|
+
def remove_from_itunes
|
41
|
+
predicate = OSX::NSPredicate.predicateWithFormat("databaseID == #{@database_id}")
|
42
|
+
#library_track = Itunes.sources.first.playlists[1].tracks.filteredArrayUsingPredicate(predicate).first
|
43
|
+
library_track = Itunes.sources.first.playlists.first.tracks.filteredArrayUsingPredicate(predicate).first
|
44
|
+
if library_track
|
45
|
+
library_track.delete
|
46
|
+
puts " Removed from iTunes."
|
47
|
+
return true
|
48
|
+
else
|
49
|
+
puts " ERROR trying to remove this track from iTunes. Please handle manually."
|
50
|
+
# NOTE This is probably a podcast
|
51
|
+
puts " Continuing..."
|
52
|
+
sleep 2
|
53
|
+
return false
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def delete_file
|
58
|
+
rm @unix_path
|
59
|
+
puts " Deleted from file system."
|
60
|
+
end
|
61
|
+
|
62
|
+
def delete!
|
63
|
+
puts "Deleting: #@unix_path"
|
64
|
+
if remove_from_itunes
|
65
|
+
delete_file
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def calculate_md5( file_path )
|
71
|
+
print "calculating md5 for file: #{file_path}"
|
72
|
+
print " --> "
|
73
|
+
STDOUT.flush
|
74
|
+
|
75
|
+
#md5 = Digest::MD5.hexdigest( File.read( file_path ) )
|
76
|
+
raw = `openssl dgst -md5 "#{file_path}"`
|
77
|
+
md5 = raw.split[-1]
|
78
|
+
print md5
|
79
|
+
puts
|
80
|
+
md5
|
81
|
+
end
|
82
|
+
|
83
|
+
def unixify_path(path)
|
84
|
+
# CGI strips out literal +, etc. signs in the path, but we need to preserve them.
|
85
|
+
['+', "'", '(', ')'].each do |symbol|
|
86
|
+
path.gsub!(symbol, CGI.escape(symbol))
|
87
|
+
end
|
88
|
+
path.gsub!('$', '\$')
|
89
|
+
CGI.unescape( path.sub(/^(file:\/\/localhost)/, '') ).strip
|
90
|
+
end
|
91
|
+
alias :unixify :unixify_path
|
92
|
+
|
93
|
+
def normalize_path(path)
|
94
|
+
path.sub(/\s\d(.\w{3})$/, '\1')
|
95
|
+
end
|
96
|
+
|
97
|
+
# Takes an array of tracks and returns a hash where the keys are an MD5 digest
|
98
|
+
# and the values are arrays which contain one or more PossibleDuplicateTrack
|
99
|
+
# objects.
|
100
|
+
def find_duplicates( tracks, run_limit = nil, mb_limit=nil )
|
101
|
+
stop_counter = 0
|
102
|
+
md5_tracks = Hash.new {|hash, key| hash[key] = []} # will be keyed by MD5 hash
|
103
|
+
tracks.each do |track|
|
104
|
+
track_file_location = unixify_path( track.location.to_s )
|
105
|
+
unless track_file_location && track_file_location.strip != ''
|
106
|
+
puts ">>> No file location for #{track.name}. Removing this track from iTunes..."
|
107
|
+
x = PossibleDuplicateTrack.new(track, nil)
|
108
|
+
x.remove_from_itunes
|
109
|
+
next
|
110
|
+
end
|
111
|
+
unless File.exist?(track_file_location)
|
112
|
+
puts ">>> No file found for #{track.name} at #{track_file_location}"
|
113
|
+
SkippedFiles << track_file_location
|
114
|
+
next
|
115
|
+
end
|
116
|
+
mb = File.size(track_file_location) / (1024 * 1024)
|
117
|
+
if mb_limit && mb > mb_limit
|
118
|
+
puts "SKIPPING FILE DUE TO EXCESS SIZE (#{mb}MB): #{track.name} : #{track_file_location}"
|
119
|
+
SkippedFiles << track_file_location
|
120
|
+
puts "Please deal with any duplicates of this file manually"
|
121
|
+
next
|
122
|
+
end
|
123
|
+
|
124
|
+
# We will use the MD5 digest to identify identical files.
|
125
|
+
md5 = calculate_md5( track_file_location )
|
126
|
+
if md5.nil? # missing file or bad file path
|
127
|
+
SkippedFiles << track_file_location
|
128
|
+
next
|
129
|
+
end
|
130
|
+
# This might be a duplicate files
|
131
|
+
md5_tracks[md5] << PossibleDuplicateTrack.new( track, md5 )
|
132
|
+
stop_counter += 1
|
133
|
+
if run_limit && stop_counter > run_limit
|
134
|
+
break
|
135
|
+
end
|
136
|
+
end
|
137
|
+
puts "=" * 40
|
138
|
+
# puts "Total Tracks: #{tracks.size}" # This number is confusing
|
139
|
+
puts "Total Unique Media Files: #{md5_tracks.keys.size}"
|
140
|
+
tracks_with_dups = md5_tracks.delete_if {|key, value| value.size < 2}
|
141
|
+
puts "Tracks With Duplicates: #{tracks_with_dups.keys.size}"
|
142
|
+
puts "=" * 40
|
143
|
+
return tracks_with_dups
|
144
|
+
end
|
145
|
+
|
146
|
+
# Takes an array of PossibleDuplicateTracks with the same MD5 signature and
|
147
|
+
# figures out which one with the duplicate simply by choosing the one(s) with the
|
148
|
+
# longer filename.
|
149
|
+
def determine_duplicate( tracks )
|
150
|
+
# Get the track with the shortest unix_path
|
151
|
+
tracks = tracks.sort_by {|track| File.basename( track.unix_path ).size } # shortest name first
|
152
|
+
original = tracks.shift
|
153
|
+
return original, tracks
|
154
|
+
end
|
155
|
+
|
156
|
+
def run(pattern=nil, run_limit=nil)
|
157
|
+
# Load the iTunes instance
|
158
|
+
title = "itunes duplicate track removal machine"
|
159
|
+
width = 80
|
160
|
+
puts "-" * width
|
161
|
+
puts
|
162
|
+
puts "itunes duplicate track removal machine".center(width)
|
163
|
+
puts
|
164
|
+
puts "by daniel choi".center(width)
|
165
|
+
puts "betahouse".center(width)
|
166
|
+
puts "cambridge, ma, usa".center(width)
|
167
|
+
puts
|
168
|
+
puts "contact: dhchoi@gmail.com".center(width)
|
169
|
+
puts
|
170
|
+
puts "-" * width
|
171
|
+
|
172
|
+
puts
|
173
|
+
sleep 1
|
174
|
+
puts <<END
|
175
|
+
Running program. It may take several minutes to analyze all the tracks in your
|
176
|
+
iTunes library.
|
177
|
+
END
|
178
|
+
sleep 2
|
179
|
+
puts
|
180
|
+
puts <<END
|
181
|
+
To avoid any potential hangs and crashes, don't use iTunes while the program is
|
182
|
+
running.
|
183
|
+
END
|
184
|
+
sleep 2
|
185
|
+
puts
|
186
|
+
puts <<END
|
187
|
+
When this program is done analyzing your tracks, it will show you the duplicates
|
188
|
+
it has detected and ask for your confirmation before removing any files. So feel
|
189
|
+
free to go get some coffee and come back.
|
190
|
+
END
|
191
|
+
puts
|
192
|
+
sleep 1
|
193
|
+
puts "To make the program run faster, we can skip iTunes files that are very large."
|
194
|
+
puts "How many megabytes do you want to set as the maximum? "
|
195
|
+
print "(Default: 100mb. Type 'none' for no limit): "
|
196
|
+
response = gets
|
197
|
+
if response =~ /none/
|
198
|
+
mb_limit = nil
|
199
|
+
elsif response.to_i == 0
|
200
|
+
mb_limit = 100
|
201
|
+
else
|
202
|
+
mb_limit = response.to_i
|
203
|
+
end
|
204
|
+
if mb_limit
|
205
|
+
puts "Setting maximum file size to analyze to #{mb_limit}MB."
|
206
|
+
else
|
207
|
+
puts "Setting maximum file size to analyze to unlimited."
|
208
|
+
end
|
209
|
+
sleep 3
|
210
|
+
source = Itunes.sources.first
|
211
|
+
puts "Using iTunes source: #{source.name}"
|
212
|
+
puts "Using iTunes playlist: #{source.playlists.first.name}"
|
213
|
+
|
214
|
+
duplicate_tracks = []
|
215
|
+
fileTracks = source.libraryPlaylists[0].fileTracks
|
216
|
+
if pattern # filter tracks
|
217
|
+
fileTracks = fileTracks.select {|track| track.name.to_s =~ pattern}
|
218
|
+
end
|
219
|
+
puts "Calculating MD5 digests for all tracks with files..."
|
220
|
+
sleep 2
|
221
|
+
puts
|
222
|
+
tracks_with_dups = find_duplicates( fileTracks, run_limit, mb_limit )
|
223
|
+
puts "Finding duplicates..."
|
224
|
+
puts
|
225
|
+
sleep 2
|
226
|
+
|
227
|
+
# Now do something with the duplicate tracks
|
228
|
+
puts "=" * 40
|
229
|
+
SkippedFiles.compact!
|
230
|
+
SkippedFiles.reject! {|x| x.strip == ''}
|
231
|
+
unless SkippedFiles.empty?
|
232
|
+
puts "#{SkippedFiles.length} files were skipped, either because they could not be found or because they were too large (over #{mb_limit}MB) to analyze quickly. Also, files may be skipped because their path contains non-standard characters. Please look for duplicates in these files manually."
|
233
|
+
puts
|
234
|
+
puts "Please handle any duplicates of these special files manually."
|
235
|
+
puts
|
236
|
+
puts "See these files? (y/n) (default: n) "
|
237
|
+
if gets.strip.downcase == "y"
|
238
|
+
puts "Skipped Files:"
|
239
|
+
SkippedFiles.each do |file|
|
240
|
+
puts file
|
241
|
+
end
|
242
|
+
puts "=" * 40
|
243
|
+
puts "Continue? (y/n)"
|
244
|
+
if gets =~ /^n/
|
245
|
+
puts "Aborted."
|
246
|
+
exit
|
247
|
+
end
|
248
|
+
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
puts "FOUND DUPLICATES:"
|
253
|
+
sleep 1
|
254
|
+
if tracks_with_dups.empty?
|
255
|
+
puts "No true duplicates found."
|
256
|
+
puts "Exiting."
|
257
|
+
display_note
|
258
|
+
exit
|
259
|
+
end
|
260
|
+
tracks_with_dups.each do |k,tracks|
|
261
|
+
puts "#{k} :"
|
262
|
+
original, dups = determine_duplicate( tracks )
|
263
|
+
puts " #{original.database_id} : #{original.unix_path} <-- original"
|
264
|
+
dups.each do |track|
|
265
|
+
puts " #{track.database_id} : #{track.unix_path} <-- duplicate"
|
266
|
+
end
|
267
|
+
duplicate_tracks << dups
|
268
|
+
end
|
269
|
+
duplicate_tracks.flatten!
|
270
|
+
|
271
|
+
num_to_remove = duplicate_tracks.length
|
272
|
+
print "Go ahead and delete the #{num_to_remove} files marked 'duplicate'? (y/n) (default: y) "
|
273
|
+
|
274
|
+
unless gets.strip.downcase == "n"
|
275
|
+
puts "OK, here goes..."
|
276
|
+
# DELETE EM
|
277
|
+
duplicate_tracks.each {|track| track.delete!}
|
278
|
+
sleep 2
|
279
|
+
puts "Done. #{num_to_remove} duplicates removed."
|
280
|
+
display_note
|
281
|
+
else
|
282
|
+
puts "Canceled."
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def display_note
|
287
|
+
puts
|
288
|
+
puts <<-END
|
289
|
+
# NOTE: This program is designed to detect and remove only exact duplicates.
|
290
|
+
# Some apparent duplicates may be left intact in the iTunes library after the
|
291
|
+
# program runs. These are not true duplicates because their MD5 hashes do not
|
292
|
+
# match for one reason or another. It could be that one version of the track is
|
293
|
+
# protected and the other is not; that one version is compressed more than the
|
294
|
+
# other, or that one version uses different protection encryption, or that the
|
295
|
+
# versions simply have a few bits set differently for whatever reason. Please
|
296
|
+
# check the files manually to be sure.
|
297
|
+
END
|
298
|
+
puts
|
299
|
+
end
|
metadata
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: itunes-rm-dups
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: universal-darwin-9
|
6
|
+
authors:
|
7
|
+
- Daniel Choi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-05-21 00:00:00 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: This ruby gem allows a OS X Leopard user to remove duplicate tracks from her iTunes library and file system from the command line. Duplicates are detected robustly, using MD5 digests of track files instead of iTunes metadata matching, which can produce false positives and false negatives. The program is a ruby script that use RubyCocoa and the OS X Cocoa Scripting Bridge. To run the program, type itunes-rm-dups on the command line and follow the prompts.
|
17
|
+
email: dhchoi@gmail.com
|
18
|
+
executables:
|
19
|
+
- itunes-rm-dups
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README
|
24
|
+
files:
|
25
|
+
- lib/itunes_dup_handler.rb
|
26
|
+
- README
|
27
|
+
has_rdoc: true
|
28
|
+
homepage: http://cesareborgia.com/software/itunes-rm-dups/
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
|
32
|
+
require_paths:
|
33
|
+
- lib
|
34
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: "0"
|
39
|
+
version:
|
40
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: "0"
|
45
|
+
version:
|
46
|
+
requirements: []
|
47
|
+
|
48
|
+
rubyforge_project:
|
49
|
+
rubygems_version: 1.0.1
|
50
|
+
signing_key:
|
51
|
+
specification_version: 2
|
52
|
+
summary: Remove duplicate tracks from an iTunes library and file system from the command line.
|
53
|
+
test_files: []
|
54
|
+
|