itunes-rm-dups 0.1.1-universal-darwin-9

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,21 @@
1
+ ITunes Duplicate Track Remover
2
+
3
+ After installation, run with this command:
4
+
5
+ itunes-rm-dups
6
+
7
+ and follow the prompts, which should be self-explanatory.
8
+
9
+ NOTE: This program is designed to detect and remove only exact duplicates.
10
+ Some apparent duplicates may be left intact in the iTunes library after the
11
+ program runs. These are not true duplicates because their MD5 hashes do not
12
+ match for one reason or another. It could be that one version of the track is
13
+ protected and the other is not; that one version is compressed more than the
14
+ other, or that one version uses different protection encryption, or that the
15
+ versions simply have a few bits set differently for whatever reason. Please
16
+ check the files manually to be sure.
17
+
18
+
19
+ Daniel Choi
20
+ Cambridge, MA
21
+ dhchoi@gmail.com
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'itunes_dup_handler'
4
+ run
@@ -0,0 +1,299 @@
1
+ # To run, make sure this file is on the RUBYLIB, and type this on the command line:
2
+ #
3
+ # > ruby -r itunes_dup_handler -e 'run'
4
+ #
5
+ # And follow the prompts.
6
+ #
7
+ # NOTE: This program is designed to detect and remove only exact duplicates.
8
+ # Some apparent duplicates may be left intact in the iTunes library after the
9
+ # program runs. These are not true duplicates because their MD5 hashes do not
10
+ # match for one reason or another. It could be that one version of the track is
11
+ # protected and the other is not; that one version is compressed more than the
12
+ # other, or that one version uses different protection encryption, or that the
13
+ # versions simply have a few bits set differently for whatever reason. Please
14
+ # check the files manually to be sure.
15
+
16
+ require 'digest/md5'
17
+ require 'osx/cocoa'
18
+ require 'yaml'
19
+ require 'cgi'
20
+ include OSX
21
+ require 'fileutils'
22
+ include FileUtils
23
+ OSX.require_framework 'ScriptingBridge'
24
+
25
+
26
+
27
+ Itunes = SBApplication.applicationWithBundleIdentifier_("com.apple.iTunes")
28
+ SkippedFiles = []
29
+
30
+ class PossibleDuplicateTrack
31
+ # track is the iTunes track object that will be moved or deleted
32
+ attr_accessor :unix_path, :database_id, :md5
33
+
34
+ def initialize(itunesTrack, md5)
35
+ @database_id = itunesTrack.databaseID.to_i
36
+ @unix_path = unixify_path( itunesTrack.location.to_s )
37
+ @md5 = md5
38
+ end
39
+
40
+ def remove_from_itunes
41
+ predicate = OSX::NSPredicate.predicateWithFormat("databaseID == #{@database_id}")
42
+ #library_track = Itunes.sources.first.playlists[1].tracks.filteredArrayUsingPredicate(predicate).first
43
+ library_track = Itunes.sources.first.playlists.first.tracks.filteredArrayUsingPredicate(predicate).first
44
+ if library_track
45
+ library_track.delete
46
+ puts " Removed from iTunes."
47
+ return true
48
+ else
49
+ puts " ERROR trying to remove this track from iTunes. Please handle manually."
50
+ # NOTE This is probably a podcast
51
+ puts " Continuing..."
52
+ sleep 2
53
+ return false
54
+ end
55
+ end
56
+
57
+ def delete_file
58
+ rm @unix_path
59
+ puts " Deleted from file system."
60
+ end
61
+
62
+ def delete!
63
+ puts "Deleting: #@unix_path"
64
+ if remove_from_itunes
65
+ delete_file
66
+ end
67
+ end
68
+ end
69
+
70
+ def calculate_md5( file_path )
71
+ print "calculating md5 for file: #{file_path}"
72
+ print " --> "
73
+ STDOUT.flush
74
+
75
+ #md5 = Digest::MD5.hexdigest( File.read( file_path ) )
76
+ raw = `openssl dgst -md5 "#{file_path}"`
77
+ md5 = raw.split[-1]
78
+ print md5
79
+ puts
80
+ md5
81
+ end
82
+
83
+ def unixify_path(path)
84
+ # CGI strips out literal +, etc. signs in the path, but we need to preserve them.
85
+ ['+', "'", '(', ')'].each do |symbol|
86
+ path.gsub!(symbol, CGI.escape(symbol))
87
+ end
88
+ path.gsub!('$', '\$')
89
+ CGI.unescape( path.sub(/^(file:\/\/localhost)/, '') ).strip
90
+ end
91
+ alias :unixify :unixify_path
92
+
93
+ def normalize_path(path)
94
+ path.sub(/\s\d(.\w{3})$/, '\1')
95
+ end
96
+
97
+ # Takes an array of tracks and returns a hash where the keys are an MD5 digest
98
+ # and the values are arrays which contain one or more PossibleDuplicateTrack
99
+ # objects.
100
+ def find_duplicates( tracks, run_limit = nil, mb_limit=nil )
101
+ stop_counter = 0
102
+ md5_tracks = Hash.new {|hash, key| hash[key] = []} # will be keyed by MD5 hash
103
+ tracks.each do |track|
104
+ track_file_location = unixify_path( track.location.to_s )
105
+ unless track_file_location && track_file_location.strip != ''
106
+ puts ">>> No file location for #{track.name}. Removing this track from iTunes..."
107
+ x = PossibleDuplicateTrack.new(track, nil)
108
+ x.remove_from_itunes
109
+ next
110
+ end
111
+ unless File.exist?(track_file_location)
112
+ puts ">>> No file found for #{track.name} at #{track_file_location}"
113
+ SkippedFiles << track_file_location
114
+ next
115
+ end
116
+ mb = File.size(track_file_location) / (1024 * 1024)
117
+ if mb_limit && mb > mb_limit
118
+ puts "SKIPPING FILE DUE TO EXCESS SIZE (#{mb}MB): #{track.name} : #{track_file_location}"
119
+ SkippedFiles << track_file_location
120
+ puts "Please deal with any duplicates of this file manually"
121
+ next
122
+ end
123
+
124
+ # We will use the MD5 digest to identify identical files.
125
+ md5 = calculate_md5( track_file_location )
126
+ if md5.nil? # missing file or bad file path
127
+ SkippedFiles << track_file_location
128
+ next
129
+ end
130
+ # This might be a duplicate files
131
+ md5_tracks[md5] << PossibleDuplicateTrack.new( track, md5 )
132
+ stop_counter += 1
133
+ if run_limit && stop_counter > run_limit
134
+ break
135
+ end
136
+ end
137
+ puts "=" * 40
138
+ # puts "Total Tracks: #{tracks.size}" # This number is confusing
139
+ puts "Total Unique Media Files: #{md5_tracks.keys.size}"
140
+ tracks_with_dups = md5_tracks.delete_if {|key, value| value.size < 2}
141
+ puts "Tracks With Duplicates: #{tracks_with_dups.keys.size}"
142
+ puts "=" * 40
143
+ return tracks_with_dups
144
+ end
145
+
146
+ # Takes an array of PossibleDuplicateTracks with the same MD5 signature and
147
+ # figures out which one with the duplicate simply by choosing the one(s) with the
148
+ # longer filename.
149
+ def determine_duplicate( tracks )
150
+ # Get the track with the shortest unix_path
151
+ tracks = tracks.sort_by {|track| File.basename( track.unix_path ).size } # shortest name first
152
+ original = tracks.shift
153
+ return original, tracks
154
+ end
155
+
156
+ def run(pattern=nil, run_limit=nil)
157
+ # Load the iTunes instance
158
+ title = "itunes duplicate track removal machine"
159
+ width = 80
160
+ puts "-" * width
161
+ puts
162
+ puts "itunes duplicate track removal machine".center(width)
163
+ puts
164
+ puts "by daniel choi".center(width)
165
+ puts "betahouse".center(width)
166
+ puts "cambridge, ma, usa".center(width)
167
+ puts
168
+ puts "contact: dhchoi@gmail.com".center(width)
169
+ puts
170
+ puts "-" * width
171
+
172
+ puts
173
+ sleep 1
174
+ puts <<END
175
+ Running program. It may take several minutes to analyze all the tracks in your
176
+ iTunes library.
177
+ END
178
+ sleep 2
179
+ puts
180
+ puts <<END
181
+ To avoid any potential hangs and crashes, don't use iTunes while the program is
182
+ running.
183
+ END
184
+ sleep 2
185
+ puts
186
+ puts <<END
187
+ When this program is done analyzing your tracks, it will show you the duplicates
188
+ it has detected and ask for your confirmation before removing any files. So feel
189
+ free to go get some coffee and come back.
190
+ END
191
+ puts
192
+ sleep 1
193
+ puts "To make the program run faster, we can skip iTunes files that are very large."
194
+ puts "How many megabytes do you want to set as the maximum? "
195
+ print "(Default: 100mb. Type 'none' for no limit): "
196
+ response = gets
197
+ if response =~ /none/
198
+ mb_limit = nil
199
+ elsif response.to_i == 0
200
+ mb_limit = 100
201
+ else
202
+ mb_limit = response.to_i
203
+ end
204
+ if mb_limit
205
+ puts "Setting maximum file size to analyze to #{mb_limit}MB."
206
+ else
207
+ puts "Setting maximum file size to analyze to unlimited."
208
+ end
209
+ sleep 3
210
+ source = Itunes.sources.first
211
+ puts "Using iTunes source: #{source.name}"
212
+ puts "Using iTunes playlist: #{source.playlists.first.name}"
213
+
214
+ duplicate_tracks = []
215
+ fileTracks = source.libraryPlaylists[0].fileTracks
216
+ if pattern # filter tracks
217
+ fileTracks = fileTracks.select {|track| track.name.to_s =~ pattern}
218
+ end
219
+ puts "Calculating MD5 digests for all tracks with files..."
220
+ sleep 2
221
+ puts
222
+ tracks_with_dups = find_duplicates( fileTracks, run_limit, mb_limit )
223
+ puts "Finding duplicates..."
224
+ puts
225
+ sleep 2
226
+
227
+ # Now do something with the duplicate tracks
228
+ puts "=" * 40
229
+ SkippedFiles.compact!
230
+ SkippedFiles.reject! {|x| x.strip == ''}
231
+ unless SkippedFiles.empty?
232
+ puts "#{SkippedFiles.length} files were skipped, either because they could not be found or because they were too large (over #{mb_limit}MB) to analyze quickly. Also, files may be skipped because their path contains non-standard characters. Please look for duplicates in these files manually."
233
+ puts
234
+ puts "Please handle any duplicates of these special files manually."
235
+ puts
236
+ puts "See these files? (y/n) (default: n) "
237
+ if gets.strip.downcase == "y"
238
+ puts "Skipped Files:"
239
+ SkippedFiles.each do |file|
240
+ puts file
241
+ end
242
+ puts "=" * 40
243
+ puts "Continue? (y/n)"
244
+ if gets =~ /^n/
245
+ puts "Aborted."
246
+ exit
247
+ end
248
+
249
+ end
250
+ end
251
+
252
+ puts "FOUND DUPLICATES:"
253
+ sleep 1
254
+ if tracks_with_dups.empty?
255
+ puts "No true duplicates found."
256
+ puts "Exiting."
257
+ display_note
258
+ exit
259
+ end
260
+ tracks_with_dups.each do |k,tracks|
261
+ puts "#{k} :"
262
+ original, dups = determine_duplicate( tracks )
263
+ puts " #{original.database_id} : #{original.unix_path} <-- original"
264
+ dups.each do |track|
265
+ puts " #{track.database_id} : #{track.unix_path} <-- duplicate"
266
+ end
267
+ duplicate_tracks << dups
268
+ end
269
+ duplicate_tracks.flatten!
270
+
271
+ num_to_remove = duplicate_tracks.length
272
+ print "Go ahead and delete the #{num_to_remove} files marked 'duplicate'? (y/n) (default: y) "
273
+
274
+ unless gets.strip.downcase == "n"
275
+ puts "OK, here goes..."
276
+ # DELETE EM
277
+ duplicate_tracks.each {|track| track.delete!}
278
+ sleep 2
279
+ puts "Done. #{num_to_remove} duplicates removed."
280
+ display_note
281
+ else
282
+ puts "Canceled."
283
+ end
284
+ end
285
+
286
+ def display_note
287
+ puts
288
+ puts <<-END
289
+ # NOTE: This program is designed to detect and remove only exact duplicates.
290
+ # Some apparent duplicates may be left intact in the iTunes library after the
291
+ # program runs. These are not true duplicates because their MD5 hashes do not
292
+ # match for one reason or another. It could be that one version of the track is
293
+ # protected and the other is not; that one version is compressed more than the
294
+ # other, or that one version uses different protection encryption, or that the
295
+ # versions simply have a few bits set differently for whatever reason. Please
296
+ # check the files manually to be sure.
297
+ END
298
+ puts
299
+ end
metadata ADDED
@@ -0,0 +1,54 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: itunes-rm-dups
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: universal-darwin-9
6
+ authors:
7
+ - Daniel Choi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-05-21 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: This ruby gem allows a OS X Leopard user to remove duplicate tracks from her iTunes library and file system from the command line. Duplicates are detected robustly, using MD5 digests of track files instead of iTunes metadata matching, which can produce false positives and false negatives. The program is a ruby script that use RubyCocoa and the OS X Cocoa Scripting Bridge. To run the program, type itunes-rm-dups on the command line and follow the prompts.
17
+ email: dhchoi@gmail.com
18
+ executables:
19
+ - itunes-rm-dups
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README
24
+ files:
25
+ - lib/itunes_dup_handler.rb
26
+ - README
27
+ has_rdoc: true
28
+ homepage: http://cesareborgia.com/software/itunes-rm-dups/
29
+ post_install_message:
30
+ rdoc_options: []
31
+
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: "0"
39
+ version:
40
+ required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: "0"
45
+ version:
46
+ requirements: []
47
+
48
+ rubyforge_project:
49
+ rubygems_version: 1.0.1
50
+ signing_key:
51
+ specification_version: 2
52
+ summary: Remove duplicate tracks from an iTunes library and file system from the command line.
53
+ test_files: []
54
+