grok-itunes 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
File without changes
@@ -0,0 +1,580 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'ftools'
4
+ require 'time'
5
+ require 'rubygems'
6
+ require 'sqlite3'
7
+ require 'scrobbler'
8
+ # I don't want to deal with RMagic on Mac OS X
9
+ require 'treemap/node'
10
+ require 'treemap/html_output'
11
+ require 'treemap/squarified_layout'
12
+ require 'treemap/gradient_color'
13
+ require 'treemap/rectangle'
14
+
15
+ class GrokITunes
16
+ VERSION = '0.1.2'
17
+ BENCHMARK = false
18
+
19
+ attr_accessor :db, :file, :tracks, :playlists, :now
20
+
21
+ def initialize(file)
22
+ @db = nil
23
+ @file = file
24
+ @top = 10
25
+ @tracks = []
26
+ @playlists = []
27
+ @now = Time.now
28
+ valid_file?
29
+ end
30
+
31
+ def valid_file?
32
+ unless File.exist?(File.expand_path(@file))
33
+ raise FileNotFoundException, "File not found: #{@file}"
34
+ end
35
+ if File.size?(File.expand_path(@file)) == nil
36
+ raise FileEmptyException, "File has no content: #{@file}"
37
+ else
38
+ true
39
+ end
40
+ end
41
+
42
+ def init_db
43
+ puts "init_db: start #{Time.now}" if BENCHMARK
44
+
45
+ db_file = File.expand_path(@file) + ".db"
46
+
47
+ @now = Time.now
48
+ @db = SQLite3::Database.new("#{db_file}")
49
+ @db.type_translation = true
50
+
51
+ @db.execute( "drop table if exists tracks_playlists" )
52
+ @db.execute( "drop table if exists playlists" )
53
+ @db.execute( "drop table if exists tracks" )
54
+
55
+ create_tables = <<SQL
56
+ create table tracks (
57
+ track_id INTEGER PRIMARY KEY,
58
+ name VARCHAR DEFAULT NULL,
59
+ artist VARCHAR DEFAULT NULL,
60
+ album_artist VARCHAR DEFAULT NULL,
61
+ composer VARCHAR DEFAULT NULL,
62
+ album VARCHAR DEFAULT NULL,
63
+ grouping VARCHAR DEFAULT NULL,
64
+ genre VARCHAR DEFAULT NULL,
65
+ kind VARCHAR DEFAULT NULL,
66
+ size INTEGER DEFAULT NULL,
67
+ total_time INTEGER DEFAULT NULL,
68
+ disc_number INTEGER DEFAULT NULL,
69
+ disc_count INTEGER DEFAULT NULL,
70
+ track_number INTEGER DEFAULT NULL,
71
+ track_count INTEGER DEFAULT NULL,
72
+ year INTEGER DEFAULT NULL,
73
+ bpm INTEGER DEFAULT NULL,
74
+ date_modified DATE DEFAULT NULL,
75
+ date_added DATE DEFAULT NULL,
76
+ bit_rate INTEGER DEFAULT NULL,
77
+ sample_rate INTEGER DEFAULT NULL,
78
+ volume_adjustment INTEGER DEFAULT NULL,
79
+ part_of_gapless_album BOOLEAN DEFAULT NULL,
80
+ equalizer VARCHAR DEFAULT NULL,
81
+ comments VARCHAR DEFAULT NULL,
82
+ play_count INTEGER DEFAULT NULL,
83
+ play_date INTEGER DEFAULT NULL,
84
+ play_date_utc DATE DEFAULT NULL,
85
+ skip_count INTEGER DEFAULT NULL,
86
+ skip_date DATE DEFAULT NULL,
87
+ release_date DATE DEFAULT NULL,
88
+ rating INTEGER DEFAULT NULL,
89
+ album_rating INTEGER DEFAULT NULL,
90
+ album_rating_computed BOOLEAN DEFAULT NULL,
91
+ compilation BOOLEAN DEFAULT NULL,
92
+ series VARCHAR DEFAULT NULL,
93
+ season INTEGER DEFAULT NULL,
94
+ episode VARCHAR DEFAULT NULL,
95
+ episode_order INTEGER DEFAULT NULL,
96
+ sort_album VARCHAR DEFAULT NULL,
97
+ sort_album_artist VARCHAR DEFAULT NULL,
98
+ sort_artist VARCHAR DEFAULT NULL,
99
+ sort_composer VARCHAR DEFAULT NULL,
100
+ sort_name VARCHAR DEFAULT NULL,
101
+ sort_series VARCHAR DEFAULT NULL,
102
+ artwork_count INTEGER DEFAULT NULL,
103
+ persistent_id VARCHAR DEFAULT NULL,
104
+ disabled BOOLEAN DEFAULT NULL,
105
+ clean BOOLEAN DEFAULT NULL,
106
+ explicit BOOLEAN DEFAULT NULL,
107
+ track_type VARCHAR DEFAULT NULL,
108
+ podcast BOOLEAN DEFAULT NULL,
109
+ has_video BOOLEAN DEFAULT NULL,
110
+ hd BOOLEAN DEFAULT NULL,
111
+ video_width INTEGER DEFAULT NULL,
112
+ video_height INTEGER DEFAULT NULL,
113
+ movie BOOLEAN DEFAULT NULL,
114
+ tv_show BOOLEAN DEFAULT NULL,
115
+ protected BOOLEAN DEFAULT NULL,
116
+ purchased BOOLEAN DEFAULT NULL,
117
+ unplayed BOOLEAN DEFAULT NULL,
118
+ file_type INTEGER DEFAULT NULL,
119
+ file_creator INTEGER DEFAULT NULL,
120
+ location VARCHAR DEFAULT NULL,
121
+ file_folder_count INTEGER DEFAULT NULL,
122
+ library_folder_count INTEGER DEFAULT NULL
123
+ );
124
+
125
+ create table playlists (
126
+ playlist_id INTEGER PRIMARY KEY,
127
+ master BOOLEAN DEFAULT NULL,
128
+ name VARCHAR DEFAULT NULL,
129
+ playlist_persistent_id VARCHAR DEFAULT NULL,
130
+ distinguished_kind INTEGER DEFAULT NULL,
131
+ folder BOOLEAN DEFAULT NULL,
132
+ visible BOOLEAN DEFAULT NULL,
133
+ audiobooks BOOLEAN DEFAULT NULL,
134
+ movies BOOLEAN DEFAULT NULL,
135
+ music BOOLEAN DEFAULT NULL,
136
+ party_shuffle BOOLEAN DEFAULT NULL,
137
+ podcasts BOOLEAN DEFAULT NULL,
138
+ purchased_music BOOLEAN DEFAULT NULL,
139
+ tv_shows BOOLEAN DEFAULT NULL,
140
+ all_items BOOLEAN DEFAULT NULL,
141
+ genius_track_id BOOLEAN DEFAULT NULL,
142
+ smart_info BLOB DEFAULT NULL,
143
+ smart_criteria BLOB DEFAULT NULL
144
+ );
145
+
146
+ create table tracks_playlists (
147
+ track_id INTEGER,
148
+ playlist_id INTEGER
149
+ );
150
+ SQL
151
+
152
+ @db.execute_batch( create_tables )
153
+ puts "init_db: stop #{Time.now}" if BENCHMARK
154
+ end
155
+
156
+ def populate_db
157
+ puts "populate_db: start #{Time.now}" if BENCHMARK
158
+ @db.transaction do |db|
159
+ db.prepare(
160
+ "insert into tracks values ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
161
+ ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
162
+ ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
163
+ ?, ?, ?, ?, ?, ? )"
164
+ ) do |stmt|
165
+ @tracks.each do |t|
166
+ stmt.execute t[:track_id], "#{t[:name]}", "#{t[:artist]}",
167
+ "#{t[:album_artist]}", "#{t[:composer]}", "#{t[:album]}",
168
+ "#{t[:grouping]}", "#{t[:genre]}", "#{t[:kind]}", t[:size],
169
+ t[:total_time], t[:disc_number], t[:disc_count], t[:track_number],
170
+ t[:track_count], t[:year], t[:bpm], t[:date_modified], t[:date_added],
171
+ t[:bit_rate], t[:sample_rate], t[:volume_adjustment],
172
+ t[:part_of_gapless_album], "#{t[:equalizer]}", "#{t[:comments]}",
173
+ t[:play_count], t[:play_date], t[:play_date_utc], t[:skip_count],
174
+ t[:skip_date], t[:release_date], t[:rating], t[:album_rating],
175
+ t[:album_rating_computed], t[:compilation], t[:series], t[:season],
176
+ t[:episode], t[:episode_order], "#{t[:sort_album]}",
177
+ "#{t[:sort_album_artist]}", "#{t[:sort_artist]}", "#{t[:sort_composer]}",
178
+ "#{t[:sort_name]}", "#{t[:sort_series]}", t[:artwork_count],
179
+ "#{t[:persistent_id]}", t[:disabled], t[:clean], t[:explicit],
180
+ "#{t[:track_type]}", t[:podcast], t[:has_video], t[:hd], t[:video_width],
181
+ t[:video_height], t[:movie], t[:tv_show], t[:protected], t[:purchased],
182
+ t[:unplayed], t[:file_type], t[:file_creator], t[:location],
183
+ t[:file_folder_count], t[:library_folder_count]
184
+ end
185
+ end
186
+
187
+ db.prepare(
188
+ "insert into playlists values ( ?, ?, ?, ?, ?, ?, ?, ?, ?,
189
+ ?, ?, ?, ?, ?, ?, ?, ?, ?)"
190
+ ) do |stmt|
191
+ @playlists.each do |p|
192
+ stmt.execute p[0][:playlist_id], p[0][:master], "#{p[0][:name]}",
193
+ "#{p[0][:playlist_persistent_id]}", p[0][:distinguished_kind], p[0][:folder],
194
+ p[0][:visible], p[0][:audiobooks], p[0][:movies], p[0][:music], p[0][:party_shuffle],
195
+ p[0][:podcasts], p[0][:purchased_music], p[0][:tv_shows], p[0][:all_items],
196
+ p[0][:genius_track_id], p[0][:smart_info], p[0][:smart_criteria]
197
+ end
198
+ end
199
+
200
+ db.prepare(
201
+ "insert into tracks_playlists values ( ?, ? )"
202
+ ) do |stmt|
203
+ @playlists.each do |p|
204
+ p[1].each do |tracks|
205
+ stmt.execute p[0][:playlist_id], tracks
206
+ end
207
+ end
208
+ end
209
+ end
210
+ puts "populate_db: stop #{Time.now}" if BENCHMARK
211
+ end
212
+
213
+ # Based on seconds_fraction_to_time http://snippets.dzone.com/posts/show/5958
214
+ def human_clock_time(seconds)
215
+ days = hours = mins = 0
216
+ if seconds >= 60 then
217
+ mins = ( seconds / 60 ).to_i
218
+ seconds = ( seconds % 60 ).to_i
219
+
220
+ if mins >= 60 then
221
+ hours = ( mins / 60 ).to_i
222
+ mins = ( mins % 60 ).to_i
223
+ end
224
+
225
+ if hours >= 24 then
226
+ days = ( hours / 24 ).to_i
227
+ hours = ( hours % 24 ).to_i
228
+ end
229
+ end
230
+ "#{days.to_s.rjust(3,"0")}:#{hours.to_s.rjust(2,"0")}:#{mins.to_s.rjust(2,"0")}:#{seconds.to_s.rjust(2,"0")}"
231
+ end
232
+
233
+ def fix_type(value, type)
234
+ # Sanitize String data into proper Class before injection
235
+ if type =~ /date/ then
236
+ # sqlite3 doesn't understand ISO 8601 ( YYYY/MM/DDTHH:MM:SSZ$ )
237
+ value = value.sub(/Z/, '').to_s
238
+ # Time.parse(value)
239
+ elsif type =~ /integer/ then
240
+ value.to_i
241
+ elsif type =~ /string/ then
242
+ value.to_s
243
+ elsif type =~ /boolean/ then
244
+ if value == "true"
245
+ true
246
+ elsif value == "false"
247
+ false
248
+ else
249
+ raise NonBooleanValueException, "Really now? True or False?: #{value}"
250
+ end
251
+ else
252
+ raise UnknownDataTypeException, "I don't know how to treat type: #{type}"
253
+ end
254
+ end
255
+
256
+ # Based on work by zenspider @ http://snippets.dzone.com/posts/show/3700
257
+
258
+ def parse_xml
259
+ puts "parse_xml: start #{Time.now}" if BENCHMARK
260
+
261
+ is_tracks = is_playlists = false
262
+
263
+ track = {}
264
+ playlist = {}
265
+ playitems = []
266
+
267
+ IO.foreach(File.expand_path(@file)) do |line|
268
+ if line =~ /^\t<key>Tracks<\/key>$/
269
+ is_tracks = true
270
+ next
271
+ end
272
+
273
+ if is_tracks then
274
+ if line =~ /^\t\t\t<key>(.*)<\/key><(date|integer|string)>(.*)<\/.*>$/ then
275
+ key = $1.downcase.split.join("_").intern
276
+ track[key] = fix_type($3, $2)
277
+ next
278
+ elsif line =~ /^\t\t\t<key>(.*)<\/key><(true|false)\/>$/ then
279
+ key = $1.downcase.split.join("_").intern
280
+ track[key] = fix_type($2, "boolean")
281
+ next
282
+ elsif line =~ /^\t\t<\/dict>$/ then
283
+ @tracks.push(track.dup)
284
+ track.clear
285
+ next
286
+ elsif is_tracks && line =~ /^\t<\/dict>$/
287
+ is_tracks = false
288
+ next
289
+ else
290
+ next
291
+ end
292
+ end
293
+
294
+ if line =~ /^\t<key>Playlists<\/key>$/
295
+ is_playlists = true
296
+ next
297
+ end
298
+
299
+ if is_playlists then
300
+ if line =~ /^\t\t\t<key>(.*)<\/key><(date|integer|string)>(.*)<\/.*>$/ then
301
+ key = $1.downcase.split.join("_").intern
302
+ playlist[key] = fix_type($3, $2)
303
+ next
304
+ elsif line =~ /^\t\t\t<key>(.*)<\/key><(true|false)\/>$/ then
305
+ key = $1.downcase.split.join("_").intern
306
+ playlist[key] = fix_type($2, "boolean")
307
+ next
308
+ elsif line =~ /^\t\t\t\t\t<key>Track ID<\/key><integer>(.*)<\/integer>$/ then
309
+ playitems.push($1.to_i)
310
+ next
311
+ elsif line =~ /^\t\t<\/dict>$/ then
312
+ @playlists.push([playlist.dup, playitems.dup])
313
+ playlist.clear
314
+ playitems.clear
315
+ next
316
+ elsif is_playlists && line =~ /^\t<\/array>$/
317
+ is_playlists = false
318
+ next
319
+ else
320
+ next
321
+ end
322
+ end
323
+ end
324
+
325
+ puts "parse_xml: stop #{Time.now}" if BENCHMARK
326
+ end
327
+
328
+ def count_artists
329
+ @db.execute("select count(distinct artist) as count from tracks
330
+ where artist not null and artist != ''")[0].first.to_i
331
+ end
332
+
333
+ def count_albums
334
+ @db.execute("select count(distinct album) as count from tracks
335
+ where album not null and album != ''")[0].first.to_i
336
+ end
337
+
338
+ def count_tracks
339
+ @db.execute("select count(*) from tracks")[0].first.to_i
340
+ end
341
+
342
+ def total_time
343
+ human_clock_time(
344
+ ( @db.execute("select sum(total_time) from tracks
345
+ where total_time not null")[0].first.to_i )/1000.00
346
+ )
347
+ end
348
+
349
+ def top_artists
350
+ @db.execute("select count(artist) as count, artist from tracks
351
+ where artist not null
352
+ group by artist
353
+ order by count desc").first(@top)
354
+ end
355
+
356
+
357
+ def top_genres
358
+ @db.execute("select count(genre) as count, genre from tracks
359
+ group by genre order by count desc, genre limit #{@top}")
360
+ end
361
+
362
+ def oldest_tracks
363
+ @db.execute("select datetime(play_date_utc), name from tracks
364
+ where play_date_utc not null and play_date_utc != ''
365
+ order by play_date_utc").first(@top)
366
+ end
367
+
368
+ def top_tracks_on_rating_times_playcount
369
+ @db.execute("select rating*play_count as count, name, artist,
370
+ datetime(play_date_utc) from tracks
371
+ order by count desc").first(@top)
372
+ end
373
+
374
+ # Based on work by zenspider @ http://snippets.dzone.com/posts/show/3700
375
+ def top_tracks_aging_well
376
+ results = []
377
+ now = Time.now
378
+ rating = 0
379
+ records = @db.execute("select rating, play_count, name, artist,
380
+ datetime(play_date_utc) from tracks
381
+ where rating not null and play_count not null and name not null
382
+ and artist not null and play_date_utc not null
383
+ order by rating, play_count")
384
+ records.each do |rec|
385
+ rating = rec[0].to_i * rec[1].to_i * Math.log( ((@now - Time.parse(rec[4])) / 86400.0).to_i )
386
+ results.push([ rating.to_i, rec[2], rec[3], rec[4] ])
387
+ end
388
+ results.sort.reverse.first(@top)
389
+ end
390
+
391
+ #
392
+ # I don't trust this function yet since it always passes and Scrobbler kicks this error msg
393
+ # /Library/Ruby/Gems/1.8/gems/scrobbler-0.2.0/lib/scrobbler/base.rb:21: warning: instance variable @similar not initialized
394
+ #
395
+ # Works from irb and is way cool!
396
+ #
397
+ def related_artists_to(artist)
398
+ artist = Scrobbler::Artist.new(artist)
399
+ results = []
400
+ artist.similar.each do |a|
401
+ results.push( [ "#{a.match.dup}", "#{a.name.dup}" ] )
402
+ end
403
+ results.first(@top)
404
+ end
405
+
406
+ # Shame, scrobbler only does last.fm API Version 1.0
407
+ #def related_tracks_to(track,artist)
408
+ #end
409
+
410
+ def rating_fanatic
411
+ base = @db.execute("select rating, count(rating), count(name) from tracks
412
+ group by rating")
413
+ ratings = []
414
+ base.each do |base|
415
+ percent = "%.4f" % ( base[2].to_f / @tracks.size.to_f )
416
+ ratings.push( [ base[0].to_i ||= 0, base[1].to_i, base[2].to_i, @tracks.size, percent.to_f ] )
417
+ end
418
+ ratings
419
+ end
420
+
421
+ def small_files(size)
422
+ @db.execute("select size,name,artist,location from tracks
423
+ where size <= #{size}
424
+ order by size")
425
+ end
426
+
427
+ def find_buggy_itunes_files
428
+ # iTune's has an evil bug that replicates leading numbers
429
+ # for example: '01 01 01 somefile.mp3' then you click on it and iTunes transmutes it into
430
+ # '01 01 01 01 somefile.mp3'. Every time you click! Bad!
431
+ # But if you rename it back to '01 somefile.mp3' then the madness stops
432
+ @db.execute("select artist,album,name,location from tracks
433
+ where name like '01 01 %'
434
+ or name like '02 02 %'
435
+ or name like '03 03 %'
436
+ or name like '04 04 %'
437
+ or name like '05 05 %'
438
+ or name like '06 06 %'
439
+ or name like '07 07 %'
440
+ or name like '08 08 %'
441
+ or name like '09 09 %'
442
+ order by artist, album, name")
443
+ end
444
+
445
+ def find_trailing_spaces(thing)
446
+ @db.execute("select #{thing} from tracks
447
+ where #{thing} like '% '
448
+ order by #{thing}")
449
+ end
450
+
451
+ def bad_track_names(suffix)
452
+ @db.execute("select name, location from tracks
453
+ where lower(name) like '%#{suffix}'
454
+ and podcast is null
455
+ order by name")
456
+ end
457
+
458
+ def clean_tracks
459
+ @db.execute("select name, artist, album, clean from tracks
460
+ where clean = 'true'
461
+ order by artist, album, name")
462
+ end
463
+
464
+ def explicit_tracks
465
+ @db.execute("select name, artist, album, explicit from tracks
466
+ where explicit = 'true'
467
+ order by artist, album, name")
468
+ end
469
+
470
+ def tracks_not_rated
471
+ @db.execute("select album,artist,name from tracks
472
+ where rating is null
473
+ order by album,artist,name")
474
+ end
475
+
476
+ def podcast_without_genre
477
+ @db.execute("select name,artist from tracks
478
+ where podcast = 'true' and genre is null or genre = ''
479
+ order by name")
480
+ end
481
+
482
+ def spam_tunes
483
+ @db.execute("select artist,album,name from tracks
484
+ where lower(name) like '%http:%'
485
+ or lower(name) like '%www.%'
486
+ or lower(artist) like '%http:%'
487
+ or lower(artist) like '%www.%'
488
+ or lower(album) like '%http:%'
489
+ or lower(album) like '%www.%'
490
+ order by artist,album,name")
491
+ end
492
+
493
+ def never_played
494
+ # QUESTION? How can you rate something if you never played it?
495
+ @db.execute("select artist,album,name from tracks
496
+ where play_count is null and skip_count is null
497
+ order by artist,album,name")
498
+ end
499
+
500
+ def bitrate_histogram
501
+ @db.execute("select count(bit_rate) as count, bit_rate from tracks
502
+ where bit_rate not null
503
+ group by bit_rate
504
+ order by bit_rate")
505
+ end
506
+
507
+ def genre_histogram
508
+ @db.execute("select count(genre) as count, genre from tracks
509
+ where genre is not null
510
+ group by genre
511
+ order by count desc")
512
+ end
513
+
514
+ def missing_track_numbers
515
+ @db.execute("select artist,album,name from tracks
516
+ where track_number is null and has_video is null
517
+ order by artist,album,name")
518
+ end
519
+
520
+ def missing_genre
521
+ @db.execute("select artist,album,name from tracks
522
+ where genre is null or genre = ''
523
+ order by artist,album,name")
524
+ end
525
+
526
+ def disk_pigs
527
+ @db.execute("select size,artist,album,name from tracks
528
+ where size is not null
529
+ order by size desc limit #{@top}")
530
+ end
531
+
532
+ def dj_itis_skip_happy
533
+ @db.execute("select artist,album,name,skip_count,play_count from tracks
534
+ where skip_count >= play_count
535
+ order by skip_count,play_count,artist,album,name")
536
+ end
537
+
538
+ def screaming_names
539
+ results = []
540
+ @tracks.each do |track|
541
+ if track[:name] then
542
+ if track[:name] =~ /^[A-Z\s]*$/ then
543
+ results.push( [ track[:name].dup, track[:artist].dup, track[:album].dup ] )
544
+ end
545
+ end
546
+ end
547
+ results
548
+ end
549
+
550
+ def treemap_bitrate
551
+ root = Treemap::Node.new
552
+ self.bitrate_histogram.each do |bitrate|
553
+ root.new_child(:size => bitrate[0].to_i, :label => "#{bitrate[1]}")
554
+ end
555
+ output = Treemap::HtmlOutput.new do |o|
556
+ o.width = 1024
557
+ o.height = 768
558
+ o.center_labels_at_depth = 1
559
+ end
560
+ File.open('/tmp/bitrate_treemap.html', 'w') {|f| f.write(output.to_html(root)) }
561
+ end
562
+
563
+ def treemap_genre
564
+ root = Treemap::Node.new
565
+ self.genre_histogram.each do |genre|
566
+ root.new_child(:size => genre[0].to_i, :label => "#{genre[1]}")
567
+ end
568
+ output = Treemap::HtmlOutput.new do |o|
569
+ o.width = 1024
570
+ o.height = 768
571
+ o.center_labels_at_depth = 1
572
+ end
573
+ File.open('/tmp/genre_treemap.html', 'w') {|f| f.write(output.to_html(root)) }
574
+ end
575
+ end
576
+
577
+ class FileEmptyException < StandardError; end
578
+ class FileNotFoundException < StandardError; end
579
+ class UnknownDataTypeException < StandardError; end
580
+ class NonBooleanValueException < StandardError; end