grok-itunes 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,580 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'ftools'
4
+ require 'time'
5
+ require 'rubygems'
6
+ require 'sqlite3'
7
+ require 'scrobbler'
8
+ # I don't want to deal with RMagic on Mac OS X
9
+ require 'treemap/node'
10
+ require 'treemap/html_output'
11
+ require 'treemap/squarified_layout'
12
+ require 'treemap/gradient_color'
13
+ require 'treemap/rectangle'
14
+
15
+ class GrokITunes
16
+ VERSION = '0.1.2'
17
+ BENCHMARK = false
18
+
19
+ attr_accessor :db, :file, :tracks, :playlists, :now
20
+
21
+ def initialize(file)
22
+ @db = nil
23
+ @file = file
24
+ @top = 10
25
+ @tracks = []
26
+ @playlists = []
27
+ @now = Time.now
28
+ valid_file?
29
+ end
30
+
31
+ def valid_file?
32
+ unless File.exist?(File.expand_path(@file))
33
+ raise FileNotFoundException, "File not found: #{@file}"
34
+ end
35
+ if File.size?(File.expand_path(@file)) == nil
36
+ raise FileEmptyException, "File has no content: #{@file}"
37
+ else
38
+ true
39
+ end
40
+ end
41
+
42
+ def init_db
43
+ puts "init_db: start #{Time.now}" if BENCHMARK
44
+
45
+ db_file = File.expand_path(@file) + ".db"
46
+
47
+ @now = Time.now
48
+ @db = SQLite3::Database.new("#{db_file}")
49
+ @db.type_translation = true
50
+
51
+ @db.execute( "drop table if exists tracks_playlists" )
52
+ @db.execute( "drop table if exists playlists" )
53
+ @db.execute( "drop table if exists tracks" )
54
+
55
+ create_tables = <<SQL
56
+ create table tracks (
57
+ track_id INTEGER PRIMARY KEY,
58
+ name VARCHAR DEFAULT NULL,
59
+ artist VARCHAR DEFAULT NULL,
60
+ album_artist VARCHAR DEFAULT NULL,
61
+ composer VARCHAR DEFAULT NULL,
62
+ album VARCHAR DEFAULT NULL,
63
+ grouping VARCHAR DEFAULT NULL,
64
+ genre VARCHAR DEFAULT NULL,
65
+ kind VARCHAR DEFAULT NULL,
66
+ size INTEGER DEFAULT NULL,
67
+ total_time INTEGER DEFAULT NULL,
68
+ disc_number INTEGER DEFAULT NULL,
69
+ disc_count INTEGER DEFAULT NULL,
70
+ track_number INTEGER DEFAULT NULL,
71
+ track_count INTEGER DEFAULT NULL,
72
+ year INTEGER DEFAULT NULL,
73
+ bpm INTEGER DEFAULT NULL,
74
+ date_modified DATE DEFAULT NULL,
75
+ date_added DATE DEFAULT NULL,
76
+ bit_rate INTEGER DEFAULT NULL,
77
+ sample_rate INTEGER DEFAULT NULL,
78
+ volume_adjustment INTEGER DEFAULT NULL,
79
+ part_of_gapless_album BOOLEAN DEFAULT NULL,
80
+ equalizer VARCHAR DEFAULT NULL,
81
+ comments VARCHAR DEFAULT NULL,
82
+ play_count INTEGER DEFAULT NULL,
83
+ play_date INTEGER DEFAULT NULL,
84
+ play_date_utc DATE DEFAULT NULL,
85
+ skip_count INTEGER DEFAULT NULL,
86
+ skip_date DATE DEFAULT NULL,
87
+ release_date DATE DEFAULT NULL,
88
+ rating INTEGER DEFAULT NULL,
89
+ album_rating INTEGER DEFAULT NULL,
90
+ album_rating_computed BOOLEAN DEFAULT NULL,
91
+ compilation BOOLEAN DEFAULT NULL,
92
+ series VARCHAR DEFAULT NULL,
93
+ season INTEGER DEFAULT NULL,
94
+ episode VARCHAR DEFAULT NULL,
95
+ episode_order INTEGER DEFAULT NULL,
96
+ sort_album VARCHAR DEFAULT NULL,
97
+ sort_album_artist VARCHAR DEFAULT NULL,
98
+ sort_artist VARCHAR DEFAULT NULL,
99
+ sort_composer VARCHAR DEFAULT NULL,
100
+ sort_name VARCHAR DEFAULT NULL,
101
+ sort_series VARCHAR DEFAULT NULL,
102
+ artwork_count INTEGER DEFAULT NULL,
103
+ persistent_id VARCHAR DEFAULT NULL,
104
+ disabled BOOLEAN DEFAULT NULL,
105
+ clean BOOLEAN DEFAULT NULL,
106
+ explicit BOOLEAN DEFAULT NULL,
107
+ track_type VARCHAR DEFAULT NULL,
108
+ podcast BOOLEAN DEFAULT NULL,
109
+ has_video BOOLEAN DEFAULT NULL,
110
+ hd BOOLEAN DEFAULT NULL,
111
+ video_width INTEGER DEFAULT NULL,
112
+ video_height INTEGER DEFAULT NULL,
113
+ movie BOOLEAN DEFAULT NULL,
114
+ tv_show BOOLEAN DEFAULT NULL,
115
+ protected BOOLEAN DEFAULT NULL,
116
+ purchased BOOLEAN DEFAULT NULL,
117
+ unplayed BOOLEAN DEFAULT NULL,
118
+ file_type INTEGER DEFAULT NULL,
119
+ file_creator INTEGER DEFAULT NULL,
120
+ location VARCHAR DEFAULT NULL,
121
+ file_folder_count INTEGER DEFAULT NULL,
122
+ library_folder_count INTEGER DEFAULT NULL
123
+ );
124
+
125
+ create table playlists (
126
+ playlist_id INTEGER PRIMARY KEY,
127
+ master BOOLEAN DEFAULT NULL,
128
+ name VARCHAR DEFAULT NULL,
129
+ playlist_persistent_id VARCHAR DEFAULT NULL,
130
+ distinguished_kind INTEGER DEFAULT NULL,
131
+ folder BOOLEAN DEFAULT NULL,
132
+ visible BOOLEAN DEFAULT NULL,
133
+ audiobooks BOOLEAN DEFAULT NULL,
134
+ movies BOOLEAN DEFAULT NULL,
135
+ music BOOLEAN DEFAULT NULL,
136
+ party_shuffle BOOLEAN DEFAULT NULL,
137
+ podcasts BOOLEAN DEFAULT NULL,
138
+ purchased_music BOOLEAN DEFAULT NULL,
139
+ tv_shows BOOLEAN DEFAULT NULL,
140
+ all_items BOOLEAN DEFAULT NULL,
141
+ genius_track_id BOOLEAN DEFAULT NULL,
142
+ smart_info BLOB DEFAULT NULL,
143
+ smart_criteria BLOB DEFAULT NULL
144
+ );
145
+
146
+ create table tracks_playlists (
147
+ track_id INTEGER,
148
+ playlist_id INTEGER
149
+ );
150
+ SQL
151
+
152
+ @db.execute_batch( create_tables )
153
+ puts "init_db: stop #{Time.now}" if BENCHMARK
154
+ end
155
+
156
+ def populate_db
157
+ puts "populate_db: start #{Time.now}" if BENCHMARK
158
+ @db.transaction do |db|
159
+ db.prepare(
160
+ "insert into tracks values ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
161
+ ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
162
+ ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
163
+ ?, ?, ?, ?, ?, ? )"
164
+ ) do |stmt|
165
+ @tracks.each do |t|
166
+ stmt.execute t[:track_id], "#{t[:name]}", "#{t[:artist]}",
167
+ "#{t[:album_artist]}", "#{t[:composer]}", "#{t[:album]}",
168
+ "#{t[:grouping]}", "#{t[:genre]}", "#{t[:kind]}", t[:size],
169
+ t[:total_time], t[:disc_number], t[:disc_count], t[:track_number],
170
+ t[:track_count], t[:year], t[:bpm], t[:date_modified], t[:date_added],
171
+ t[:bit_rate], t[:sample_rate], t[:volume_adjustment],
172
+ t[:part_of_gapless_album], "#{t[:equalizer]}", "#{t[:comments]}",
173
+ t[:play_count], t[:play_date], t[:play_date_utc], t[:skip_count],
174
+ t[:skip_date], t[:release_date], t[:rating], t[:album_rating],
175
+ t[:album_rating_computed], t[:compilation], t[:series], t[:season],
176
+ t[:episode], t[:episode_order], "#{t[:sort_album]}",
177
+ "#{t[:sort_album_artist]}", "#{t[:sort_artist]}", "#{t[:sort_composer]}",
178
+ "#{t[:sort_name]}", "#{t[:sort_series]}", t[:artwork_count],
179
+ "#{t[:persistent_id]}", t[:disabled], t[:clean], t[:explicit],
180
+ "#{t[:track_type]}", t[:podcast], t[:has_video], t[:hd], t[:video_width],
181
+ t[:video_height], t[:movie], t[:tv_show], t[:protected], t[:purchased],
182
+ t[:unplayed], t[:file_type], t[:file_creator], t[:location],
183
+ t[:file_folder_count], t[:library_folder_count]
184
+ end
185
+ end
186
+
187
+ db.prepare(
188
+ "insert into playlists values ( ?, ?, ?, ?, ?, ?, ?, ?, ?,
189
+ ?, ?, ?, ?, ?, ?, ?, ?, ?)"
190
+ ) do |stmt|
191
+ @playlists.each do |p|
192
+ stmt.execute p[0][:playlist_id], p[0][:master], "#{p[0][:name]}",
193
+ "#{p[0][:playlist_persistent_id]}", p[0][:distinguished_kind], p[0][:folder],
194
+ p[0][:visible], p[0][:audiobooks], p[0][:movies], p[0][:music], p[0][:party_shuffle],
195
+ p[0][:podcasts], p[0][:purchased_music], p[0][:tv_shows], p[0][:all_items],
196
+ p[0][:genius_track_id], p[0][:smart_info], p[0][:smart_criteria]
197
+ end
198
+ end
199
+
200
+ db.prepare(
201
+ "insert into tracks_playlists values ( ?, ? )"
202
+ ) do |stmt|
203
+ @playlists.each do |p|
204
+ p[1].each do |tracks|
205
+ stmt.execute p[0][:playlist_id], tracks
206
+ end
207
+ end
208
+ end
209
+ end
210
+ puts "populate_db: stop #{Time.now}" if BENCHMARK
211
+ end
212
+
213
+ # Based on seconds_fraction_to_time http://snippets.dzone.com/posts/show/5958
214
+ def human_clock_time(seconds)
215
+ days = hours = mins = 0
216
+ if seconds >= 60 then
217
+ mins = ( seconds / 60 ).to_i
218
+ seconds = ( seconds % 60 ).to_i
219
+
220
+ if mins >= 60 then
221
+ hours = ( mins / 60 ).to_i
222
+ mins = ( mins % 60 ).to_i
223
+ end
224
+
225
+ if hours >= 24 then
226
+ days = ( hours / 24 ).to_i
227
+ hours = ( hours % 24 ).to_i
228
+ end
229
+ end
230
+ "#{days.to_s.rjust(3,"0")}:#{hours.to_s.rjust(2,"0")}:#{mins.to_s.rjust(2,"0")}:#{seconds.to_s.rjust(2,"0")}"
231
+ end
232
+
233
+ def fix_type(value, type)
234
+ # Sanitize String data into proper Class before injection
235
+ if type =~ /date/ then
236
+ # sqlite3 doesn't understand ISO 8601 ( YYYY/MM/DDTHH:MM:SSZ$ )
237
+ value = value.sub(/Z/, '').to_s
238
+ # Time.parse(value)
239
+ elsif type =~ /integer/ then
240
+ value.to_i
241
+ elsif type =~ /string/ then
242
+ value.to_s
243
+ elsif type =~ /boolean/ then
244
+ if value == "true"
245
+ true
246
+ elsif value == "false"
247
+ false
248
+ else
249
+ raise NonBooleanValueException, "Really now? True or False?: #{value}"
250
+ end
251
+ else
252
+ raise UnknownDataTypeException, "I don't know how to treat type: #{type}"
253
+ end
254
+ end
255
+
256
+ # Based on work by zenspider @ http://snippets.dzone.com/posts/show/3700
257
+
258
+ def parse_xml
259
+ puts "parse_xml: start #{Time.now}" if BENCHMARK
260
+
261
+ is_tracks = is_playlists = false
262
+
263
+ track = {}
264
+ playlist = {}
265
+ playitems = []
266
+
267
+ IO.foreach(File.expand_path(@file)) do |line|
268
+ if line =~ /^\t<key>Tracks<\/key>$/
269
+ is_tracks = true
270
+ next
271
+ end
272
+
273
+ if is_tracks then
274
+ if line =~ /^\t\t\t<key>(.*)<\/key><(date|integer|string)>(.*)<\/.*>$/ then
275
+ key = $1.downcase.split.join("_").intern
276
+ track[key] = fix_type($3, $2)
277
+ next
278
+ elsif line =~ /^\t\t\t<key>(.*)<\/key><(true|false)\/>$/ then
279
+ key = $1.downcase.split.join("_").intern
280
+ track[key] = fix_type($2, "boolean")
281
+ next
282
+ elsif line =~ /^\t\t<\/dict>$/ then
283
+ @tracks.push(track.dup)
284
+ track.clear
285
+ next
286
+ elsif is_tracks && line =~ /^\t<\/dict>$/
287
+ is_tracks = false
288
+ next
289
+ else
290
+ next
291
+ end
292
+ end
293
+
294
+ if line =~ /^\t<key>Playlists<\/key>$/
295
+ is_playlists = true
296
+ next
297
+ end
298
+
299
+ if is_playlists then
300
+ if line =~ /^\t\t\t<key>(.*)<\/key><(date|integer|string)>(.*)<\/.*>$/ then
301
+ key = $1.downcase.split.join("_").intern
302
+ playlist[key] = fix_type($3, $2)
303
+ next
304
+ elsif line =~ /^\t\t\t<key>(.*)<\/key><(true|false)\/>$/ then
305
+ key = $1.downcase.split.join("_").intern
306
+ playlist[key] = fix_type($2, "boolean")
307
+ next
308
+ elsif line =~ /^\t\t\t\t\t<key>Track ID<\/key><integer>(.*)<\/integer>$/ then
309
+ playitems.push($1.to_i)
310
+ next
311
+ elsif line =~ /^\t\t<\/dict>$/ then
312
+ @playlists.push([playlist.dup, playitems.dup])
313
+ playlist.clear
314
+ playitems.clear
315
+ next
316
+ elsif is_playlists && line =~ /^\t<\/array>$/
317
+ is_playlists = false
318
+ next
319
+ else
320
+ next
321
+ end
322
+ end
323
+ end
324
+
325
+ puts "parse_xml: stop #{Time.now}" if BENCHMARK
326
+ end
327
+
328
+ def count_artists
329
+ @db.execute("select count(distinct artist) as count from tracks
330
+ where artist not null and artist != ''")[0].first.to_i
331
+ end
332
+
333
+ def count_albums
334
+ @db.execute("select count(distinct album) as count from tracks
335
+ where album not null and album != ''")[0].first.to_i
336
+ end
337
+
338
+ def count_tracks
339
+ @db.execute("select count(*) from tracks")[0].first.to_i
340
+ end
341
+
342
+ def total_time
343
+ human_clock_time(
344
+ ( @db.execute("select sum(total_time) from tracks
345
+ where total_time not null")[0].first.to_i )/1000.00
346
+ )
347
+ end
348
+
349
+ def top_artists
350
+ @db.execute("select count(artist) as count, artist from tracks
351
+ where artist not null
352
+ group by artist
353
+ order by count desc").first(@top)
354
+ end
355
+
356
+
357
+ def top_genres
358
+ @db.execute("select count(genre) as count, genre from tracks
359
+ group by genre order by count desc, genre limit #{@top}")
360
+ end
361
+
362
+ def oldest_tracks
363
+ @db.execute("select datetime(play_date_utc), name from tracks
364
+ where play_date_utc not null and play_date_utc != ''
365
+ order by play_date_utc").first(@top)
366
+ end
367
+
368
+ def top_tracks_on_rating_times_playcount
369
+ @db.execute("select rating*play_count as count, name, artist,
370
+ datetime(play_date_utc) from tracks
371
+ order by count desc").first(@top)
372
+ end
373
+
374
+ # Based on work by zenspider @ http://snippets.dzone.com/posts/show/3700
375
+ def top_tracks_aging_well
376
+ results = []
377
+ now = Time.now
378
+ rating = 0
379
+ records = @db.execute("select rating, play_count, name, artist,
380
+ datetime(play_date_utc) from tracks
381
+ where rating not null and play_count not null and name not null
382
+ and artist not null and play_date_utc not null
383
+ order by rating, play_count")
384
+ records.each do |rec|
385
+ rating = rec[0].to_i * rec[1].to_i * Math.log( ((@now - Time.parse(rec[4])) / 86400.0).to_i )
386
+ results.push([ rating.to_i, rec[2], rec[3], rec[4] ])
387
+ end
388
+ results.sort.reverse.first(@top)
389
+ end
390
+
391
+ #
392
+ # I don't trust this function yet since it always passes and Scrobbler kicks this error msg
393
+ # /Library/Ruby/Gems/1.8/gems/scrobbler-0.2.0/lib/scrobbler/base.rb:21: warning: instance variable @similar not initialized
394
+ #
395
+ # Works from irb and is way cool!
396
+ #
397
+ def related_artists_to(artist)
398
+ artist = Scrobbler::Artist.new(artist)
399
+ results = []
400
+ artist.similar.each do |a|
401
+ results.push( [ "#{a.match.dup}", "#{a.name.dup}" ] )
402
+ end
403
+ results.first(@top)
404
+ end
405
+
406
+ # Shame, scrobbler only does last.fm API Version 1.0
407
+ #def related_tracks_to(track,artist)
408
+ #end
409
+
410
+ def rating_fanatic
411
+ base = @db.execute("select rating, count(rating), count(name) from tracks
412
+ group by rating")
413
+ ratings = []
414
+ base.each do |base|
415
+ percent = "%.4f" % ( base[2].to_f / @tracks.size.to_f )
416
+ ratings.push( [ base[0].to_i ||= 0, base[1].to_i, base[2].to_i, @tracks.size, percent.to_f ] )
417
+ end
418
+ ratings
419
+ end
420
+
421
+ def small_files(size)
422
+ @db.execute("select size,name,artist,location from tracks
423
+ where size <= #{size}
424
+ order by size")
425
+ end
426
+
427
+ def find_buggy_itunes_files
428
+ # iTune's has an evil bug that replicates leading numbers
429
+ # for example: '01 01 01 somefile.mp3' then you click on it and iTunes transmutes it into
430
+ # '01 01 01 01 somefile.mp3'. Every time you click! Bad!
431
+ # But if you rename it back to '01 somefile.mp3' then the madness stops
432
+ @db.execute("select artist,album,name,location from tracks
433
+ where name like '01 01 %'
434
+ or name like '02 02 %'
435
+ or name like '03 03 %'
436
+ or name like '04 04 %'
437
+ or name like '05 05 %'
438
+ or name like '06 06 %'
439
+ or name like '07 07 %'
440
+ or name like '08 08 %'
441
+ or name like '09 09 %'
442
+ order by artist, album, name")
443
+ end
444
+
445
+ def find_trailing_spaces(thing)
446
+ @db.execute("select #{thing} from tracks
447
+ where #{thing} like '% '
448
+ order by #{thing}")
449
+ end
450
+
451
+ def bad_track_names(suffix)
452
+ @db.execute("select name, location from tracks
453
+ where lower(name) like '%#{suffix}'
454
+ and podcast is null
455
+ order by name")
456
+ end
457
+
458
+ def clean_tracks
459
+ @db.execute("select name, artist, album, clean from tracks
460
+ where clean = 'true'
461
+ order by artist, album, name")
462
+ end
463
+
464
+ def explicit_tracks
465
+ @db.execute("select name, artist, album, explicit from tracks
466
+ where explicit = 'true'
467
+ order by artist, album, name")
468
+ end
469
+
470
+ def tracks_not_rated
471
+ @db.execute("select album,artist,name from tracks
472
+ where rating is null
473
+ order by album,artist,name")
474
+ end
475
+
476
+ def podcast_without_genre
477
+ @db.execute("select name,artist from tracks
478
+ where podcast = 'true' and genre is null or genre = ''
479
+ order by name")
480
+ end
481
+
482
+ def spam_tunes
483
+ @db.execute("select artist,album,name from tracks
484
+ where lower(name) like '%http:%'
485
+ or lower(name) like '%www.%'
486
+ or lower(artist) like '%http:%'
487
+ or lower(artist) like '%www.%'
488
+ or lower(album) like '%http:%'
489
+ or lower(album) like '%www.%'
490
+ order by artist,album,name")
491
+ end
492
+
493
+ def never_played
494
+ # QUESTION? How can you rate something if you never played it?
495
+ @db.execute("select artist,album,name from tracks
496
+ where play_count is null and skip_count is null
497
+ order by artist,album,name")
498
+ end
499
+
500
+ def bitrate_histogram
501
+ @db.execute("select count(bit_rate) as count, bit_rate from tracks
502
+ where bit_rate not null
503
+ group by bit_rate
504
+ order by bit_rate")
505
+ end
506
+
507
+ def genre_histogram
508
+ @db.execute("select count(genre) as count, genre from tracks
509
+ where genre is not null
510
+ group by genre
511
+ order by count desc")
512
+ end
513
+
514
+ def missing_track_numbers
515
+ @db.execute("select artist,album,name from tracks
516
+ where track_number is null and has_video is null
517
+ order by artist,album,name")
518
+ end
519
+
520
+ def missing_genre
521
+ @db.execute("select artist,album,name from tracks
522
+ where genre is null or genre = ''
523
+ order by artist,album,name")
524
+ end
525
+
526
+ def disk_pigs
527
+ @db.execute("select size,artist,album,name from tracks
528
+ where size is not null
529
+ order by size desc limit #{@top}")
530
+ end
531
+
532
+ def dj_itis_skip_happy
533
+ @db.execute("select artist,album,name,skip_count,play_count from tracks
534
+ where skip_count >= play_count
535
+ order by skip_count,play_count,artist,album,name")
536
+ end
537
+
538
+ def screaming_names
539
+ results = []
540
+ @tracks.each do |track|
541
+ if track[:name] then
542
+ if track[:name] =~ /^[A-Z\s]*$/ then
543
+ results.push( [ track[:name].dup, track[:artist].dup, track[:album].dup ] )
544
+ end
545
+ end
546
+ end
547
+ results
548
+ end
549
+
550
+ def treemap_bitrate
551
+ root = Treemap::Node.new
552
+ self.bitrate_histogram.each do |bitrate|
553
+ root.new_child(:size => bitrate[0].to_i, :label => "#{bitrate[1]}")
554
+ end
555
+ output = Treemap::HtmlOutput.new do |o|
556
+ o.width = 1024
557
+ o.height = 768
558
+ o.center_labels_at_depth = 1
559
+ end
560
+ File.open('/tmp/bitrate_treemap.html', 'w') {|f| f.write(output.to_html(root)) }
561
+ end
562
+
563
+ def treemap_genre
564
+ root = Treemap::Node.new
565
+ self.genre_histogram.each do |genre|
566
+ root.new_child(:size => genre[0].to_i, :label => "#{genre[1]}")
567
+ end
568
+ output = Treemap::HtmlOutput.new do |o|
569
+ o.width = 1024
570
+ o.height = 768
571
+ o.center_labels_at_depth = 1
572
+ end
573
+ File.open('/tmp/genre_treemap.html', 'w') {|f| f.write(output.to_html(root)) }
574
+ end
575
+ end
576
+
577
+ class FileEmptyException < StandardError; end
578
+ class FileNotFoundException < StandardError; end
579
+ class UnknownDataTypeException < StandardError; end
580
+ class NonBooleanValueException < StandardError; end