PheldItunesReporter 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/Manifest.txt +8 -0
- data/README.txt +70 -0
- data/Rakefile +20 -0
- data/bin/pheld_itunes_reporter +175 -0
- data/lib/pheld_itunes_data_miner.rb +405 -0
- data/lib/pheld_itunes_track.rb +27 -0
- data/test/test_pheld_itunes_data_miner.rb +647 -0
- metadata +103 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
= PheldItunesReporter
|
2
|
+
|
3
|
+
* http://rubyforge.org/projects/uwruby
|
4
|
+
* http://uwruby.rubyforge.org/pheld_itunes_reporter
|
5
|
+
|
6
|
+
== DESCRIPTION:
|
7
|
+
|
8
|
+
PheldItunesReporter provides a library and corresponding executable script that calculate and report statistics for a given iTunes library file. When executed, the script prints a text summary of all of the statistics it is capable of generating. It also creates four .PNG image files with graphs of key statistics.
|
9
|
+
|
10
|
+
== FEATURES/PROBLEMS:
|
11
|
+
|
12
|
+
The included library can generate the following statistics about the tracks in the iTunes library:
|
13
|
+
* total play time
|
14
|
+
* total number of tracks
|
15
|
+
* total number of genres
|
16
|
+
* total number of artists
|
17
|
+
* total number of albums
|
18
|
+
* average year
|
19
|
+
* N most popular genres
|
20
|
+
* N most popular artists
|
21
|
+
* N most popular years
|
22
|
+
* number of tracks added for each year
|
23
|
+
* correlation between bitrate and play count
|
24
|
+
* correlation between rating and play count
|
25
|
+
* guesstimated age of library owner
|
26
|
+
|
27
|
+
== SYNOPSIS:
|
28
|
+
|
29
|
+
To run the reporter script, simply navigate to the root of the source folder and type "./bin/pheld_itunes_reporter.rb <path_to_itunes_library.xml>". All supported statistics are then printed and four image files with graphs are written to the current directory.
|
30
|
+
|
31
|
+
The PheldItunesDataMiner library is used to parse the iTunes library XML and generate statistics. No parameters are passed to the 'new' method. To get statistics, a new data miner instance is created, then the 'parse' method is called. Once that has been done, the other methods for statistics generation can be used.
|
32
|
+
|
33
|
+
Example:
|
34
|
+
data_miner = PheldItunesDataMiner.new
|
35
|
+
tracks = data_miner.parse_file('iTunes\ Music\ Library.xml')
|
36
|
+
ten_most_popular_artists = data_miner.get_most_popular_artists(tracks, 10)
|
37
|
+
|
38
|
+
== REQUIREMENTS:
|
39
|
+
|
40
|
+
* Nokogiri >= 1.0.6
|
41
|
+
* RMagick >= 2.8.0
|
42
|
+
|
43
|
+
== INSTALL:
|
44
|
+
|
45
|
+
sudo gem install pheld_itunes_reporter
|
46
|
+
|
47
|
+
== LICENSE:
|
48
|
+
|
49
|
+
(The MIT License)
|
50
|
+
|
51
|
+
Copyright (c) 2008 FIX
|
52
|
+
|
53
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
54
|
+
a copy of this software and associated documentation files (the
|
55
|
+
'Software'), to deal in the Software without restriction, including
|
56
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
57
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
58
|
+
permit persons to whom the Software is furnished to do so, subject to
|
59
|
+
the following conditions:
|
60
|
+
|
61
|
+
The above copyright notice and this permission notice shall be
|
62
|
+
included in all copies or substantial portions of the Software.
|
63
|
+
|
64
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
65
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
66
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
67
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
68
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
69
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
70
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'rmagick'
|
7
|
+
require 'gruff'
|
8
|
+
require './lib/pheld_itunes_data_miner.rb'
|
9
|
+
|
10
|
+
Hoe.new('PheldItunesReporter', PheldItunesDataMiner::VERSION) do |p|
|
11
|
+
p.rubyforge_name = 'uwruby'
|
12
|
+
p.developer('Peter Held', 'peter.t.held@gmail.com')
|
13
|
+
p.need_zip = true
|
14
|
+
|
15
|
+
p.extra_deps << ['nokogiri', '>= 1.0.6']
|
16
|
+
p.extra_deps << ['rmagick', '>= 2.8.0']
|
17
|
+
p.extra_deps << ['gruff', '>= 0.3.4']
|
18
|
+
end
|
19
|
+
|
20
|
+
# vim: syntax=Ruby
|
@@ -0,0 +1,175 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'optparse' # for taking input
|
5
|
+
require 'gruff' # for generating graphs - requires RMagick...sorry!
|
6
|
+
require 'lib/pheld_itunes_data_miner.rb'
|
7
|
+
|
8
|
+
##
|
9
|
+
# Student Name: Peter Held
|
10
|
+
# Homework Week: 8
|
11
|
+
#
|
12
|
+
# This class outputs 4 PNG files. Please see them for the graphs. RMagick is required :(
|
13
|
+
#
|
14
|
+
|
15
|
+
|
16
|
+
class PheldItunesReporter
|
17
|
+
VERSION = '1.0.0'
|
18
|
+
|
19
|
+
def initialize(options, itunes_library_file)
|
20
|
+
@number_of_popular = options.number_of_popular
|
21
|
+
@data_miner = PheldItunesDataMiner.new
|
22
|
+
@tracks = @data_miner.parse_file(itunes_library_file)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.run(args)
|
26
|
+
options = ItunesReporterParser.parse(args)
|
27
|
+
unless ARGV.length == 0
|
28
|
+
reporter = PheldItunesReporter.new(options, ARGV[0])
|
29
|
+
reporter.make_top_artists_graph
|
30
|
+
reporter.make_top_years_graph
|
31
|
+
reporter.make_top_genres_graph
|
32
|
+
reporter.make_tracks_added_by_year_graph
|
33
|
+
reporter.print_stats
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def print_stats
|
38
|
+
@data_miner.print_stats(@tracks)
|
39
|
+
end
|
40
|
+
|
41
|
+
def print_correlation
|
42
|
+
rating_playcount_correlation = @data_miner.get_rating_playcount_correlation(@tracks)
|
43
|
+
puts "The correlation between rating and play count is: #{rating_playcount_correlation.to_s}."
|
44
|
+
bitrate_playcount_correlation = @data_miner.get_bitrate_playcount_correlation(@tracks)
|
45
|
+
puts "The correlation between bit rate and play count is: #{bitrate_playcount_correlation.to_s}."
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
def make_top_artists_graph
|
50
|
+
# set up the graph
|
51
|
+
graph = Gruff::SideBar.new
|
52
|
+
graph.title = "Top #{@number_of_popular.to_s} Artists"
|
53
|
+
|
54
|
+
# add the data
|
55
|
+
most_popular_artists = @data_miner.get_most_popular_artists(@tracks, @number_of_popular)
|
56
|
+
|
57
|
+
graph.data("Tracks per Artist", most_popular_artists.map {|item| item[1]})
|
58
|
+
labels = {}
|
59
|
+
@index = 0
|
60
|
+
most_popular_artists.each do |item|
|
61
|
+
labels[@index] = item[0]
|
62
|
+
@index += 1
|
63
|
+
end
|
64
|
+
graph.labels = labels
|
65
|
+
|
66
|
+
graph.maximum_value = ((most_popular_artists[0][1] / 10) * 10) + 10
|
67
|
+
graph.minimum_value = 0
|
68
|
+
|
69
|
+
graph.write("top_artists.png")
|
70
|
+
end
|
71
|
+
|
72
|
+
def make_top_years_graph
|
73
|
+
# set up the graph
|
74
|
+
graph = Gruff::SideBar.new
|
75
|
+
graph.title = "Top #{@number_of_popular.to_s} Years"
|
76
|
+
|
77
|
+
# add the data
|
78
|
+
most_popular_years = @data_miner.get_most_popular_years(@tracks, @number_of_popular)
|
79
|
+
|
80
|
+
# add the axis labels
|
81
|
+
graph.data("Tracks per year", most_popular_years.map {|item| item[1]})
|
82
|
+
labels = {}
|
83
|
+
index = 0
|
84
|
+
most_popular_years.each do |item|
|
85
|
+
labels[index] = item[0].to_s
|
86
|
+
index += 1
|
87
|
+
end
|
88
|
+
graph.labels = labels
|
89
|
+
|
90
|
+
graph.maximum_value = ((most_popular_years[0][1] / 10) * 10) + 10
|
91
|
+
graph.minimum_value = 0
|
92
|
+
|
93
|
+
graph.write("top_years.png")
|
94
|
+
end
|
95
|
+
|
96
|
+
def make_top_genres_graph
|
97
|
+
# set up the graph
|
98
|
+
graph = Gruff::SideBar.new
|
99
|
+
graph.title = "Top #{@number_of_popular.to_s} Genres"
|
100
|
+
|
101
|
+
# add the data
|
102
|
+
most_popular_genres = @data_miner.get_most_popular_genres(@tracks, @number_of_popular)
|
103
|
+
|
104
|
+
# add the axis labels
|
105
|
+
graph.data("Tracks per genre", most_popular_genres.map {|item| item[1]})
|
106
|
+
labels = {}
|
107
|
+
index = 0
|
108
|
+
most_popular_genres.each do |item|
|
109
|
+
labels[index] = item[0].to_s
|
110
|
+
index += 1
|
111
|
+
end
|
112
|
+
graph.labels = labels
|
113
|
+
|
114
|
+
graph.maximum_value = ((most_popular_genres[0][1] / 10) * 10) + 10
|
115
|
+
graph.minimum_value = 0
|
116
|
+
|
117
|
+
graph.write("top_genres.png")
|
118
|
+
end
|
119
|
+
|
120
|
+
def make_tracks_added_by_year_graph
|
121
|
+
# set up the graph
|
122
|
+
graph = Gruff::Bar.new
|
123
|
+
graph.title = "Tracks Added by Year"
|
124
|
+
|
125
|
+
# add the data
|
126
|
+
tracks_added_by_year = @data_miner.get_tracks_added_by_year(@tracks)
|
127
|
+
|
128
|
+
# add the axis labels
|
129
|
+
graph.data("Tracks Added", tracks_added_by_year.map {|item| item[1]})
|
130
|
+
labels = {}
|
131
|
+
index = 0
|
132
|
+
max = 0
|
133
|
+
tracks_added_by_year.each do |item|
|
134
|
+
labels[index] = item[0].to_s
|
135
|
+
index +=1
|
136
|
+
|
137
|
+
# keep track of the max value so we can format the Y axis
|
138
|
+
if item[1] > max
|
139
|
+
max = item[1]
|
140
|
+
end
|
141
|
+
end
|
142
|
+
graph.labels = labels
|
143
|
+
|
144
|
+
graph.maximum_value = ((max / 10) * 10) + 10
|
145
|
+
graph.minimum_value = 0
|
146
|
+
|
147
|
+
graph.write("tracks_added_by_year.png")
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# Parser class to handle option flags
|
152
|
+
class ItunesReporterParser
|
153
|
+
def self.parse(args)
|
154
|
+
# pp args
|
155
|
+
|
156
|
+
options = OpenStruct.new
|
157
|
+
options.number_of_popular = 10 # default to listing the top 10 popular things
|
158
|
+
|
159
|
+
opts = OptionParser.new do |opts|
|
160
|
+
opts.banner = "Usage: itunes_reporter.rb [options] <itunes_library_file_1> <itunes_library_file_2> ... <itunes_library_file_n>"
|
161
|
+
|
162
|
+
opts.separator ""
|
163
|
+
opts.separator "Specific options:"
|
164
|
+
|
165
|
+
opts.on("-n", "--number [NUMBER]", Integer, "Number of files to report for maximums.") do |number|
|
166
|
+
options.number_of_popular = number
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
opts.parse!(args)
|
171
|
+
options
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
PheldItunesReporter.run(ARGV) if $0 == __FILE__
|
@@ -0,0 +1,405 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'nokogiri' # Nokogiri (XML parsing)
|
5
|
+
require 'ostruct' # OpenStruct
|
6
|
+
require 'ftools' # File
|
7
|
+
require 'set'
|
8
|
+
require 'lib/pheld_itunes_track.rb'
|
9
|
+
|
10
|
+
##
|
11
|
+
# Student Name: Peter Held
|
12
|
+
# Homework Week: 8
|
13
|
+
#
|
14
|
+
#
|
15
|
+
|
16
|
+
class PheldItunesDataMiner
|
17
|
+
VERSION = '1.0.0'
|
18
|
+
|
19
|
+
def self.run file_name
|
20
|
+
# read the tracks
|
21
|
+
idm = PheldItunesDataMiner.new
|
22
|
+
tracks = idm.parse_file(file_name)
|
23
|
+
|
24
|
+
# print the stats
|
25
|
+
idm.print_stats(tracks)
|
26
|
+
end
|
27
|
+
|
28
|
+
def print_stats tracks
|
29
|
+
# totals
|
30
|
+
puts "Track count: #{tracks.length.to_s}"
|
31
|
+
puts "Number of artists: #{get_number_of_artists(tracks).to_s}"
|
32
|
+
puts "Number of albums: #{get_number_of_albums(tracks).to_s}"
|
33
|
+
total_playtime = get_total_playtime(tracks)
|
34
|
+
puts "Total playtime: #{total_playtime[0].to_s} days, #{total_playtime[1].to_s} hours, #{total_playtime[2].to_s} minutes, #{total_playtime[3].to_s} seconds"
|
35
|
+
|
36
|
+
# popular stuff
|
37
|
+
ten_most_popular_artists = get_most_popular_artists(tracks, 10)
|
38
|
+
puts "Ten most popular artists:"
|
39
|
+
ten_most_popular_artists.each do |artist|
|
40
|
+
puts "\t\"#{artist[0]}\" - #{artist[1].to_s} tracks"
|
41
|
+
end
|
42
|
+
ten_most_popular_years = get_most_popular_years(tracks, 10)
|
43
|
+
puts "Ten most popular years:"
|
44
|
+
ten_most_popular_years.each do |year|
|
45
|
+
puts "\t\"#{year[0]}\" - #{year[1].to_s} tracks"
|
46
|
+
end
|
47
|
+
ten_most_popular_genres = get_most_popular_genres(tracks, 10)
|
48
|
+
puts "Ten most popular genres:"
|
49
|
+
ten_most_popular_genres.each do |genre|
|
50
|
+
puts "\t\"#{genre[0]}\" - #{genre[1].to_s} tracks"
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# other interesting statistics
|
55
|
+
tracks_added_per_year = get_tracks_added_by_year(tracks)
|
56
|
+
puts "The number of tracks added each year was:"
|
57
|
+
tracks_added_per_year.each do |year|
|
58
|
+
puts "\t\"#{year[0].to_s}\" - #{year[1].to_s} tracks"
|
59
|
+
end
|
60
|
+
bitrate_playcount_correlation = get_bitrate_playcount_correlation(tracks)
|
61
|
+
puts "The correlation between bit rate and play count is: #{bitrate_playcount_correlation.to_s}."
|
62
|
+
rating_playcount_correlation = get_rating_playcount_correlation(tracks)
|
63
|
+
puts "The correlation between rating and play count is: #{rating_playcount_correlation.to_s}."
|
64
|
+
age_guess = guess_age(tracks)
|
65
|
+
puts "According to my calculations/assumptions and other peoples' research, your age is #{age_guess.to_s}."
|
66
|
+
end
|
67
|
+
|
68
|
+
def parse_file file_name
|
69
|
+
if !File.exists?(file_name)
|
70
|
+
return nil
|
71
|
+
end
|
72
|
+
|
73
|
+
file = File.new(file_name)
|
74
|
+
file_text = file.read
|
75
|
+
|
76
|
+
parse_library file_text
|
77
|
+
end
|
78
|
+
|
79
|
+
def parse_library library_xml
|
80
|
+
tracks = []
|
81
|
+
doc = Nokogiri::XML.parse(library_xml)
|
82
|
+
|
83
|
+
# get an array of the track xmls
|
84
|
+
last_key = nil
|
85
|
+
doc.xpath('/plist/dict/dict/dict').each do |track_xml|
|
86
|
+
track = PheldItunesTrack.new
|
87
|
+
|
88
|
+
track_xml.children.each do |element|
|
89
|
+
if (element.name == "key" )
|
90
|
+
last_key = element.text
|
91
|
+
else
|
92
|
+
case last_key
|
93
|
+
when "Track ID" then track.track_id = element.text.to_i
|
94
|
+
when "Name" then track.name = element.text.strip
|
95
|
+
when "Artist" then track.artist = element.text.strip
|
96
|
+
when "Album" then track.album = element.text.strip
|
97
|
+
when "Total Time" then track.total_time = element.text.to_i
|
98
|
+
when "Year" then track.year = element.text.to_i
|
99
|
+
when "Bit Rate" then track.bit_rate = element.text.to_i
|
100
|
+
when "Play Count" then track.play_count = element.text.to_i
|
101
|
+
when "Rating" then track.rating = element.text.to_i
|
102
|
+
when "Date Added" then track.date_added = Date.parse(element.text)
|
103
|
+
when "Genre" then track.genre = element.text.strip
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
tracks << track
|
109
|
+
end
|
110
|
+
|
111
|
+
tracks
|
112
|
+
end
|
113
|
+
|
114
|
+
def get_number_of_artists tracks
|
115
|
+
# scrub the tracks for nils
|
116
|
+
scrubbed_tracks = tracks.delete_if {|track| track.artist == nil }
|
117
|
+
|
118
|
+
scrubbed_tracks.map { |track| track.artist }.uniq.length
|
119
|
+
end
|
120
|
+
|
121
|
+
def get_number_of_albums tracks
|
122
|
+
# scrub the tracks for nils
|
123
|
+
scrubbed_tracks = tracks.delete_if {|track| track.album == nil }
|
124
|
+
|
125
|
+
scrubbed_tracks.map { |track| track.album }.uniq.length
|
126
|
+
end
|
127
|
+
|
128
|
+
def get_number_of_genres tracks
|
129
|
+
# scrub the tracks for nils
|
130
|
+
scrubbed_tracks = tracks.delete_if {|track| track.genre == nil }
|
131
|
+
|
132
|
+
scrubbed_tracks.map { |track| track.genre }.uniq.length
|
133
|
+
end
|
134
|
+
|
135
|
+
def get_number_of_tracks tracks
|
136
|
+
tracks.length
|
137
|
+
end
|
138
|
+
|
139
|
+
def get_total_playtime tracks
|
140
|
+
# scrub the tracks for nils
|
141
|
+
scrubbed_tracks = tracks.delete_if {|track| track.total_time == nil }
|
142
|
+
|
143
|
+
total_playtime = 0
|
144
|
+
|
145
|
+
scrubbed_tracks.each do |track|
|
146
|
+
total_playtime = total_playtime + track.total_time
|
147
|
+
end
|
148
|
+
|
149
|
+
# takes seconds, but iTunes stores in milliseconds
|
150
|
+
seconds_fraction_to_time(total_playtime / 1000)
|
151
|
+
end
|
152
|
+
|
153
|
+
def get_tracks_added_by_year tracks
|
154
|
+
# scrub the tracks for nils
|
155
|
+
scrubbed_tracks = tracks.delete_if {|track| track.date_added == nil}
|
156
|
+
|
157
|
+
tracks_added_by_year = {} # hash to store year added counts
|
158
|
+
|
159
|
+
# get the list of track occurance counts
|
160
|
+
scrubbed_tracks.each do |track|
|
161
|
+
year_added = track.date_added.year
|
162
|
+
|
163
|
+
if tracks_added_by_year[year_added]
|
164
|
+
tracks_added_by_year[year_added] += 1
|
165
|
+
else
|
166
|
+
tracks_added_by_year[year_added] = 1
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
tracks_added_by_year.sort # sort by year
|
171
|
+
end
|
172
|
+
|
173
|
+
def get_most_popular_artists tracks, list_depth
|
174
|
+
# scrub the tracks for nils
|
175
|
+
scrubbed_tracks = tracks.delete_if {|track| track.artist == nil }
|
176
|
+
|
177
|
+
artists = {} # hash to store artist counts
|
178
|
+
top_artists = []
|
179
|
+
|
180
|
+
# get the list of artist occurance counts
|
181
|
+
scrubbed_tracks.each do |track|
|
182
|
+
if artists[track.artist]
|
183
|
+
artists[track.artist] += 1
|
184
|
+
else
|
185
|
+
artists[track.artist] = 1
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
# sort by the occurance counts
|
190
|
+
sorted_artists = artists.sort {|a,b| b[1]<=>a[1]}
|
191
|
+
|
192
|
+
if list_depth > 0
|
193
|
+
(0..(list_depth - 1)).each do |index|
|
194
|
+
if sorted_artists[index]
|
195
|
+
top_artists << [ sorted_artists[index][0], sorted_artists[index][1] ]
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
top_artists
|
201
|
+
end
|
202
|
+
|
203
|
+
def get_most_popular_years tracks, list_depth
|
204
|
+
# scrub the tracks for nils
|
205
|
+
scrubbed_tracks = tracks.delete_if {|track| track.year == nil }
|
206
|
+
|
207
|
+
years = {} # hash to store artist counts
|
208
|
+
top_years = []
|
209
|
+
|
210
|
+
# get the list of artist occurance counts
|
211
|
+
scrubbed_tracks.each do |track|
|
212
|
+
if years[track.year] # make sure the list even has this many in it
|
213
|
+
years[track.year] = years[track.year] + 1
|
214
|
+
else
|
215
|
+
years[track.year] = 1
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
# sort by the occurance counts
|
220
|
+
sorted_years = years.sort {|a,b| b[1]<=>a[1]}
|
221
|
+
|
222
|
+
if list_depth > 0
|
223
|
+
(0..(list_depth - 1)).each do |index|
|
224
|
+
if sorted_years[index] # make sure the list even has this many in it
|
225
|
+
top_years << [ sorted_years[index][0], sorted_years[index][1] ]
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
top_years
|
231
|
+
end
|
232
|
+
|
233
|
+
def get_most_popular_genres tracks, list_depth
|
234
|
+
# scrub the tracks for nils
|
235
|
+
scrubbed_tracks = tracks.delete_if {|track| track.genre == nil }
|
236
|
+
|
237
|
+
genres = {} # hash to store genre counts
|
238
|
+
top_genres = []
|
239
|
+
|
240
|
+
# get the list of artist occurance counts
|
241
|
+
scrubbed_tracks.each do |track|
|
242
|
+
if genres[track.genre]
|
243
|
+
genres[track.genre] += 1
|
244
|
+
else
|
245
|
+
genres[track.genre] = 1
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
# sort by the occurance counts
|
250
|
+
sorted_genres = genres.sort {|a,b| b[1]<=>a[1]}
|
251
|
+
|
252
|
+
if list_depth > 0
|
253
|
+
(0..(list_depth - 1)).each do |index|
|
254
|
+
if sorted_genres[index]
|
255
|
+
top_genres << [ sorted_genres[index][0], sorted_genres[index][1] ]
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
top_genres
|
261
|
+
end
|
262
|
+
|
263
|
+
def get_bitrate_playcount_correlation tracks
|
264
|
+
# scrub the tracks for nils
|
265
|
+
scrubbed_tracks = tracks.delete_if {|track| (track.bit_rate == nil) || (track.play_count == nil) || (track.date_added == nil)}
|
266
|
+
|
267
|
+
bit_rates = get_bit_rate scrubbed_tracks
|
268
|
+
play_counts = get_play_counts_normalized_for_date_added scrubbed_tracks
|
269
|
+
|
270
|
+
correlation(bit_rates, play_counts)
|
271
|
+
end
|
272
|
+
|
273
|
+
def get_rating_playcount_correlation tracks
|
274
|
+
# scrub the tracks for nils
|
275
|
+
scrubbed_tracks = tracks.delete_if {|track| (track.play_count == nil) || (track.rating == nil) || (track.date_added == nil)}
|
276
|
+
|
277
|
+
play_counts = get_play_counts_normalized_for_date_added scrubbed_tracks
|
278
|
+
ratings = get_rating scrubbed_tracks
|
279
|
+
|
280
|
+
correlation(play_counts, ratings)
|
281
|
+
end
|
282
|
+
|
283
|
+
# see http://www.mcgill.ca/reporter/39/01/expert/
|
284
|
+
def guess_age tracks
|
285
|
+
average_year = get_average_year(tracks)
|
286
|
+
|
287
|
+
Time.now.year - average_year + 14
|
288
|
+
end
|
289
|
+
|
290
|
+
def get_average_year tracks
|
291
|
+
# scrub the tracks for nils
|
292
|
+
scrubbed_tracks = tracks.delete_if {|track| track.year == nil}
|
293
|
+
|
294
|
+
total = 0
|
295
|
+
scrubbed_tracks.each do |track|
|
296
|
+
total += track.year
|
297
|
+
end
|
298
|
+
|
299
|
+
total / tracks.length
|
300
|
+
end
|
301
|
+
|
302
|
+
# Converts seconds to an array with days, hours, minutes and seconds
|
303
|
+
def seconds_fraction_to_time seconds
|
304
|
+
days = hours = mins = 0
|
305
|
+
|
306
|
+
if seconds >= 60 then
|
307
|
+
mins = (seconds / 60).to_i
|
308
|
+
seconds = (seconds % 60 ).to_i
|
309
|
+
|
310
|
+
if mins >= 60 then
|
311
|
+
hours = (mins / 60).to_i
|
312
|
+
mins = (mins % 60).to_i
|
313
|
+
|
314
|
+
if hours >= 24 then
|
315
|
+
days = (hours / 24).to_i
|
316
|
+
hours = (hours % 24).to_i
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
[days,hours,mins,seconds]
|
322
|
+
end
|
323
|
+
|
324
|
+
def get_play_count tracks
|
325
|
+
play_counts = []
|
326
|
+
|
327
|
+
tracks.each do |track|
|
328
|
+
play_counts << track.play_count
|
329
|
+
end
|
330
|
+
|
331
|
+
play_counts
|
332
|
+
end
|
333
|
+
|
334
|
+
def get_play_counts_normalized_for_date_added tracks
|
335
|
+
play_counts = []
|
336
|
+
|
337
|
+
tracks.each do |track|
|
338
|
+
play_counts << get_playcount_normalized_for_date_added(track)
|
339
|
+
end
|
340
|
+
|
341
|
+
play_counts
|
342
|
+
end
|
343
|
+
|
344
|
+
def get_rating tracks
|
345
|
+
ratings = []
|
346
|
+
|
347
|
+
tracks.each do |track|
|
348
|
+
ratings << track.rating
|
349
|
+
end
|
350
|
+
|
351
|
+
ratings
|
352
|
+
end
|
353
|
+
|
354
|
+
def get_bit_rate tracks
|
355
|
+
bit_rates = []
|
356
|
+
|
357
|
+
tracks.each do |track|
|
358
|
+
bit_rates << track.bit_rate
|
359
|
+
end
|
360
|
+
|
361
|
+
bit_rates
|
362
|
+
end
|
363
|
+
|
364
|
+
def get_playcount_normalized_for_date_added track
|
365
|
+
# Normalize for the age in days. Multiplied by 10000 so that the values aren't fractions
|
366
|
+
# and the correlation() method can use them.
|
367
|
+
age_normalized = (10000 * track.play_count / (Date.today - track.date_added)).round
|
368
|
+
age_normalized
|
369
|
+
end
|
370
|
+
|
371
|
+
# From http://blog.trevorberg.com/2008/08/13/standard-deviation-and-correlation-coefficient-in-ruby/
|
372
|
+
def correlation(x, y)
|
373
|
+
# Calculate the necessary values
|
374
|
+
n = x.size
|
375
|
+
|
376
|
+
sum_x = sum(x)
|
377
|
+
sum_y = sum(y)
|
378
|
+
|
379
|
+
x_squared = x.map {|item| item*item }
|
380
|
+
y_squared = y.map {|item| item*item }
|
381
|
+
|
382
|
+
sum_x_squared = sum(x_squared)
|
383
|
+
sum_y_squared = sum(y_squared)
|
384
|
+
|
385
|
+
xy = []
|
386
|
+
x.each_with_index do |value, key|
|
387
|
+
xy << value * y[key]
|
388
|
+
end
|
389
|
+
|
390
|
+
sum_xy = sum(xy)
|
391
|
+
|
392
|
+
# Calculate the correlation value
|
393
|
+
left = n * sum_xy - sum_x * sum_y
|
394
|
+
right = ((n * sum_x_squared - sum_x**2) * (n * sum_y_squared - sum_y**2)) ** 0.5
|
395
|
+
|
396
|
+
left / right
|
397
|
+
end
|
398
|
+
|
399
|
+
# From http://blog.trevorberg.com/2008/08/13/standard-deviation-and-correlation-coefficient-in-ruby/
|
400
|
+
def sum(list)
|
401
|
+
list.inject( nil ) { |sum,x| sum ? sum+x : x };
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
ItunesDataMiner.run(ARGV[0]) if $0 == __FILE__
|