pandata 0.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7f73c416a585d7b27a59799982502ab3425c0830
4
+ data.tar.gz: 88921adfb03392ac5af5f56402e1ef58e6cee767
5
+ SHA512:
6
+ metadata.gz: 17a42cf2bf107425067d52db7139abe3bb96321cdd9cc16e9041ebe8b4641264e21dbbe8040f9a2fd5cf3df8af3c161f1d51aa60ccbefc5980ab7616b12806d3
7
+ data.tar.gz: 682286a820a8f4314fe399f201e6d5362929d4b823e3b0b2c44a2cb2c09f84f1441cc7d1c10271ddedb51978103c664d492292e74e5d209c1716b0e064d67844
data/bin/pandata ADDED
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/pandata'
4
+ require_relative '../lib/pandata/argv_parser'
5
+ require_relative '../lib/pandata/data_formatter'
6
+
7
+ options = Pandata::ArgvParser.parse(ARGV)
8
+
9
+ output_file = options[:output_file]
10
+ if output_file
11
+ File.delete(output_file) if File.exists?(output_file)
12
+
13
+ Object.send(:define_method, :write) do |string|
14
+ File.open(output_file, 'a') do |file|
15
+ file.write(string)
16
+ end
17
+ end
18
+ else
19
+ def write(string)
20
+ puts string
21
+ end
22
+ end
23
+
24
+ if ARGV.empty?
25
+ # Print command-line usage help.
26
+ puts options[:opts]
27
+ exit
28
+ end
29
+
30
+ scraper = Pandata::Scraper.get(options[:user_id])
31
+ formatter = Pandata::DataFormatter.new
32
+
33
+ # If scraper is an array, a Pandora user could not be found with certainty.
34
+ # In this case, scraper will contain webnames similar to options[:user_id].
35
+ if scraper.kind_of?(Array)
36
+ puts "No exact match for '#{options[:user_id]}'."
37
+
38
+ unless scraper.empty?
39
+ puts "\nWebname results for '#{options[:user_id]}':"
40
+ puts formatter.list(scraper)
41
+ end
42
+
43
+ exit
44
+ end
45
+
46
+ scraper_data = {}
47
+ options[:data_to_get].each do |data_type|
48
+ if /(bookmark|like)e?d_(.*)/ =~ data_type
49
+ method = $1 << 's' # 'likes' or 'bookmarks'
50
+ argument = $2.to_sym # :tracks, :artists, :stations or :albums
51
+ scraper_data[data_type] = scraper.public_send(method, argument)
52
+ else
53
+ scraper_data[data_type] = scraper.public_send(data_type)
54
+ end
55
+ end
56
+
57
+ if options[:return_as_json]
58
+ require 'json'
59
+ write JSON.generate(scraper_data)
60
+ exit
61
+ end
62
+
63
+ scraper_data.each do |key, value|
64
+ # Capitalize each word in the key symbol.
65
+ # e.g. :liked_tracks becomes 'Liked Tracks:'
66
+ title = key.to_s.split('_').map(&:capitalize).join(' ') << ':'
67
+
68
+ if value.empty?
69
+ output = ' ** No Data **'
70
+ else
71
+ output = case key
72
+ when /playing_station|recent_activity/
73
+ formatter.list(value)
74
+ when /liked_tracks|bookmarked_tracks/
75
+ formatter.tracks(value)
76
+ when /liked_artists|bookmarked_artists|stations|liked_stations/
77
+ formatter.sort_list(value)
78
+ when :liked_albums
79
+ formatter.albums(value)
80
+ when /following|followers/
81
+ formatter.followx(value)
82
+ end
83
+ end
84
+
85
+ write "#{ title }\n#{ output }"
86
+ end
data/lib/pandata.rb ADDED
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'pandata/data_urls'
4
+ require_relative 'pandata/downloader'
5
+ require_relative 'pandata/parser'
6
+ require_relative 'pandata/scraper'
7
+
8
+ module Pandata
9
+ module Version
10
+ MAJOR = 0
11
+ MINOR = 1
12
+ PATCH = 0
13
+ BUILD = 'pre'
14
+
15
+ STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
16
+ end
17
+ end
18
+
@@ -0,0 +1,131 @@
1
+ require 'optparse'
2
+ require_relative '../pandata'
3
+
4
+ module Pandata
5
+
6
+ # Parses command-line input.
7
+ class ArgvParser
8
+ # Prevent instances
9
+ private_class_method :new
10
+
11
+ # Takes an ARGV (array) argument.
12
+ #
13
+ # Returns a hash with:
14
+ # - :opts (OptionParser object)
15
+ # - :user_id (string)
16
+ # - :output_file (string)
17
+ # - :data_to_get (array)
18
+ # - :get_all_data (boolean)
19
+ # - :return_as_json (boolean)
20
+ def self.parse(argv)
21
+ options = { data_to_get: [] }
22
+ get_all_data = false
23
+
24
+ options[:opts] = OptionParser.new do |opts|
25
+ opts.banner = 'Pandata: A tool for downloading Pandora.com data (likes, bookmarks, stations, etc.)'
26
+ opts.define_head 'Usage: pandata <email|webname> [options]'
27
+ opts.separator <<-END
28
+
29
+ Examples:
30
+ pandata john@example.com --liked_tracks
31
+ pandata my_webname --all -o my_pandora_data.txt
32
+ pandata my_webname -lLb --json
33
+
34
+ Options:
35
+ END
36
+
37
+ opts.on('--all', 'Get all data') do
38
+ get_all_data = true
39
+ end
40
+
41
+ opts.on('-a', '--recent_activity', 'Get recent activity') do
42
+ options[:data_to_get] << :recent_activity
43
+ end
44
+
45
+ opts.on('-B', '--bookmarked_artists', 'Get all bookmarked artists') do
46
+ options[:data_to_get] << :bookmarked_artists
47
+ end
48
+
49
+ opts.on('-b', '--bookmarked_tracks', 'Get all bookmarked tracks') do
50
+ options[:data_to_get] << :bookmarked_tracks
51
+ end
52
+
53
+ opts.on('-F', '--followers', "Get all user's followers") do
54
+ options[:data_to_get] << :followers
55
+ end
56
+
57
+ opts.on('-f', '--following', 'Get all users being followed by user') do
58
+ options[:data_to_get] << :following
59
+ end
60
+
61
+ opts.on('-j', '--json', 'Return the results as JSON') do
62
+ options[:return_as_json] = true
63
+ end
64
+
65
+ opts.on('-L', '--liked_artists', 'Get all liked artists') do
66
+ options[:data_to_get] << :liked_artists
67
+ end
68
+
69
+ opts.on('-l', '--liked_tracks', 'Get all liked tracks') do
70
+ options[:data_to_get] << :liked_tracks
71
+ end
72
+
73
+ opts.on('-m', '--liked_albums', 'Get all liked albums') do
74
+ options[:data_to_get] << :liked_albums
75
+ end
76
+
77
+ opts.on('-n', '--liked_stations', 'Get all liked stations') do
78
+ options[:data_to_get] << :liked_stations
79
+ end
80
+
81
+ opts.on('-o', '--output_file PATH', 'File to output the data into') do |path|
82
+ options[:output_file] = path
83
+ end
84
+
85
+ opts.on('-S', '--playing_station', 'Get currently playing station') do
86
+ options[:data_to_get] << :playing_station
87
+ end
88
+
89
+ opts.on('-s', '--stations', 'Get all stations') do
90
+ options[:data_to_get] << :stations
91
+ end
92
+
93
+ opts.on_tail("-h", "--help", "Show this message") do
94
+ puts opts
95
+ exit
96
+ end
97
+
98
+ opts.on_tail("--version", "Show version") do
99
+ puts Pandata::Version::STRING
100
+ exit
101
+ end
102
+ end
103
+
104
+ options[:opts].parse(argv)
105
+
106
+ # User ID is the first argument.
107
+ options[:user_id] = argv.shift
108
+
109
+ if get_all_data
110
+ options[:data_to_get] = [
111
+ :recent_activity,
112
+ :playing_station,
113
+ :stations,
114
+ :bookmarked_tracks,
115
+ :bookmarked_artists,
116
+ :liked_tracks,
117
+ :liked_artists,
118
+ :liked_albums,
119
+ :liked_stations,
120
+ :followers,
121
+ :following
122
+ ]
123
+ else
124
+ # Remove any duplicates caused by supplying flags multiple times.
125
+ options[:data_to_get].uniq!
126
+ end
127
+
128
+ options
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,105 @@
1
+ require 'set'
2
+
3
+ module Pandata
4
+
5
+ # Sorts and formats Pandata::Scraper data as a string for printing.
6
+ class DataFormatter
7
+
8
+ # Takes an array or string and returns a string with each item on its own line.
9
+ #--
10
+ #
11
+ # Example output:
12
+ # - item1
13
+ # - item2
14
+ # - item3
15
+ #
16
+ #++
17
+ def list(data)
18
+ data = [data] unless data.kind_of?(Array)
19
+ str = ''
20
+ data.each { |item| str << " - #{item}\n" }
21
+ str
22
+ end
23
+
24
+ # Identical to #list but sorts alphabetically ignoring 'the'.
25
+ def sort_list(data)
26
+ list custom_sort(data)
27
+ end
28
+
29
+ # Takes an array of hashes with :artist and :track keys.
30
+ def tracks(tracks)
31
+ artists_items(tracks, :track)
32
+ end
33
+
34
+ # Takes an array of hashes with :artist and :album keys.
35
+ def albums(albums)
36
+ artists_items(albums, :album)
37
+ end
38
+
39
+ # Takes an array of hashes with :name, :webname and :href keys.
40
+ def followx(data)
41
+ str = ''
42
+ data.sort_by { |item| item[:webname].downcase }.each do |hash|
43
+ str << " - name: #{hash[:name]}\n"
44
+ str << " webname: #{hash[:webname]}\n"
45
+ str << " href: #{hash[:href]}\n"
46
+ end
47
+ str
48
+ end
49
+
50
+ private
51
+
52
+ # Takes an array or hash.
53
+ # Sorts alphabetically ignoring the initial 'The' when sorting strings.
54
+ # Also case-insensitive to prevent lowercase names from being sorted last.
55
+ def custom_sort(enumerable)
56
+ sorted_array = enumerable.sort_by { |key, _| key.sub(/^the\s*/i, '').downcase }
57
+
58
+ # sort_by() returns an array when called on hashes.
59
+ if enumerable.kind_of?(Hash)
60
+ # Rebuild the hash.
61
+ sorted_hash = {}
62
+ sorted_array.each { |item| sorted_hash[item[0]] = item[1] }
63
+ sorted_hash
64
+ else
65
+ sorted_array
66
+ end
67
+ end
68
+
69
+ # Takes an array of hashes with :artist and another key belonging to an
70
+ # artist (e.g. :track or :album).
71
+ # Returns a string with each artist name on a line with the artist's items
72
+ # listed and indented below. Sorts the output, too.
73
+ #--
74
+ #
75
+ # Example output:
76
+ # - Artist1:
77
+ # - item2
78
+ # - item3
79
+ # - Artist2:
80
+ # - item1
81
+ # - item1
82
+ #
83
+ #++
84
+ def artists_items(data, item_name)
85
+ artists_items = {}
86
+
87
+ data.each do |hash|
88
+ artist_name = hash[:artist]
89
+ (artists_items[artist_name] ||= Set.new) << hash[item_name]
90
+ end
91
+
92
+ artists_items = custom_sort(artists_items)
93
+
94
+ str = ''
95
+ artists_items.each do |artist_name, items|
96
+ str << " - #{artist_name}\n"
97
+ custom_sort(items).each do |item|
98
+ str << " - #{item}\n"
99
+ end
100
+ end
101
+ str
102
+ end
103
+
104
+ end
105
+ end
@@ -0,0 +1,20 @@
1
+ module Pandata
2
+ # Number of results to get from a feeds.pandora.com URL.
3
+ MAX_RESULTS = 100000 # Get everything...
4
+
5
+ # URLs to Pandora's data!
6
+ DATA_FEED_URLS = {
7
+ user_search: 'http://www.pandora.com/content/connect?searchString=%{searchString}',
8
+ recent_activity: 'http://feeds.pandora.com/feeds/people/%{webname}/recentactivity.xml',
9
+ playing_station: 'http://feeds.pandora.com/feeds/people/%{webname}/nowplaying.xml',
10
+ stations: "http://feeds.pandora.com/feeds/people/%{webname}/stations.xml?max=#{MAX_RESULTS}",
11
+ bookmarked_tracks: "http://feeds.pandora.com/feeds/people/%{webname}/favorites.xml?max=#{MAX_RESULTS}",
12
+ bookmarked_artists: "http://feeds.pandora.com/feeds/people/%{webname}/favoriteartists.xml?max=#{MAX_RESULTS}",
13
+ liked_tracks: 'http://www.pandora.com/content/tracklikes?likeStartIndex=%{nextLikeStartIndex}&thumbStartIndex=%{nextThumbStartIndex}&webname=%{webname}',
14
+ liked_artists: 'http://www.pandora.com/content/artistlikes?artistStartIndex=%{nextStartIndex}&webname=%{webname}',
15
+ liked_stations: 'http://www.pandora.com/content/stationlikes?stationStartIndex=%{nextStartIndex}&webname=%{webname}',
16
+ liked_albums: 'http://www.pandora.com/content/albumlikes?albumStartIndex=%{nextStartIndex}&webname=%{webname}',
17
+ following: 'http://www.pandora.com/content/following?startIndex=%{nextStartIndex}&webname=%{webname}',
18
+ followers: 'http://www.pandora.com/content/followers?startIndex=%{nextStartIndex}&webname=%{webname}'
19
+ }
20
+ end
@@ -0,0 +1,56 @@
1
+ require 'json'
2
+ require 'open-uri'
3
+
4
+ module Pandata
5
+ # Custom Pandata error
6
+ class PandataError < StandardError
7
+ end
8
+
9
+ # Retrieves data from Pandora and handles errors.
10
+ class Downloader
11
+ # A GitHub Gist that contains an updated cookie allowing access to 'login-only' visible data.
12
+ CONFIG_URL = 'https://gist.github.com/ustasb/596f1ee96d03463fde77/raw/pandata_config.json'
13
+
14
+ class << self
15
+ attr_accessor :cookie
16
+ end
17
+
18
+ # Gets a Pandora cookie and returns a Downloader instance.
19
+ def initialize
20
+ # If we already have a cookie, don't get another.
21
+ unless Downloader.cookie
22
+ Downloader.cookie = get_cookie
23
+ end
24
+ end
25
+
26
+ # Downloads a page and returns its content as a string.
27
+ def read_page(url)
28
+ download(url, Downloader.cookie).read
29
+ end
30
+
31
+ private
32
+
33
+ # Downloads a page and handles errors.
34
+ def download(url, cookie = '')
35
+ escaped_url = URI.escape(url)
36
+
37
+ begin
38
+ open(escaped_url, 'Cookie' => cookie, :read_timeout => 5)
39
+ rescue OpenURI::HTTPError => error
40
+ puts "The network request for:\n #{url}\nreturned an error:\n #{error.message}"
41
+ puts "Please try again later or update Pandata. Sorry about that!\n\nFull error:"
42
+ raise PandataError
43
+ end
44
+ end
45
+
46
+ def get_cookie
47
+ config = JSON.parse download(CONFIG_URL).read
48
+
49
+ if Gem::Version.new(Pandata::Version::STRING) <= Gem::Version.new(config['required_update_for'])
50
+ raise PandataError, 'Pandora.com has changed something and you need to update Pandata!'
51
+ end
52
+
53
+ config['cookie']
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,191 @@
1
+ require 'nokogiri'
2
+
3
+ module Pandata
4
+
5
+ # Parses HTML/XML pages from Pandora for relevant data.
6
+ class Parser
7
+
8
+ # Returns an array of webnames.
9
+ def get_webnames_from_search(html)
10
+ user_links = Nokogiri::HTML(html).css('.user_name a')
11
+ webnames = []
12
+
13
+ user_links.each do |link|
14
+ webnames << link['webname']
15
+ end
16
+
17
+ webnames
18
+ end
19
+
20
+ # Returns the query parameters necessary to get the next page of data
21
+ # from Pandora.
22
+ def get_next_data_indices(html)
23
+ show_more = Nokogiri::HTML(html).css('.show_more')[0]
24
+
25
+ if show_more
26
+ next_indices = {}
27
+ data_attributes = ['nextStartIndex', 'nextLikeStartIndex', 'nextThumbStartIndex']
28
+ data_attributes.each do |attr_name|
29
+ attr = show_more.attributes['data-' + attr_name.downcase]
30
+ next_indices[attr_name.to_sym] = attr.value.to_i if attr
31
+ end
32
+
33
+ next_indices
34
+ else
35
+ false
36
+ end
37
+ end
38
+
39
+ # Returns an array of recent activities.
40
+ def get_recent_activity(xml)
41
+ activity_names = []
42
+
43
+ xml_each_item(xml) do |title|
44
+ activity_names << title
45
+ end
46
+
47
+ activity_names
48
+ end
49
+
50
+ # Returns an array of station names.
51
+ def get_stations(xml)
52
+ stations = []
53
+
54
+ xml_each_item(xml) do |title|
55
+ stations << title
56
+ end
57
+
58
+ stations
59
+ end
60
+
61
+ # Returns the currently playing station name.
62
+ def get_playing_station(xml)
63
+ station = ''
64
+
65
+ xml_each_item(xml) do |title|
66
+ station = title # First title is the station name.
67
+ break
68
+ end
69
+
70
+ station
71
+ end
72
+
73
+ # Returns an array of hashes with :artist and :track keys.
74
+ def get_bookmarked_tracks(xml)
75
+ tracks = []
76
+
77
+ xml_each_item(xml) do |title|
78
+ track, artist = title.split(' by ')
79
+ tracks << { artist: artist, track: track }
80
+ end
81
+
82
+ tracks
83
+ end
84
+
85
+ # Returns an array of artist names.
86
+ def get_bookmarked_artists(xml)
87
+ artists = []
88
+
89
+ xml_each_item(xml) do |title|
90
+ artists << title
91
+ end
92
+
93
+ artists
94
+ end
95
+
96
+ # Returns an array of hashes with :artist and :track keys.
97
+ def get_liked_tracks(html)
98
+ tracks = []
99
+
100
+ infobox_each_link(html) do |title, subtitle|
101
+ tracks << { track: title, artist: subtitle }
102
+ end
103
+
104
+ tracks
105
+ end
106
+
107
+ # Returns an array of artist names.
108
+ def get_liked_artists(html)
109
+ get_infobox_titles(html)
110
+ end
111
+
112
+ # Returns an array of station names.
113
+ def get_liked_stations(html)
114
+ get_infobox_titles(html)
115
+ end
116
+
117
+ # Returns an array of hashes with :artist and :album keys.
118
+ def get_liked_albums(html)
119
+ albums = []
120
+
121
+ infobox_each_link(html) do |title, subtitle|
122
+ albums << { album: title, artist: subtitle }
123
+ end
124
+
125
+ albums
126
+ end
127
+
128
+ # Returns an array of hashes with :name, :webname and :href keys.
129
+ def get_following(html)
130
+ get_followx_users(html)
131
+ end
132
+
133
+ # Returns an array of hashes with :name, :webname and :href keys.
134
+ def get_followers(html)
135
+ get_followx_users(html)
136
+ end
137
+
138
+ private
139
+
140
+ # Loops over each 'item' tag and yields the title and description.
141
+ def xml_each_item(xml)
142
+ Nokogiri::XML(xml).css('item').each do |item|
143
+ title = item.at_css('title').text
144
+ desc = item.at_css('description').text
145
+ yield(title, desc)
146
+ end
147
+ end
148
+
149
+ # Loops over each .infobox container and yields the title and subtitle.
150
+ def infobox_each_link(html)
151
+ Nokogiri::HTML(html).css('.infobox').each do |infobox|
152
+ infobox_body = infobox.css('.infobox-body')
153
+
154
+ title_link = infobox_body.css('h3 a').text.strip
155
+ subtitle_link = infobox_body.css('p a').first
156
+ subtitle_link = subtitle_link.text.strip if subtitle_link
157
+
158
+ yield(title_link, subtitle_link)
159
+ end
160
+ end
161
+
162
+ # Returns an array of titles from #infobox_each_link.
163
+ def get_infobox_titles(html)
164
+ titles = []
165
+ infobox_each_link(html) { |title| titles << title }
166
+ titles
167
+ end
168
+
169
+ # Loops over each .follow_section container and returns a hash with
170
+ # :name, :webname and :href keys.
171
+ def get_followx_users(html)
172
+ users = []
173
+
174
+ Nokogiri::HTML(html).css('.follow_section').each do |section|
175
+ listener_name = section.css('.listener_name').first
176
+ webname = listener_name['webname']
177
+
178
+ # Remove any 'spans with a space' that sometimes appear with special characters.
179
+ listener_name.css('span').each(&:remove)
180
+ name = listener_name.text.strip
181
+
182
+ href = section.css('a').first['href']
183
+
184
+ users << { name: name, webname: webname, href: href }
185
+ end
186
+
187
+ users
188
+ end
189
+
190
+ end
191
+ end
@@ -0,0 +1,165 @@
1
+ require_relative 'data_urls'
2
+ require_relative 'parser'
3
+ require_relative 'downloader'
4
+
5
+ module Pandata
6
+
7
+ # Downloads a user's Pandora.com data.
8
+ # A user's profile must be public for Pandata to download its data.
9
+ class Scraper
10
+
11
+ # What Pandora uses to identify a user and it remains constant even if
12
+ # the user ties a new email address to their Pandora account.
13
+ attr_reader :webname
14
+
15
+ # Takes either an email or a webname string.
16
+ # Returns either:
17
+ # - a new scraper object for the supplied user ID.
18
+ # - an array of similar webnames because a matching Pandora user could not be found.
19
+ def self.get(user_id)
20
+ search_url = DATA_FEED_URLS[:user_search] % { searchString: user_id }
21
+ html = Downloader.new.read_page(search_url)
22
+ webnames = Parser.new.get_webnames_from_search(html)
23
+
24
+ if webnames.include?(user_id)
25
+ new(user_id)
26
+ # If user_id looks like an email and still gets a result.
27
+ elsif webnames.size == 1 && /.*@.*\..*/ =~ user_id
28
+ new(webnames.first)
29
+ else
30
+ webnames
31
+ end
32
+ end
33
+
34
+ private_class_method :new
35
+ def initialize(webname)
36
+ @downloader = Downloader.new
37
+ @parser = Parser.new
38
+ @webname = webname
39
+ end
40
+
41
+ # Returns an array of the user's recent activity.
42
+ def recent_activity
43
+ scrape_for(:recent_activity, :get_recent_activity)
44
+ end
45
+
46
+ # Returns the user's currently playing station.
47
+ def playing_station
48
+ scrape_for(:playing_station, :get_playing_station).first
49
+ end
50
+
51
+ # Returns an array of the user's stations.
52
+ def stations
53
+ scrape_for(:stations, :get_stations)
54
+ end
55
+
56
+ # Returns a user's bookmarked data.
57
+ #
58
+ # Bookmark types:
59
+ # - :artists - Returns an array of artist names.
60
+ # - :tracks - Returns an array of hashes with :artist and :track keys.
61
+ # - :all - Returns a hash with all bookmarked data.
62
+ def bookmarks(bookmark_type = :all)
63
+ case bookmark_type
64
+ when :tracks
65
+ scrape_for(:bookmarked_tracks, :get_bookmarked_tracks)
66
+ when :artists
67
+ scrape_for(:bookmarked_artists, :get_bookmarked_artists)
68
+ when :all
69
+ { artists: bookmarks(:artists),
70
+ tracks: bookmarks(:tracks) }
71
+ end
72
+ end
73
+
74
+ # Returns a user's liked data. (The results from giving a 'thumbs up.')
75
+ #
76
+ # Like types:
77
+ # - :artists - Returns an array of artist names.
78
+ # - :albums - Returns an array of album names.
79
+ # - :stations - Returns an array of station names.
80
+ # - :tracks - Returns an array of hashes with :artist and :track keys.
81
+ # - :all - Returns a hash with all liked data.
82
+ def likes(like_type = :all)
83
+ case like_type
84
+ when :tracks
85
+ scrape_for(:liked_tracks, :get_liked_tracks)
86
+ when :artists
87
+ scrape_for(:liked_artists, :get_liked_artists)
88
+ when :stations
89
+ scrape_for(:liked_stations, :get_liked_stations)
90
+ when :albums
91
+ scrape_for(:liked_albums, :get_liked_albums)
92
+ when :all
93
+ { artists: likes(:artists),
94
+ albums: likes(:albums),
95
+ stations: likes(:stations),
96
+ tracks: likes(:tracks) }
97
+ end
98
+ end
99
+
100
+ # Returns the *public* users being followed by the user.
101
+ #
102
+ # Returns an array of hashes with keys:
103
+ # - :name - Profile name
104
+ # - :webname - Unique Pandora ID
105
+ # - :href - URL to online Pandora profile.
106
+ def following
107
+ scrape_for(:following, :get_following)
108
+ end
109
+
110
+ # Returns the user's followers in a format identical to #following.
111
+ def followers
112
+ scrape_for(:followers, :get_followers)
113
+ end
114
+
115
+ private
116
+
117
+ # Downloads all data for a given type, calls the supplied Pandata::Parser
118
+ # method and removes any duplicates.
119
+ def scrape_for(data_type, parser_method)
120
+ results = []
121
+
122
+ url = get_url(data_type)
123
+ download_all_data(url) do |html, next_data_indices|
124
+ new_data = @parser.public_send(parser_method, html)
125
+
126
+ if new_data.kind_of?(Array)
127
+ results.concat(new_data)
128
+ else
129
+ results.push(new_data)
130
+ end
131
+
132
+ get_url(data_type, next_data_indices) if next_data_indices
133
+ end
134
+
135
+ # Pandora data often contains duplicates--get rid of them.
136
+ results.uniq
137
+ end
138
+
139
+ # Downloads all data given a starting URL. Some Pandora feeds only return
140
+ # 5 - 10 items per page but contain a link to the next set of data. Threads
141
+ # cannot be used because page A be must visited to know how to obtain page B.
142
+ def download_all_data(url)
143
+ next_data_indices = {}
144
+
145
+ while next_data_indices
146
+ html = @downloader.read_page(url)
147
+ next_data_indices = @parser.get_next_data_indices(html)
148
+ url = yield(html, next_data_indices)
149
+ end
150
+ end
151
+
152
+ # Grabs a URL from DATA_FEED_URLS and formats it appropriately.
153
+ def get_url(data_name, next_data_indices = {})
154
+ next_data_indices = {
155
+ nextStartIndex: 0,
156
+ nextLikeStartIndex: 0,
157
+ nextThumbStartIndex: 0
158
+ } if next_data_indices.empty?
159
+
160
+ next_data_indices[:webname] = @webname
161
+ DATA_FEED_URLS[data_name] % next_data_indices
162
+ end
163
+
164
+ end
165
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pandata
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0.pre
5
+ platform: ruby
6
+ authors:
7
+ - Brian Ustas
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-03-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.5.6
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.5.6
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 2.12.2
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 2.12.2
41
+ description: A library and tool for downloading Pandora.com data (likes, bookmarks,
42
+ stations, etc.)
43
+ email: brianustas@gmail.com
44
+ executables:
45
+ - pandata
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - lib/pandata/argv_parser.rb
50
+ - lib/pandata/data_formatter.rb
51
+ - lib/pandata/data_urls.rb
52
+ - lib/pandata/downloader.rb
53
+ - lib/pandata/parser.rb
54
+ - lib/pandata/scraper.rb
55
+ - lib/pandata.rb
56
+ - bin/pandata
57
+ homepage: https://github.com/ustasb/pandata
58
+ licenses:
59
+ - MIT
60
+ metadata: {}
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 1.9.1
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>'
73
+ - !ruby/object:Gem::Version
74
+ version: 1.3.1
75
+ requirements: []
76
+ rubyforge_project:
77
+ rubygems_version: 2.0.2
78
+ signing_key:
79
+ specification_version: 4
80
+ summary: A Pandora.com web scraper
81
+ test_files: []