pandata 0.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7f73c416a585d7b27a59799982502ab3425c0830
4
+ data.tar.gz: 88921adfb03392ac5af5f56402e1ef58e6cee767
5
+ SHA512:
6
+ metadata.gz: 17a42cf2bf107425067d52db7139abe3bb96321cdd9cc16e9041ebe8b4641264e21dbbe8040f9a2fd5cf3df8af3c161f1d51aa60ccbefc5980ab7616b12806d3
7
+ data.tar.gz: 682286a820a8f4314fe399f201e6d5362929d4b823e3b0b2c44a2cb2c09f84f1441cc7d1c10271ddedb51978103c664d492292e74e5d209c1716b0e064d67844
data/bin/pandata ADDED
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/pandata'
4
+ require_relative '../lib/pandata/argv_parser'
5
+ require_relative '../lib/pandata/data_formatter'
6
+
7
+ options = Pandata::ArgvParser.parse(ARGV)
8
+
9
+ output_file = options[:output_file]
10
+ if output_file
11
+ File.delete(output_file) if File.exists?(output_file)
12
+
13
+ Object.send(:define_method, :write) do |string|
14
+ File.open(output_file, 'a') do |file|
15
+ file.write(string)
16
+ end
17
+ end
18
+ else
19
+ def write(string)
20
+ puts string
21
+ end
22
+ end
23
+
24
+ if ARGV.empty?
25
+ # Print command-line usage help.
26
+ puts options[:opts]
27
+ exit
28
+ end
29
+
30
+ scraper = Pandata::Scraper.get(options[:user_id])
31
+ formatter = Pandata::DataFormatter.new
32
+
33
+ # If scraper is an array, a Pandora user could not be found with certainty.
34
+ # In this case, scraper will contain webnames similar to options[:user_id].
35
+ if scraper.kind_of?(Array)
36
+ puts "No exact match for '#{options[:user_id]}'."
37
+
38
+ unless scraper.empty?
39
+ puts "\nWebname results for '#{options[:user_id]}':"
40
+ puts formatter.list(scraper)
41
+ end
42
+
43
+ exit
44
+ end
45
+
46
+ scraper_data = {}
47
+ options[:data_to_get].each do |data_type|
48
+ if /(bookmark|like)e?d_(.*)/ =~ data_type
49
+ method = $1 << 's' # 'likes' or 'bookmarks'
50
+ argument = $2.to_sym # :tracks, :artists, :stations or :albums
51
+ scraper_data[data_type] = scraper.public_send(method, argument)
52
+ else
53
+ scraper_data[data_type] = scraper.public_send(data_type)
54
+ end
55
+ end
56
+
57
+ if options[:return_as_json]
58
+ require 'json'
59
+ write JSON.generate(scraper_data)
60
+ exit
61
+ end
62
+
63
+ scraper_data.each do |key, value|
64
+ # Capitalize each word in the key symbol.
65
+ # e.g. :liked_tracks becomes 'Liked Tracks:'
66
+ title = key.to_s.split('_').map(&:capitalize).join(' ') << ':'
67
+
68
+ if value.empty?
69
+ output = ' ** No Data **'
70
+ else
71
+ output = case key
72
+ when /playing_station|recent_activity/
73
+ formatter.list(value)
74
+ when /liked_tracks|bookmarked_tracks/
75
+ formatter.tracks(value)
76
+ when /liked_artists|bookmarked_artists|stations|liked_stations/
77
+ formatter.sort_list(value)
78
+ when :liked_albums
79
+ formatter.albums(value)
80
+ when /following|followers/
81
+ formatter.followx(value)
82
+ end
83
+ end
84
+
85
+ write "#{ title }\n#{ output }"
86
+ end
data/lib/pandata.rb ADDED
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'pandata/data_urls'
4
+ require_relative 'pandata/downloader'
5
+ require_relative 'pandata/parser'
6
+ require_relative 'pandata/scraper'
7
+
8
+ module Pandata
9
+ module Version
10
+ MAJOR = 0
11
+ MINOR = 1
12
+ PATCH = 0
13
+ BUILD = 'pre'
14
+
15
+ STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
16
+ end
17
+ end
18
+
@@ -0,0 +1,131 @@
1
+ require 'optparse'
2
+ require_relative '../pandata'
3
+
4
+ module Pandata
5
+
6
+ # Parses command-line input.
7
+ class ArgvParser
8
+ # Prevent instances
9
+ private_class_method :new
10
+
11
+ # Takes an ARGV (array) argument.
12
+ #
13
+ # Returns a hash with:
14
+ # - :opts (OptionParser object)
15
+ # - :user_id (string)
16
+ # - :output_file (string)
17
+ # - :data_to_get (array)
18
+ # - :get_all_data (boolean)
19
+ # - :return_as_json (boolean)
20
+ def self.parse(argv)
21
+ options = { data_to_get: [] }
22
+ get_all_data = false
23
+
24
+ options[:opts] = OptionParser.new do |opts|
25
+ opts.banner = 'Pandata: A tool for downloading Pandora.com data (likes, bookmarks, stations, etc.)'
26
+ opts.define_head 'Usage: pandata <email|webname> [options]'
27
+ opts.separator <<-END
28
+
29
+ Examples:
30
+ pandata john@example.com --liked_tracks
31
+ pandata my_webname --all -o my_pandora_data.txt
32
+ pandata my_webname -lLb --json
33
+
34
+ Options:
35
+ END
36
+
37
+ opts.on('--all', 'Get all data') do
38
+ get_all_data = true
39
+ end
40
+
41
+ opts.on('-a', '--recent_activity', 'Get recent activity') do
42
+ options[:data_to_get] << :recent_activity
43
+ end
44
+
45
+ opts.on('-B', '--bookmarked_artists', 'Get all bookmarked artists') do
46
+ options[:data_to_get] << :bookmarked_artists
47
+ end
48
+
49
+ opts.on('-b', '--bookmarked_tracks', 'Get all bookmarked tracks') do
50
+ options[:data_to_get] << :bookmarked_tracks
51
+ end
52
+
53
+ opts.on('-F', '--followers', "Get all user's followers") do
54
+ options[:data_to_get] << :followers
55
+ end
56
+
57
+ opts.on('-f', '--following', 'Get all users being followed by user') do
58
+ options[:data_to_get] << :following
59
+ end
60
+
61
+ opts.on('-j', '--json', 'Return the results as JSON') do
62
+ options[:return_as_json] = true
63
+ end
64
+
65
+ opts.on('-L', '--liked_artists', 'Get all liked artists') do
66
+ options[:data_to_get] << :liked_artists
67
+ end
68
+
69
+ opts.on('-l', '--liked_tracks', 'Get all liked tracks') do
70
+ options[:data_to_get] << :liked_tracks
71
+ end
72
+
73
+ opts.on('-m', '--liked_albums', 'Get all liked albums') do
74
+ options[:data_to_get] << :liked_albums
75
+ end
76
+
77
+ opts.on('-n', '--liked_stations', 'Get all liked stations') do
78
+ options[:data_to_get] << :liked_stations
79
+ end
80
+
81
+ opts.on('-o', '--output_file PATH', 'File to output the data into') do |path|
82
+ options[:output_file] = path
83
+ end
84
+
85
+ opts.on('-S', '--playing_station', 'Get currently playing station') do
86
+ options[:data_to_get] << :playing_station
87
+ end
88
+
89
+ opts.on('-s', '--stations', 'Get all stations') do
90
+ options[:data_to_get] << :stations
91
+ end
92
+
93
+ opts.on_tail("-h", "--help", "Show this message") do
94
+ puts opts
95
+ exit
96
+ end
97
+
98
+ opts.on_tail("--version", "Show version") do
99
+ puts Pandata::Version::STRING
100
+ exit
101
+ end
102
+ end
103
+
104
+ options[:opts].parse(argv)
105
+
106
+ # User ID is the first argument.
107
+ options[:user_id] = argv.shift
108
+
109
+ if get_all_data
110
+ options[:data_to_get] = [
111
+ :recent_activity,
112
+ :playing_station,
113
+ :stations,
114
+ :bookmarked_tracks,
115
+ :bookmarked_artists,
116
+ :liked_tracks,
117
+ :liked_artists,
118
+ :liked_albums,
119
+ :liked_stations,
120
+ :followers,
121
+ :following
122
+ ]
123
+ else
124
+ # Remove any duplicates caused by supplying flags multiple times.
125
+ options[:data_to_get].uniq!
126
+ end
127
+
128
+ options
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,105 @@
1
+ require 'set'
2
+
3
+ module Pandata
4
+
5
+ # Sorts and formats Pandata::Scraper data as a string for printing.
6
+ class DataFormatter
7
+
8
+ # Takes an array or string and returns a string with each item on its own line.
9
+ #--
10
+ #
11
+ # Example output:
12
+ # - item1
13
+ # - item2
14
+ # - item3
15
+ #
16
+ #++
17
+ def list(data)
18
+ data = [data] unless data.kind_of?(Array)
19
+ str = ''
20
+ data.each { |item| str << " - #{item}\n" }
21
+ str
22
+ end
23
+
24
+ # Identical to #list but sorts alphabetically ignoring 'the'.
25
+ def sort_list(data)
26
+ list custom_sort(data)
27
+ end
28
+
29
+ # Takes an array of hashes with :artist and :track keys.
30
+ def tracks(tracks)
31
+ artists_items(tracks, :track)
32
+ end
33
+
34
+ # Takes an array of hashes with :artist and :album keys.
35
+ def albums(albums)
36
+ artists_items(albums, :album)
37
+ end
38
+
39
+ # Takes an array of hashes with :name, :webname and :href keys.
40
+ def followx(data)
41
+ str = ''
42
+ data.sort_by { |item| item[:webname].downcase }.each do |hash|
43
+ str << " - name: #{hash[:name]}\n"
44
+ str << " webname: #{hash[:webname]}\n"
45
+ str << " href: #{hash[:href]}\n"
46
+ end
47
+ str
48
+ end
49
+
50
+ private
51
+
52
+ # Takes an array or hash.
53
+ # Sorts alphabetically ignoring the initial 'The' when sorting strings.
54
+ # Also case-insensitive to prevent lowercase names from being sorted last.
55
+ def custom_sort(enumerable)
56
+ sorted_array = enumerable.sort_by { |key, _| key.sub(/^the\s*/i, '').downcase }
57
+
58
+ # sort_by() returns an array when called on hashes.
59
+ if enumerable.kind_of?(Hash)
60
+ # Rebuild the hash.
61
+ sorted_hash = {}
62
+ sorted_array.each { |item| sorted_hash[item[0]] = item[1] }
63
+ sorted_hash
64
+ else
65
+ sorted_array
66
+ end
67
+ end
68
+
69
+ # Takes an array of hashes with :artist and another key belonging to an
70
+ # artist (e.g. :track or :album).
71
+ # Returns a string with each artist name on a line with the artist's items
72
+ # listed and indented below. Sorts the output, too.
73
+ #--
74
+ #
75
+ # Example output:
76
+ # - Artist1:
77
+ # - item2
78
+ # - item3
79
+ # - Artist2:
80
+ # - item1
81
+ # - item1
82
+ #
83
+ #++
84
+ def artists_items(data, item_name)
85
+ artists_items = {}
86
+
87
+ data.each do |hash|
88
+ artist_name = hash[:artist]
89
+ (artists_items[artist_name] ||= Set.new) << hash[item_name]
90
+ end
91
+
92
+ artists_items = custom_sort(artists_items)
93
+
94
+ str = ''
95
+ artists_items.each do |artist_name, items|
96
+ str << " - #{artist_name}\n"
97
+ custom_sort(items).each do |item|
98
+ str << " - #{item}\n"
99
+ end
100
+ end
101
+ str
102
+ end
103
+
104
+ end
105
+ end
@@ -0,0 +1,20 @@
1
+ module Pandata
2
+ # Number of results to get from a feeds.pandora.com URL.
3
+ MAX_RESULTS = 100000 # Get everything...
4
+
5
+ # URLs to Pandora's data!
6
+ DATA_FEED_URLS = {
7
+ user_search: 'http://www.pandora.com/content/connect?searchString=%{searchString}',
8
+ recent_activity: 'http://feeds.pandora.com/feeds/people/%{webname}/recentactivity.xml',
9
+ playing_station: 'http://feeds.pandora.com/feeds/people/%{webname}/nowplaying.xml',
10
+ stations: "http://feeds.pandora.com/feeds/people/%{webname}/stations.xml?max=#{MAX_RESULTS}",
11
+ bookmarked_tracks: "http://feeds.pandora.com/feeds/people/%{webname}/favorites.xml?max=#{MAX_RESULTS}",
12
+ bookmarked_artists: "http://feeds.pandora.com/feeds/people/%{webname}/favoriteartists.xml?max=#{MAX_RESULTS}",
13
+ liked_tracks: 'http://www.pandora.com/content/tracklikes?likeStartIndex=%{nextLikeStartIndex}&thumbStartIndex=%{nextThumbStartIndex}&webname=%{webname}',
14
+ liked_artists: 'http://www.pandora.com/content/artistlikes?artistStartIndex=%{nextStartIndex}&webname=%{webname}',
15
+ liked_stations: 'http://www.pandora.com/content/stationlikes?stationStartIndex=%{nextStartIndex}&webname=%{webname}',
16
+ liked_albums: 'http://www.pandora.com/content/albumlikes?albumStartIndex=%{nextStartIndex}&webname=%{webname}',
17
+ following: 'http://www.pandora.com/content/following?startIndex=%{nextStartIndex}&webname=%{webname}',
18
+ followers: 'http://www.pandora.com/content/followers?startIndex=%{nextStartIndex}&webname=%{webname}'
19
+ }
20
+ end
@@ -0,0 +1,56 @@
1
+ require 'json'
2
+ require 'open-uri'
3
+
4
+ module Pandata
5
+ # Custom Pandata error
6
+ class PandataError < StandardError
7
+ end
8
+
9
+ # Retrieves data from Pandora and handles errors.
10
+ class Downloader
11
+ # A GitHub Gist that contains an updated cookie allowing access to 'login-only' visible data.
12
+ CONFIG_URL = 'https://gist.github.com/ustasb/596f1ee96d03463fde77/raw/pandata_config.json'
13
+
14
+ class << self
15
+ attr_accessor :cookie
16
+ end
17
+
18
+ # Gets a Pandora cookie and returns a Downloader instance.
19
+ def initialize
20
+ # If we already have a cookie, don't get another.
21
+ unless Downloader.cookie
22
+ Downloader.cookie = get_cookie
23
+ end
24
+ end
25
+
26
+ # Downloads a page and returns its content as a string.
27
+ def read_page(url)
28
+ download(url, Downloader.cookie).read
29
+ end
30
+
31
+ private
32
+
33
+ # Downloads a page and handles errors.
34
+ def download(url, cookie = '')
35
+ escaped_url = URI.escape(url)
36
+
37
+ begin
38
+ open(escaped_url, 'Cookie' => cookie, :read_timeout => 5)
39
+ rescue OpenURI::HTTPError => error
40
+ puts "The network request for:\n #{url}\nreturned an error:\n #{error.message}"
41
+ puts "Please try again later or update Pandata. Sorry about that!\n\nFull error:"
42
+ raise PandataError
43
+ end
44
+ end
45
+
46
+ def get_cookie
47
+ config = JSON.parse download(CONFIG_URL).read
48
+
49
+ if Gem::Version.new(Pandata::Version::STRING) <= Gem::Version.new(config['required_update_for'])
50
+ raise PandataError, 'Pandora.com has changed something and you need to update Pandata!'
51
+ end
52
+
53
+ config['cookie']
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,191 @@
1
+ require 'nokogiri'
2
+
3
+ module Pandata
4
+
5
+ # Parses HTML/XML pages from Pandora for relevant data.
6
+ class Parser
7
+
8
+ # Returns an array of webnames.
9
+ def get_webnames_from_search(html)
10
+ user_links = Nokogiri::HTML(html).css('.user_name a')
11
+ webnames = []
12
+
13
+ user_links.each do |link|
14
+ webnames << link['webname']
15
+ end
16
+
17
+ webnames
18
+ end
19
+
20
+ # Returns the query parameters necessary to get the next page of data
21
+ # from Pandora.
22
+ def get_next_data_indices(html)
23
+ show_more = Nokogiri::HTML(html).css('.show_more')[0]
24
+
25
+ if show_more
26
+ next_indices = {}
27
+ data_attributes = ['nextStartIndex', 'nextLikeStartIndex', 'nextThumbStartIndex']
28
+ data_attributes.each do |attr_name|
29
+ attr = show_more.attributes['data-' + attr_name.downcase]
30
+ next_indices[attr_name.to_sym] = attr.value.to_i if attr
31
+ end
32
+
33
+ next_indices
34
+ else
35
+ false
36
+ end
37
+ end
38
+
39
+ # Returns an array of recent activities.
40
+ def get_recent_activity(xml)
41
+ activity_names = []
42
+
43
+ xml_each_item(xml) do |title|
44
+ activity_names << title
45
+ end
46
+
47
+ activity_names
48
+ end
49
+
50
+ # Returns an array of station names.
51
+ def get_stations(xml)
52
+ stations = []
53
+
54
+ xml_each_item(xml) do |title|
55
+ stations << title
56
+ end
57
+
58
+ stations
59
+ end
60
+
61
+ # Returns the currently playing station name.
62
+ def get_playing_station(xml)
63
+ station = ''
64
+
65
+ xml_each_item(xml) do |title|
66
+ station = title # First title is the station name.
67
+ break
68
+ end
69
+
70
+ station
71
+ end
72
+
73
+ # Returns an array of hashes with :artist and :track keys.
74
+ def get_bookmarked_tracks(xml)
75
+ tracks = []
76
+
77
+ xml_each_item(xml) do |title|
78
+ track, artist = title.split(' by ')
79
+ tracks << { artist: artist, track: track }
80
+ end
81
+
82
+ tracks
83
+ end
84
+
85
+ # Returns an array of artist names.
86
+ def get_bookmarked_artists(xml)
87
+ artists = []
88
+
89
+ xml_each_item(xml) do |title|
90
+ artists << title
91
+ end
92
+
93
+ artists
94
+ end
95
+
96
+ # Returns an array of hashes with :artist and :track keys.
97
+ def get_liked_tracks(html)
98
+ tracks = []
99
+
100
+ infobox_each_link(html) do |title, subtitle|
101
+ tracks << { track: title, artist: subtitle }
102
+ end
103
+
104
+ tracks
105
+ end
106
+
107
+ # Returns an array of artist names.
108
+ def get_liked_artists(html)
109
+ get_infobox_titles(html)
110
+ end
111
+
112
+ # Returns an array of station names.
113
+ def get_liked_stations(html)
114
+ get_infobox_titles(html)
115
+ end
116
+
117
+ # Returns an array of hashes with :artist and :album keys.
118
+ def get_liked_albums(html)
119
+ albums = []
120
+
121
+ infobox_each_link(html) do |title, subtitle|
122
+ albums << { album: title, artist: subtitle }
123
+ end
124
+
125
+ albums
126
+ end
127
+
128
+ # Returns an array of hashes with :name, :webname and :href keys.
129
+ def get_following(html)
130
+ get_followx_users(html)
131
+ end
132
+
133
+ # Returns an array of hashes with :name, :webname and :href keys.
134
+ def get_followers(html)
135
+ get_followx_users(html)
136
+ end
137
+
138
+ private
139
+
140
+ # Loops over each 'item' tag and yields the title and description.
141
+ def xml_each_item(xml)
142
+ Nokogiri::XML(xml).css('item').each do |item|
143
+ title = item.at_css('title').text
144
+ desc = item.at_css('description').text
145
+ yield(title, desc)
146
+ end
147
+ end
148
+
149
+ # Loops over each .infobox container and yields the title and subtitle.
150
+ def infobox_each_link(html)
151
+ Nokogiri::HTML(html).css('.infobox').each do |infobox|
152
+ infobox_body = infobox.css('.infobox-body')
153
+
154
+ title_link = infobox_body.css('h3 a').text.strip
155
+ subtitle_link = infobox_body.css('p a').first
156
+ subtitle_link = subtitle_link.text.strip if subtitle_link
157
+
158
+ yield(title_link, subtitle_link)
159
+ end
160
+ end
161
+
162
+ # Returns an array of titles from #infobox_each_link.
163
+ def get_infobox_titles(html)
164
+ titles = []
165
+ infobox_each_link(html) { |title| titles << title }
166
+ titles
167
+ end
168
+
169
+ # Loops over each .follow_section container and returns a hash with
170
+ # :name, :webname and :href keys.
171
+ def get_followx_users(html)
172
+ users = []
173
+
174
+ Nokogiri::HTML(html).css('.follow_section').each do |section|
175
+ listener_name = section.css('.listener_name').first
176
+ webname = listener_name['webname']
177
+
178
+ # Remove any 'spans with a space' that sometimes appear with special characters.
179
+ listener_name.css('span').each(&:remove)
180
+ name = listener_name.text.strip
181
+
182
+ href = section.css('a').first['href']
183
+
184
+ users << { name: name, webname: webname, href: href }
185
+ end
186
+
187
+ users
188
+ end
189
+
190
+ end
191
+ end
@@ -0,0 +1,165 @@
1
+ require_relative 'data_urls'
2
+ require_relative 'parser'
3
+ require_relative 'downloader'
4
+
5
+ module Pandata
6
+
7
+ # Downloads a user's Pandora.com data.
8
+ # A user's profile must be public for Pandata to download its data.
9
+ class Scraper
10
+
11
+ # What Pandora uses to identify a user and it remains constant even if
12
+ # the user ties a new email address to their Pandora account.
13
+ attr_reader :webname
14
+
15
+ # Takes either an email or a webname string.
16
+ # Returns either:
17
+ # - a new scraper object for the supplied user ID.
18
+ # - an array of similar webnames because a matching Pandora user could not be found.
19
+ def self.get(user_id)
20
+ search_url = DATA_FEED_URLS[:user_search] % { searchString: user_id }
21
+ html = Downloader.new.read_page(search_url)
22
+ webnames = Parser.new.get_webnames_from_search(html)
23
+
24
+ if webnames.include?(user_id)
25
+ new(user_id)
26
+ # If user_id looks like an email and still gets a result.
27
+ elsif webnames.size == 1 && /.*@.*\..*/ =~ user_id
28
+ new(webnames.first)
29
+ else
30
+ webnames
31
+ end
32
+ end
33
+
34
+ private_class_method :new
35
+ def initialize(webname)
36
+ @downloader = Downloader.new
37
+ @parser = Parser.new
38
+ @webname = webname
39
+ end
40
+
41
+ # Returns an array of the user's recent activity.
42
+ def recent_activity
43
+ scrape_for(:recent_activity, :get_recent_activity)
44
+ end
45
+
46
+ # Returns the user's currently playing station.
47
+ def playing_station
48
+ scrape_for(:playing_station, :get_playing_station).first
49
+ end
50
+
51
+ # Returns an array of the user's stations.
52
+ def stations
53
+ scrape_for(:stations, :get_stations)
54
+ end
55
+
56
+ # Returns a user's bookmarked data.
57
+ #
58
+ # Bookmark types:
59
+ # - :artists - Returns an array of artist names.
60
+ # - :tracks - Returns an array of hashes with :artist and :track keys.
61
+ # - :all - Returns a hash with all bookmarked data.
62
+ def bookmarks(bookmark_type = :all)
63
+ case bookmark_type
64
+ when :tracks
65
+ scrape_for(:bookmarked_tracks, :get_bookmarked_tracks)
66
+ when :artists
67
+ scrape_for(:bookmarked_artists, :get_bookmarked_artists)
68
+ when :all
69
+ { artists: bookmarks(:artists),
70
+ tracks: bookmarks(:tracks) }
71
+ end
72
+ end
73
+
74
+ # Returns a user's liked data. (The results from giving a 'thumbs up.')
75
+ #
76
+ # Like types:
77
+ # - :artists - Returns an array of artist names.
78
+ # - :albums - Returns an array of album names.
79
+ # - :stations - Returns an array of station names.
80
+ # - :tracks - Returns an array of hashes with :artist and :track keys.
81
+ # - :all - Returns a hash with all liked data.
82
+ def likes(like_type = :all)
83
+ case like_type
84
+ when :tracks
85
+ scrape_for(:liked_tracks, :get_liked_tracks)
86
+ when :artists
87
+ scrape_for(:liked_artists, :get_liked_artists)
88
+ when :stations
89
+ scrape_for(:liked_stations, :get_liked_stations)
90
+ when :albums
91
+ scrape_for(:liked_albums, :get_liked_albums)
92
+ when :all
93
+ { artists: likes(:artists),
94
+ albums: likes(:albums),
95
+ stations: likes(:stations),
96
+ tracks: likes(:tracks) }
97
+ end
98
+ end
99
+
100
+ # Returns the *public* users being followed by the user.
101
+ #
102
+ # Returns an array of hashes with keys:
103
+ # - :name - Profile name
104
+ # - :webname - Unique Pandora ID
105
+ # - :href - URL to online Pandora profile.
106
+ def following
107
+ scrape_for(:following, :get_following)
108
+ end
109
+
110
+ # Returns the user's followers in a format identical to #following.
111
+ def followers
112
+ scrape_for(:followers, :get_followers)
113
+ end
114
+
115
+ private
116
+
117
+ # Downloads all data for a given type, calls the supplied Pandata::Parser
118
+ # method and removes any duplicates.
119
+ def scrape_for(data_type, parser_method)
120
+ results = []
121
+
122
+ url = get_url(data_type)
123
+ download_all_data(url) do |html, next_data_indices|
124
+ new_data = @parser.public_send(parser_method, html)
125
+
126
+ if new_data.kind_of?(Array)
127
+ results.concat(new_data)
128
+ else
129
+ results.push(new_data)
130
+ end
131
+
132
+ get_url(data_type, next_data_indices) if next_data_indices
133
+ end
134
+
135
+ # Pandora data often contains duplicates--get rid of them.
136
+ results.uniq
137
+ end
138
+
139
+ # Downloads all data given a starting URL. Some Pandora feeds only return
140
+ # 5 - 10 items per page but contain a link to the next set of data. Threads
141
+ # cannot be used because page A be must visited to know how to obtain page B.
142
+ def download_all_data(url)
143
+ next_data_indices = {}
144
+
145
+ while next_data_indices
146
+ html = @downloader.read_page(url)
147
+ next_data_indices = @parser.get_next_data_indices(html)
148
+ url = yield(html, next_data_indices)
149
+ end
150
+ end
151
+
152
+ # Grabs a URL from DATA_FEED_URLS and formats it appropriately.
153
+ def get_url(data_name, next_data_indices = {})
154
+ next_data_indices = {
155
+ nextStartIndex: 0,
156
+ nextLikeStartIndex: 0,
157
+ nextThumbStartIndex: 0
158
+ } if next_data_indices.empty?
159
+
160
+ next_data_indices[:webname] = @webname
161
+ DATA_FEED_URLS[data_name] % next_data_indices
162
+ end
163
+
164
+ end
165
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pandata
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0.pre
5
+ platform: ruby
6
+ authors:
7
+ - Brian Ustas
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-03-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.5.6
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.5.6
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 2.12.2
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 2.12.2
41
+ description: A library and tool for downloading Pandora.com data (likes, bookmarks,
42
+ stations, etc.)
43
+ email: brianustas@gmail.com
44
+ executables:
45
+ - pandata
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - lib/pandata/argv_parser.rb
50
+ - lib/pandata/data_formatter.rb
51
+ - lib/pandata/data_urls.rb
52
+ - lib/pandata/downloader.rb
53
+ - lib/pandata/parser.rb
54
+ - lib/pandata/scraper.rb
55
+ - lib/pandata.rb
56
+ - bin/pandata
57
+ homepage: https://github.com/ustasb/pandata
58
+ licenses:
59
+ - MIT
60
+ metadata: {}
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - '>='
68
+ - !ruby/object:Gem::Version
69
+ version: 1.9.1
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - '>'
73
+ - !ruby/object:Gem::Version
74
+ version: 1.3.1
75
+ requirements: []
76
+ rubyforge_project:
77
+ rubygems_version: 2.0.2
78
+ signing_key:
79
+ specification_version: 4
80
+ summary: A Pandora.com web scraper
81
+ test_files: []