pandata 0.1.2 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -1
- data/bin/pandata +4 -82
- data/lib/pandata.rb +4 -2
- data/lib/pandata/argv_parser.rb +5 -5
- data/lib/pandata/cli.rb +144 -0
- data/lib/pandata/data_urls.rb +1 -1
- data/lib/pandata/downloader.rb +19 -23
- data/lib/pandata/scraper.rb +7 -3
- metadata +48 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e2bc9075307ca70cb1e11c34ca8e817d3fea79c
|
4
|
+
data.tar.gz: bca7a313c16dd995cfd0f6094e7cb35b5f539c69
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 833e93f00606f5aac3c4a3a297f3fd7ee7f61614de1868b69d9cbbcb68fe218a43275c4087fada150e016d2da02e6bc2ef2dfb08bf368b7a389574140a41f867
|
7
|
+
data.tar.gz: 1ba8b3230c426ec828952376c79e1c4cb649bd3f936db3564ff774f51d0398b1eb9fbbc8beb229a0683ac4dda3e8ff0406cd8266f5a1f5e8af34835afb7347dc
|
data/README.md
CHANGED
@@ -57,7 +57,7 @@ Next, start scraping!
|
|
57
57
|
# Get all followers
|
58
58
|
followers = johns_scraper.followers
|
59
59
|
|
60
|
-
For more information, see the documentation for Pandata::Scraper.
|
60
|
+
For more information, see the [documentation][2] for Pandata::Scraper.
|
61
61
|
|
62
62
|
### As a Command-Line Tool
|
63
63
|
|
@@ -82,3 +82,4 @@ For an up-to-date list, check out:
|
|
82
82
|
pandata my_webname --all -o my_pandora_data.txt
|
83
83
|
|
84
84
|
[1]: http://www.pandora.com/feeds
|
85
|
+
[2]: http://rubydoc.info/gems/pandata/frames
|
data/bin/pandata
CHANGED
@@ -1,86 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require_relative '../lib/pandata'
|
4
|
-
require_relative '../lib/pandata/argv_parser'
|
5
|
-
require_relative '../lib/pandata/data_formatter'
|
3
|
+
require_relative '../lib/pandata/cli'
|
6
4
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
if output_file
|
11
|
-
File.delete(output_file) if File.exists?(output_file)
|
12
|
-
|
13
|
-
Object.send(:define_method, :write) do |string|
|
14
|
-
File.open(output_file, 'a') do |file|
|
15
|
-
file.puts string
|
16
|
-
end
|
17
|
-
end
|
18
|
-
else
|
19
|
-
def write(string)
|
20
|
-
puts string
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
if ARGV.empty?
|
25
|
-
# Print command-line usage help.
|
26
|
-
puts options[:opts]
|
27
|
-
exit
|
28
|
-
end
|
29
|
-
|
30
|
-
scraper = Pandata::Scraper.get(options[:user_id])
|
31
|
-
formatter = Pandata::DataFormatter.new
|
32
|
-
|
33
|
-
# If scraper is an array, a Pandora user could not be found with certainty.
|
34
|
-
# In this case, scraper will contain webnames similar to options[:user_id].
|
35
|
-
if scraper.kind_of?(Array)
|
36
|
-
puts "No exact match for '#{options[:user_id]}'."
|
37
|
-
|
38
|
-
unless scraper.empty?
|
39
|
-
puts "\nWebname results for '#{options[:user_id]}':"
|
40
|
-
puts formatter.list(scraper)
|
41
|
-
end
|
42
|
-
|
43
|
-
exit
|
44
|
-
end
|
45
|
-
|
46
|
-
scraper_data = {}
|
47
|
-
options[:data_to_get].each do |data_type|
|
48
|
-
if /(bookmark|like)e?d_(.*)/ =~ data_type
|
49
|
-
method = $1 << 's' # 'likes' or 'bookmarks'
|
50
|
-
argument = $2.to_sym # :tracks, :artists, :stations or :albums
|
51
|
-
scraper_data[data_type] = scraper.public_send(method, argument)
|
52
|
-
else
|
53
|
-
scraper_data[data_type] = scraper.public_send(data_type)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
if options[:return_as_json]
|
58
|
-
require 'json'
|
59
|
-
write JSON.generate(scraper_data)
|
60
|
-
exit
|
61
|
-
end
|
62
|
-
|
63
|
-
scraper_data.each do |key, value|
|
64
|
-
# Capitalize each word in the key symbol.
|
65
|
-
# e.g. :liked_tracks becomes 'Liked Tracks:'
|
66
|
-
title = key.to_s.split('_').map(&:capitalize).join(' ') << ':'
|
67
|
-
|
68
|
-
if value.empty?
|
69
|
-
output = ' ** No Data **'
|
70
|
-
else
|
71
|
-
output = case key
|
72
|
-
when /playing_station|recent_activity/
|
73
|
-
formatter.list(value)
|
74
|
-
when /liked_tracks|bookmarked_tracks/
|
75
|
-
formatter.tracks(value)
|
76
|
-
when /liked_artists|bookmarked_artists|stations|liked_stations/
|
77
|
-
formatter.sort_list(value)
|
78
|
-
when :liked_albums
|
79
|
-
formatter.albums(value)
|
80
|
-
when /following|followers/
|
81
|
-
formatter.followx(value)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
write "#{ title }\n#{ output }"
|
5
|
+
begin
|
6
|
+
Pandata::CLI.scrape(ARGV)
|
7
|
+
rescue Pandata::PandataError
|
86
8
|
end
|
data/lib/pandata.rb
CHANGED
@@ -6,10 +6,12 @@ require_relative 'pandata/parser'
|
|
6
6
|
require_relative 'pandata/scraper'
|
7
7
|
|
8
8
|
module Pandata
|
9
|
+
class PandataError < StandardError; end
|
10
|
+
|
9
11
|
module Version
|
10
12
|
MAJOR = 0
|
11
|
-
MINOR =
|
12
|
-
PATCH =
|
13
|
+
MINOR = 2
|
14
|
+
PATCH = 1
|
13
15
|
BUILD = nil
|
14
16
|
|
15
17
|
STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
|
data/lib/pandata/argv_parser.rb
CHANGED
@@ -15,6 +15,8 @@ module Pandata
|
|
15
15
|
# - :output_file [String]
|
16
16
|
# - :data_to_get [Array]
|
17
17
|
# - :get_all_data [Boolean]
|
18
|
+
# - :help [Boolean]
|
19
|
+
# - :version [Boolean]
|
18
20
|
# - :return_as_json [Boolean]
|
19
21
|
def self.parse(argv)
|
20
22
|
options = { data_to_get: [] }
|
@@ -90,20 +92,18 @@ Options:
|
|
90
92
|
end
|
91
93
|
|
92
94
|
opts.on_tail("-h", "--help", "Show this message") do
|
93
|
-
|
94
|
-
exit
|
95
|
+
options[:help] = true
|
95
96
|
end
|
96
97
|
|
97
98
|
opts.on_tail("--version", "Show version") do
|
98
|
-
|
99
|
-
exit
|
99
|
+
options[:version] = true
|
100
100
|
end
|
101
101
|
end
|
102
102
|
|
103
103
|
options[:opts].parse(argv)
|
104
104
|
|
105
105
|
# User ID is the first argument.
|
106
|
-
options[:user_id] = argv
|
106
|
+
options[:user_id] = argv[0]
|
107
107
|
|
108
108
|
if get_all_data
|
109
109
|
options[:data_to_get] = [
|
data/lib/pandata/cli.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'ruby-progressbar'
|
3
|
+
require_relative '../pandata'
|
4
|
+
require_relative 'argv_parser'
|
5
|
+
require_relative 'data_formatter'
|
6
|
+
|
7
|
+
module Pandata
|
8
|
+
|
9
|
+
# Pandata command-line interface
|
10
|
+
class CLI
|
11
|
+
|
12
|
+
def self.scrape(argv)
|
13
|
+
options = Pandata::ArgvParser.parse(argv)
|
14
|
+
|
15
|
+
if argv.empty? || options[:help]
|
16
|
+
puts options[:opts].to_s # Log usage information
|
17
|
+
elsif options[:version]
|
18
|
+
puts Pandata::Version::STRING
|
19
|
+
else
|
20
|
+
new(options).download_and_output
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize(options)
|
25
|
+
@data_to_get = options[:data_to_get]
|
26
|
+
@output_file = options[:output_file]
|
27
|
+
@return_as_json = options[:return_as_json]
|
28
|
+
|
29
|
+
@scraper = scraper_for(options[:user_id])
|
30
|
+
@scraper.download_cb = method(:update_progress)
|
31
|
+
end
|
32
|
+
|
33
|
+
def update_progress(num_data)
|
34
|
+
progressbar.progress += num_data
|
35
|
+
end
|
36
|
+
|
37
|
+
def download_and_output
|
38
|
+
output_data format_data(download_data, @return_as_json)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def progressbar
|
44
|
+
@progressbar ||= ProgressBar.create(
|
45
|
+
title: 'Data Downloaded',
|
46
|
+
format: '%t: %c',
|
47
|
+
total: nil
|
48
|
+
)
|
49
|
+
end
|
50
|
+
|
51
|
+
def formatter
|
52
|
+
@formatter ||= DataFormatter.new
|
53
|
+
end
|
54
|
+
|
55
|
+
def log(msg)
|
56
|
+
puts msg
|
57
|
+
end
|
58
|
+
|
59
|
+
# Writes the data to STDOUT or a file.
|
60
|
+
# @param formatted_data [String]
|
61
|
+
def output_data(formatted_data)
|
62
|
+
@progressbar.stop if @progressbar
|
63
|
+
|
64
|
+
if @output_file
|
65
|
+
File.write(@output_file, formatted_data)
|
66
|
+
else
|
67
|
+
log formatted_data
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Formats data as a string list or JSON.
|
72
|
+
# @param data [Hash]
|
73
|
+
# @param json [Boolean]
|
74
|
+
# @return [String]
|
75
|
+
def format_data(data, json = false)
|
76
|
+
if json
|
77
|
+
JSON.generate(data)
|
78
|
+
else
|
79
|
+
data.map do |category, cat_data|
|
80
|
+
# Capitalize each word in the category symbol.
|
81
|
+
# e.g. :liked_tracks becomes 'Liked Tracks'
|
82
|
+
title = category.to_s.split('_').map(&:capitalize).join(' ')
|
83
|
+
|
84
|
+
output = if cat_data.empty?
|
85
|
+
" ** No Data **\n"
|
86
|
+
else
|
87
|
+
case category
|
88
|
+
when /playing_station|recent_activity/
|
89
|
+
formatter.list(cat_data)
|
90
|
+
when /liked_tracks|bookmarked_tracks/
|
91
|
+
formatter.tracks(cat_data)
|
92
|
+
when /liked_artists|bookmarked_artists|stations|liked_stations/
|
93
|
+
formatter.sort_list(cat_data)
|
94
|
+
when :liked_albums
|
95
|
+
formatter.albums(cat_data)
|
96
|
+
when /following|followers/
|
97
|
+
formatter.followx(cat_data)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
"#{title}:\n#{output}"
|
102
|
+
end.join
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Downloads the user's desired data.
|
107
|
+
# @return [Hash]
|
108
|
+
def download_data
|
109
|
+
scraper_data = {}
|
110
|
+
|
111
|
+
@data_to_get.each do |data_category|
|
112
|
+
if /(bookmark|like)e?d_(.*)/ =~ data_category
|
113
|
+
method = $1 << 's' # 'likes' or 'bookmarks'
|
114
|
+
argument = $2.to_sym # :tracks, :artists, :stations or :albums
|
115
|
+
scraper_data[data_category] = @scraper.public_send(method, argument)
|
116
|
+
else
|
117
|
+
scraper_data[data_category] = @scraper.public_send(data_category)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
scraper_data
|
122
|
+
end
|
123
|
+
|
124
|
+
# Returns a scraper for the user's id.
|
125
|
+
# @param user_id [String] webname or email
|
126
|
+
# @return [Pandata::Scraper]
|
127
|
+
def scraper_for(user_id)
|
128
|
+
scraper = Pandata::Scraper.get(user_id)
|
129
|
+
|
130
|
+
if scraper.kind_of?(Array)
|
131
|
+
log "No exact match for '#{user_id}'."
|
132
|
+
|
133
|
+
unless scraper.empty?
|
134
|
+
log "\nWebname results for '#{user_id}':\n#{formatter.list(scraper)}"
|
135
|
+
end
|
136
|
+
|
137
|
+
raise PandataError, "Could not create a scraper for '#{user_id}'."
|
138
|
+
end
|
139
|
+
|
140
|
+
scraper
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
end
|
data/lib/pandata/data_urls.rb
CHANGED
data/lib/pandata/downloader.rb
CHANGED
@@ -1,30 +1,23 @@
|
|
1
1
|
require 'json'
|
2
2
|
require 'open-uri'
|
3
|
+
require_relative '../pandata'
|
3
4
|
|
4
5
|
module Pandata
|
5
|
-
class PandataError < StandardError; end
|
6
6
|
|
7
|
-
# Retrieves data from Pandora.com and handles errors.
|
7
|
+
# Retrieves data from Pandora.com and handles network errors.
|
8
8
|
class Downloader
|
9
|
+
|
9
10
|
# A GitHub Gist that contains an updated cookie allowing access to 'login-only' visible data.
|
10
11
|
CONFIG_URL = 'https://gist.github.com/ustasb/596f1ee96d03463fde77/raw/pandata_config.json'
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
end
|
15
|
-
|
16
|
-
# Gets a Pandora cookie and returns a Downloader instance.
|
17
|
-
def initialize
|
18
|
-
unless Downloader.cookie
|
19
|
-
Downloader.cookie = get_cookie
|
20
|
-
end
|
21
|
-
end
|
13
|
+
# The cached cookie.
|
14
|
+
@@cookie = nil
|
22
15
|
|
23
16
|
# Downloads and reads a page from a URL.
|
24
17
|
# @param url [String]
|
25
18
|
# @return [String] contents of page
|
26
|
-
def read_page(url)
|
27
|
-
download(url,
|
19
|
+
def self.read_page(url)
|
20
|
+
download(url, get_cookie).read
|
28
21
|
end
|
29
22
|
|
30
23
|
private
|
@@ -33,19 +26,21 @@ module Pandata
|
|
33
26
|
# @param url [String]
|
34
27
|
# @param cookie [String]
|
35
28
|
# @return [File]
|
36
|
-
def download(url, cookie = '')
|
29
|
+
def self.download(url, cookie = '')
|
37
30
|
escaped_url = URI.escape(url)
|
38
31
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
32
|
+
open(escaped_url, 'Cookie' => cookie, :read_timeout => 5)
|
33
|
+
rescue OpenURI::HTTPError => error
|
34
|
+
puts "The network request for:\n #{url}\nreturned an error:\n #{error.message}"
|
35
|
+
puts "Please try again later or update Pandata. Sorry about that!\n\nFull error:"
|
36
|
+
raise PandataError
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.get_cookie
|
40
|
+
@@cookie ||= download_cookie
|
46
41
|
end
|
47
42
|
|
48
|
-
def
|
43
|
+
def self.download_cookie
|
49
44
|
config = JSON.parse download(CONFIG_URL).read
|
50
45
|
|
51
46
|
if Gem::Version.new(Pandata::Version::STRING) <= Gem::Version.new(config['required_update_for'])
|
@@ -54,5 +49,6 @@ module Pandata
|
|
54
49
|
|
55
50
|
config['cookie']
|
56
51
|
end
|
52
|
+
|
57
53
|
end
|
58
54
|
end
|
data/lib/pandata/scraper.rb
CHANGED
@@ -12,6 +12,9 @@ module Pandata
|
|
12
12
|
# the user ties a new email address to their Pandora account.
|
13
13
|
attr_reader :webname
|
14
14
|
|
15
|
+
# A Proc that gets called after some data has been downloaded.
|
16
|
+
attr_accessor :download_cb
|
17
|
+
|
15
18
|
# If possible, get a Scraper instance for the user_id otherwise return
|
16
19
|
# an array of similar webnames.
|
17
20
|
# @param user_id [String] email or webname
|
@@ -19,7 +22,7 @@ module Pandata
|
|
19
22
|
# @return [Array] array of similar webnames
|
20
23
|
def self.get(user_id)
|
21
24
|
search_url = DATA_FEED_URLS[:user_search] % { searchString: user_id }
|
22
|
-
html = Downloader.
|
25
|
+
html = Downloader.read_page(search_url)
|
23
26
|
webnames = Parser.new.get_webnames_from_search(html)
|
24
27
|
|
25
28
|
if webnames.include?(user_id)
|
@@ -34,7 +37,6 @@ module Pandata
|
|
34
37
|
|
35
38
|
private_class_method :new
|
36
39
|
def initialize(webname)
|
37
|
-
@downloader = Downloader.new
|
38
40
|
@parser = Parser.new
|
39
41
|
@webname = webname
|
40
42
|
end
|
@@ -134,6 +136,8 @@ module Pandata
|
|
134
136
|
results.push(new_data)
|
135
137
|
end
|
136
138
|
|
139
|
+
@download_cb[new_data.size] if @download_cb
|
140
|
+
|
137
141
|
get_url(data_type, next_data_indices) if next_data_indices
|
138
142
|
end
|
139
143
|
|
@@ -149,7 +153,7 @@ module Pandata
|
|
149
153
|
next_data_indices = {}
|
150
154
|
|
151
155
|
while next_data_indices
|
152
|
-
html =
|
156
|
+
html = Downloader.read_page(url)
|
153
157
|
next_data_indices = @parser.get_next_data_indices(html)
|
154
158
|
url = yield(html, next_data_indices)
|
155
159
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pandata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Ustas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -24,20 +24,62 @@ dependencies:
|
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 1.5.6
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: ruby-progressbar
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.2.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.2.0
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: rspec
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - ~>
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: 2.
|
47
|
+
version: 2.14.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 2.14.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: vcr
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.5.0
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 2.5.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: webmock
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.13.0
|
34
76
|
type: :development
|
35
77
|
prerelease: false
|
36
78
|
version_requirements: !ruby/object:Gem::Requirement
|
37
79
|
requirements:
|
38
80
|
- - ~>
|
39
81
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
82
|
+
version: 1.13.0
|
41
83
|
- !ruby/object:Gem::Dependency
|
42
84
|
name: yard
|
43
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -63,6 +105,7 @@ extra_rdoc_files:
|
|
63
105
|
- README.md
|
64
106
|
files:
|
65
107
|
- lib/pandata/argv_parser.rb
|
108
|
+
- lib/pandata/cli.rb
|
66
109
|
- lib/pandata/data_formatter.rb
|
67
110
|
- lib/pandata/data_urls.rb
|
68
111
|
- lib/pandata/downloader.rb
|
@@ -92,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
92
135
|
version: '0'
|
93
136
|
requirements: []
|
94
137
|
rubyforge_project:
|
95
|
-
rubygems_version: 2.0.
|
138
|
+
rubygems_version: 2.0.3
|
96
139
|
signing_key:
|
97
140
|
specification_version: 4
|
98
141
|
summary: A Pandora.com web scraper
|