pandata 0.1.2 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b110672f1843569186fff6c0c073defe047b0932
4
- data.tar.gz: bf753689e76d89c1b0ce7a92e1c2491f6d021392
3
+ metadata.gz: 7e2bc9075307ca70cb1e11c34ca8e817d3fea79c
4
+ data.tar.gz: bca7a313c16dd995cfd0f6094e7cb35b5f539c69
5
5
  SHA512:
6
- metadata.gz: 0aa2816fa28182cf3bce1f1a813663624279095ff2de56922afa58b110b3e26faa0b661fc1a33df416d90d64c08b3e741174806521d7d65fedf03afa293bb438
7
- data.tar.gz: 79cd4ce5b6a65d08585164040d39dd6a952cb8a5fd65ab55023ab4aaf6aaa402e063af7105eb977fd904ee12d6a90ce7bc10d388525c00070840392abac91129
6
+ metadata.gz: 833e93f00606f5aac3c4a3a297f3fd7ee7f61614de1868b69d9cbbcb68fe218a43275c4087fada150e016d2da02e6bc2ef2dfb08bf368b7a389574140a41f867
7
+ data.tar.gz: 1ba8b3230c426ec828952376c79e1c4cb649bd3f936db3564ff774f51d0398b1eb9fbbc8beb229a0683ac4dda3e8ff0406cd8266f5a1f5e8af34835afb7347dc
data/README.md CHANGED
@@ -57,7 +57,7 @@ Next, start scraping!
57
57
  # Get all followers
58
58
  followers = johns_scraper.followers
59
59
 
60
- For more information, see the documentation for Pandata::Scraper.
60
+ For more information, see the [documentation][2] for Pandata::Scraper.
61
61
 
62
62
  ### As a Command-Line Tool
63
63
 
@@ -82,3 +82,4 @@ For an up-to-date list, check out:
82
82
  pandata my_webname --all -o my_pandora_data.txt
83
83
 
84
84
  [1]: http://www.pandora.com/feeds
85
+ [2]: http://rubydoc.info/gems/pandata/frames
data/bin/pandata CHANGED
@@ -1,86 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require_relative '../lib/pandata'
4
- require_relative '../lib/pandata/argv_parser'
5
- require_relative '../lib/pandata/data_formatter'
3
+ require_relative '../lib/pandata/cli'
6
4
 
7
- options = Pandata::ArgvParser.parse(ARGV)
8
-
9
- output_file = options[:output_file]
10
- if output_file
11
- File.delete(output_file) if File.exists?(output_file)
12
-
13
- Object.send(:define_method, :write) do |string|
14
- File.open(output_file, 'a') do |file|
15
- file.puts string
16
- end
17
- end
18
- else
19
- def write(string)
20
- puts string
21
- end
22
- end
23
-
24
- if ARGV.empty?
25
- # Print command-line usage help.
26
- puts options[:opts]
27
- exit
28
- end
29
-
30
- scraper = Pandata::Scraper.get(options[:user_id])
31
- formatter = Pandata::DataFormatter.new
32
-
33
- # If scraper is an array, a Pandora user could not be found with certainty.
34
- # In this case, scraper will contain webnames similar to options[:user_id].
35
- if scraper.kind_of?(Array)
36
- puts "No exact match for '#{options[:user_id]}'."
37
-
38
- unless scraper.empty?
39
- puts "\nWebname results for '#{options[:user_id]}':"
40
- puts formatter.list(scraper)
41
- end
42
-
43
- exit
44
- end
45
-
46
- scraper_data = {}
47
- options[:data_to_get].each do |data_type|
48
- if /(bookmark|like)e?d_(.*)/ =~ data_type
49
- method = $1 << 's' # 'likes' or 'bookmarks'
50
- argument = $2.to_sym # :tracks, :artists, :stations or :albums
51
- scraper_data[data_type] = scraper.public_send(method, argument)
52
- else
53
- scraper_data[data_type] = scraper.public_send(data_type)
54
- end
55
- end
56
-
57
- if options[:return_as_json]
58
- require 'json'
59
- write JSON.generate(scraper_data)
60
- exit
61
- end
62
-
63
- scraper_data.each do |key, value|
64
- # Capitalize each word in the key symbol.
65
- # e.g. :liked_tracks becomes 'Liked Tracks:'
66
- title = key.to_s.split('_').map(&:capitalize).join(' ') << ':'
67
-
68
- if value.empty?
69
- output = ' ** No Data **'
70
- else
71
- output = case key
72
- when /playing_station|recent_activity/
73
- formatter.list(value)
74
- when /liked_tracks|bookmarked_tracks/
75
- formatter.tracks(value)
76
- when /liked_artists|bookmarked_artists|stations|liked_stations/
77
- formatter.sort_list(value)
78
- when :liked_albums
79
- formatter.albums(value)
80
- when /following|followers/
81
- formatter.followx(value)
82
- end
83
- end
84
-
85
- write "#{ title }\n#{ output }"
5
+ begin
6
+ Pandata::CLI.scrape(ARGV)
7
+ rescue Pandata::PandataError
86
8
  end
data/lib/pandata.rb CHANGED
@@ -6,10 +6,12 @@ require_relative 'pandata/parser'
6
6
  require_relative 'pandata/scraper'
7
7
 
8
8
  module Pandata
9
+ class PandataError < StandardError; end
10
+
9
11
  module Version
10
12
  MAJOR = 0
11
- MINOR = 1
12
- PATCH = 2
13
+ MINOR = 2
14
+ PATCH = 1
13
15
  BUILD = nil
14
16
 
15
17
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
@@ -15,6 +15,8 @@ module Pandata
15
15
  # - :output_file [String]
16
16
  # - :data_to_get [Array]
17
17
  # - :get_all_data [Boolean]
18
+ # - :help [Boolean]
19
+ # - :version [Boolean]
18
20
  # - :return_as_json [Boolean]
19
21
  def self.parse(argv)
20
22
  options = { data_to_get: [] }
@@ -90,20 +92,18 @@ Options:
90
92
  end
91
93
 
92
94
  opts.on_tail("-h", "--help", "Show this message") do
93
- puts opts
94
- exit
95
+ options[:help] = true
95
96
  end
96
97
 
97
98
  opts.on_tail("--version", "Show version") do
98
- puts Pandata::Version::STRING
99
- exit
99
+ options[:version] = true
100
100
  end
101
101
  end
102
102
 
103
103
  options[:opts].parse(argv)
104
104
 
105
105
  # User ID is the first argument.
106
- options[:user_id] = argv.shift
106
+ options[:user_id] = argv[0]
107
107
 
108
108
  if get_all_data
109
109
  options[:data_to_get] = [
@@ -0,0 +1,144 @@
1
+ require 'json'
2
+ require 'ruby-progressbar'
3
+ require_relative '../pandata'
4
+ require_relative 'argv_parser'
5
+ require_relative 'data_formatter'
6
+
7
+ module Pandata
8
+
9
+ # Pandata command-line interface
10
+ class CLI
11
+
12
+ def self.scrape(argv)
13
+ options = Pandata::ArgvParser.parse(argv)
14
+
15
+ if argv.empty? || options[:help]
16
+ puts options[:opts].to_s # Log usage information
17
+ elsif options[:version]
18
+ puts Pandata::Version::STRING
19
+ else
20
+ new(options).download_and_output
21
+ end
22
+ end
23
+
24
+ def initialize(options)
25
+ @data_to_get = options[:data_to_get]
26
+ @output_file = options[:output_file]
27
+ @return_as_json = options[:return_as_json]
28
+
29
+ @scraper = scraper_for(options[:user_id])
30
+ @scraper.download_cb = method(:update_progress)
31
+ end
32
+
33
+ def update_progress(num_data)
34
+ progressbar.progress += num_data
35
+ end
36
+
37
+ def download_and_output
38
+ output_data format_data(download_data, @return_as_json)
39
+ end
40
+
41
+ private
42
+
43
+ def progressbar
44
+ @progressbar ||= ProgressBar.create(
45
+ title: 'Data Downloaded',
46
+ format: '%t: %c',
47
+ total: nil
48
+ )
49
+ end
50
+
51
+ def formatter
52
+ @formatter ||= DataFormatter.new
53
+ end
54
+
55
+ def log(msg)
56
+ puts msg
57
+ end
58
+
59
+ # Writes the data to STDOUT or a file.
60
+ # @param formatted_data [String]
61
+ def output_data(formatted_data)
62
+ @progressbar.stop if @progressbar
63
+
64
+ if @output_file
65
+ File.write(@output_file, formatted_data)
66
+ else
67
+ log formatted_data
68
+ end
69
+ end
70
+
71
+ # Formats data as a string list or JSON.
72
+ # @param data [Hash]
73
+ # @param json [Boolean]
74
+ # @return [String]
75
+ def format_data(data, json = false)
76
+ if json
77
+ JSON.generate(data)
78
+ else
79
+ data.map do |category, cat_data|
80
+ # Capitalize each word in the category symbol.
81
+ # e.g. :liked_tracks becomes 'Liked Tracks'
82
+ title = category.to_s.split('_').map(&:capitalize).join(' ')
83
+
84
+ output = if cat_data.empty?
85
+ " ** No Data **\n"
86
+ else
87
+ case category
88
+ when /playing_station|recent_activity/
89
+ formatter.list(cat_data)
90
+ when /liked_tracks|bookmarked_tracks/
91
+ formatter.tracks(cat_data)
92
+ when /liked_artists|bookmarked_artists|stations|liked_stations/
93
+ formatter.sort_list(cat_data)
94
+ when :liked_albums
95
+ formatter.albums(cat_data)
96
+ when /following|followers/
97
+ formatter.followx(cat_data)
98
+ end
99
+ end
100
+
101
+ "#{title}:\n#{output}"
102
+ end.join
103
+ end
104
+ end
105
+
106
+ # Downloads the user's desired data.
107
+ # @return [Hash]
108
+ def download_data
109
+ scraper_data = {}
110
+
111
+ @data_to_get.each do |data_category|
112
+ if /(bookmark|like)e?d_(.*)/ =~ data_category
113
+ method = $1 << 's' # 'likes' or 'bookmarks'
114
+ argument = $2.to_sym # :tracks, :artists, :stations or :albums
115
+ scraper_data[data_category] = @scraper.public_send(method, argument)
116
+ else
117
+ scraper_data[data_category] = @scraper.public_send(data_category)
118
+ end
119
+ end
120
+
121
+ scraper_data
122
+ end
123
+
124
+ # Returns a scraper for the user's id.
125
+ # @param user_id [String] webname or email
126
+ # @return [Pandata::Scraper]
127
+ def scraper_for(user_id)
128
+ scraper = Pandata::Scraper.get(user_id)
129
+
130
+ if scraper.kind_of?(Array)
131
+ log "No exact match for '#{user_id}'."
132
+
133
+ unless scraper.empty?
134
+ log "\nWebname results for '#{user_id}':\n#{formatter.list(scraper)}"
135
+ end
136
+
137
+ raise PandataError, "Could not create a scraper for '#{user_id}'."
138
+ end
139
+
140
+ scraper
141
+ end
142
+
143
+ end
144
+ end
@@ -1,6 +1,6 @@
1
1
  module Pandata
2
2
  # Number of results to get from a feeds.pandora.com URL.
3
- MAX_RESULTS = 100000 # Get everything...
3
+ MAX_RESULTS = 100_000 # Get everything...
4
4
 
5
5
  # URLs to Pandora's data!
6
6
  DATA_FEED_URLS = {
@@ -1,30 +1,23 @@
1
1
  require 'json'
2
2
  require 'open-uri'
3
+ require_relative '../pandata'
3
4
 
4
5
  module Pandata
5
- class PandataError < StandardError; end
6
6
 
7
- # Retrieves data from Pandora.com and handles errors.
7
+ # Retrieves data from Pandora.com and handles network errors.
8
8
  class Downloader
9
+
9
10
  # A GitHub Gist that contains an updated cookie allowing access to 'login-only' visible data.
10
11
  CONFIG_URL = 'https://gist.github.com/ustasb/596f1ee96d03463fde77/raw/pandata_config.json'
11
12
 
12
- class << self
13
- attr_accessor :cookie
14
- end
15
-
16
- # Gets a Pandora cookie and returns a Downloader instance.
17
- def initialize
18
- unless Downloader.cookie
19
- Downloader.cookie = get_cookie
20
- end
21
- end
13
+ # The cached cookie.
14
+ @@cookie = nil
22
15
 
23
16
  # Downloads and reads a page from a URL.
24
17
  # @param url [String]
25
18
  # @return [String] contents of page
26
- def read_page(url)
27
- download(url, Downloader.cookie).read
19
+ def self.read_page(url)
20
+ download(url, get_cookie).read
28
21
  end
29
22
 
30
23
  private
@@ -33,19 +26,21 @@ module Pandata
33
26
  # @param url [String]
34
27
  # @param cookie [String]
35
28
  # @return [File]
36
- def download(url, cookie = '')
29
+ def self.download(url, cookie = '')
37
30
  escaped_url = URI.escape(url)
38
31
 
39
- begin
40
- open(escaped_url, 'Cookie' => cookie, :read_timeout => 5)
41
- rescue OpenURI::HTTPError => error
42
- puts "The network request for:\n #{url}\nreturned an error:\n #{error.message}"
43
- puts "Please try again later or update Pandata. Sorry about that!\n\nFull error:"
44
- raise PandataError
45
- end
32
+ open(escaped_url, 'Cookie' => cookie, :read_timeout => 5)
33
+ rescue OpenURI::HTTPError => error
34
+ puts "The network request for:\n #{url}\nreturned an error:\n #{error.message}"
35
+ puts "Please try again later or update Pandata. Sorry about that!\n\nFull error:"
36
+ raise PandataError
37
+ end
38
+
39
+ def self.get_cookie
40
+ @@cookie ||= download_cookie
46
41
  end
47
42
 
48
- def get_cookie
43
+ def self.download_cookie
49
44
  config = JSON.parse download(CONFIG_URL).read
50
45
 
51
46
  if Gem::Version.new(Pandata::Version::STRING) <= Gem::Version.new(config['required_update_for'])
@@ -54,5 +49,6 @@ module Pandata
54
49
 
55
50
  config['cookie']
56
51
  end
52
+
57
53
  end
58
54
  end
@@ -12,6 +12,9 @@ module Pandata
12
12
  # the user ties a new email address to their Pandora account.
13
13
  attr_reader :webname
14
14
 
15
+ # A Proc that gets called after some data has been downloaded.
16
+ attr_accessor :download_cb
17
+
15
18
  # If possible, get a Scraper instance for the user_id otherwise return
16
19
  # an array of similar webnames.
17
20
  # @param user_id [String] email or webname
@@ -19,7 +22,7 @@ module Pandata
19
22
  # @return [Array] array of similar webnames
20
23
  def self.get(user_id)
21
24
  search_url = DATA_FEED_URLS[:user_search] % { searchString: user_id }
22
- html = Downloader.new.read_page(search_url)
25
+ html = Downloader.read_page(search_url)
23
26
  webnames = Parser.new.get_webnames_from_search(html)
24
27
 
25
28
  if webnames.include?(user_id)
@@ -34,7 +37,6 @@ module Pandata
34
37
 
35
38
  private_class_method :new
36
39
  def initialize(webname)
37
- @downloader = Downloader.new
38
40
  @parser = Parser.new
39
41
  @webname = webname
40
42
  end
@@ -134,6 +136,8 @@ module Pandata
134
136
  results.push(new_data)
135
137
  end
136
138
 
139
+ @download_cb[new_data.size] if @download_cb
140
+
137
141
  get_url(data_type, next_data_indices) if next_data_indices
138
142
  end
139
143
 
@@ -149,7 +153,7 @@ module Pandata
149
153
  next_data_indices = {}
150
154
 
151
155
  while next_data_indices
152
- html = @downloader.read_page(url)
156
+ html = Downloader.read_page(url)
153
157
  next_data_indices = @parser.get_next_data_indices(html)
154
158
  url = yield(html, next_data_indices)
155
159
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pandata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Ustas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-18 00:00:00.000000000 Z
11
+ date: 2013-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -24,20 +24,62 @@ dependencies:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.5.6
27
+ - !ruby/object:Gem::Dependency
28
+ name: ruby-progressbar
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 1.2.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 1.2.0
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: rspec
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - ~>
32
46
  - !ruby/object:Gem::Version
33
- version: 2.12.2
47
+ version: 2.14.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 2.14.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: vcr
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.5.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 2.5.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: webmock
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: 1.13.0
34
76
  type: :development
35
77
  prerelease: false
36
78
  version_requirements: !ruby/object:Gem::Requirement
37
79
  requirements:
38
80
  - - ~>
39
81
  - !ruby/object:Gem::Version
40
- version: 2.12.2
82
+ version: 1.13.0
41
83
  - !ruby/object:Gem::Dependency
42
84
  name: yard
43
85
  requirement: !ruby/object:Gem::Requirement
@@ -63,6 +105,7 @@ extra_rdoc_files:
63
105
  - README.md
64
106
  files:
65
107
  - lib/pandata/argv_parser.rb
108
+ - lib/pandata/cli.rb
66
109
  - lib/pandata/data_formatter.rb
67
110
  - lib/pandata/data_urls.rb
68
111
  - lib/pandata/downloader.rb
@@ -92,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
92
135
  version: '0'
93
136
  requirements: []
94
137
  rubyforge_project:
95
- rubygems_version: 2.0.2
138
+ rubygems_version: 2.0.3
96
139
  signing_key:
97
140
  specification_version: 4
98
141
  summary: A Pandora.com web scraper