pandata 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b110672f1843569186fff6c0c073defe047b0932
4
- data.tar.gz: bf753689e76d89c1b0ce7a92e1c2491f6d021392
3
+ metadata.gz: 7e2bc9075307ca70cb1e11c34ca8e817d3fea79c
4
+ data.tar.gz: bca7a313c16dd995cfd0f6094e7cb35b5f539c69
5
5
  SHA512:
6
- metadata.gz: 0aa2816fa28182cf3bce1f1a813663624279095ff2de56922afa58b110b3e26faa0b661fc1a33df416d90d64c08b3e741174806521d7d65fedf03afa293bb438
7
- data.tar.gz: 79cd4ce5b6a65d08585164040d39dd6a952cb8a5fd65ab55023ab4aaf6aaa402e063af7105eb977fd904ee12d6a90ce7bc10d388525c00070840392abac91129
6
+ metadata.gz: 833e93f00606f5aac3c4a3a297f3fd7ee7f61614de1868b69d9cbbcb68fe218a43275c4087fada150e016d2da02e6bc2ef2dfb08bf368b7a389574140a41f867
7
+ data.tar.gz: 1ba8b3230c426ec828952376c79e1c4cb649bd3f936db3564ff774f51d0398b1eb9fbbc8beb229a0683ac4dda3e8ff0406cd8266f5a1f5e8af34835afb7347dc
data/README.md CHANGED
@@ -57,7 +57,7 @@ Next, start scraping!
57
57
  # Get all followers
58
58
  followers = johns_scraper.followers
59
59
 
60
- For more information, see the documentation for Pandata::Scraper.
60
+ For more information, see the [documentation][2] for Pandata::Scraper.
61
61
 
62
62
  ### As a Command-Line Tool
63
63
 
@@ -82,3 +82,4 @@ For an up-to-date list, check out:
82
82
  pandata my_webname --all -o my_pandora_data.txt
83
83
 
84
84
  [1]: http://www.pandora.com/feeds
85
+ [2]: http://rubydoc.info/gems/pandata/frames
data/bin/pandata CHANGED
@@ -1,86 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require_relative '../lib/pandata'
4
- require_relative '../lib/pandata/argv_parser'
5
- require_relative '../lib/pandata/data_formatter'
3
+ require_relative '../lib/pandata/cli'
6
4
 
7
- options = Pandata::ArgvParser.parse(ARGV)
8
-
9
- output_file = options[:output_file]
10
- if output_file
11
- File.delete(output_file) if File.exists?(output_file)
12
-
13
- Object.send(:define_method, :write) do |string|
14
- File.open(output_file, 'a') do |file|
15
- file.puts string
16
- end
17
- end
18
- else
19
- def write(string)
20
- puts string
21
- end
22
- end
23
-
24
- if ARGV.empty?
25
- # Print command-line usage help.
26
- puts options[:opts]
27
- exit
28
- end
29
-
30
- scraper = Pandata::Scraper.get(options[:user_id])
31
- formatter = Pandata::DataFormatter.new
32
-
33
- # If scraper is an array, a Pandora user could not be found with certainty.
34
- # In this case, scraper will contain webnames similar to options[:user_id].
35
- if scraper.kind_of?(Array)
36
- puts "No exact match for '#{options[:user_id]}'."
37
-
38
- unless scraper.empty?
39
- puts "\nWebname results for '#{options[:user_id]}':"
40
- puts formatter.list(scraper)
41
- end
42
-
43
- exit
44
- end
45
-
46
- scraper_data = {}
47
- options[:data_to_get].each do |data_type|
48
- if /(bookmark|like)e?d_(.*)/ =~ data_type
49
- method = $1 << 's' # 'likes' or 'bookmarks'
50
- argument = $2.to_sym # :tracks, :artists, :stations or :albums
51
- scraper_data[data_type] = scraper.public_send(method, argument)
52
- else
53
- scraper_data[data_type] = scraper.public_send(data_type)
54
- end
55
- end
56
-
57
- if options[:return_as_json]
58
- require 'json'
59
- write JSON.generate(scraper_data)
60
- exit
61
- end
62
-
63
- scraper_data.each do |key, value|
64
- # Capitalize each word in the key symbol.
65
- # e.g. :liked_tracks becomes 'Liked Tracks:'
66
- title = key.to_s.split('_').map(&:capitalize).join(' ') << ':'
67
-
68
- if value.empty?
69
- output = ' ** No Data **'
70
- else
71
- output = case key
72
- when /playing_station|recent_activity/
73
- formatter.list(value)
74
- when /liked_tracks|bookmarked_tracks/
75
- formatter.tracks(value)
76
- when /liked_artists|bookmarked_artists|stations|liked_stations/
77
- formatter.sort_list(value)
78
- when :liked_albums
79
- formatter.albums(value)
80
- when /following|followers/
81
- formatter.followx(value)
82
- end
83
- end
84
-
85
- write "#{ title }\n#{ output }"
5
+ begin
6
+ Pandata::CLI.scrape(ARGV)
7
+ rescue Pandata::PandataError
86
8
  end
data/lib/pandata.rb CHANGED
@@ -6,10 +6,12 @@ require_relative 'pandata/parser'
6
6
  require_relative 'pandata/scraper'
7
7
 
8
8
  module Pandata
9
+ class PandataError < StandardError; end
10
+
9
11
  module Version
10
12
  MAJOR = 0
11
- MINOR = 1
12
- PATCH = 2
13
+ MINOR = 2
14
+ PATCH = 1
13
15
  BUILD = nil
14
16
 
15
17
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
@@ -15,6 +15,8 @@ module Pandata
15
15
  # - :output_file [String]
16
16
  # - :data_to_get [Array]
17
17
  # - :get_all_data [Boolean]
18
+ # - :help [Boolean]
19
+ # - :version [Boolean]
18
20
  # - :return_as_json [Boolean]
19
21
  def self.parse(argv)
20
22
  options = { data_to_get: [] }
@@ -90,20 +92,18 @@ Options:
90
92
  end
91
93
 
92
94
  opts.on_tail("-h", "--help", "Show this message") do
93
- puts opts
94
- exit
95
+ options[:help] = true
95
96
  end
96
97
 
97
98
  opts.on_tail("--version", "Show version") do
98
- puts Pandata::Version::STRING
99
- exit
99
+ options[:version] = true
100
100
  end
101
101
  end
102
102
 
103
103
  options[:opts].parse(argv)
104
104
 
105
105
  # User ID is the first argument.
106
- options[:user_id] = argv.shift
106
+ options[:user_id] = argv[0]
107
107
 
108
108
  if get_all_data
109
109
  options[:data_to_get] = [
@@ -0,0 +1,144 @@
1
+ require 'json'
2
+ require 'ruby-progressbar'
3
+ require_relative '../pandata'
4
+ require_relative 'argv_parser'
5
+ require_relative 'data_formatter'
6
+
7
+ module Pandata
8
+
9
+ # Pandata command-line interface
10
+ class CLI
11
+
12
+ def self.scrape(argv)
13
+ options = Pandata::ArgvParser.parse(argv)
14
+
15
+ if argv.empty? || options[:help]
16
+ puts options[:opts].to_s # Log usage information
17
+ elsif options[:version]
18
+ puts Pandata::Version::STRING
19
+ else
20
+ new(options).download_and_output
21
+ end
22
+ end
23
+
24
+ def initialize(options)
25
+ @data_to_get = options[:data_to_get]
26
+ @output_file = options[:output_file]
27
+ @return_as_json = options[:return_as_json]
28
+
29
+ @scraper = scraper_for(options[:user_id])
30
+ @scraper.download_cb = method(:update_progress)
31
+ end
32
+
33
+ def update_progress(num_data)
34
+ progressbar.progress += num_data
35
+ end
36
+
37
+ def download_and_output
38
+ output_data format_data(download_data, @return_as_json)
39
+ end
40
+
41
+ private
42
+
43
+ def progressbar
44
+ @progressbar ||= ProgressBar.create(
45
+ title: 'Data Downloaded',
46
+ format: '%t: %c',
47
+ total: nil
48
+ )
49
+ end
50
+
51
+ def formatter
52
+ @formatter ||= DataFormatter.new
53
+ end
54
+
55
+ def log(msg)
56
+ puts msg
57
+ end
58
+
59
+ # Writes the data to STDOUT or a file.
60
+ # @param formatted_data [String]
61
+ def output_data(formatted_data)
62
+ @progressbar.stop if @progressbar
63
+
64
+ if @output_file
65
+ File.write(@output_file, formatted_data)
66
+ else
67
+ log formatted_data
68
+ end
69
+ end
70
+
71
+ # Formats data as a string list or JSON.
72
+ # @param data [Hash]
73
+ # @param json [Boolean]
74
+ # @return [String]
75
+ def format_data(data, json = false)
76
+ if json
77
+ JSON.generate(data)
78
+ else
79
+ data.map do |category, cat_data|
80
+ # Capitalize each word in the category symbol.
81
+ # e.g. :liked_tracks becomes 'Liked Tracks'
82
+ title = category.to_s.split('_').map(&:capitalize).join(' ')
83
+
84
+ output = if cat_data.empty?
85
+ " ** No Data **\n"
86
+ else
87
+ case category
88
+ when /playing_station|recent_activity/
89
+ formatter.list(cat_data)
90
+ when /liked_tracks|bookmarked_tracks/
91
+ formatter.tracks(cat_data)
92
+ when /liked_artists|bookmarked_artists|stations|liked_stations/
93
+ formatter.sort_list(cat_data)
94
+ when :liked_albums
95
+ formatter.albums(cat_data)
96
+ when /following|followers/
97
+ formatter.followx(cat_data)
98
+ end
99
+ end
100
+
101
+ "#{title}:\n#{output}"
102
+ end.join
103
+ end
104
+ end
105
+
106
+ # Downloads the user's desired data.
107
+ # @return [Hash]
108
+ def download_data
109
+ scraper_data = {}
110
+
111
+ @data_to_get.each do |data_category|
112
+ if /(bookmark|like)e?d_(.*)/ =~ data_category
113
+ method = $1 << 's' # 'likes' or 'bookmarks'
114
+ argument = $2.to_sym # :tracks, :artists, :stations or :albums
115
+ scraper_data[data_category] = @scraper.public_send(method, argument)
116
+ else
117
+ scraper_data[data_category] = @scraper.public_send(data_category)
118
+ end
119
+ end
120
+
121
+ scraper_data
122
+ end
123
+
124
+ # Returns a scraper for the user's id.
125
+ # @param user_id [String] webname or email
126
+ # @return [Pandata::Scraper]
127
+ def scraper_for(user_id)
128
+ scraper = Pandata::Scraper.get(user_id)
129
+
130
+ if scraper.kind_of?(Array)
131
+ log "No exact match for '#{user_id}'."
132
+
133
+ unless scraper.empty?
134
+ log "\nWebname results for '#{user_id}':\n#{formatter.list(scraper)}"
135
+ end
136
+
137
+ raise PandataError, "Could not create a scraper for '#{user_id}'."
138
+ end
139
+
140
+ scraper
141
+ end
142
+
143
+ end
144
+ end
@@ -1,6 +1,6 @@
1
1
  module Pandata
2
2
  # Number of results to get from a feeds.pandora.com URL.
3
- MAX_RESULTS = 100000 # Get everything...
3
+ MAX_RESULTS = 100_000 # Get everything...
4
4
 
5
5
  # URLs to Pandora's data!
6
6
  DATA_FEED_URLS = {
@@ -1,30 +1,23 @@
1
1
  require 'json'
2
2
  require 'open-uri'
3
+ require_relative '../pandata'
3
4
 
4
5
  module Pandata
5
- class PandataError < StandardError; end
6
6
 
7
- # Retrieves data from Pandora.com and handles errors.
7
+ # Retrieves data from Pandora.com and handles network errors.
8
8
  class Downloader
9
+
9
10
  # A GitHub Gist that contains an updated cookie allowing access to 'login-only' visible data.
10
11
  CONFIG_URL = 'https://gist.github.com/ustasb/596f1ee96d03463fde77/raw/pandata_config.json'
11
12
 
12
- class << self
13
- attr_accessor :cookie
14
- end
15
-
16
- # Gets a Pandora cookie and returns a Downloader instance.
17
- def initialize
18
- unless Downloader.cookie
19
- Downloader.cookie = get_cookie
20
- end
21
- end
13
+ # The cached cookie.
14
+ @@cookie = nil
22
15
 
23
16
  # Downloads and reads a page from a URL.
24
17
  # @param url [String]
25
18
  # @return [String] contents of page
26
- def read_page(url)
27
- download(url, Downloader.cookie).read
19
+ def self.read_page(url)
20
+ download(url, get_cookie).read
28
21
  end
29
22
 
30
23
  private
@@ -33,19 +26,21 @@ module Pandata
33
26
  # @param url [String]
34
27
  # @param cookie [String]
35
28
  # @return [File]
36
- def download(url, cookie = '')
29
+ def self.download(url, cookie = '')
37
30
  escaped_url = URI.escape(url)
38
31
 
39
- begin
40
- open(escaped_url, 'Cookie' => cookie, :read_timeout => 5)
41
- rescue OpenURI::HTTPError => error
42
- puts "The network request for:\n #{url}\nreturned an error:\n #{error.message}"
43
- puts "Please try again later or update Pandata. Sorry about that!\n\nFull error:"
44
- raise PandataError
45
- end
32
+ open(escaped_url, 'Cookie' => cookie, :read_timeout => 5)
33
+ rescue OpenURI::HTTPError => error
34
+ puts "The network request for:\n #{url}\nreturned an error:\n #{error.message}"
35
+ puts "Please try again later or update Pandata. Sorry about that!\n\nFull error:"
36
+ raise PandataError
37
+ end
38
+
39
+ def self.get_cookie
40
+ @@cookie ||= download_cookie
46
41
  end
47
42
 
48
- def get_cookie
43
+ def self.download_cookie
49
44
  config = JSON.parse download(CONFIG_URL).read
50
45
 
51
46
  if Gem::Version.new(Pandata::Version::STRING) <= Gem::Version.new(config['required_update_for'])
@@ -54,5 +49,6 @@ module Pandata
54
49
 
55
50
  config['cookie']
56
51
  end
52
+
57
53
  end
58
54
  end
@@ -12,6 +12,9 @@ module Pandata
12
12
  # the user ties a new email address to their Pandora account.
13
13
  attr_reader :webname
14
14
 
15
+ # A Proc that gets called after some data has been downloaded.
16
+ attr_accessor :download_cb
17
+
15
18
  # If possible, get a Scraper instance for the user_id otherwise return
16
19
  # an array of similar webnames.
17
20
  # @param user_id [String] email or webname
@@ -19,7 +22,7 @@ module Pandata
19
22
  # @return [Array] array of similar webnames
20
23
  def self.get(user_id)
21
24
  search_url = DATA_FEED_URLS[:user_search] % { searchString: user_id }
22
- html = Downloader.new.read_page(search_url)
25
+ html = Downloader.read_page(search_url)
23
26
  webnames = Parser.new.get_webnames_from_search(html)
24
27
 
25
28
  if webnames.include?(user_id)
@@ -34,7 +37,6 @@ module Pandata
34
37
 
35
38
  private_class_method :new
36
39
  def initialize(webname)
37
- @downloader = Downloader.new
38
40
  @parser = Parser.new
39
41
  @webname = webname
40
42
  end
@@ -134,6 +136,8 @@ module Pandata
134
136
  results.push(new_data)
135
137
  end
136
138
 
139
+ @download_cb[new_data.size] if @download_cb
140
+
137
141
  get_url(data_type, next_data_indices) if next_data_indices
138
142
  end
139
143
 
@@ -149,7 +153,7 @@ module Pandata
149
153
  next_data_indices = {}
150
154
 
151
155
  while next_data_indices
152
- html = @downloader.read_page(url)
156
+ html = Downloader.read_page(url)
153
157
  next_data_indices = @parser.get_next_data_indices(html)
154
158
  url = yield(html, next_data_indices)
155
159
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pandata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Ustas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-18 00:00:00.000000000 Z
11
+ date: 2013-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -24,20 +24,62 @@ dependencies:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.5.6
27
+ - !ruby/object:Gem::Dependency
28
+ name: ruby-progressbar
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 1.2.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 1.2.0
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: rspec
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - ~>
32
46
  - !ruby/object:Gem::Version
33
- version: 2.12.2
47
+ version: 2.14.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 2.14.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: vcr
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.5.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 2.5.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: webmock
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: 1.13.0
34
76
  type: :development
35
77
  prerelease: false
36
78
  version_requirements: !ruby/object:Gem::Requirement
37
79
  requirements:
38
80
  - - ~>
39
81
  - !ruby/object:Gem::Version
40
- version: 2.12.2
82
+ version: 1.13.0
41
83
  - !ruby/object:Gem::Dependency
42
84
  name: yard
43
85
  requirement: !ruby/object:Gem::Requirement
@@ -63,6 +105,7 @@ extra_rdoc_files:
63
105
  - README.md
64
106
  files:
65
107
  - lib/pandata/argv_parser.rb
108
+ - lib/pandata/cli.rb
66
109
  - lib/pandata/data_formatter.rb
67
110
  - lib/pandata/data_urls.rb
68
111
  - lib/pandata/downloader.rb
@@ -92,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
92
135
  version: '0'
93
136
  requirements: []
94
137
  rubyforge_project:
95
- rubygems_version: 2.0.2
138
+ rubygems_version: 2.0.3
96
139
  signing_key:
97
140
  specification_version: 4
98
141
  summary: A Pandora.com web scraper