steam_scraper 1.1.6 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d35d39b0ec4e9ee65403d6f213f02818db2bc493
4
- data.tar.gz: ff9f3ec8ec11b965da0a3d8cf3d958f99536af83
3
+ metadata.gz: 2b682929fb38924b8ad5b928ad27f98b4cc666f1
4
+ data.tar.gz: b1b40cebd77d5202362d006af504e47ed5f8b473
5
5
  SHA512:
6
- metadata.gz: 1c464a5c2d986221968cb6cbc1ab6ecfcab3c6e11a93a732eda686ef09722d1585e5649c6e399a53ee4486939a5c83e5c65ddcf5c87037c9bc2cf1840e91a8cd
7
- data.tar.gz: c98b76070ddefc042c4871b328f27b6a7e416b8aaada35b451eedc16c1cfb0e4f1d475d14523b7bfa3ded45c9563c4b29e608ccd5210f9f93da2616032d6ffeb
6
+ metadata.gz: d88b7769ea0294bb94ac839f8597c4f461f3d9eb6bb6cf9af26c6cde55b2c8cbb931a3016c44c60417669962228b03f12ccbf9aec92f9d96baeca7421e79f2e2
7
+ data.tar.gz: edf0a0f2980c07e5802b4823392fc5c40e56e85dbe328299938948e22b739d1144ba9a52f548322a1cc67d161aa1617a3c2f0499e694316a70c5b5843275b5db
@@ -1,5 +1,5 @@
1
1
  require_relative './page_retriever.rb'
2
-
2
+ require 'parallel'
3
3
  # Core scraping class
4
4
  class GameListScraper
5
5
  def initialize(*_args)
@@ -27,19 +27,21 @@ class GameListScraper
27
27
  def scrape(first_page = 1, last_page = nil)
28
28
  last_page ||= @last_page_num
29
29
  # scrape each search page
30
- puts 'Scraping Steam Store pages ' + [first_page..last_page].join(' to ')
31
- (first_page..last_page).each do |page|
32
- puts 'Scraping Page ' + page.to_s
30
+ result = Parallel.map(first_page..last_page,
31
+ progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to ')) do |page|
33
32
  items_on_page = search_results(page)
34
33
  scrape_page(items_on_page)
35
34
  end
36
- @game_list
35
+ @game_list.push(result).flatten!
37
36
  end
38
37
 
39
38
  def scrape_page(current_page)
39
+ entries = []
40
40
  current_page.each do |entry|
41
- scrape_entry(entry)
41
+ entries.push(scrape_entry(entry))
42
42
  end
43
+
44
+ entries
43
45
  end
44
46
 
45
47
  def scrape_url(entry)
@@ -65,7 +67,6 @@ class GameListScraper
65
67
  begin
66
68
  date = Date.parse(date_node_text)
67
69
  rescue ArgumentError
68
- puts 'Invalid date found. Probably in the future. Date set to nil'
69
70
  end
70
71
 
71
72
  date
@@ -119,6 +120,6 @@ class GameListScraper
119
120
  new_game[:icon_url] = scrape_icon_url(entry)
120
121
  new_game[:review_score] = scrape_review_score(entry)
121
122
  new_game[:number_of_reviews] = scrape_number_of_reviews(entry)
122
- @game_list.push new_game
123
+ new_game
123
124
  end
124
125
  end
@@ -1,4 +1,5 @@
1
1
  require_relative './page_retriever.rb'
2
+ require 'Parallel'
2
3
 
3
4
  # Class that scrapes a games actual page
4
5
  class GamePageScraper
@@ -7,16 +8,21 @@ class GamePageScraper
7
8
  end
8
9
 
9
10
  def scrape(games_hash)
10
- games_hash.map! do |game|
11
+ result = Parallel.map(games_hash, progress: 'Scraping additional per game data') do |game|
11
12
  url = game[:url]
12
- scrape_game!(game, url) unless url.nil?
13
+ scrape_game(game, url) unless url.nil?
13
14
  end
14
- games_hash
15
+ games_hash.push(result).flatten!
15
16
  end
16
17
 
17
- def scrape_game!(game, url)
18
- puts 'Scraping additional data for ' + game[:name]
18
+ def scrape_game(game, url)
19
19
  page_contents = get_page_contents(url)
20
+ game = scrape_game_with_valid_contents(game, page_contents) unless page_contents.nil?
21
+
22
+ game
23
+ end
24
+
25
+ def scrape_game_with_valid_contents(game, page_contents)
20
26
  game[:metacritic] = scrape_metacritic(page_contents)
21
27
  game[:tags] = scrape_tags(page_contents)
22
28
  game[:genres] = scrape_genres(page_contents)
@@ -7,5 +7,8 @@ class PageRetriever
7
7
  def retrieve(url)
8
8
  page_contents = HTTParty.get(url)
9
9
  Nokogiri::HTML(page_contents)
10
+ rescue Error => e
11
+ puts e
12
+ nil
10
13
  end
11
14
  end
@@ -1,3 +1,3 @@
1
1
  module SteamScraper
2
- VERSION = '1.1.6'.freeze
2
+ VERSION = '1.1.7'.freeze
3
3
  end
@@ -24,6 +24,10 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency 'pry'
25
25
  spec.add_development_dependency 'nokogiri', '~> 1.6.8'
26
26
  spec.add_development_dependency 'httparty', '~> 0.14.0'
27
+ spec.add_development_dependency 'parallel', '~> 1.9.0'
28
+ spec.add_development_dependency 'ruby-progressbar', '~> 1.8.1'
29
+ spec.add_runtime_dependency 'ruby-progressbar', '~> 1.8.1'
30
+ spec.add_runtime_dependency 'parallel', '~> 1.9.0'
27
31
  spec.add_runtime_dependency 'nokogiri', '~> 1.6.8'
28
32
  spec.add_runtime_dependency 'httparty', '~> 0.14.0'
29
33
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: steam_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.6
4
+ version: 1.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Gardner
@@ -80,6 +80,62 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: 0.14.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: parallel
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 1.9.0
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 1.9.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: ruby-progressbar
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 1.8.1
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 1.8.1
111
+ - !ruby/object:Gem::Dependency
112
+ name: ruby-progressbar
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 1.8.1
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 1.8.1
125
+ - !ruby/object:Gem::Dependency
126
+ name: parallel
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 1.9.0
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 1.9.0
83
139
  - !ruby/object:Gem::Dependency
84
140
  name: nokogiri
85
141
  requirement: !ruby/object:Gem::Requirement