steam_scraper 1.1.6 → 1.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d35d39b0ec4e9ee65403d6f213f02818db2bc493
4
- data.tar.gz: ff9f3ec8ec11b965da0a3d8cf3d958f99536af83
3
+ metadata.gz: 2b682929fb38924b8ad5b928ad27f98b4cc666f1
4
+ data.tar.gz: b1b40cebd77d5202362d006af504e47ed5f8b473
5
5
  SHA512:
6
- metadata.gz: 1c464a5c2d986221968cb6cbc1ab6ecfcab3c6e11a93a732eda686ef09722d1585e5649c6e399a53ee4486939a5c83e5c65ddcf5c87037c9bc2cf1840e91a8cd
7
- data.tar.gz: c98b76070ddefc042c4871b328f27b6a7e416b8aaada35b451eedc16c1cfb0e4f1d475d14523b7bfa3ded45c9563c4b29e608ccd5210f9f93da2616032d6ffeb
6
+ metadata.gz: d88b7769ea0294bb94ac839f8597c4f461f3d9eb6bb6cf9af26c6cde55b2c8cbb931a3016c44c60417669962228b03f12ccbf9aec92f9d96baeca7421e79f2e2
7
+ data.tar.gz: edf0a0f2980c07e5802b4823392fc5c40e56e85dbe328299938948e22b739d1144ba9a52f548322a1cc67d161aa1617a3c2f0499e694316a70c5b5843275b5db
@@ -1,5 +1,5 @@
1
1
  require_relative './page_retriever.rb'
2
-
2
+ require 'parallel'
3
3
  # Core scraping class
4
4
  class GameListScraper
5
5
  def initialize(*_args)
@@ -27,19 +27,21 @@ class GameListScraper
27
27
  def scrape(first_page = 1, last_page = nil)
28
28
  last_page ||= @last_page_num
29
29
  # scrape each search page
30
- puts 'Scraping Steam Store pages ' + [first_page..last_page].join(' to ')
31
- (first_page..last_page).each do |page|
32
- puts 'Scraping Page ' + page.to_s
30
+ result = Parallel.map(first_page..last_page,
31
+ progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to ')) do |page|
33
32
  items_on_page = search_results(page)
34
33
  scrape_page(items_on_page)
35
34
  end
36
- @game_list
35
+ @game_list.push(result).flatten!
37
36
  end
38
37
 
39
38
  def scrape_page(current_page)
39
+ entries = []
40
40
  current_page.each do |entry|
41
- scrape_entry(entry)
41
+ entries.push(scrape_entry(entry))
42
42
  end
43
+
44
+ entries
43
45
  end
44
46
 
45
47
  def scrape_url(entry)
@@ -65,7 +67,6 @@ class GameListScraper
65
67
  begin
66
68
  date = Date.parse(date_node_text)
67
69
  rescue ArgumentError
68
- puts 'Invalid date found. Probably in the future. Date set to nil'
69
70
  end
70
71
 
71
72
  date
@@ -119,6 +120,6 @@ class GameListScraper
119
120
  new_game[:icon_url] = scrape_icon_url(entry)
120
121
  new_game[:review_score] = scrape_review_score(entry)
121
122
  new_game[:number_of_reviews] = scrape_number_of_reviews(entry)
122
- @game_list.push new_game
123
+ new_game
123
124
  end
124
125
  end
@@ -1,4 +1,5 @@
1
1
  require_relative './page_retriever.rb'
2
+ require 'Parallel'
2
3
 
3
4
  # Class that scrapes a games actual page
4
5
  class GamePageScraper
@@ -7,16 +8,21 @@ class GamePageScraper
7
8
  end
8
9
 
9
10
  def scrape(games_hash)
10
- games_hash.map! do |game|
11
+ result = Parallel.map(games_hash, progress: 'Scraping additional per game data') do |game|
11
12
  url = game[:url]
12
- scrape_game!(game, url) unless url.nil?
13
+ scrape_game(game, url) unless url.nil?
13
14
  end
14
- games_hash
15
+ games_hash.push(result).flatten!
15
16
  end
16
17
 
17
- def scrape_game!(game, url)
18
- puts 'Scraping additional data for ' + game[:name]
18
+ def scrape_game(game, url)
19
19
  page_contents = get_page_contents(url)
20
+ game = scrape_game_with_valid_contents(game, page_contents) unless page_contents.nil?
21
+
22
+ game
23
+ end
24
+
25
+ def scrape_game_with_valid_contents(game, page_contents)
20
26
  game[:metacritic] = scrape_metacritic(page_contents)
21
27
  game[:tags] = scrape_tags(page_contents)
22
28
  game[:genres] = scrape_genres(page_contents)
@@ -7,5 +7,8 @@ class PageRetriever
7
7
  def retrieve(url)
8
8
  page_contents = HTTParty.get(url)
9
9
  Nokogiri::HTML(page_contents)
10
+ rescue Error => e
11
+ puts e
12
+ nil
10
13
  end
11
14
  end
@@ -1,3 +1,3 @@
1
1
  module SteamScraper
2
- VERSION = '1.1.6'.freeze
2
+ VERSION = '1.1.7'.freeze
3
3
  end
@@ -24,6 +24,10 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency 'pry'
25
25
  spec.add_development_dependency 'nokogiri', '~> 1.6.8'
26
26
  spec.add_development_dependency 'httparty', '~> 0.14.0'
27
+ spec.add_development_dependency 'parallel', '~> 1.9.0'
28
+ spec.add_development_dependency 'ruby-progressbar', '~> 1.8.1'
29
+ spec.add_runtime_dependency 'ruby-progressbar', '~> 1.8.1'
30
+ spec.add_runtime_dependency 'parallel', '~> 1.9.0'
27
31
  spec.add_runtime_dependency 'nokogiri', '~> 1.6.8'
28
32
  spec.add_runtime_dependency 'httparty', '~> 0.14.0'
29
33
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: steam_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.6
4
+ version: 1.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Gardner
@@ -80,6 +80,62 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: 0.14.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: parallel
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 1.9.0
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 1.9.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: ruby-progressbar
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 1.8.1
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 1.8.1
111
+ - !ruby/object:Gem::Dependency
112
+ name: ruby-progressbar
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 1.8.1
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 1.8.1
125
+ - !ruby/object:Gem::Dependency
126
+ name: parallel
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 1.9.0
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 1.9.0
83
139
  - !ruby/object:Gem::Dependency
84
140
  name: nokogiri
85
141
  requirement: !ruby/object:Gem::Requirement