steam_scraper 1.1.6 → 1.1.7
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b682929fb38924b8ad5b928ad27f98b4cc666f1
|
4
|
+
data.tar.gz: b1b40cebd77d5202362d006af504e47ed5f8b473
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d88b7769ea0294bb94ac839f8597c4f461f3d9eb6bb6cf9af26c6cde55b2c8cbb931a3016c44c60417669962228b03f12ccbf9aec92f9d96baeca7421e79f2e2
|
7
|
+
data.tar.gz: edf0a0f2980c07e5802b4823392fc5c40e56e85dbe328299938948e22b739d1144ba9a52f548322a1cc67d161aa1617a3c2f0499e694316a70c5b5843275b5db
|
@@ -1,5 +1,5 @@
|
|
1
1
|
require_relative './page_retriever.rb'
|
2
|
-
|
2
|
+
require 'parallel'
|
3
3
|
# Core scraping class
|
4
4
|
class GameListScraper
|
5
5
|
def initialize(*_args)
|
@@ -27,19 +27,21 @@ class GameListScraper
|
|
27
27
|
def scrape(first_page = 1, last_page = nil)
|
28
28
|
last_page ||= @last_page_num
|
29
29
|
# scrape each search page
|
30
|
-
|
31
|
-
|
32
|
-
puts 'Scraping Page ' + page.to_s
|
30
|
+
result = Parallel.map(first_page..last_page,
|
31
|
+
progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to ')) do |page|
|
33
32
|
items_on_page = search_results(page)
|
34
33
|
scrape_page(items_on_page)
|
35
34
|
end
|
36
|
-
@game_list
|
35
|
+
@game_list.push(result).flatten!
|
37
36
|
end
|
38
37
|
|
39
38
|
def scrape_page(current_page)
|
39
|
+
entries = []
|
40
40
|
current_page.each do |entry|
|
41
|
-
scrape_entry(entry)
|
41
|
+
entries.push(scrape_entry(entry))
|
42
42
|
end
|
43
|
+
|
44
|
+
entries
|
43
45
|
end
|
44
46
|
|
45
47
|
def scrape_url(entry)
|
@@ -65,7 +67,6 @@ class GameListScraper
|
|
65
67
|
begin
|
66
68
|
date = Date.parse(date_node_text)
|
67
69
|
rescue ArgumentError
|
68
|
-
puts 'Invalid date found. Probably in the future. Date set to nil'
|
69
70
|
end
|
70
71
|
|
71
72
|
date
|
@@ -119,6 +120,6 @@ class GameListScraper
|
|
119
120
|
new_game[:icon_url] = scrape_icon_url(entry)
|
120
121
|
new_game[:review_score] = scrape_review_score(entry)
|
121
122
|
new_game[:number_of_reviews] = scrape_number_of_reviews(entry)
|
122
|
-
|
123
|
+
new_game
|
123
124
|
end
|
124
125
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative './page_retriever.rb'
|
2
|
+
require 'Parallel'
|
2
3
|
|
3
4
|
# Class that scrapes a games actual page
|
4
5
|
class GamePageScraper
|
@@ -7,16 +8,21 @@ class GamePageScraper
|
|
7
8
|
end
|
8
9
|
|
9
10
|
def scrape(games_hash)
|
10
|
-
|
11
|
+
result = Parallel.map(games_hash, progress: 'Scraping additional per game data') do |game|
|
11
12
|
url = game[:url]
|
12
|
-
scrape_game
|
13
|
+
scrape_game(game, url) unless url.nil?
|
13
14
|
end
|
14
|
-
games_hash
|
15
|
+
games_hash.push(result).flatten!
|
15
16
|
end
|
16
17
|
|
17
|
-
def scrape_game
|
18
|
-
puts 'Scraping additional data for ' + game[:name]
|
18
|
+
def scrape_game(game, url)
|
19
19
|
page_contents = get_page_contents(url)
|
20
|
+
game = scrape_game_with_valid_contents(game, page_contents) unless page_contents.nil?
|
21
|
+
|
22
|
+
game
|
23
|
+
end
|
24
|
+
|
25
|
+
def scrape_game_with_valid_contents(game, page_contents)
|
20
26
|
game[:metacritic] = scrape_metacritic(page_contents)
|
21
27
|
game[:tags] = scrape_tags(page_contents)
|
22
28
|
game[:genres] = scrape_genres(page_contents)
|
data/steam_scraper.gemspec
CHANGED
@@ -24,6 +24,10 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency 'pry'
|
25
25
|
spec.add_development_dependency 'nokogiri', '~> 1.6.8'
|
26
26
|
spec.add_development_dependency 'httparty', '~> 0.14.0'
|
27
|
+
spec.add_development_dependency 'parallel', '~> 1.9.0'
|
28
|
+
spec.add_development_dependency 'ruby-progressbar', '~> 1.8.1'
|
29
|
+
spec.add_runtime_dependency 'ruby-progressbar', '~> 1.8.1'
|
30
|
+
spec.add_runtime_dependency 'parallel', '~> 1.9.0'
|
27
31
|
spec.add_runtime_dependency 'nokogiri', '~> 1.6.8'
|
28
32
|
spec.add_runtime_dependency 'httparty', '~> 0.14.0'
|
29
33
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: steam_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Gardner
|
@@ -80,6 +80,62 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 0.14.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: parallel
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 1.9.0
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 1.9.0
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: ruby-progressbar
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 1.8.1
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 1.8.1
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: ruby-progressbar
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 1.8.1
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 1.8.1
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: parallel
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 1.9.0
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 1.9.0
|
83
139
|
- !ruby/object:Gem::Dependency
|
84
140
|
name: nokogiri
|
85
141
|
requirement: !ruby/object:Gem::Requirement
|