steam_scraper 1.1.6 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b682929fb38924b8ad5b928ad27f98b4cc666f1
|
4
|
+
data.tar.gz: b1b40cebd77d5202362d006af504e47ed5f8b473
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d88b7769ea0294bb94ac839f8597c4f461f3d9eb6bb6cf9af26c6cde55b2c8cbb931a3016c44c60417669962228b03f12ccbf9aec92f9d96baeca7421e79f2e2
|
7
|
+
data.tar.gz: edf0a0f2980c07e5802b4823392fc5c40e56e85dbe328299938948e22b739d1144ba9a52f548322a1cc67d161aa1617a3c2f0499e694316a70c5b5843275b5db
|
@@ -1,5 +1,5 @@
|
|
1
1
|
require_relative './page_retriever.rb'
|
2
|
-
|
2
|
+
require 'parallel'
|
3
3
|
# Core scraping class
|
4
4
|
class GameListScraper
|
5
5
|
def initialize(*_args)
|
@@ -27,19 +27,21 @@ class GameListScraper
|
|
27
27
|
def scrape(first_page = 1, last_page = nil)
|
28
28
|
last_page ||= @last_page_num
|
29
29
|
# scrape each search page
|
30
|
-
|
31
|
-
|
32
|
-
puts 'Scraping Page ' + page.to_s
|
30
|
+
result = Parallel.map(first_page..last_page,
|
31
|
+
progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to ')) do |page|
|
33
32
|
items_on_page = search_results(page)
|
34
33
|
scrape_page(items_on_page)
|
35
34
|
end
|
36
|
-
@game_list
|
35
|
+
@game_list.push(result).flatten!
|
37
36
|
end
|
38
37
|
|
39
38
|
def scrape_page(current_page)
|
39
|
+
entries = []
|
40
40
|
current_page.each do |entry|
|
41
|
-
scrape_entry(entry)
|
41
|
+
entries.push(scrape_entry(entry))
|
42
42
|
end
|
43
|
+
|
44
|
+
entries
|
43
45
|
end
|
44
46
|
|
45
47
|
def scrape_url(entry)
|
@@ -65,7 +67,6 @@ class GameListScraper
|
|
65
67
|
begin
|
66
68
|
date = Date.parse(date_node_text)
|
67
69
|
rescue ArgumentError
|
68
|
-
puts 'Invalid date found. Probably in the future. Date set to nil'
|
69
70
|
end
|
70
71
|
|
71
72
|
date
|
@@ -119,6 +120,6 @@ class GameListScraper
|
|
119
120
|
new_game[:icon_url] = scrape_icon_url(entry)
|
120
121
|
new_game[:review_score] = scrape_review_score(entry)
|
121
122
|
new_game[:number_of_reviews] = scrape_number_of_reviews(entry)
|
122
|
-
|
123
|
+
new_game
|
123
124
|
end
|
124
125
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative './page_retriever.rb'
|
2
|
+
require 'Parallel'
|
2
3
|
|
3
4
|
# Class that scrapes a games actual page
|
4
5
|
class GamePageScraper
|
@@ -7,16 +8,21 @@ class GamePageScraper
|
|
7
8
|
end
|
8
9
|
|
9
10
|
def scrape(games_hash)
|
10
|
-
|
11
|
+
result = Parallel.map(games_hash, progress: 'Scraping additional per game data') do |game|
|
11
12
|
url = game[:url]
|
12
|
-
scrape_game
|
13
|
+
scrape_game(game, url) unless url.nil?
|
13
14
|
end
|
14
|
-
games_hash
|
15
|
+
games_hash.push(result).flatten!
|
15
16
|
end
|
16
17
|
|
17
|
-
def scrape_game
|
18
|
-
puts 'Scraping additional data for ' + game[:name]
|
18
|
+
def scrape_game(game, url)
|
19
19
|
page_contents = get_page_contents(url)
|
20
|
+
game = scrape_game_with_valid_contents(game, page_contents) unless page_contents.nil?
|
21
|
+
|
22
|
+
game
|
23
|
+
end
|
24
|
+
|
25
|
+
def scrape_game_with_valid_contents(game, page_contents)
|
20
26
|
game[:metacritic] = scrape_metacritic(page_contents)
|
21
27
|
game[:tags] = scrape_tags(page_contents)
|
22
28
|
game[:genres] = scrape_genres(page_contents)
|
data/steam_scraper.gemspec
CHANGED
@@ -24,6 +24,10 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency 'pry'
|
25
25
|
spec.add_development_dependency 'nokogiri', '~> 1.6.8'
|
26
26
|
spec.add_development_dependency 'httparty', '~> 0.14.0'
|
27
|
+
spec.add_development_dependency 'parallel', '~> 1.9.0'
|
28
|
+
spec.add_development_dependency 'ruby-progressbar', '~> 1.8.1'
|
29
|
+
spec.add_runtime_dependency 'ruby-progressbar', '~> 1.8.1'
|
30
|
+
spec.add_runtime_dependency 'parallel', '~> 1.9.0'
|
27
31
|
spec.add_runtime_dependency 'nokogiri', '~> 1.6.8'
|
28
32
|
spec.add_runtime_dependency 'httparty', '~> 0.14.0'
|
29
33
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: steam_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Gardner
|
@@ -80,6 +80,62 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 0.14.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: parallel
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 1.9.0
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 1.9.0
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: ruby-progressbar
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 1.8.1
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 1.8.1
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: ruby-progressbar
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 1.8.1
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 1.8.1
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: parallel
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 1.9.0
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 1.9.0
|
83
139
|
- !ruby/object:Gem::Dependency
|
84
140
|
name: nokogiri
|
85
141
|
requirement: !ruby/object:Gem::Requirement
|