steam_scraper 1.1.7 → 1.1.8
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3eb0595031ddaac9c22e6501cc1aecba8d22a8bf
|
4
|
+
data.tar.gz: 399dbd713d76fb4f28b3a81bd20ee2fe978facf1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d6827c7c7ba1c5127ef85ff87065cb8061ec4e7166ef856bc245f018a68ec453bbb4271fc398e9b7daf5f351d06aac0b512c5cfb66910e91206f06ee5057d447
|
7
|
+
data.tar.gz: a70456ea7d52266e01025b25b46e0ef0142f75f8ddacfcdb02523dbe6471f02e101af3f918e2def8a04189a00858d8e5ec888e83d182330e1d6799b859b85767
|
data/lib/steam_scraper.rb
CHANGED
@@ -13,6 +13,7 @@ module SteamScraper
|
|
13
13
|
|
14
14
|
def scrape(first_page = 1, last_page = nil)
|
15
15
|
scraped_game_list = @game_list_scraper.scrape(first_page, last_page)
|
16
|
+
puts 'Initial scrape complete. Found ' + scraped_game_list.length.to_s + ' games.'
|
16
17
|
final_game_list = @game_page_scraper.scrape(scraped_game_list)
|
17
18
|
|
18
19
|
final_game_list
|
@@ -5,10 +5,16 @@ class GameListScraper
|
|
5
5
|
def initialize(*_args)
|
6
6
|
@game_list = []
|
7
7
|
@page_retriever = PageRetriever.new
|
8
|
+
init_last_page_num
|
9
|
+
end
|
10
|
+
|
11
|
+
def init_last_page_num
|
8
12
|
current_page_contents = get_page_contents(site + 1.to_s)
|
9
|
-
|
10
|
-
|
11
|
-
|
13
|
+
@last_page_num = current_page_contents.xpath("//div[contains(@class, 'search_pagination_right')]")
|
14
|
+
.text.scan(/(\d+)/i).flatten.last.to_i
|
15
|
+
rescue StandardError => e
|
16
|
+
puts 'Could not connect to Steam Store'
|
17
|
+
raise e
|
12
18
|
end
|
13
19
|
|
14
20
|
def site
|
@@ -22,13 +28,15 @@ class GameListScraper
|
|
22
28
|
def search_results(page_number)
|
23
29
|
current_page_contents = get_page_contents(site + page_number.to_s)
|
24
30
|
current_page_contents.xpath("//div[@id='search_result_container']/div/a")
|
31
|
+
rescue
|
32
|
+
nil
|
25
33
|
end
|
26
34
|
|
27
35
|
def scrape(first_page = 1, last_page = nil)
|
28
36
|
last_page ||= @last_page_num
|
29
|
-
# scrape each search page
|
30
37
|
result = Parallel.map(first_page..last_page,
|
31
|
-
progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to ')
|
38
|
+
progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to '),
|
39
|
+
in_processes: 8) do |page|
|
32
40
|
items_on_page = search_results(page)
|
33
41
|
scrape_page(items_on_page)
|
34
42
|
end
|
@@ -40,7 +48,6 @@ class GameListScraper
|
|
40
48
|
current_page.each do |entry|
|
41
49
|
entries.push(scrape_entry(entry))
|
42
50
|
end
|
43
|
-
|
44
51
|
entries
|
45
52
|
end
|
46
53
|
|
@@ -54,22 +61,13 @@ class GameListScraper
|
|
54
61
|
|
55
62
|
def scrape_price(entry)
|
56
63
|
entry.xpath(".//div[contains(@class, 'search_price')
|
57
|
-
and not(contains(@class, 'search_price_discount_combined'))]")
|
58
|
-
.text
|
59
|
-
.strip
|
60
|
-
.split('$')
|
61
|
-
.last
|
64
|
+
and not(contains(@class, 'search_price_discount_combined'))]").text.strip.split('$').last
|
62
65
|
end
|
63
66
|
|
64
67
|
def scrape_release_date(entry)
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
date = Date.parse(date_node_text)
|
69
|
-
rescue ArgumentError
|
70
|
-
end
|
71
|
-
|
72
|
-
date
|
68
|
+
Date.parse(entry.xpath(".//div[contains(@class, 'search_released')]").text)
|
69
|
+
rescue
|
70
|
+
nil
|
73
71
|
end
|
74
72
|
|
75
73
|
def scrape_platforms(entry)
|
@@ -78,7 +76,6 @@ class GameListScraper
|
|
78
76
|
platforms.push('macOS') unless entry.xpath(".//span[contains(@class, 'mac')]").empty?
|
79
77
|
platforms.push('Linux') unless entry.xpath(".//span[contains(@class, 'linux')]").empty?
|
80
78
|
platforms.push('Steamplay') unless entry.xpath(".//span[contains(@class, 'steamplay')]").empty?
|
81
|
-
|
82
79
|
platforms
|
83
80
|
end
|
84
81
|
|
@@ -90,7 +87,6 @@ class GameListScraper
|
|
90
87
|
node = entry.xpath(".//span[contains(@class, 'search_review_summary')]")
|
91
88
|
result = nil
|
92
89
|
result = node.attribute('data-store-tooltip').value unless node.empty?
|
93
|
-
|
94
90
|
result
|
95
91
|
end
|
96
92
|
|
@@ -98,7 +94,6 @@ class GameListScraper
|
|
98
94
|
review_string = get_review_contents(entry)
|
99
95
|
matches = /.*(\d\d)[%]/i.match(review_string)
|
100
96
|
review_percentage = matches[1] unless matches.nil?
|
101
|
-
|
102
97
|
review_percentage
|
103
98
|
end
|
104
99
|
|
@@ -106,7 +101,6 @@ class GameListScraper
|
|
106
101
|
review_string = get_review_contents(entry)
|
107
102
|
matches = /.*\d\d[%] of the ([0-9,]*) user/i.match(review_string)
|
108
103
|
num_reviews = matches[1] unless matches.nil?
|
109
|
-
|
110
104
|
num_reviews
|
111
105
|
end
|
112
106
|
|
@@ -8,7 +8,9 @@ class GamePageScraper
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def scrape(games_hash)
|
11
|
-
result = Parallel.map(games_hash,
|
11
|
+
result = Parallel.map(games_hash,
|
12
|
+
progress: 'Scraping additional per game data',
|
13
|
+
in_processes: 8) do |game|
|
12
14
|
url = game[:url]
|
13
15
|
scrape_game(game, url) unless url.nil?
|
14
16
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: steam_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Gardner
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-09-
|
11
|
+
date: 2016-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|