steam_scraper 1.1.7 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3eb0595031ddaac9c22e6501cc1aecba8d22a8bf
|
4
|
+
data.tar.gz: 399dbd713d76fb4f28b3a81bd20ee2fe978facf1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d6827c7c7ba1c5127ef85ff87065cb8061ec4e7166ef856bc245f018a68ec453bbb4271fc398e9b7daf5f351d06aac0b512c5cfb66910e91206f06ee5057d447
|
7
|
+
data.tar.gz: a70456ea7d52266e01025b25b46e0ef0142f75f8ddacfcdb02523dbe6471f02e101af3f918e2def8a04189a00858d8e5ec888e83d182330e1d6799b859b85767
|
data/lib/steam_scraper.rb
CHANGED
@@ -13,6 +13,7 @@ module SteamScraper
|
|
13
13
|
|
14
14
|
def scrape(first_page = 1, last_page = nil)
|
15
15
|
scraped_game_list = @game_list_scraper.scrape(first_page, last_page)
|
16
|
+
puts 'Initial scrape complete. Found ' + scraped_game_list.length.to_s + ' games.'
|
16
17
|
final_game_list = @game_page_scraper.scrape(scraped_game_list)
|
17
18
|
|
18
19
|
final_game_list
|
@@ -5,10 +5,16 @@ class GameListScraper
|
|
5
5
|
def initialize(*_args)
|
6
6
|
@game_list = []
|
7
7
|
@page_retriever = PageRetriever.new
|
8
|
+
init_last_page_num
|
9
|
+
end
|
10
|
+
|
11
|
+
def init_last_page_num
|
8
12
|
current_page_contents = get_page_contents(site + 1.to_s)
|
9
|
-
|
10
|
-
|
11
|
-
|
13
|
+
@last_page_num = current_page_contents.xpath("//div[contains(@class, 'search_pagination_right')]")
|
14
|
+
.text.scan(/(\d+)/i).flatten.last.to_i
|
15
|
+
rescue StandardError => e
|
16
|
+
puts 'Could not connect to Steam Store'
|
17
|
+
raise e
|
12
18
|
end
|
13
19
|
|
14
20
|
def site
|
@@ -22,13 +28,15 @@ class GameListScraper
|
|
22
28
|
def search_results(page_number)
|
23
29
|
current_page_contents = get_page_contents(site + page_number.to_s)
|
24
30
|
current_page_contents.xpath("//div[@id='search_result_container']/div/a")
|
31
|
+
rescue
|
32
|
+
nil
|
25
33
|
end
|
26
34
|
|
27
35
|
def scrape(first_page = 1, last_page = nil)
|
28
36
|
last_page ||= @last_page_num
|
29
|
-
# scrape each search page
|
30
37
|
result = Parallel.map(first_page..last_page,
|
31
|
-
progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to ')
|
38
|
+
progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to '),
|
39
|
+
in_processes: 8) do |page|
|
32
40
|
items_on_page = search_results(page)
|
33
41
|
scrape_page(items_on_page)
|
34
42
|
end
|
@@ -40,7 +48,6 @@ class GameListScraper
|
|
40
48
|
current_page.each do |entry|
|
41
49
|
entries.push(scrape_entry(entry))
|
42
50
|
end
|
43
|
-
|
44
51
|
entries
|
45
52
|
end
|
46
53
|
|
@@ -54,22 +61,13 @@ class GameListScraper
|
|
54
61
|
|
55
62
|
def scrape_price(entry)
|
56
63
|
entry.xpath(".//div[contains(@class, 'search_price')
|
57
|
-
and not(contains(@class, 'search_price_discount_combined'))]")
|
58
|
-
.text
|
59
|
-
.strip
|
60
|
-
.split('$')
|
61
|
-
.last
|
64
|
+
and not(contains(@class, 'search_price_discount_combined'))]").text.strip.split('$').last
|
62
65
|
end
|
63
66
|
|
64
67
|
def scrape_release_date(entry)
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
date = Date.parse(date_node_text)
|
69
|
-
rescue ArgumentError
|
70
|
-
end
|
71
|
-
|
72
|
-
date
|
68
|
+
Date.parse(entry.xpath(".//div[contains(@class, 'search_released')]").text)
|
69
|
+
rescue
|
70
|
+
nil
|
73
71
|
end
|
74
72
|
|
75
73
|
def scrape_platforms(entry)
|
@@ -78,7 +76,6 @@ class GameListScraper
|
|
78
76
|
platforms.push('macOS') unless entry.xpath(".//span[contains(@class, 'mac')]").empty?
|
79
77
|
platforms.push('Linux') unless entry.xpath(".//span[contains(@class, 'linux')]").empty?
|
80
78
|
platforms.push('Steamplay') unless entry.xpath(".//span[contains(@class, 'steamplay')]").empty?
|
81
|
-
|
82
79
|
platforms
|
83
80
|
end
|
84
81
|
|
@@ -90,7 +87,6 @@ class GameListScraper
|
|
90
87
|
node = entry.xpath(".//span[contains(@class, 'search_review_summary')]")
|
91
88
|
result = nil
|
92
89
|
result = node.attribute('data-store-tooltip').value unless node.empty?
|
93
|
-
|
94
90
|
result
|
95
91
|
end
|
96
92
|
|
@@ -98,7 +94,6 @@ class GameListScraper
|
|
98
94
|
review_string = get_review_contents(entry)
|
99
95
|
matches = /.*(\d\d)[%]/i.match(review_string)
|
100
96
|
review_percentage = matches[1] unless matches.nil?
|
101
|
-
|
102
97
|
review_percentage
|
103
98
|
end
|
104
99
|
|
@@ -106,7 +101,6 @@ class GameListScraper
|
|
106
101
|
review_string = get_review_contents(entry)
|
107
102
|
matches = /.*\d\d[%] of the ([0-9,]*) user/i.match(review_string)
|
108
103
|
num_reviews = matches[1] unless matches.nil?
|
109
|
-
|
110
104
|
num_reviews
|
111
105
|
end
|
112
106
|
|
@@ -8,7 +8,9 @@ class GamePageScraper
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def scrape(games_hash)
|
11
|
-
result = Parallel.map(games_hash,
|
11
|
+
result = Parallel.map(games_hash,
|
12
|
+
progress: 'Scraping additional per game data',
|
13
|
+
in_processes: 8) do |game|
|
12
14
|
url = game[:url]
|
13
15
|
scrape_game(game, url) unless url.nil?
|
14
16
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: steam_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Gardner
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-09-
|
11
|
+
date: 2016-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|