steam_scraper 1.1.7 → 1.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2b682929fb38924b8ad5b928ad27f98b4cc666f1
4
- data.tar.gz: b1b40cebd77d5202362d006af504e47ed5f8b473
3
+ metadata.gz: 3eb0595031ddaac9c22e6501cc1aecba8d22a8bf
4
+ data.tar.gz: 399dbd713d76fb4f28b3a81bd20ee2fe978facf1
5
5
  SHA512:
6
- metadata.gz: d88b7769ea0294bb94ac839f8597c4f461f3d9eb6bb6cf9af26c6cde55b2c8cbb931a3016c44c60417669962228b03f12ccbf9aec92f9d96baeca7421e79f2e2
7
- data.tar.gz: edf0a0f2980c07e5802b4823392fc5c40e56e85dbe328299938948e22b739d1144ba9a52f548322a1cc67d161aa1617a3c2f0499e694316a70c5b5843275b5db
6
+ metadata.gz: d6827c7c7ba1c5127ef85ff87065cb8061ec4e7166ef856bc245f018a68ec453bbb4271fc398e9b7daf5f351d06aac0b512c5cfb66910e91206f06ee5057d447
7
+ data.tar.gz: a70456ea7d52266e01025b25b46e0ef0142f75f8ddacfcdb02523dbe6471f02e101af3f918e2def8a04189a00858d8e5ec888e83d182330e1d6799b859b85767
@@ -13,6 +13,7 @@ module SteamScraper
13
13
 
14
14
  def scrape(first_page = 1, last_page = nil)
15
15
  scraped_game_list = @game_list_scraper.scrape(first_page, last_page)
16
+ puts 'Initial scrape complete. Found ' + scraped_game_list.length.to_s + ' games.'
16
17
  final_game_list = @game_page_scraper.scrape(scraped_game_list)
17
18
 
18
19
  final_game_list
@@ -5,10 +5,16 @@ class GameListScraper
5
5
  def initialize(*_args)
6
6
  @game_list = []
7
7
  @page_retriever = PageRetriever.new
8
+ init_last_page_num
9
+ end
10
+
11
+ def init_last_page_num
8
12
  current_page_contents = get_page_contents(site + 1.to_s)
9
- pagination_contents = current_page_contents.xpath("//div[contains(@class, 'search_pagination_right')]") .text
10
- last_page_number = pagination_contents.scan(/(\d+)/i) .flatten.last
11
- @last_page_num = last_page_number.to_i
13
+ @last_page_num = current_page_contents.xpath("//div[contains(@class, 'search_pagination_right')]")
14
+ .text.scan(/(\d+)/i).flatten.last.to_i
15
+ rescue StandardError => e
16
+ puts 'Could not connect to Steam Store'
17
+ raise e
12
18
  end
13
19
 
14
20
  def site
@@ -22,13 +28,15 @@ class GameListScraper
22
28
  def search_results(page_number)
23
29
  current_page_contents = get_page_contents(site + page_number.to_s)
24
30
  current_page_contents.xpath("//div[@id='search_result_container']/div/a")
31
+ rescue
32
+ nil
25
33
  end
26
34
 
27
35
  def scrape(first_page = 1, last_page = nil)
28
36
  last_page ||= @last_page_num
29
- # scrape each search page
30
37
  result = Parallel.map(first_page..last_page,
31
- progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to ')) do |page|
38
+ progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to '),
39
+ in_processes: 8) do |page|
32
40
  items_on_page = search_results(page)
33
41
  scrape_page(items_on_page)
34
42
  end
@@ -40,7 +48,6 @@ class GameListScraper
40
48
  current_page.each do |entry|
41
49
  entries.push(scrape_entry(entry))
42
50
  end
43
-
44
51
  entries
45
52
  end
46
53
 
@@ -54,22 +61,13 @@ class GameListScraper
54
61
 
55
62
  def scrape_price(entry)
56
63
  entry.xpath(".//div[contains(@class, 'search_price')
57
- and not(contains(@class, 'search_price_discount_combined'))]")
58
- .text
59
- .strip
60
- .split('$')
61
- .last
64
+ and not(contains(@class, 'search_price_discount_combined'))]").text.strip.split('$').last
62
65
  end
63
66
 
64
67
  def scrape_release_date(entry)
65
- date_node_text = entry.xpath(".//div[contains(@class, 'search_released')]").text
66
- date = nil
67
- begin
68
- date = Date.parse(date_node_text)
69
- rescue ArgumentError
70
- end
71
-
72
- date
68
+ Date.parse(entry.xpath(".//div[contains(@class, 'search_released')]").text)
69
+ rescue
70
+ nil
73
71
  end
74
72
 
75
73
  def scrape_platforms(entry)
@@ -78,7 +76,6 @@ class GameListScraper
78
76
  platforms.push('macOS') unless entry.xpath(".//span[contains(@class, 'mac')]").empty?
79
77
  platforms.push('Linux') unless entry.xpath(".//span[contains(@class, 'linux')]").empty?
80
78
  platforms.push('Steamplay') unless entry.xpath(".//span[contains(@class, 'steamplay')]").empty?
81
-
82
79
  platforms
83
80
  end
84
81
 
@@ -90,7 +87,6 @@ class GameListScraper
90
87
  node = entry.xpath(".//span[contains(@class, 'search_review_summary')]")
91
88
  result = nil
92
89
  result = node.attribute('data-store-tooltip').value unless node.empty?
93
-
94
90
  result
95
91
  end
96
92
 
@@ -98,7 +94,6 @@ class GameListScraper
98
94
  review_string = get_review_contents(entry)
99
95
  matches = /.*(\d\d)[%]/i.match(review_string)
100
96
  review_percentage = matches[1] unless matches.nil?
101
-
102
97
  review_percentage
103
98
  end
104
99
 
@@ -106,7 +101,6 @@ class GameListScraper
106
101
  review_string = get_review_contents(entry)
107
102
  matches = /.*\d\d[%] of the ([0-9,]*) user/i.match(review_string)
108
103
  num_reviews = matches[1] unless matches.nil?
109
-
110
104
  num_reviews
111
105
  end
112
106
 
@@ -8,7 +8,9 @@ class GamePageScraper
8
8
  end
9
9
 
10
10
  def scrape(games_hash)
11
- result = Parallel.map(games_hash, progress: 'Scraping additional per game data') do |game|
11
+ result = Parallel.map(games_hash,
12
+ progress: 'Scraping additional per game data',
13
+ in_processes: 8) do |game|
12
14
  url = game[:url]
13
15
  scrape_game(game, url) unless url.nil?
14
16
  end
@@ -7,8 +7,7 @@ class PageRetriever
7
7
  def retrieve(url)
8
8
  page_contents = HTTParty.get(url)
9
9
  Nokogiri::HTML(page_contents)
10
- rescue Error => e
11
- puts e
10
+ rescue
12
11
  nil
13
12
  end
14
13
  end
@@ -1,3 +1,3 @@
1
1
  module SteamScraper
2
- VERSION = '1.1.7'.freeze
2
+ VERSION = '1.1.8'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: steam_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.7
4
+ version: 1.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Gardner
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-09-11 00:00:00.000000000 Z
11
+ date: 2016-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler