steam_scraper 1.1.7 → 1.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2b682929fb38924b8ad5b928ad27f98b4cc666f1
4
- data.tar.gz: b1b40cebd77d5202362d006af504e47ed5f8b473
3
+ metadata.gz: 3eb0595031ddaac9c22e6501cc1aecba8d22a8bf
4
+ data.tar.gz: 399dbd713d76fb4f28b3a81bd20ee2fe978facf1
5
5
  SHA512:
6
- metadata.gz: d88b7769ea0294bb94ac839f8597c4f461f3d9eb6bb6cf9af26c6cde55b2c8cbb931a3016c44c60417669962228b03f12ccbf9aec92f9d96baeca7421e79f2e2
7
- data.tar.gz: edf0a0f2980c07e5802b4823392fc5c40e56e85dbe328299938948e22b739d1144ba9a52f548322a1cc67d161aa1617a3c2f0499e694316a70c5b5843275b5db
6
+ metadata.gz: d6827c7c7ba1c5127ef85ff87065cb8061ec4e7166ef856bc245f018a68ec453bbb4271fc398e9b7daf5f351d06aac0b512c5cfb66910e91206f06ee5057d447
7
+ data.tar.gz: a70456ea7d52266e01025b25b46e0ef0142f75f8ddacfcdb02523dbe6471f02e101af3f918e2def8a04189a00858d8e5ec888e83d182330e1d6799b859b85767
@@ -13,6 +13,7 @@ module SteamScraper
13
13
 
14
14
  def scrape(first_page = 1, last_page = nil)
15
15
  scraped_game_list = @game_list_scraper.scrape(first_page, last_page)
16
+ puts 'Initial scrape complete. Found ' + scraped_game_list.length.to_s + ' games.'
16
17
  final_game_list = @game_page_scraper.scrape(scraped_game_list)
17
18
 
18
19
  final_game_list
@@ -5,10 +5,16 @@ class GameListScraper
5
5
  def initialize(*_args)
6
6
  @game_list = []
7
7
  @page_retriever = PageRetriever.new
8
+ init_last_page_num
9
+ end
10
+
11
+ def init_last_page_num
8
12
  current_page_contents = get_page_contents(site + 1.to_s)
9
- pagination_contents = current_page_contents.xpath("//div[contains(@class, 'search_pagination_right')]") .text
10
- last_page_number = pagination_contents.scan(/(\d+)/i) .flatten.last
11
- @last_page_num = last_page_number.to_i
13
+ @last_page_num = current_page_contents.xpath("//div[contains(@class, 'search_pagination_right')]")
14
+ .text.scan(/(\d+)/i).flatten.last.to_i
15
+ rescue StandardError => e
16
+ puts 'Could not connect to Steam Store'
17
+ raise e
12
18
  end
13
19
 
14
20
  def site
@@ -22,13 +28,15 @@ class GameListScraper
22
28
  def search_results(page_number)
23
29
  current_page_contents = get_page_contents(site + page_number.to_s)
24
30
  current_page_contents.xpath("//div[@id='search_result_container']/div/a")
31
+ rescue
32
+ nil
25
33
  end
26
34
 
27
35
  def scrape(first_page = 1, last_page = nil)
28
36
  last_page ||= @last_page_num
29
- # scrape each search page
30
37
  result = Parallel.map(first_page..last_page,
31
- progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to ')) do |page|
38
+ progress: 'Scraping Steam Store pages ' + [first_page..last_page].join(' to '),
39
+ in_processes: 8) do |page|
32
40
  items_on_page = search_results(page)
33
41
  scrape_page(items_on_page)
34
42
  end
@@ -40,7 +48,6 @@ class GameListScraper
40
48
  current_page.each do |entry|
41
49
  entries.push(scrape_entry(entry))
42
50
  end
43
-
44
51
  entries
45
52
  end
46
53
 
@@ -54,22 +61,13 @@ class GameListScraper
54
61
 
55
62
  def scrape_price(entry)
56
63
  entry.xpath(".//div[contains(@class, 'search_price')
57
- and not(contains(@class, 'search_price_discount_combined'))]")
58
- .text
59
- .strip
60
- .split('$')
61
- .last
64
+ and not(contains(@class, 'search_price_discount_combined'))]").text.strip.split('$').last
62
65
  end
63
66
 
64
67
  def scrape_release_date(entry)
65
- date_node_text = entry.xpath(".//div[contains(@class, 'search_released')]").text
66
- date = nil
67
- begin
68
- date = Date.parse(date_node_text)
69
- rescue ArgumentError
70
- end
71
-
72
- date
68
+ Date.parse(entry.xpath(".//div[contains(@class, 'search_released')]").text)
69
+ rescue
70
+ nil
73
71
  end
74
72
 
75
73
  def scrape_platforms(entry)
@@ -78,7 +76,6 @@ class GameListScraper
78
76
  platforms.push('macOS') unless entry.xpath(".//span[contains(@class, 'mac')]").empty?
79
77
  platforms.push('Linux') unless entry.xpath(".//span[contains(@class, 'linux')]").empty?
80
78
  platforms.push('Steamplay') unless entry.xpath(".//span[contains(@class, 'steamplay')]").empty?
81
-
82
79
  platforms
83
80
  end
84
81
 
@@ -90,7 +87,6 @@ class GameListScraper
90
87
  node = entry.xpath(".//span[contains(@class, 'search_review_summary')]")
91
88
  result = nil
92
89
  result = node.attribute('data-store-tooltip').value unless node.empty?
93
-
94
90
  result
95
91
  end
96
92
 
@@ -98,7 +94,6 @@ class GameListScraper
98
94
  review_string = get_review_contents(entry)
99
95
  matches = /.*(\d\d)[%]/i.match(review_string)
100
96
  review_percentage = matches[1] unless matches.nil?
101
-
102
97
  review_percentage
103
98
  end
104
99
 
@@ -106,7 +101,6 @@ class GameListScraper
106
101
  review_string = get_review_contents(entry)
107
102
  matches = /.*\d\d[%] of the ([0-9,]*) user/i.match(review_string)
108
103
  num_reviews = matches[1] unless matches.nil?
109
-
110
104
  num_reviews
111
105
  end
112
106
 
@@ -8,7 +8,9 @@ class GamePageScraper
8
8
  end
9
9
 
10
10
  def scrape(games_hash)
11
- result = Parallel.map(games_hash, progress: 'Scraping additional per game data') do |game|
11
+ result = Parallel.map(games_hash,
12
+ progress: 'Scraping additional per game data',
13
+ in_processes: 8) do |game|
12
14
  url = game[:url]
13
15
  scrape_game(game, url) unless url.nil?
14
16
  end
@@ -7,8 +7,7 @@ class PageRetriever
7
7
  def retrieve(url)
8
8
  page_contents = HTTParty.get(url)
9
9
  Nokogiri::HTML(page_contents)
10
- rescue Error => e
11
- puts e
10
+ rescue
12
11
  nil
13
12
  end
14
13
  end
@@ -1,3 +1,3 @@
1
1
  module SteamScraper
2
- VERSION = '1.1.7'.freeze
2
+ VERSION = '1.1.8'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: steam_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.7
4
+ version: 1.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Gardner
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-09-11 00:00:00.000000000 Z
11
+ date: 2016-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler