GDNewsScraper 3.0.4 → 3.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/GDNewsScraper/scrapers/polygon_com/news.rb +201 -142
- data/lib/GDNewsScraper/string.rb +5 -0
- data/lib/GDNewsScraper/version.rb +6 -2
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c37e3bbf8420be9d2bf182d091eb5b8713e46679
|
|
4
|
+
data.tar.gz: c20ce1f2ee2b57757bd0d9d87b3bdc34fd751cf7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 74639942afca8966f6602c642a89cb31979e896a4656c8ed6dbf20d5e8764a642ab62f6a05566265b75531f685c542d1ff0daaa8ce0f474e3b882355a1f9afc3
|
|
7
|
+
data.tar.gz: 1783aa15b5339131027ecf0aa0d2776d4959664157cee1305eca2acf206e43551f9079e4a60a322fe7a99f95f1513c302d8e13cf9529c95a97aa6e2476215b49
|
|
@@ -7,49 +7,31 @@ module GDNewsScraper::Scrapers
|
|
|
7
7
|
"User-Agent" => "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
|
|
8
8
|
}
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
URL ||= 'https://www.polygon.com'
|
|
11
11
|
|
|
12
12
|
WHITELIST ||= {
|
|
13
13
|
default: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'figure', 'blockquote', 'ul', 'ol'],
|
|
14
14
|
inner: ['strong', 'em', 'li']
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
DOM ||= {
|
|
18
|
-
article: {
|
|
19
|
-
wrapper: '.c-compact-river',
|
|
20
|
-
container: '.c-compact-river__entry',
|
|
21
|
-
inner_container: '.c-entry-box--compact',
|
|
22
|
-
inner_container_video: '.c-entry-box--compact--video',
|
|
23
|
-
title: '.c-entry-box--compact__title',
|
|
24
|
-
cover: '.c-entry-box--compact__image',
|
|
25
|
-
meta: '.c-byline'
|
|
26
|
-
},
|
|
27
|
-
|
|
28
|
-
pagination: {
|
|
29
|
-
previous: '.c-pagination__prev',
|
|
30
|
-
info: '.c-pagination__text',
|
|
31
|
-
next: '.c-pagination__next'
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
|
|
35
17
|
class News
|
|
36
18
|
attr_accessor :stream
|
|
37
19
|
|
|
38
|
-
def initialize(offset =
|
|
20
|
+
def initialize(offset = nil)
|
|
39
21
|
unless offset.nil?
|
|
40
|
-
uri = "#{ GDNewsScraper::Scrapers::PolygonCOM::
|
|
22
|
+
uri = "#{ GDNewsScraper::Scrapers::PolygonCOM::URL }/news/archives/#{ offset }"
|
|
41
23
|
|
|
42
24
|
@page = Nokogiri::HTML(open(uri, GDNewsScraper::Scrapers::PolygonCOM::HEADERS))
|
|
43
25
|
@stream = Hash.new
|
|
44
26
|
|
|
45
27
|
stream[:stream] = Hash.new
|
|
46
|
-
stream[:stream][:size] = @page.
|
|
47
|
-
stream[:stream][:pages] = @page.
|
|
48
|
-
stream[:stream][:prev] = @page.
|
|
49
|
-
stream[:stream][:next] = @page.
|
|
28
|
+
stream[:stream][:size] = @page.at('.c-pagination__text').text.split.first.to_num
|
|
29
|
+
stream[:stream][:pages] = @page.at('.c-pagination__text').text.split.last.to_num
|
|
30
|
+
stream[:stream][:prev] = @page.at('.c-pagination__prev')&.attr('href')&.split('/')&.last.to_i
|
|
31
|
+
stream[:stream][:next] = @page.at('.c-pagination__next')&.attr('href')&.split('/')&.last.to_i
|
|
50
32
|
|
|
51
33
|
stream[:feed] = Hash.new
|
|
52
|
-
stream[:feed][:url] = GDNewsScraper::Scrapers::PolygonCOM::
|
|
34
|
+
stream[:feed][:url] = GDNewsScraper::Scrapers::PolygonCOM::URL
|
|
53
35
|
stream[:feed][:source] = 'polygon'
|
|
54
36
|
stream[:feed][:label] = 'Polygon'
|
|
55
37
|
|
|
@@ -60,8 +42,8 @@ module GDNewsScraper::Scrapers
|
|
|
60
42
|
end
|
|
61
43
|
|
|
62
44
|
def perform
|
|
63
|
-
@page.css(
|
|
64
|
-
stream[:articles]
|
|
45
|
+
@page.css('.c-compact-river__entry').first(2).each do |article|
|
|
46
|
+
stream[:articles].push(parse(article))
|
|
65
47
|
end
|
|
66
48
|
end
|
|
67
49
|
|
|
@@ -72,38 +54,74 @@ module GDNewsScraper::Scrapers
|
|
|
72
54
|
def parse(article)
|
|
73
55
|
pulse = Hash.new
|
|
74
56
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
57
|
+
# This allows the Parser to get its data from the Index page, when the
|
|
58
|
+
# article is a Nokogiri::XML or from the Article page when the article
|
|
59
|
+
# is a URL.
|
|
60
|
+
#
|
|
61
|
+
# Passing a URL is mainly for debugging in case an Article fails to
|
|
62
|
+
# parse and should only be used as such..
|
|
63
|
+
#
|
|
64
|
+
if article.is_a?(String)
|
|
65
|
+
begin
|
|
66
|
+
article_page = Nokogiri::HTML(open(article, GDNewsScraper::Scrapers::PolygonCOM::HEADERS))
|
|
67
|
+
|
|
68
|
+
is_a_video = article_page.at('.c-video-embed').nil?
|
|
69
|
+
|
|
70
|
+
key = article_page.at('span[data-content-admin-id]').attr('data-content-admin-id').to_i
|
|
71
|
+
url = article
|
|
72
|
+
title = strip(article_page.css('.c-entry-hero').at('.c-page-title'))
|
|
73
|
+
cover = (is_a_video ? nil : article_page.css('.l-col__main').at('.e-image__image').attr('data-original'))
|
|
74
|
+
author = strip(article_page.css('.c-entry-hero').at('.c-byline').css('.c-byline__item > a').children[0])
|
|
75
|
+
|
|
76
|
+
begin
|
|
77
|
+
article_date = strip(article_page.css('.c-entry-hero').at('.c-byline').css('time.c-byline__item'))
|
|
78
|
+
parsed_date = DateTime.parse(article_date)
|
|
79
|
+
|
|
80
|
+
date = parsed_date.to_time.to_i
|
|
81
|
+
|
|
82
|
+
# Never failed so not entirely sure what to rescue from, but with
|
|
83
|
+
# dates it allways risky not to rescue
|
|
84
|
+
#
|
|
85
|
+
# TODO: When it fails, find out why and rescue from that instead
|
|
86
|
+
# of rescuing from 'everything' ..
|
|
87
|
+
#
|
|
88
|
+
rescue
|
|
89
|
+
date = nil
|
|
90
|
+
end
|
|
91
|
+
rescue TypeError
|
|
92
|
+
raise ArgumentError.new('Invalid URL')
|
|
93
|
+
end
|
|
94
|
+
elsif article.is_a?(Nokogiri::XML::Element)
|
|
95
|
+
is_a_video = !article.at('.c-entry-box--compact--video').nil?
|
|
96
|
+
|
|
97
|
+
key = article.at('.c-entry-box--compact--article').attr('data-chorus-optimize-id').to_i
|
|
98
|
+
url = article.at('.c-entry-box--compact__title').at('> a').attr('href')
|
|
99
|
+
title = strip(article.at('.c-entry-box--compact__title'))
|
|
100
|
+
cover = (article.at('.c-entry-box--compact__image').at('noscript').at('img').attr('src') rescue nil)
|
|
101
|
+
author = strip(article.at('.c-byline').css('.c-byline__item > a').children[0])
|
|
102
|
+
date = JSON.parse(article.at('.c-byline').attr('data-cdata'))['timestamp'].to_i
|
|
103
|
+
else
|
|
104
|
+
raise ArgumentError.new("Make sure the 'article' argument is either a Hash containing the article's initial metadata or a String which is the article's URL")
|
|
88
105
|
end
|
|
89
|
-
|
|
106
|
+
|
|
107
|
+
pulse[:id] = key
|
|
108
|
+
pulse[:hash] = ::Base64.encode64("#{ title } - #{ key }")
|
|
109
|
+
pulse[:cover] = cover
|
|
90
110
|
pulse[:url] = url
|
|
91
111
|
pulse[:title] = title
|
|
92
|
-
pulse[:author] =
|
|
93
|
-
pulse[:date] =
|
|
112
|
+
pulse[:author] = author
|
|
113
|
+
pulse[:date] = date
|
|
94
114
|
pulse[:content] = parse_article_body(url, is_a_video)
|
|
95
115
|
pulse[:tags] = title.downcase.split
|
|
96
116
|
|
|
97
117
|
return pulse
|
|
98
118
|
rescue => e
|
|
99
|
-
"There was a problem while parsing Article
|
|
119
|
+
"There was a problem while parsing this Article: #{ e }"
|
|
100
120
|
end
|
|
101
121
|
|
|
102
|
-
private
|
|
103
|
-
|
|
104
122
|
def parse_article_body(article_url, is_a_video = false)
|
|
105
123
|
article_page = Nokogiri::HTML(open(article_url, GDNewsScraper::Scrapers::PolygonCOM::HEADERS))
|
|
106
|
-
article_container = article_page.
|
|
124
|
+
article_container = article_page.at('.c-entry-content')
|
|
107
125
|
|
|
108
126
|
article_body = {
|
|
109
127
|
galleries: { },
|
|
@@ -118,7 +136,7 @@ module GDNewsScraper::Scrapers
|
|
|
118
136
|
|
|
119
137
|
if is_a_video
|
|
120
138
|
iframe = article_page.at('.c-video-embed--media').at('iframe')
|
|
121
|
-
iframe_id =
|
|
139
|
+
iframe_id = unique_id
|
|
122
140
|
|
|
123
141
|
article_body[:videos][iframe_id] = {}
|
|
124
142
|
article_body[:videos][iframe_id][:url] = iframe.attr('src')
|
|
@@ -127,148 +145,189 @@ module GDNewsScraper::Scrapers
|
|
|
127
145
|
end
|
|
128
146
|
|
|
129
147
|
article_container.children.each do |node|
|
|
130
|
-
|
|
148
|
+
content = node.content.strip.empty?
|
|
149
|
+
text = node.text.strip.empty?
|
|
150
|
+
attributes = node.attributes.empty?
|
|
151
|
+
children = node.children.empty?
|
|
131
152
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
153
|
+
if content && text && attributes && children
|
|
154
|
+
node.remove
|
|
155
|
+
else
|
|
156
|
+
if node.name == 'div'
|
|
135
157
|
|
|
136
|
-
|
|
137
|
-
|
|
158
|
+
# Check to see if the div contains a embeded video
|
|
159
|
+
#
|
|
160
|
+
iframe = node.at('iframe')
|
|
138
161
|
|
|
139
|
-
|
|
140
|
-
|
|
162
|
+
if iframe # YouTube videos
|
|
163
|
+
iframe_id = unique_id
|
|
141
164
|
|
|
142
|
-
|
|
143
|
-
|
|
165
|
+
article_body[:videos][iframe_id] = {}
|
|
166
|
+
article_body[:videos][iframe_id][:url] = iframe.attr('src')
|
|
144
167
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
gallery = node.at('.c-image-gallery')
|
|
168
|
+
article_body[:body] << iframe.replace("{{video:#{ iframe_id }}}").to_html
|
|
169
|
+
end
|
|
148
170
|
|
|
149
|
-
|
|
150
|
-
|
|
171
|
+
# Check to see if the Article has a video by Polygon, which is
|
|
172
|
+
# embeded differnetly than a YouTube video..
|
|
173
|
+
#
|
|
174
|
+
polygon_video = node.attributes['data-volume-uuid']
|
|
175
|
+
|
|
176
|
+
unless polygon_video.nil?
|
|
177
|
+
id = unique_id
|
|
178
|
+
|
|
179
|
+
article_body[:videos][id] = {}
|
|
180
|
+
article_body[:videos][id][:label] = node.attr('data-analytics-label').split('|').first.strip
|
|
181
|
+
article_body[:videos][id][:url] = "https://volume.vox-cdn.com/embed/#{ node.attr('data-volume-uuid') }"
|
|
182
|
+
|
|
183
|
+
article_body[:body] << node.replace("{{video:#{ id }}}").to_html
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Check to see if the div contains a gallery
|
|
187
|
+
#
|
|
188
|
+
gallery = node.at('.c-image-gallery')
|
|
189
|
+
|
|
190
|
+
if gallery
|
|
191
|
+
gallery_container = gallery.at('.c-image-gallery__thumbs-viewport')
|
|
151
192
|
|
|
152
|
-
|
|
153
|
-
|
|
193
|
+
gallery_id = unique_id
|
|
194
|
+
article_body[:galleries][gallery_id] = []
|
|
154
195
|
|
|
155
|
-
|
|
156
|
-
|
|
196
|
+
gallery_container.children.children.each do |image_container|
|
|
197
|
+
image = image_container.at('a')
|
|
157
198
|
|
|
158
|
-
|
|
159
|
-
|
|
199
|
+
if image
|
|
200
|
+
article_body[:galleries][gallery_id] << image.attr('href')
|
|
201
|
+
end
|
|
160
202
|
end
|
|
203
|
+
|
|
204
|
+
article_body[:body] << gallery.replace("{{gallery:#{ gallery_id }}}").to_html
|
|
161
205
|
end
|
|
162
206
|
|
|
163
|
-
|
|
164
|
-
end
|
|
207
|
+
twitdget = node.at('.twitter-tweet')
|
|
165
208
|
|
|
166
|
-
|
|
209
|
+
if twitdget
|
|
210
|
+
article_body[:body] << twitdget.to_html
|
|
211
|
+
end
|
|
167
212
|
|
|
168
|
-
|
|
169
|
-
|
|
213
|
+
redditget = node.at('.reddit-card')
|
|
214
|
+
|
|
215
|
+
if redditget
|
|
216
|
+
article_body[:body] << redditget.to_html
|
|
217
|
+
end
|
|
170
218
|
end
|
|
171
219
|
|
|
172
|
-
|
|
220
|
+
# First ensure the node is an actual element. This removes random HTML elements
|
|
221
|
+
#
|
|
222
|
+
# => node.element?
|
|
223
|
+
#
|
|
224
|
+
# Secondly, ensure the node is what we actual want. We don't want <div>'s
|
|
225
|
+
# which are usualy used for placing inline advertisments or content specific
|
|
226
|
+
# only to that website
|
|
227
|
+
#
|
|
228
|
+
# => WHITELIST[:default].include?(node.name)
|
|
229
|
+
#
|
|
230
|
+
if node.element? && GDNewsScraper::Scrapers::PolygonCOM::WHITELIST[:default].include?(node.name)
|
|
231
|
+
case node.name
|
|
232
|
+
when 'figure'
|
|
173
233
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
end
|
|
177
|
-
end
|
|
234
|
+
image = node.at('.e-image__image')
|
|
235
|
+
image_url = image.attr('data-original')
|
|
178
236
|
|
|
179
|
-
# First ensure the node is an actual element. This removes random HTML elements
|
|
180
|
-
#
|
|
181
|
-
# => node.element?
|
|
182
|
-
#
|
|
183
|
-
# Secondly, ensure the node is what we actual want. We don't want <div>'s
|
|
184
|
-
# which are usualy used for placing inline advertisments or content specific
|
|
185
|
-
# only to that website
|
|
186
|
-
#
|
|
187
|
-
# => WHITELIST[:default].include?(node.name)
|
|
188
|
-
#
|
|
189
|
-
if node.element? && GDNewsScraper::Scrapers::PolygonCOM::WHITELIST[:default].include?(node.name)
|
|
190
|
-
case node.name
|
|
191
|
-
when 'figure'
|
|
192
|
-
|
|
193
|
-
image = node.css('.e-image__image').first
|
|
194
|
-
image_url = image.attr('data-original')
|
|
195
|
-
|
|
196
|
-
begin
|
|
197
237
|
if image_url.split('.').last == 'gif'
|
|
198
|
-
|
|
238
|
+
id = unique_id
|
|
199
239
|
|
|
200
|
-
article_body[:images][
|
|
201
|
-
article_body[:images][
|
|
240
|
+
article_body[:images][id] = { }
|
|
241
|
+
article_body[:images][id][:url] = image_url
|
|
202
242
|
|
|
203
|
-
article_body[:body] << node.replace("{{image:#{
|
|
243
|
+
article_body[:body] << node.replace("{{image:#{ id }}}").to_html
|
|
204
244
|
else
|
|
205
|
-
|
|
206
|
-
|
|
245
|
+
id = unique_id
|
|
246
|
+
|
|
247
|
+
figure(article_body, id, node, image, image_url)
|
|
248
|
+
|
|
249
|
+
article_body[:body] << node.replace("{{figure:#{ id }}}").to_html
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
else
|
|
207
253
|
|
|
208
|
-
|
|
254
|
+
node.children.each do |inner_node|
|
|
255
|
+
case inner_node.name
|
|
256
|
+
when 'a'
|
|
257
|
+
id = unique_id
|
|
258
|
+
|
|
259
|
+
article_body[:anchors][id] = {
|
|
260
|
+
text: inner_node.children.text,
|
|
261
|
+
url: inner_node.attr('href')
|
|
262
|
+
}
|
|
209
263
|
|
|
210
|
-
|
|
264
|
+
inner_node.replace("{{anchor:#{ id }}}")
|
|
265
|
+
when 'figure'
|
|
266
|
+
id = unique_id
|
|
211
267
|
|
|
212
|
-
|
|
268
|
+
image = node.at('.e-image__image')
|
|
269
|
+
image_url = image.attr('data-original')
|
|
213
270
|
|
|
214
|
-
|
|
215
|
-
article_body[:figures][figure_id][:title] = image_title
|
|
216
|
-
article_body[:figures][figure_id][:alt] = image_alt
|
|
271
|
+
figure(article_body, id, node, image, image_url)
|
|
217
272
|
|
|
218
|
-
|
|
219
|
-
article_body[:
|
|
220
|
-
article_body[:figures][figure_id][:cite] = image_meta.first.at('cite')&.text
|
|
273
|
+
node = node.replace("{{figure:#{ id }}}").to_html
|
|
274
|
+
article_body[:body] << node
|
|
221
275
|
end
|
|
222
|
-
|
|
223
|
-
article_body[:body] << node.replace("{{figure:#{ figure_id }}}").to_html
|
|
224
276
|
end
|
|
225
|
-
rescue
|
|
226
|
-
raise 'Unknown format, please review.'
|
|
227
|
-
end
|
|
228
|
-
else
|
|
229
277
|
|
|
230
|
-
node.children.each do |url|
|
|
231
278
|
begin
|
|
232
|
-
if url.name == 'a'
|
|
233
|
-
url_id = random_string
|
|
234
|
-
|
|
235
|
-
article_body[:anchors][url_id] = {
|
|
236
|
-
text: url.children.text,
|
|
237
|
-
url: url.attributes['href'].value
|
|
238
|
-
}
|
|
239
279
|
|
|
240
|
-
|
|
241
|
-
|
|
280
|
+
# Remove all attributes
|
|
281
|
+
#
|
|
282
|
+
parsed_node = node.xpath('.//@*').remove
|
|
283
|
+
|
|
284
|
+
# Return clean HTML, including HTML elements and text
|
|
285
|
+
#
|
|
286
|
+
parsed_node = node.to_html
|
|
287
|
+
|
|
242
288
|
rescue
|
|
243
|
-
|
|
289
|
+
|
|
244
290
|
end
|
|
245
291
|
end
|
|
246
292
|
|
|
247
|
-
|
|
248
|
-
#
|
|
249
|
-
parsed_node = node.xpath('.//@*').remove
|
|
250
|
-
|
|
251
|
-
# Return clean HTML, including HTML elements and text
|
|
252
|
-
#
|
|
253
|
-
parsed_node = node.to_html
|
|
293
|
+
article_body[:body] << parsed_node unless parsed_node.nil?
|
|
254
294
|
end
|
|
255
|
-
|
|
256
|
-
article_body[:body] << parsed_node
|
|
257
295
|
end
|
|
258
296
|
end
|
|
259
297
|
|
|
260
298
|
return article_body
|
|
261
299
|
rescue => e
|
|
262
|
-
"There was a problem while parsing
|
|
300
|
+
"There was a problem while parsing this Article: #{ e }"
|
|
263
301
|
end
|
|
264
302
|
|
|
303
|
+
def figure(article_body, id, node, image, image_url)
|
|
304
|
+
article_body[:figures][id] = { }
|
|
305
|
+
|
|
306
|
+
article_body[:figures][id][:image] = image_url
|
|
307
|
+
article_body[:figures][id][:title] = image.at('img').attr('title')
|
|
308
|
+
article_body[:figures][id][:alt] = image.at('img').attr('alt')
|
|
309
|
+
|
|
310
|
+
image_meta = node.at('.e-image__meta')
|
|
311
|
+
|
|
312
|
+
unless image_meta.nil?
|
|
313
|
+
article_body[:figures][id][:caption] = strip(image_meta.at('figcaption'))
|
|
314
|
+
article_body[:figures][id][:cite] = strip(image_meta.at('cite'))
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
private
|
|
319
|
+
|
|
265
320
|
def attr(attribute)
|
|
266
321
|
attributes&.fetch(attribute, nil)&.value
|
|
267
322
|
end
|
|
268
323
|
|
|
269
|
-
def
|
|
324
|
+
def strip(string)
|
|
325
|
+
string&.text&.strip
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def unique_id
|
|
270
329
|
(0...50).map { (65 + rand(25)).chr }.join.to_sym
|
|
271
330
|
end
|
|
272
331
|
end # News
|
|
273
332
|
end # PolygonCOM
|
|
274
|
-
end # GDNewsScraper::Scrapers
|
|
333
|
+
end # GDNewsScraper::Scrapers
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
module GDNewsScraper
|
|
2
|
-
VERSION ||= '3.0.
|
|
2
|
+
VERSION ||= '3.0.6'
|
|
3
3
|
|
|
4
4
|
# => major: A new Source has been added or removed
|
|
5
5
|
# => minor: A Source code has changed drastically to a point where it's not
|
|
@@ -31,5 +31,9 @@ module GDNewsScraper
|
|
|
31
31
|
# v3.0.3 - Added a new method which will refresh the content of an Article
|
|
32
32
|
# v3.0.4 - Fixed an issue caused by Featured Articles which have a different
|
|
33
33
|
# DOM structure
|
|
34
|
-
#
|
|
34
|
+
# v3.0.5 - Adds the possibility to parse an article from its URL rather than
|
|
35
|
+
# having to go through the index page to get its metadata
|
|
36
|
+
# v3.0.6 - Small refactor of the code which also improved parsing speed by
|
|
37
|
+
# about 10% on average! :)
|
|
38
|
+
#
|
|
35
39
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: GDNewsScraper
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.0.
|
|
4
|
+
version: 3.0.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Vlad Radulescu
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2017-11-
|
|
11
|
+
date: 2017-11-30 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -74,6 +74,7 @@ files:
|
|
|
74
74
|
- lib/GDNewsScraper.rb
|
|
75
75
|
- lib/GDNewsScraper/scrapers/polygon_com/news.rb
|
|
76
76
|
- lib/GDNewsScraper/scrapers/polygon_com/reviews.rb
|
|
77
|
+
- lib/GDNewsScraper/string.rb
|
|
77
78
|
- lib/GDNewsScraper/version.rb
|
|
78
79
|
homepage: https://github.com/games-directory/scraper
|
|
79
80
|
licenses:
|