GDNewsScraper 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/GDNewsScraper/scrapers/polygon_com/news.rb +14 -3
- data/lib/GDNewsScraper/version.rb +7 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ff2c27e527cd142f2d76e22a35169da1c0a41d43
|
4
|
+
data.tar.gz: 01c0550669575b38a91afbe85641301ab252e45f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e276b6b87a7fd1041e63ee6771dc85ad6b71af7257f11a9bd4052300d0e9309040f690cf5ffaa9a0c74ec4f431dd19a0d1723cd8176a12939a87d1455ed369e2
|
7
|
+
data.tar.gz: d2368e5da859420f830ffcbd40c14d74d01087b727a69209bb7ef93c368efc9ca5dc5dadaf0321c8737a7be2ef9e3f5dfcfec288ba4916d30cca3c68989c857b
|
@@ -34,15 +34,26 @@ module GDNewsScraper
|
|
34
34
|
article_id = article.attributes['data-entry-id'].value.to_i
|
35
35
|
|
36
36
|
news[article_id] = HashWithIndifferentAccess.new
|
37
|
-
news[article_id][:id] =
|
37
|
+
news[article_id][:id] = article_id
|
38
38
|
news[article_id][:hash] = Base64.encode64(body.children[3].children[1].children.children.text)
|
39
|
-
news[article_id][:cover] = "https://cdn#{body.children[1].attributes['data-original'].value.split('/cdn').last}"
|
40
39
|
news[article_id][:url] = body.children[1].attributes['href'].value
|
41
40
|
news[article_id][:title] = body.children[3].children[1].children.children.text
|
42
41
|
news[article_id][:author] = body.children[3].children[3].children[1].children.text
|
43
42
|
news[article_id][:content] = body.children[3].css('.copy').text
|
44
|
-
news[article_id][:tags] = body.children[3].children[1].children.children.text.scan(/[[:alpha:]]{
|
43
|
+
news[article_id][:tags] = body.children[3].children[1].children.children.text.split('/').last.scan(/[[:alpha:]]{4,}/).uniq
|
45
44
|
news[article_id][:date] = body.children[3].children[3].css('.long_date').children.text.strip
|
45
|
+
|
46
|
+
begin
|
47
|
+
cover = body.children[1].attributes['data-original']
|
48
|
+
|
49
|
+
if cover.nil?
|
50
|
+
news[article_id][:cover] = "https://cdn#{body.cover.value.split('/cdn').last}"
|
51
|
+
else
|
52
|
+
news[article_id][:cover] = "https://cdn#{body.children[1].cover.value.split('/cdn').last}"
|
53
|
+
end
|
54
|
+
rescue
|
55
|
+
news[article_id][:cover] = nil
|
56
|
+
end
|
46
57
|
end
|
47
58
|
|
48
59
|
return news
|
@@ -1,11 +1,16 @@
|
|
1
1
|
module GDNewsScraper
|
2
|
-
VERSION = "2.0.
|
2
|
+
VERSION = "2.0.1"
|
3
3
|
|
4
4
|
# CHANGELOG
|
5
5
|
#
|
6
6
|
# v1.0.0 - Initial Gem Setup
|
7
7
|
# v1.1.0 - Add Polygon.com News Scrapper
|
8
8
|
# v1.2.0 - Add Polygon.com Reviews Scrapper
|
9
|
-
# v1.2.1 - Remove date parse from the Gem and let the Application handle the
|
9
|
+
# v1.2.1 - Remove date parse from the Gem and let the Application handle the
|
10
|
+
# values
|
10
11
|
# v2.0.0 - Rename the gem from GDNewsScrapper to GDNewsScraper
|
12
|
+
# v2.0.1 - Fix a bug in PolygonCOM News scrapper where a gallery of images
|
13
|
+
# would cause the script to fail when requesting the photo for the
|
14
|
+
# Article
|
15
|
+
|
11
16
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: GDNewsScraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vlad Radulescu
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|