GDNewsScraper 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/GDNewsScraper/scrapers/polygon_com/news.rb +14 -3
- data/lib/GDNewsScraper/version.rb +7 -2
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ff2c27e527cd142f2d76e22a35169da1c0a41d43
|
|
4
|
+
data.tar.gz: 01c0550669575b38a91afbe85641301ab252e45f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e276b6b87a7fd1041e63ee6771dc85ad6b71af7257f11a9bd4052300d0e9309040f690cf5ffaa9a0c74ec4f431dd19a0d1723cd8176a12939a87d1455ed369e2
|
|
7
|
+
data.tar.gz: d2368e5da859420f830ffcbd40c14d74d01087b727a69209bb7ef93c368efc9ca5dc5dadaf0321c8737a7be2ef9e3f5dfcfec288ba4916d30cca3c68989c857b
|
|
@@ -34,15 +34,26 @@ module GDNewsScraper
|
|
|
34
34
|
article_id = article.attributes['data-entry-id'].value.to_i
|
|
35
35
|
|
|
36
36
|
news[article_id] = HashWithIndifferentAccess.new
|
|
37
|
-
news[article_id][:id] =
|
|
37
|
+
news[article_id][:id] = article_id
|
|
38
38
|
news[article_id][:hash] = Base64.encode64(body.children[3].children[1].children.children.text)
|
|
39
|
-
news[article_id][:cover] = "https://cdn#{body.children[1].attributes['data-original'].value.split('/cdn').last}"
|
|
40
39
|
news[article_id][:url] = body.children[1].attributes['href'].value
|
|
41
40
|
news[article_id][:title] = body.children[3].children[1].children.children.text
|
|
42
41
|
news[article_id][:author] = body.children[3].children[3].children[1].children.text
|
|
43
42
|
news[article_id][:content] = body.children[3].css('.copy').text
|
|
44
|
-
news[article_id][:tags] = body.children[3].children[1].children.children.text.scan(/[[:alpha:]]{
|
|
43
|
+
news[article_id][:tags] = body.children[3].children[1].children.children.text.split('/').last.scan(/[[:alpha:]]{4,}/).uniq
|
|
45
44
|
news[article_id][:date] = body.children[3].children[3].css('.long_date').children.text.strip
|
|
45
|
+
|
|
46
|
+
begin
|
|
47
|
+
cover = body.children[1].attributes['data-original']
|
|
48
|
+
|
|
49
|
+
if cover.nil?
|
|
50
|
+
news[article_id][:cover] = "https://cdn#{body.cover.value.split('/cdn').last}"
|
|
51
|
+
else
|
|
52
|
+
news[article_id][:cover] = "https://cdn#{body.children[1].cover.value.split('/cdn').last}"
|
|
53
|
+
end
|
|
54
|
+
rescue
|
|
55
|
+
news[article_id][:cover] = nil
|
|
56
|
+
end
|
|
46
57
|
end
|
|
47
58
|
|
|
48
59
|
return news
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
module GDNewsScraper
|
|
2
|
-
VERSION = "2.0.
|
|
2
|
+
VERSION = "2.0.1"
|
|
3
3
|
|
|
4
4
|
# CHANGELOG
|
|
5
5
|
#
|
|
6
6
|
# v1.0.0 - Initial Gem Setup
|
|
7
7
|
# v1.1.0 - Add Polygon.com News Scrapper
|
|
8
8
|
# v1.2.0 - Add Polygon.com Reviews Scrapper
|
|
9
|
-
# v1.2.1 - Remove date parse from the Gem and let the Application handle the
|
|
9
|
+
# v1.2.1 - Remove date parse from the Gem and let the Application handle the
|
|
10
|
+
# values
|
|
10
11
|
# v2.0.0 - Rename the gem from GDNewsScrapper to GDNewsScraper
|
|
12
|
+
# v2.0.1 - Fix a bug in PolygonCOM News scrapper where a gallery of images
|
|
13
|
+
# would cause the script to fail when requesting the photo for the
|
|
14
|
+
# Article
|
|
15
|
+
|
|
11
16
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: GDNewsScraper
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.
|
|
4
|
+
version: 2.0.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Vlad Radulescu
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-06-
|
|
11
|
+
date: 2016-06-22 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|