GDNewsScraper 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 79d8fb56daf1b68157a0855f189fd53779daa18a
4
- data.tar.gz: 0d70629b443af5e485300b0e28103fd538a840c5
3
+ metadata.gz: ff2c27e527cd142f2d76e22a35169da1c0a41d43
4
+ data.tar.gz: 01c0550669575b38a91afbe85641301ab252e45f
5
5
  SHA512:
6
- metadata.gz: 39192776e25199e211fc6183668af639b65a47ca9e857fc1710629dab206e2d904ed1893c53965d4ece42c7eebda6376df5012859d011ee959cb2949884c702a
7
- data.tar.gz: ab50744e78b5f7d9bd9c4d0c593ff666b0b72d059fdc0c0b67476897f4b4c3c0772169ed56d76e7cceeb7ee6d3ceb3d9bbfb5a6d8cf1139266660d50e684d5f8
6
+ metadata.gz: e276b6b87a7fd1041e63ee6771dc85ad6b71af7257f11a9bd4052300d0e9309040f690cf5ffaa9a0c74ec4f431dd19a0d1723cd8176a12939a87d1455ed369e2
7
+ data.tar.gz: d2368e5da859420f830ffcbd40c14d74d01087b727a69209bb7ef93c368efc9ca5dc5dadaf0321c8737a7be2ef9e3f5dfcfec288ba4916d30cca3c68989c857b
@@ -34,15 +34,26 @@ module GDNewsScraper
34
34
  article_id = article.attributes['data-entry-id'].value.to_i
35
35
 
36
36
  news[article_id] = HashWithIndifferentAccess.new
37
- news[article_id][:id] = article.attributes['data-entry-id'].value.to_i
37
+ news[article_id][:id] = article_id
38
38
  news[article_id][:hash] = Base64.encode64(body.children[3].children[1].children.children.text)
39
- news[article_id][:cover] = "https://cdn#{body.children[1].attributes['data-original'].value.split('/cdn').last}"
40
39
  news[article_id][:url] = body.children[1].attributes['href'].value
41
40
  news[article_id][:title] = body.children[3].children[1].children.children.text
42
41
  news[article_id][:author] = body.children[3].children[3].children[1].children.text
43
42
  news[article_id][:content] = body.children[3].css('.copy').text
44
- news[article_id][:tags] = body.children[3].children[1].children.children.text.scan(/[[:alpha:]]{3,}/).uniq
43
+ news[article_id][:tags] = body.children[3].children[1].children.children.text.split('/').last.scan(/[[:alpha:]]{4,}/).uniq
45
44
  news[article_id][:date] = body.children[3].children[3].css('.long_date').children.text.strip
45
+
46
+ begin
47
+ cover = body.children[1].attributes['data-original']
48
+
49
+ if cover.nil?
50
+ news[article_id][:cover] = "https://cdn#{body.cover.value.split('/cdn').last}"
51
+ else
52
+ news[article_id][:cover] = "https://cdn#{body.children[1].cover.value.split('/cdn').last}"
53
+ end
54
+ rescue
55
+ news[article_id][:cover] = nil
56
+ end
46
57
  end
47
58
 
48
59
  return news
@@ -1,11 +1,16 @@
1
1
  module GDNewsScraper
2
- VERSION = "2.0.0"
2
+ VERSION = "2.0.1"
3
3
 
4
4
  # CHANGELOG
5
5
  #
6
6
  # v1.0.0 - Initial Gem Setup
7
7
  # v1.1.0 - Add Polygon.com News Scrapper
8
8
  # v1.2.0 - Add Polygon.com Reviews Scrapper
9
- # v1.2.1 - Remove date parse from the Gem and let the Application handle the values
9
+ # v1.2.1 - Remove date parse from the Gem and let the Application handle the
10
+ # values
10
11
  # v2.0.0 - Rename the gem from GDNewsScrapper to GDNewsScraper
12
+ # v2.0.1 - Fix a bug in PolygonCOM News scrapper where a gallery of images
13
+ # would cause the script to fail when requesting the photo for the
14
+ # Article
15
+
11
16
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: GDNewsScraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vlad Radulescu
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-06-21 00:00:00.000000000 Z
11
+ date: 2016-06-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri