hongkong-news-scrapers 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b73252f05a33fcd69fb0a821cb04eef25c32878f
|
4
|
+
data.tar.gz: 61c0353950c4be8ba70280b3d8c8c79cd523fd6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1af171147a715dd963dfe19bd5e26128e515591ea0da194e8496faa94027c5baaffd1dd2bbda02c1671adabd69031f1d06796c4ef94272bba0ebb33bcf4bd643
|
7
|
+
data.tar.gz: f3865cbf0a6020c83e2f7312ad1a72738474eaedb366a84faf4eff7154b16b953f44a802f985af0558e9d11113c85f301fd337ce787d6c28240bb96e1940af91
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative './phantom_scraper'
|
2
|
+
require 'uri'
|
2
3
|
|
3
4
|
module Hongkong
|
4
5
|
module News
|
@@ -6,15 +7,17 @@ module Hongkong
|
|
6
7
|
class MingpaoScraper
|
7
8
|
include PhantomScraper
|
8
9
|
|
10
|
+
LIST_URL = "http://news.mingpao.com/pns/%E6%96%B0%E8%81%9E%E7%B8%BD%E8%A6%BD/web_tc/archive/latest"
|
11
|
+
|
9
12
|
# Extract all news links from Mingpao
|
10
|
-
def
|
13
|
+
def news_links
|
11
14
|
new_session
|
12
|
-
visit
|
15
|
+
visit LIST_URL
|
13
16
|
|
14
17
|
all(".listing ul li a").collect do |anchor|
|
15
18
|
link = Link.new
|
16
19
|
link.title = anchor.text
|
17
|
-
link.url = anchor["href"]
|
20
|
+
link.url = URI::join(LIST_URL, anchor["href"]).to_s
|
18
21
|
link
|
19
22
|
end
|
20
23
|
end
|