hongkong-news-scrapers 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 95d75b7d38e8eb25ea08a743393301fb76f59179
4
- data.tar.gz: 18944ea56db252f3f3a21f475cf03ec43db4eca5
3
+ metadata.gz: bd2a3a978a37ffcf1f0edafaca7b69d379f81983
4
+ data.tar.gz: 69974d9afb715fda8b72b6a8901e274fbd081dba
5
5
  SHA512:
6
- metadata.gz: 92e25a760477e6b7d3f74cc33f327b8eb22212befaaa22a74bd8d5b2b3f614c8430ebf517e9bd47827eb3f40a944b9ff81ee21c58f7b1cf3834af2f89474ff37
7
- data.tar.gz: 96683750052447cd9f724c2fa10a2a5842aad7365ba70e5302c992d6dc705168d16f7cf3f1c5fa28701dee18a910417750dd0ede2b234e57dc211fa97cc0dda1
6
+ metadata.gz: 71e08113fbe2282382b4c6260d4b3dcf7c0ec89797f98bfd886d409a5840d516e0cc5660ffdf80e15e2bab05764598800882ff68b5c7c48d52771b9bd8abf2d6
7
+ data.tar.gz: 46e8d11e0aaa8c875a18a7f723ac07e90b634734d51ca2e096ef61cf6c663e3da65aca36df4d42f606ad11dab2244adcd1b7cc76787448d75b831d4db98507b7
@@ -14,12 +14,15 @@ module Hongkong
14
14
  def news_links
15
15
  visit "http://hk.apple.nextmedia.com/"
16
16
 
17
- all("#article_ddl option").collect do |option|
17
+ links = all("#article_ddl option").collect do |option|
18
18
  link = Link.new
19
19
  link.title = option.text
20
20
  link.url = option["value"]
21
21
  link
22
22
  end.reject { |l| l.url.nil? }
23
+
24
+ cleanup
25
+ links
23
26
  end
24
27
 
25
28
  # Extract article from page from Apple Daily
@@ -34,6 +37,8 @@ module Hongkong
34
37
  document.content = page.evaluate_script("HongKongNews.getInnerText('#masterContent')")
35
38
  document.screenshot_data = screenshot_data
36
39
  document.image_url = doc.search("//meta[@property='og:image']/@content").first.text rescue nil
40
+
41
+ cleanup
37
42
  document
38
43
  end
39
44
  end
@@ -17,12 +17,15 @@ module Hongkong
17
17
  def news_links
18
18
  visit LIST_URL
19
19
 
20
- all(".listing ul li a").collect do |anchor|
20
+ links = all(".listing ul li a").collect do |anchor|
21
21
  link = Link.new
22
22
  link.title = anchor.text
23
23
  link.url = URI::join(LIST_URL, anchor["href"]).to_s
24
24
  link
25
25
  end
26
+
27
+ cleanup
28
+ links
26
29
  end
27
30
 
28
31
  # Extract article from page from Mingpao
@@ -40,6 +43,8 @@ module Hongkong
40
43
  document.content = page.evaluate_script("HongKongNews.getInnerText('article')")
41
44
  document.screenshot_data = screenshot_data
42
45
  document.image_url = doc.search("//meta[@property='og:image']/@content").first.text rescue nil
46
+
47
+ cleanup
43
48
  document
44
49
  end
45
50
  end
@@ -17,12 +17,15 @@ module Hongkong
17
17
  def news_links
18
18
  visit LIST_URL
19
19
 
20
- all("#articleListSELECT option").collect do |option|
20
+ links = all("#articleListSELECT option").collect do |option|
21
21
  link = Link.new
22
22
  link.title = option.text
23
23
  link.url = URI::join(LIST_URL, option["value"]).to_s
24
24
  link
25
25
  end.reject { |l| l.url.to_s.end_with?("#") }
26
+
27
+ cleanup
28
+ links
26
29
  end
27
30
 
28
31
  # Extract article from page
@@ -43,6 +46,7 @@ module Hongkong
43
46
  image = doc.search("#contentCTN .photo img").first
44
47
  document.image_url = URI::join(url, image["src"]).to_s if image
45
48
 
49
+ cleanup
46
50
  document
47
51
  end
48
52
  end
@@ -58,6 +58,30 @@ module Hongkong
58
58
  end
59
59
  @doc
60
60
  end
61
+
62
+ # call when shutdown phantomjs
63
+ def cleanup
64
+ wait_for_ajax
65
+ page.driver.reset!
66
+ end
67
+
68
+ private
69
+
70
+ # workaround for hang phantomjs
71
+ def wait_for_ajax
72
+ Timeout.timeout(Capybara.default_wait_time) do
73
+ loop until finished_all_ajax_requests?
74
+ end
75
+ end
76
+
77
+ def finished_all_ajax_requests?
78
+ begin
79
+ page.evaluate_script("(typeof jQuery !== \"undefined\") ? jQuery.active : 0").zero?
80
+ rescue Exception => e
81
+ puts "ignored excpetion wiating ajax: #{e}"
82
+ end
83
+ end
84
+
61
85
  end
62
86
  end
63
87
  end
@@ -1,7 +1,7 @@
1
1
  module Hongkong
2
2
  module News
3
3
  module Scrapers
4
- VERSION = "0.4.1"
4
+ VERSION = "0.4.2"
5
5
  end
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hongkong-news-scrapers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francis Chong