hongkong-news-scrapers 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c55a3d970d1d5352121e572030ebca5de8ba0700
|
4
|
+
data.tar.gz: fb6f758b612e98e23de79f03285dd55dbc760fd7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 609c19b1a287c3f339d1902692a1f883862ddda4474d3c1b49d933a6e82da298998291032a72c6700bddfe73492b189b1c29edd6dc8426f442bb1437680893f3
|
7
|
+
data.tar.gz: 02e67ae4872db3dd33bce4d5f09dbcf0a34dd3ea5e765f2a9343d4615bb22118a84602000c2680f34996ae48a58fa1b763220547d7ef4c7b0024c9294eb61946
|
@@ -8,7 +8,6 @@ module Hongkong
|
|
8
8
|
|
9
9
|
# Extract all news links from Apple Daily
|
10
10
|
def news_links
|
11
|
-
new_session
|
12
11
|
visit "http://hk.apple.nextmedia.com/"
|
13
12
|
|
14
13
|
all("#article_ddl option").collect do |option|
|
@@ -21,7 +20,6 @@ module Hongkong
|
|
21
20
|
|
22
21
|
# Extract article from page from Apple Daily
|
23
22
|
def news(url)
|
24
|
-
new_session
|
25
23
|
visit url
|
26
24
|
|
27
25
|
document = Document.new
|
@@ -11,7 +11,6 @@ module Hongkong
|
|
11
11
|
|
12
12
|
# Extract all news links from Mingpao
|
13
13
|
def news_links
|
14
|
-
new_session
|
15
14
|
visit LIST_URL
|
16
15
|
|
17
16
|
all(".listing ul li a").collect do |anchor|
|
@@ -24,7 +23,6 @@ module Hongkong
|
|
24
23
|
|
25
24
|
# Extract article from page from Mingpao
|
26
25
|
def news(url)
|
27
|
-
new_session
|
28
26
|
visit url
|
29
27
|
|
30
28
|
# wait for content to be loaded
|
@@ -4,11 +4,8 @@ require 'tempfile'
|
|
4
4
|
module Hongkong
|
5
5
|
module News
|
6
6
|
module Scrapers
|
7
|
-
module
|
8
|
-
|
9
|
-
|
10
|
-
# on initialize, setup capybara to use poltergeist
|
11
|
-
def self.included(mod)
|
7
|
+
module Initializer
|
8
|
+
def self.configure
|
12
9
|
Capybara.register_driver :poltergeist do |app|
|
13
10
|
extensions = [
|
14
11
|
File.expand_path("../phantom_scraper_extension.js", __FILE__)
|
@@ -18,7 +15,7 @@ module Hongkong
|
|
18
15
|
js_errors: false,
|
19
16
|
phantomjs: ENV['PHANTOMJS_PATH'])
|
20
17
|
end
|
21
|
-
|
18
|
+
|
22
19
|
Capybara.default_wait_time = 5
|
23
20
|
Capybara.configure do |config|
|
24
21
|
config.default_driver = :poltergeist
|
@@ -26,18 +23,11 @@ module Hongkong
|
|
26
23
|
config.run_server = false
|
27
24
|
end
|
28
25
|
end
|
26
|
+
self.configure
|
27
|
+
end
|
29
28
|
|
30
|
-
|
31
|
-
|
32
|
-
@session = Capybara::Session.new(:poltergeist)
|
33
|
-
@session.driver.headers = {'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X)'}
|
34
|
-
@session
|
35
|
-
end
|
36
|
-
|
37
|
-
# use created session
|
38
|
-
def page
|
39
|
-
@session
|
40
|
-
end
|
29
|
+
module PhantomScraper
|
30
|
+
include Capybara::DSL
|
41
31
|
|
42
32
|
def screenshot_data(filename='screenshot.gif')
|
43
33
|
data = nil
|
@@ -59,3 +49,4 @@ module Hongkong
|
|
59
49
|
end
|
60
50
|
end
|
61
51
|
end
|
52
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hongkong-news-scrapers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francis Chong
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-07-
|
11
|
+
date: 2015-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|