jekyll-artisync 0.2 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/jekyll-artisync.rb +9 -7
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31d37fc6ffdd79cb4d259d1db001b0abea1f2f4d8be3d17b035e2db5e1a7cc80
|
4
|
+
data.tar.gz: 1b5b26500b61c8e271d0c2a2605b28662c041572a1428469e9d30209c718a0c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d643be4d2d2c4831251e06e6f092ff419148b7c21fbdc1d274642d7dad5cf7de741b635af6e556221e47090b61dd0a28cc4614b9adae8a829062c354ce71468
|
7
|
+
data.tar.gz: 7334aeea98d30013f3ba030caaeafbb3fbd542d74998f00efb152d1873bbdd83542b9213b0cc041836b2e9becb62622d01e894da0e5fc003b44dbc2a576cb79b
|
data/lib/jekyll-artisync.rb
CHANGED
@@ -6,8 +6,8 @@ require "jekyll"
|
|
6
6
|
# user agent is necessary otherwise certain sites such as Zhihu throws 400
|
7
7
|
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
|
8
8
|
|
9
|
-
|
10
|
-
'zhihu' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
|
9
|
+
HOST_TO_ARTICLE_XPATH = {
|
10
|
+
'zhuanlan.zhihu.com' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
|
11
11
|
}
|
12
12
|
|
13
13
|
class ArticleSyncEmbed < Liquid::Tag
|
@@ -17,8 +17,7 @@ class ArticleSyncEmbed < Liquid::Tag
|
|
17
17
|
@content = content
|
18
18
|
end
|
19
19
|
|
20
|
-
def _fetch_html(
|
21
|
-
uri = URI(url)
|
20
|
+
def _fetch_html(uri)
|
22
21
|
res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
23
22
|
# :use_ssl => true for the uri is https
|
24
23
|
http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
|
@@ -42,9 +41,12 @@ class ArticleSyncEmbed < Liquid::Tag
|
|
42
41
|
end
|
43
42
|
|
44
43
|
def render(context)
|
45
|
-
url
|
46
|
-
|
47
|
-
|
44
|
+
url = @content.strip
|
45
|
+
uri = URI(url)
|
46
|
+
page_host = uri.hostname
|
47
|
+
puts page_host
|
48
|
+
page_html = self._fetch_html(uri)
|
49
|
+
article = Nokogiri::HTML(page_html).xpath(HOST_TO_ARTICLE_XPATH[page_host])
|
48
50
|
content = []
|
49
51
|
article.children.each do |node|
|
50
52
|
content.append self._handle_node(node)
|