jekyll-artisync 0.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll-artisync.rb +9 -7
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31d37fc6ffdd79cb4d259d1db001b0abea1f2f4d8be3d17b035e2db5e1a7cc80
|
4
|
+
data.tar.gz: 1b5b26500b61c8e271d0c2a2605b28662c041572a1428469e9d30209c718a0c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d643be4d2d2c4831251e06e6f092ff419148b7c21fbdc1d274642d7dad5cf7de741b635af6e556221e47090b61dd0a28cc4614b9adae8a829062c354ce71468
|
7
|
+
data.tar.gz: 7334aeea98d30013f3ba030caaeafbb3fbd542d74998f00efb152d1873bbdd83542b9213b0cc041836b2e9becb62622d01e894da0e5fc003b44dbc2a576cb79b
|
data/lib/jekyll-artisync.rb
CHANGED
@@ -6,8 +6,8 @@ require "jekyll"
|
|
6
6
|
# user agent is necessary otherwise certain sites such as Zhihu throws 400
|
7
7
|
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
|
8
8
|
|
9
|
-
|
10
|
-
'zhihu' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
|
9
|
+
HOST_TO_ARTICLE_XPATH = {
|
10
|
+
'zhuanlan.zhihu.com' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
|
11
11
|
}
|
12
12
|
|
13
13
|
class ArticleSyncEmbed < Liquid::Tag
|
@@ -17,8 +17,7 @@ class ArticleSyncEmbed < Liquid::Tag
|
|
17
17
|
@content = content
|
18
18
|
end
|
19
19
|
|
20
|
-
def _fetch_html(
|
21
|
-
uri = URI(url)
|
20
|
+
def _fetch_html(uri)
|
22
21
|
res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
23
22
|
# :use_ssl => true for the uri is https
|
24
23
|
http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
|
@@ -42,9 +41,12 @@ class ArticleSyncEmbed < Liquid::Tag
|
|
42
41
|
end
|
43
42
|
|
44
43
|
def render(context)
|
45
|
-
url
|
46
|
-
|
47
|
-
|
44
|
+
url = @content.strip
|
45
|
+
uri = URI(url)
|
46
|
+
page_host = uri.hostname
|
47
|
+
puts page_host
|
48
|
+
page_html = self._fetch_html(uri)
|
49
|
+
article = Nokogiri::HTML(page_html).xpath(HOST_TO_ARTICLE_XPATH[page_host])
|
48
50
|
content = []
|
49
51
|
article.children.each do |node|
|
50
52
|
content.append self._handle_node(node)
|