jekyll-artisync 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll-artisync.rb +3 -41
- data/lib/syncers/abstract_syncer.rb +31 -0
- data/lib/syncers/syncer_factory.rb +18 -0
- data/lib/syncers/zhihu_syncer.rb +35 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0606097a6fb1063a731eb03353da74882e9d5aca8edf40143f7c6182e189c91a'
|
4
|
+
data.tar.gz: 7bac92d7cae8be329f1f31c782efa8faced9f280450164f110f81f7f96865e36
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 897c6f5173ed4af5541c0784609a136e7f09d3a22e174ea38f4fbc908471a7228a311c670d61557611bbce5d62cb62831f667506f25188f4938562d989d8b207
|
7
|
+
data.tar.gz: 6b51c4520cb6a6005994184a478073dac60387e3e363f11432363bd9f38a59312f47763488d2fbdc54b4bb03db4933394ab0852fc5d73f72d75dd5e9d911d977
|
data/lib/jekyll-artisync.rb
CHANGED
@@ -1,14 +1,6 @@
|
|
1
|
-
require '
|
2
|
-
require 'nokogiri'
|
3
|
-
|
1
|
+
require 'syncers/syncer_factory'
|
4
2
|
require "jekyll"
|
5
3
|
|
6
|
-
# user agent is necessary otherwise certain sites such as Zhihu throws 400
|
7
|
-
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
|
8
|
-
|
9
|
-
HOST_TO_ARTICLE_XPATH = {
|
10
|
-
'zhuanlan.zhihu.com' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
|
11
|
-
}
|
12
4
|
|
13
5
|
class ArticleSyncEmbed < Liquid::Tag
|
14
6
|
|
@@ -17,42 +9,12 @@ class ArticleSyncEmbed < Liquid::Tag
|
|
17
9
|
@content = content
|
18
10
|
end
|
19
11
|
|
20
|
-
def _fetch_html(uri)
|
21
|
-
res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
22
|
-
# :use_ssl => true for the uri is https
|
23
|
-
http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
|
24
|
-
end
|
25
|
-
|
26
|
-
res.body
|
27
|
-
end
|
28
|
-
|
29
|
-
def _handle_node(node)
|
30
|
-
case node.name
|
31
|
-
when 'figure'
|
32
|
-
img_node = node.children[1]
|
33
|
-
|
34
|
-
img_url = img_node['data-actualsrc']
|
35
|
-
img_url['_b.jpg'] = '_720w.jpg'
|
36
|
-
img_url['/v2'] = "/80/v2"
|
37
|
-
|
38
|
-
img_node['src'] = img_url
|
39
|
-
end
|
40
|
-
node.to_html
|
41
|
-
end
|
42
|
-
|
43
12
|
def render(context)
|
44
13
|
url = @content.strip
|
45
14
|
uri = URI(url)
|
46
15
|
page_host = uri.hostname
|
47
|
-
|
48
|
-
|
49
|
-
article = Nokogiri::HTML(page_html).xpath(HOST_TO_ARTICLE_XPATH[page_host])
|
50
|
-
content = []
|
51
|
-
article.children.each do |node|
|
52
|
-
content.append self._handle_node(node)
|
53
|
-
end
|
54
|
-
|
55
|
-
content.join("\n")
|
16
|
+
syncer = SyncerFactory.get_syncer(uri)
|
17
|
+
return syncer.gen_html
|
56
18
|
end
|
57
19
|
|
58
20
|
Liquid::Template.register_tag "artisync", self
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
|
4
|
+
# user agent is necessary otherwise certain sites such as Zhihu throws 400
|
5
|
+
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
|
6
|
+
|
7
|
+
MESSAGE = "AbstractSyncer::[%s]: override required."
|
8
|
+
|
9
|
+
class AbstractSyncer
|
10
|
+
def initialize(uri)
|
11
|
+
@uri = uri
|
12
|
+
end
|
13
|
+
|
14
|
+
def _fetch_html
|
15
|
+
uri = @uri
|
16
|
+
res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
17
|
+
# :use_ssl => true for the uri is https
|
18
|
+
http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
|
19
|
+
end
|
20
|
+
|
21
|
+
res.body
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_article_xpath
|
25
|
+
raise MESSAGE % [__method__]
|
26
|
+
end
|
27
|
+
|
28
|
+
def gen_html
|
29
|
+
raise MESSAGE % [__method__]
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative 'zhihu_syncer'
|
2
|
+
|
3
|
+
|
4
|
+
ZHIHU_ZHUANLAN = 'zhuanlan.zhihu.com'
|
5
|
+
|
6
|
+
class SyncerFactory
|
7
|
+
def self.get_syncer(uri)
|
8
|
+
host_name = uri.host
|
9
|
+
|
10
|
+
case host_name
|
11
|
+
when ZHIHU_ZHUANLAN
|
12
|
+
return ZhihuSyncer.new(uri)
|
13
|
+
else
|
14
|
+
raise "Not supported website for host: #{host_name}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative 'abstract_syncer'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
|
5
|
+
class ZhihuSyncer < AbstractSyncer
|
6
|
+
def get_article_xpath
|
7
|
+
'//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]'
|
8
|
+
end
|
9
|
+
|
10
|
+
def _handle_node(node)
|
11
|
+
case node.name
|
12
|
+
when 'figure'
|
13
|
+
img_node = node.children[1]
|
14
|
+
|
15
|
+
img_url = img_node['data-actualsrc']
|
16
|
+
img_url['_b.jpg'] = '_720w.jpg'
|
17
|
+
img_url['/v2'] = "/80/v2"
|
18
|
+
|
19
|
+
img_node['src'] = img_url
|
20
|
+
end
|
21
|
+
node.to_html
|
22
|
+
end
|
23
|
+
|
24
|
+
def gen_html
|
25
|
+
page_html = self._fetch_html
|
26
|
+
article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
|
27
|
+
content = []
|
28
|
+
article.children.each do |node|
|
29
|
+
content.append self._handle_node(node)
|
30
|
+
end
|
31
|
+
|
32
|
+
content.join("\n")
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-artisync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.5'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Junhan
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-05-
|
11
|
+
date: 2020-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Take an article from a given site and display on current Jekyll page.
|
14
14
|
email:
|
@@ -18,6 +18,9 @@ extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/jekyll-artisync.rb
|
21
|
+
- lib/syncers/abstract_syncer.rb
|
22
|
+
- lib/syncers/syncer_factory.rb
|
23
|
+
- lib/syncers/zhihu_syncer.rb
|
21
24
|
homepage: https://github.com/junhan-z/jekyll-artisync
|
22
25
|
licenses:
|
23
26
|
- MIT
|