jekyll-artisync 0.4 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/jekyll-artisync.rb +3 -41
- data/lib/syncers/abstract_syncer.rb +31 -0
- data/lib/syncers/syncer_factory.rb +18 -0
- data/lib/syncers/zhihu_syncer.rb +35 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0606097a6fb1063a731eb03353da74882e9d5aca8edf40143f7c6182e189c91a'
|
4
|
+
data.tar.gz: 7bac92d7cae8be329f1f31c782efa8faced9f280450164f110f81f7f96865e36
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 897c6f5173ed4af5541c0784609a136e7f09d3a22e174ea38f4fbc908471a7228a311c670d61557611bbce5d62cb62831f667506f25188f4938562d989d8b207
|
7
|
+
data.tar.gz: 6b51c4520cb6a6005994184a478073dac60387e3e363f11432363bd9f38a59312f47763488d2fbdc54b4bb03db4933394ab0852fc5d73f72d75dd5e9d911d977
|
data/lib/jekyll-artisync.rb
CHANGED
@@ -1,14 +1,6 @@
|
|
1
|
-
require '
|
2
|
-
require 'nokogiri'
|
3
|
-
|
1
|
+
require 'syncers/syncer_factory'
|
4
2
|
require "jekyll"
|
5
3
|
|
6
|
-
# user agent is necessary otherwise certain sites such as Zhihu throws 400
|
7
|
-
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
|
8
|
-
|
9
|
-
HOST_TO_ARTICLE_XPATH = {
|
10
|
-
'zhuanlan.zhihu.com' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
|
11
|
-
}
|
12
4
|
|
13
5
|
class ArticleSyncEmbed < Liquid::Tag
|
14
6
|
|
@@ -17,42 +9,12 @@ class ArticleSyncEmbed < Liquid::Tag
|
|
17
9
|
@content = content
|
18
10
|
end
|
19
11
|
|
20
|
-
def _fetch_html(uri)
|
21
|
-
res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
22
|
-
# :use_ssl => true for the uri is https
|
23
|
-
http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
|
24
|
-
end
|
25
|
-
|
26
|
-
res.body
|
27
|
-
end
|
28
|
-
|
29
|
-
def _handle_node(node)
|
30
|
-
case node.name
|
31
|
-
when 'figure'
|
32
|
-
img_node = node.children[1]
|
33
|
-
|
34
|
-
img_url = img_node['data-actualsrc']
|
35
|
-
img_url['_b.jpg'] = '_720w.jpg'
|
36
|
-
img_url['/v2'] = "/80/v2"
|
37
|
-
|
38
|
-
img_node['src'] = img_url
|
39
|
-
end
|
40
|
-
node.to_html
|
41
|
-
end
|
42
|
-
|
43
12
|
def render(context)
|
44
13
|
url = @content.strip
|
45
14
|
uri = URI(url)
|
46
15
|
page_host = uri.hostname
|
47
|
-
|
48
|
-
|
49
|
-
article = Nokogiri::HTML(page_html).xpath(HOST_TO_ARTICLE_XPATH[page_host])
|
50
|
-
content = []
|
51
|
-
article.children.each do |node|
|
52
|
-
content.append self._handle_node(node)
|
53
|
-
end
|
54
|
-
|
55
|
-
content.join("\n")
|
16
|
+
syncer = SyncerFactory.get_syncer(uri)
|
17
|
+
return syncer.gen_html
|
56
18
|
end
|
57
19
|
|
58
20
|
Liquid::Template.register_tag "artisync", self
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
|
4
|
+
# user agent is necessary otherwise certain sites such as Zhihu throws 400
|
5
|
+
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
|
6
|
+
|
7
|
+
MESSAGE = "AbstractSyncer::[%s]: override required."
|
8
|
+
|
9
|
+
class AbstractSyncer
|
10
|
+
def initialize(uri)
|
11
|
+
@uri = uri
|
12
|
+
end
|
13
|
+
|
14
|
+
def _fetch_html
|
15
|
+
uri = @uri
|
16
|
+
res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
17
|
+
# :use_ssl => true for the uri is https
|
18
|
+
http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
|
19
|
+
end
|
20
|
+
|
21
|
+
res.body
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_article_xpath
|
25
|
+
raise MESSAGE % [__method__]
|
26
|
+
end
|
27
|
+
|
28
|
+
def gen_html
|
29
|
+
raise MESSAGE % [__method__]
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative 'zhihu_syncer'
|
2
|
+
|
3
|
+
|
4
|
+
ZHIHU_ZHUANLAN = 'zhuanlan.zhihu.com'
|
5
|
+
|
6
|
+
class SyncerFactory
|
7
|
+
def self.get_syncer(uri)
|
8
|
+
host_name = uri.host
|
9
|
+
|
10
|
+
case host_name
|
11
|
+
when ZHIHU_ZHUANLAN
|
12
|
+
return ZhihuSyncer.new(uri)
|
13
|
+
else
|
14
|
+
raise "Not supported website for host: #{host_name}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative 'abstract_syncer'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
|
5
|
+
class ZhihuSyncer < AbstractSyncer
|
6
|
+
def get_article_xpath
|
7
|
+
'//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]'
|
8
|
+
end
|
9
|
+
|
10
|
+
def _handle_node(node)
|
11
|
+
case node.name
|
12
|
+
when 'figure'
|
13
|
+
img_node = node.children[1]
|
14
|
+
|
15
|
+
img_url = img_node['data-actualsrc']
|
16
|
+
img_url['_b.jpg'] = '_720w.jpg'
|
17
|
+
img_url['/v2'] = "/80/v2"
|
18
|
+
|
19
|
+
img_node['src'] = img_url
|
20
|
+
end
|
21
|
+
node.to_html
|
22
|
+
end
|
23
|
+
|
24
|
+
def gen_html
|
25
|
+
page_html = self._fetch_html
|
26
|
+
article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
|
27
|
+
content = []
|
28
|
+
article.children.each do |node|
|
29
|
+
content.append self._handle_node(node)
|
30
|
+
end
|
31
|
+
|
32
|
+
content.join("\n")
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-artisync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.5'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Junhan
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-05-
|
11
|
+
date: 2020-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Take an article from a given site and display on current Jekyll page.
|
14
14
|
email:
|
@@ -18,6 +18,9 @@ extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/jekyll-artisync.rb
|
21
|
+
- lib/syncers/abstract_syncer.rb
|
22
|
+
- lib/syncers/syncer_factory.rb
|
23
|
+
- lib/syncers/zhihu_syncer.rb
|
21
24
|
homepage: https://github.com/junhan-z/jekyll-artisync
|
22
25
|
licenses:
|
23
26
|
- MIT
|