jekyll-artisync 0.2 → 0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/jekyll-artisync.rb +5 -42
- data/lib/syncers/abstract_syncer.rb +35 -0
- data/lib/syncers/csdn_syncer.rb +14 -0
- data/lib/syncers/medium_syncer.rb +31 -0
- data/lib/syncers/per_node_syncer.rb +34 -0
- data/lib/syncers/syncer_factory.rb +30 -0
- data/lib/syncers/weixin_syncer.rb +21 -0
- data/lib/syncers/zhihu_syncer.rb +23 -0
- metadata +25 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 01afb17eccd2097c95f4071c0dce0074ec1156b70e455b78ec8c0c961e6aaf3e
|
4
|
+
data.tar.gz: 72720da82e6a39b40b0682f427c4d75397559cd7ecc23e802e9088675db60616
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fd53ace05990ec40ebe945eebc5981cbddf8c8270417f7885297e8a827dada3a1fd8fb7c7f74dae13efa5a2a4beb0b704a795c752041df16da816e3c249bbd0e
|
7
|
+
data.tar.gz: e9df8042e29b3301787bb4247e826f0b8b6ae8d247a61e31e2a9978383b0e5f39a89d66981ea558140fe2e166f03721f19faf99757c9604d43544fae9d16899b
|
data/lib/jekyll-artisync.rb
CHANGED
@@ -1,14 +1,6 @@
|
|
1
|
-
require '
|
2
|
-
require 'nokogiri'
|
3
|
-
|
1
|
+
require 'syncers/syncer_factory'
|
4
2
|
require "jekyll"
|
5
3
|
|
6
|
-
# user agent is necessary otherwise certain sites such as Zhihu throws 400
|
7
|
-
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
|
8
|
-
|
9
|
-
SITE_TO_ARTICLE_XPATH = {
|
10
|
-
'zhihu' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
|
11
|
-
}
|
12
4
|
|
13
5
|
class ArticleSyncEmbed < Liquid::Tag
|
14
6
|
|
@@ -17,40 +9,11 @@ class ArticleSyncEmbed < Liquid::Tag
|
|
17
9
|
@content = content
|
18
10
|
end
|
19
11
|
|
20
|
-
def _fetch_html(url)
|
21
|
-
uri = URI(url)
|
22
|
-
res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
23
|
-
# :use_ssl => true for the uri is https
|
24
|
-
http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
|
25
|
-
end
|
26
|
-
|
27
|
-
res.body
|
28
|
-
end
|
29
|
-
|
30
|
-
def _handle_node(node)
|
31
|
-
case node.name
|
32
|
-
when 'figure'
|
33
|
-
img_node = node.children[1]
|
34
|
-
|
35
|
-
img_url = img_node['data-actualsrc']
|
36
|
-
img_url['_b.jpg'] = '_720w.jpg'
|
37
|
-
img_url['/v2'] = "/80/v2"
|
38
|
-
|
39
|
-
img_node['src'] = img_url
|
40
|
-
end
|
41
|
-
node.to_html
|
42
|
-
end
|
43
|
-
|
44
12
|
def render(context)
|
45
|
-
url
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
article.children.each do |node|
|
50
|
-
content.append self._handle_node(node)
|
51
|
-
end
|
52
|
-
|
53
|
-
content.join("\n")
|
13
|
+
url = Liquid::Template.parse(@content).render(context).strip
|
14
|
+
uri = URI(url)
|
15
|
+
syncer = SyncerFactory.get_syncer(uri)
|
16
|
+
return syncer.gen_html
|
54
17
|
end
|
55
18
|
|
56
19
|
Liquid::Template.register_tag "artisync", self
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
|
4
|
+
# user agent is necessary otherwise certain sites such as Zhihu throws 400
|
5
|
+
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
|
6
|
+
|
7
|
+
MESSAGE = "AbstractSyncer::[%s]: override required."
|
8
|
+
|
9
|
+
class AbstractSyncer
|
10
|
+
def initialize(uri)
|
11
|
+
@uri = uri
|
12
|
+
end
|
13
|
+
|
14
|
+
def _fetch_html
|
15
|
+
uri = @uri
|
16
|
+
res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
17
|
+
# :use_ssl => true for the uri is https
|
18
|
+
http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
|
19
|
+
end
|
20
|
+
|
21
|
+
res.body
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_article_xpath
|
25
|
+
raise MESSAGE % [__method__]
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_article_nodes
|
29
|
+
raise MESSAGE % [__method__]
|
30
|
+
end
|
31
|
+
|
32
|
+
def gen_html
|
33
|
+
raise MESSAGE % [__method__]
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require_relative 'per_node_syncer'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
|
5
|
+
class MediumSyncer < PerNodeSyncer
|
6
|
+
def get_article_xpath
|
7
|
+
'//article/div/section/div/div'
|
8
|
+
end
|
9
|
+
|
10
|
+
# override
|
11
|
+
def get_article_nodes
|
12
|
+
page_html = self._fetch_html
|
13
|
+
article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
|
14
|
+
|
15
|
+
# Medium embeds Author section, which is not needed for article
|
16
|
+
article.children[1..]
|
17
|
+
end
|
18
|
+
|
19
|
+
def _handle_node(node)
|
20
|
+
case node.name
|
21
|
+
when "figure"
|
22
|
+
img_nodes = node.css('img')
|
23
|
+
node = img_nodes[-1] if img_nodes
|
24
|
+
# to make sure image scales right
|
25
|
+
node.remove_attribute('width')
|
26
|
+
node.remove_attribute('height')
|
27
|
+
end
|
28
|
+
node.to_html
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require_relative 'abstract_syncer'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module NodeAttrModule
|
5
|
+
ATTRS = ['class', 'id']
|
6
|
+
def NodeAttrModule.remove_common_attr(node)
|
7
|
+
ATTRS.each do |attr|
|
8
|
+
node.remove_attribute(attr) if node[attr]
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
class PerNodeSyncer < AbstractSyncer
|
15
|
+
include NodeAttrModule
|
16
|
+
|
17
|
+
def get_article_nodes
|
18
|
+
page_html = self._fetch_html
|
19
|
+
article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
|
20
|
+
article.children
|
21
|
+
end
|
22
|
+
|
23
|
+
def gen_html
|
24
|
+
article_nodes = self.get_article_nodes
|
25
|
+
content = []
|
26
|
+
article_nodes.each do |node|
|
27
|
+
NodeAttrModule.remove_common_attr(node)
|
28
|
+
content.append self._handle_node(node)
|
29
|
+
end
|
30
|
+
|
31
|
+
content.join("\n")
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require_relative 'zhihu_syncer'
|
2
|
+
require_relative 'weixin_syncer'
|
3
|
+
require_relative 'medium_syncer'
|
4
|
+
require_relative 'csdn_syncer'
|
5
|
+
|
6
|
+
|
7
|
+
ZHIHU_ZHUANLAN = 'zhuanlan.zhihu.com'
|
8
|
+
WEIXIN = 'mp.weixin.qq.com'
|
9
|
+
MEDIUM = 'medium.com'
|
10
|
+
CSDN = 'blog.csdn.net'
|
11
|
+
|
12
|
+
class SyncerFactory
|
13
|
+
def self.get_syncer(uri)
|
14
|
+
host_name = uri.host
|
15
|
+
|
16
|
+
case host_name
|
17
|
+
when ZHIHU_ZHUANLAN
|
18
|
+
return ZhihuSyncer.new(uri)
|
19
|
+
when WEIXIN
|
20
|
+
return WeixinSyncer.new(uri)
|
21
|
+
when MEDIUM
|
22
|
+
return MediumSyncer.new(uri)
|
23
|
+
when CSDN
|
24
|
+
return CSDNSyncer.new(uri)
|
25
|
+
else
|
26
|
+
raise "Not supported website for host: #{host_name}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require_relative 'per_node_syncer'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
|
5
|
+
class WeixinSyncer < PerNodeSyncer
|
6
|
+
def get_article_xpath
|
7
|
+
'//div[contains(@class, "rich_media_content") and contains(@id, "js_content")]'
|
8
|
+
end
|
9
|
+
|
10
|
+
def _handle_node(node)
|
11
|
+
node.children.each do |child_node|
|
12
|
+
case child_node.name
|
13
|
+
when 'img'
|
14
|
+
child_node['src'] = child_node['data-src']
|
15
|
+
child_node['data-src'] = nil
|
16
|
+
end
|
17
|
+
end
|
18
|
+
node.to_html
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require_relative 'per_node_syncer'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
|
5
|
+
class ZhihuSyncer < PerNodeSyncer
|
6
|
+
def get_article_xpath
|
7
|
+
'//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]'
|
8
|
+
end
|
9
|
+
|
10
|
+
def _handle_node(node)
|
11
|
+
case node.name
|
12
|
+
when 'figure'
|
13
|
+
img_node = node.css('img')[-1]
|
14
|
+
if img_node
|
15
|
+
node = img_node
|
16
|
+
node['src'] = node['data-actualsrc']
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
node.to_html
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-artisync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.8'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
- Junhan
|
7
|
+
- Junhan Zhu
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
12
|
-
dependencies:
|
11
|
+
date: 2020-06-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.10'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.10'
|
13
27
|
description: Take an article from a given site and display on current Jekyll page.
|
14
28
|
email:
|
15
29
|
- junhanoct@gmail.com
|
@@ -18,6 +32,13 @@ extensions: []
|
|
18
32
|
extra_rdoc_files: []
|
19
33
|
files:
|
20
34
|
- lib/jekyll-artisync.rb
|
35
|
+
- lib/syncers/abstract_syncer.rb
|
36
|
+
- lib/syncers/csdn_syncer.rb
|
37
|
+
- lib/syncers/medium_syncer.rb
|
38
|
+
- lib/syncers/per_node_syncer.rb
|
39
|
+
- lib/syncers/syncer_factory.rb
|
40
|
+
- lib/syncers/weixin_syncer.rb
|
41
|
+
- lib/syncers/zhihu_syncer.rb
|
21
42
|
homepage: https://github.com/junhan-z/jekyll-artisync
|
22
43
|
licenses:
|
23
44
|
- MIT
|