jekyll-artisync 0.2 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll-artisync.rb +5 -42
- data/lib/syncers/abstract_syncer.rb +35 -0
- data/lib/syncers/csdn_syncer.rb +14 -0
- data/lib/syncers/medium_syncer.rb +31 -0
- data/lib/syncers/per_node_syncer.rb +34 -0
- data/lib/syncers/syncer_factory.rb +30 -0
- data/lib/syncers/weixin_syncer.rb +21 -0
- data/lib/syncers/zhihu_syncer.rb +23 -0
- metadata +25 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 01afb17eccd2097c95f4071c0dce0074ec1156b70e455b78ec8c0c961e6aaf3e
|
4
|
+
data.tar.gz: 72720da82e6a39b40b0682f427c4d75397559cd7ecc23e802e9088675db60616
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fd53ace05990ec40ebe945eebc5981cbddf8c8270417f7885297e8a827dada3a1fd8fb7c7f74dae13efa5a2a4beb0b704a795c752041df16da816e3c249bbd0e
|
7
|
+
data.tar.gz: e9df8042e29b3301787bb4247e826f0b8b6ae8d247a61e31e2a9978383b0e5f39a89d66981ea558140fe2e166f03721f19faf99757c9604d43544fae9d16899b
|
data/lib/jekyll-artisync.rb
CHANGED
@@ -1,14 +1,6 @@
|
|
1
|
-
require '
|
2
|
-
require 'nokogiri'
|
3
|
-
|
1
|
+
require 'syncers/syncer_factory'
|
4
2
|
require "jekyll"
|
5
3
|
|
6
|
-
# user agent is necessary otherwise certain sites such as Zhihu throws 400
|
7
|
-
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
|
8
|
-
|
9
|
-
SITE_TO_ARTICLE_XPATH = {
|
10
|
-
'zhihu' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
|
11
|
-
}
|
12
4
|
|
13
5
|
class ArticleSyncEmbed < Liquid::Tag
|
14
6
|
|
@@ -17,40 +9,11 @@ class ArticleSyncEmbed < Liquid::Tag
|
|
17
9
|
@content = content
|
18
10
|
end
|
19
11
|
|
20
|
-
def _fetch_html(url)
|
21
|
-
uri = URI(url)
|
22
|
-
res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
23
|
-
# :use_ssl => true for the uri is https
|
24
|
-
http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
|
25
|
-
end
|
26
|
-
|
27
|
-
res.body
|
28
|
-
end
|
29
|
-
|
30
|
-
def _handle_node(node)
|
31
|
-
case node.name
|
32
|
-
when 'figure'
|
33
|
-
img_node = node.children[1]
|
34
|
-
|
35
|
-
img_url = img_node['data-actualsrc']
|
36
|
-
img_url['_b.jpg'] = '_720w.jpg'
|
37
|
-
img_url['/v2'] = "/80/v2"
|
38
|
-
|
39
|
-
img_node['src'] = img_url
|
40
|
-
end
|
41
|
-
node.to_html
|
42
|
-
end
|
43
|
-
|
44
12
|
def render(context)
|
45
|
-
url
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
article.children.each do |node|
|
50
|
-
content.append self._handle_node(node)
|
51
|
-
end
|
52
|
-
|
53
|
-
content.join("\n")
|
13
|
+
url = Liquid::Template.parse(@content).render(context).strip
|
14
|
+
uri = URI(url)
|
15
|
+
syncer = SyncerFactory.get_syncer(uri)
|
16
|
+
return syncer.gen_html
|
54
17
|
end
|
55
18
|
|
56
19
|
Liquid::Template.register_tag "artisync", self
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
|
4
|
+
# user agent is necessary otherwise certain sites such as Zhihu throws 400
|
5
|
+
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
|
6
|
+
|
7
|
+
MESSAGE = "AbstractSyncer::[%s]: override required."
|
8
|
+
|
9
|
+
class AbstractSyncer
|
10
|
+
def initialize(uri)
|
11
|
+
@uri = uri
|
12
|
+
end
|
13
|
+
|
14
|
+
def _fetch_html
|
15
|
+
uri = @uri
|
16
|
+
res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
|
17
|
+
# :use_ssl => true for the uri is https
|
18
|
+
http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
|
19
|
+
end
|
20
|
+
|
21
|
+
res.body
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_article_xpath
|
25
|
+
raise MESSAGE % [__method__]
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_article_nodes
|
29
|
+
raise MESSAGE % [__method__]
|
30
|
+
end
|
31
|
+
|
32
|
+
def gen_html
|
33
|
+
raise MESSAGE % [__method__]
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require_relative 'per_node_syncer'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
|
5
|
+
class MediumSyncer < PerNodeSyncer
|
6
|
+
def get_article_xpath
|
7
|
+
'//article/div/section/div/div'
|
8
|
+
end
|
9
|
+
|
10
|
+
# override
|
11
|
+
def get_article_nodes
|
12
|
+
page_html = self._fetch_html
|
13
|
+
article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
|
14
|
+
|
15
|
+
# Medium embeds Author section, which is not needed for article
|
16
|
+
article.children[1..]
|
17
|
+
end
|
18
|
+
|
19
|
+
def _handle_node(node)
|
20
|
+
case node.name
|
21
|
+
when "figure"
|
22
|
+
img_nodes = node.css('img')
|
23
|
+
node = img_nodes[-1] if img_nodes
|
24
|
+
# to make sure image scales right
|
25
|
+
node.remove_attribute('width')
|
26
|
+
node.remove_attribute('height')
|
27
|
+
end
|
28
|
+
node.to_html
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require_relative 'abstract_syncer'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module NodeAttrModule
|
5
|
+
ATTRS = ['class', 'id']
|
6
|
+
def NodeAttrModule.remove_common_attr(node)
|
7
|
+
ATTRS.each do |attr|
|
8
|
+
node.remove_attribute(attr) if node[attr]
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
class PerNodeSyncer < AbstractSyncer
|
15
|
+
include NodeAttrModule
|
16
|
+
|
17
|
+
def get_article_nodes
|
18
|
+
page_html = self._fetch_html
|
19
|
+
article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
|
20
|
+
article.children
|
21
|
+
end
|
22
|
+
|
23
|
+
def gen_html
|
24
|
+
article_nodes = self.get_article_nodes
|
25
|
+
content = []
|
26
|
+
article_nodes.each do |node|
|
27
|
+
NodeAttrModule.remove_common_attr(node)
|
28
|
+
content.append self._handle_node(node)
|
29
|
+
end
|
30
|
+
|
31
|
+
content.join("\n")
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require_relative 'zhihu_syncer'
|
2
|
+
require_relative 'weixin_syncer'
|
3
|
+
require_relative 'medium_syncer'
|
4
|
+
require_relative 'csdn_syncer'
|
5
|
+
|
6
|
+
|
7
|
+
ZHIHU_ZHUANLAN = 'zhuanlan.zhihu.com'
|
8
|
+
WEIXIN = 'mp.weixin.qq.com'
|
9
|
+
MEDIUM = 'medium.com'
|
10
|
+
CSDN = 'blog.csdn.net'
|
11
|
+
|
12
|
+
class SyncerFactory
|
13
|
+
def self.get_syncer(uri)
|
14
|
+
host_name = uri.host
|
15
|
+
|
16
|
+
case host_name
|
17
|
+
when ZHIHU_ZHUANLAN
|
18
|
+
return ZhihuSyncer.new(uri)
|
19
|
+
when WEIXIN
|
20
|
+
return WeixinSyncer.new(uri)
|
21
|
+
when MEDIUM
|
22
|
+
return MediumSyncer.new(uri)
|
23
|
+
when CSDN
|
24
|
+
return CSDNSyncer.new(uri)
|
25
|
+
else
|
26
|
+
raise "Not supported website for host: #{host_name}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require_relative 'per_node_syncer'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
|
5
|
+
class WeixinSyncer < PerNodeSyncer
|
6
|
+
def get_article_xpath
|
7
|
+
'//div[contains(@class, "rich_media_content") and contains(@id, "js_content")]'
|
8
|
+
end
|
9
|
+
|
10
|
+
def _handle_node(node)
|
11
|
+
node.children.each do |child_node|
|
12
|
+
case child_node.name
|
13
|
+
when 'img'
|
14
|
+
child_node['src'] = child_node['data-src']
|
15
|
+
child_node['data-src'] = nil
|
16
|
+
end
|
17
|
+
end
|
18
|
+
node.to_html
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require_relative 'per_node_syncer'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
|
5
|
+
class ZhihuSyncer < PerNodeSyncer
|
6
|
+
def get_article_xpath
|
7
|
+
'//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]'
|
8
|
+
end
|
9
|
+
|
10
|
+
def _handle_node(node)
|
11
|
+
case node.name
|
12
|
+
when 'figure'
|
13
|
+
img_node = node.css('img')[-1]
|
14
|
+
if img_node
|
15
|
+
node = img_node
|
16
|
+
node['src'] = node['data-actualsrc']
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
node.to_html
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-artisync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.8'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
- Junhan
|
7
|
+
- Junhan Zhu
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
12
|
-
dependencies:
|
11
|
+
date: 2020-06-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.10'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.10'
|
13
27
|
description: Take an article from a given site and display on current Jekyll page.
|
14
28
|
email:
|
15
29
|
- junhanoct@gmail.com
|
@@ -18,6 +32,13 @@ extensions: []
|
|
18
32
|
extra_rdoc_files: []
|
19
33
|
files:
|
20
34
|
- lib/jekyll-artisync.rb
|
35
|
+
- lib/syncers/abstract_syncer.rb
|
36
|
+
- lib/syncers/csdn_syncer.rb
|
37
|
+
- lib/syncers/medium_syncer.rb
|
38
|
+
- lib/syncers/per_node_syncer.rb
|
39
|
+
- lib/syncers/syncer_factory.rb
|
40
|
+
- lib/syncers/weixin_syncer.rb
|
41
|
+
- lib/syncers/zhihu_syncer.rb
|
21
42
|
homepage: https://github.com/junhan-z/jekyll-artisync
|
22
43
|
licenses:
|
23
44
|
- MIT
|