jekyll-artisync 0.2 → 0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bdd1c95dc86f0e265d5e748dd9559cff17968ad55570e7d610ec2441a7339382
4
- data.tar.gz: 184d76a6fe6174d9c0888dbe8126b2bf16a1e6306f28dcd836b347dd2698527a
3
+ metadata.gz: 01afb17eccd2097c95f4071c0dce0074ec1156b70e455b78ec8c0c961e6aaf3e
4
+ data.tar.gz: 72720da82e6a39b40b0682f427c4d75397559cd7ecc23e802e9088675db60616
5
5
  SHA512:
6
- metadata.gz: 6eead02cfc58bfb5bb24d3860e222a6e04ba3883a9a40a8a9c8e673efc94b29b22f851631e103dbc93ca1ff0a9e43ccefeca1eec1bbf0839c395baa0d2c86a94
7
- data.tar.gz: 7c34719be25ff7b9ab5501c2a2ef5512d71678a4c3ea918ff7b6d53ae8f874adc7683790d8468e1afaf177e167584ae7fe111d8d35f9ea1f432665aa1244cb97
6
+ metadata.gz: fd53ace05990ec40ebe945eebc5981cbddf8c8270417f7885297e8a827dada3a1fd8fb7c7f74dae13efa5a2a4beb0b704a795c752041df16da816e3c249bbd0e
7
+ data.tar.gz: e9df8042e29b3301787bb4247e826f0b8b6ae8d247a61e31e2a9978383b0e5f39a89d66981ea558140fe2e166f03721f19faf99757c9604d43544fae9d16899b
@@ -1,14 +1,6 @@
1
- require 'net/http'
2
- require 'nokogiri'
3
-
1
+ require 'syncers/syncer_factory'
4
2
  require "jekyll"
5
3
 
6
- # user agent is necessary otherwise certain sites such as Zhihu throws 400
7
- USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
8
-
9
- SITE_TO_ARTICLE_XPATH = {
10
- 'zhihu' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
11
- }
12
4
 
13
5
  class ArticleSyncEmbed < Liquid::Tag
14
6
 
@@ -17,40 +9,11 @@ class ArticleSyncEmbed < Liquid::Tag
17
9
  @content = content
18
10
  end
19
11
 
20
- def _fetch_html(url)
21
- uri = URI(url)
22
- res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
23
- # :use_ssl => true for the uri is https
24
- http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
25
- end
26
-
27
- res.body
28
- end
29
-
30
- def _handle_node(node)
31
- case node.name
32
- when 'figure'
33
- img_node = node.children[1]
34
-
35
- img_url = img_node['data-actualsrc']
36
- img_url['_b.jpg'] = '_720w.jpg'
37
- img_url['/v2'] = "/80/v2"
38
-
39
- img_node['src'] = img_url
40
- end
41
- node.to_html
42
- end
43
-
44
12
  def render(context)
45
- url, site = @content.strip.split
46
- page_html = self._fetch_html(url)
47
- article = Nokogiri::HTML(page_html).xpath(SITE_TO_ARTICLE_XPATH[site])
48
- content = []
49
- article.children.each do |node|
50
- content.append self._handle_node(node)
51
- end
52
-
53
- content.join("\n")
13
+ url = Liquid::Template.parse(@content).render(context).strip
14
+ uri = URI(url)
15
+ syncer = SyncerFactory.get_syncer(uri)
16
+ return syncer.gen_html
54
17
  end
55
18
 
56
19
  Liquid::Template.register_tag "artisync", self
@@ -0,0 +1,35 @@
1
+ require 'net/http'
2
+
3
+
4
+ # user agent is necessary otherwise certain sites such as Zhihu throws 400
5
+ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
6
+
7
+ MESSAGE = "AbstractSyncer::[%s]: override required."
8
+
9
+ class AbstractSyncer
10
+ def initialize(uri)
11
+ @uri = uri
12
+ end
13
+
14
+ def _fetch_html
15
+ uri = @uri
16
+ res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
17
+ # :use_ssl => true for the uri is https
18
+ http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
19
+ end
20
+
21
+ res.body
22
+ end
23
+
24
+ def get_article_xpath
25
+ raise MESSAGE % [__method__]
26
+ end
27
+
28
+ def get_article_nodes
29
+ raise MESSAGE % [__method__]
30
+ end
31
+
32
+ def gen_html
33
+ raise MESSAGE % [__method__]
34
+ end
35
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'per_node_syncer'
2
+ require 'nokogiri'
3
+
4
+
5
+ class CSDNSyncer < PerNodeSyncer
6
+ def get_article_xpath
7
+ '//div[contains(@id, "content_views")]'
8
+ end
9
+
10
+ def _handle_node(node)
11
+ node.to_html
12
+ end
13
+
14
+ end
@@ -0,0 +1,31 @@
1
+ require_relative 'per_node_syncer'
2
+ require 'nokogiri'
3
+
4
+
5
+ class MediumSyncer < PerNodeSyncer
6
+ def get_article_xpath
7
+ '//article/div/section/div/div'
8
+ end
9
+
10
+ # override
11
+ def get_article_nodes
12
+ page_html = self._fetch_html
13
+ article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
14
+
15
+ # Medium embeds Author section, which is not needed for article
16
+ article.children[1..]
17
+ end
18
+
19
+ def _handle_node(node)
20
+ case node.name
21
+ when "figure"
22
+ img_nodes = node.css('img')
23
+ node = img_nodes[-1] if img_nodes
24
+ # to make sure image scales right
25
+ node.remove_attribute('width')
26
+ node.remove_attribute('height')
27
+ end
28
+ node.to_html
29
+ end
30
+
31
+ end
@@ -0,0 +1,34 @@
1
+ require_relative 'abstract_syncer'
2
+ require 'nokogiri'
3
+
4
+ module NodeAttrModule
5
+ ATTRS = ['class', 'id']
6
+ def NodeAttrModule.remove_common_attr(node)
7
+ ATTRS.each do |attr|
8
+ node.remove_attribute(attr) if node[attr]
9
+ end
10
+ end
11
+ end
12
+
13
+
14
+ class PerNodeSyncer < AbstractSyncer
15
+ include NodeAttrModule
16
+
17
+ def get_article_nodes
18
+ page_html = self._fetch_html
19
+ article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
20
+ article.children
21
+ end
22
+
23
+ def gen_html
24
+ article_nodes = self.get_article_nodes
25
+ content = []
26
+ article_nodes.each do |node|
27
+ NodeAttrModule.remove_common_attr(node)
28
+ content.append self._handle_node(node)
29
+ end
30
+
31
+ content.join("\n")
32
+ end
33
+
34
+ end
@@ -0,0 +1,30 @@
1
+ require_relative 'zhihu_syncer'
2
+ require_relative 'weixin_syncer'
3
+ require_relative 'medium_syncer'
4
+ require_relative 'csdn_syncer'
5
+
6
+
7
+ ZHIHU_ZHUANLAN = 'zhuanlan.zhihu.com'
8
+ WEIXIN = 'mp.weixin.qq.com'
9
+ MEDIUM = 'medium.com'
10
+ CSDN = 'blog.csdn.net'
11
+
12
+ class SyncerFactory
13
+ def self.get_syncer(uri)
14
+ host_name = uri.host
15
+
16
+ case host_name
17
+ when ZHIHU_ZHUANLAN
18
+ return ZhihuSyncer.new(uri)
19
+ when WEIXIN
20
+ return WeixinSyncer.new(uri)
21
+ when MEDIUM
22
+ return MediumSyncer.new(uri)
23
+ when CSDN
24
+ return CSDNSyncer.new(uri)
25
+ else
26
+ raise "Not supported website for host: #{host_name}"
27
+ end
28
+ end
29
+ end
30
+
@@ -0,0 +1,21 @@
1
+ require_relative 'per_node_syncer'
2
+ require 'nokogiri'
3
+
4
+
5
+ class WeixinSyncer < PerNodeSyncer
6
+ def get_article_xpath
7
+ '//div[contains(@class, "rich_media_content") and contains(@id, "js_content")]'
8
+ end
9
+
10
+ def _handle_node(node)
11
+ node.children.each do |child_node|
12
+ case child_node.name
13
+ when 'img'
14
+ child_node['src'] = child_node['data-src']
15
+ child_node['data-src'] = nil
16
+ end
17
+ end
18
+ node.to_html
19
+ end
20
+
21
+ end
@@ -0,0 +1,23 @@
1
+ require_relative 'per_node_syncer'
2
+ require 'nokogiri'
3
+
4
+
5
+ class ZhihuSyncer < PerNodeSyncer
6
+ def get_article_xpath
7
+ '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]'
8
+ end
9
+
10
+ def _handle_node(node)
11
+ case node.name
12
+ when 'figure'
13
+ img_node = node.css('img')[-1]
14
+ if img_node
15
+ node = img_node
16
+ node['src'] = node['data-actualsrc']
17
+ end
18
+ end
19
+
20
+ node.to_html
21
+ end
22
+
23
+ end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-artisync
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.8'
5
5
  platform: ruby
6
6
  authors:
7
- - Junhan
7
+ - Junhan Zhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-24 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2020-06-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
13
27
  description: Take an article from a given site and display on current Jekyll page.
14
28
  email:
15
29
  - junhanoct@gmail.com
@@ -18,6 +32,13 @@ extensions: []
18
32
  extra_rdoc_files: []
19
33
  files:
20
34
  - lib/jekyll-artisync.rb
35
+ - lib/syncers/abstract_syncer.rb
36
+ - lib/syncers/csdn_syncer.rb
37
+ - lib/syncers/medium_syncer.rb
38
+ - lib/syncers/per_node_syncer.rb
39
+ - lib/syncers/syncer_factory.rb
40
+ - lib/syncers/weixin_syncer.rb
41
+ - lib/syncers/zhihu_syncer.rb
21
42
  homepage: https://github.com/junhan-z/jekyll-artisync
22
43
  licenses:
23
44
  - MIT