jekyll-artisync 0.2 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bdd1c95dc86f0e265d5e748dd9559cff17968ad55570e7d610ec2441a7339382
4
- data.tar.gz: 184d76a6fe6174d9c0888dbe8126b2bf16a1e6306f28dcd836b347dd2698527a
3
+ metadata.gz: 01afb17eccd2097c95f4071c0dce0074ec1156b70e455b78ec8c0c961e6aaf3e
4
+ data.tar.gz: 72720da82e6a39b40b0682f427c4d75397559cd7ecc23e802e9088675db60616
5
5
  SHA512:
6
- metadata.gz: 6eead02cfc58bfb5bb24d3860e222a6e04ba3883a9a40a8a9c8e673efc94b29b22f851631e103dbc93ca1ff0a9e43ccefeca1eec1bbf0839c395baa0d2c86a94
7
- data.tar.gz: 7c34719be25ff7b9ab5501c2a2ef5512d71678a4c3ea918ff7b6d53ae8f874adc7683790d8468e1afaf177e167584ae7fe111d8d35f9ea1f432665aa1244cb97
6
+ metadata.gz: fd53ace05990ec40ebe945eebc5981cbddf8c8270417f7885297e8a827dada3a1fd8fb7c7f74dae13efa5a2a4beb0b704a795c752041df16da816e3c249bbd0e
7
+ data.tar.gz: e9df8042e29b3301787bb4247e826f0b8b6ae8d247a61e31e2a9978383b0e5f39a89d66981ea558140fe2e166f03721f19faf99757c9604d43544fae9d16899b
@@ -1,14 +1,6 @@
1
- require 'net/http'
2
- require 'nokogiri'
3
-
1
+ require 'syncers/syncer_factory'
4
2
  require "jekyll"
5
3
 
6
- # user agent is necessary otherwise certain sites such as Zhihu throws 400
7
- USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
8
-
9
- SITE_TO_ARTICLE_XPATH = {
10
- 'zhihu' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
11
- }
12
4
 
13
5
  class ArticleSyncEmbed < Liquid::Tag
14
6
 
@@ -17,40 +9,11 @@ class ArticleSyncEmbed < Liquid::Tag
17
9
  @content = content
18
10
  end
19
11
 
20
- def _fetch_html(url)
21
- uri = URI(url)
22
- res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
23
- # :use_ssl => true for the uri is https
24
- http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
25
- end
26
-
27
- res.body
28
- end
29
-
30
- def _handle_node(node)
31
- case node.name
32
- when 'figure'
33
- img_node = node.children[1]
34
-
35
- img_url = img_node['data-actualsrc']
36
- img_url['_b.jpg'] = '_720w.jpg'
37
- img_url['/v2'] = "/80/v2"
38
-
39
- img_node['src'] = img_url
40
- end
41
- node.to_html
42
- end
43
-
44
12
  def render(context)
45
- url, site = @content.strip.split
46
- page_html = self._fetch_html(url)
47
- article = Nokogiri::HTML(page_html).xpath(SITE_TO_ARTICLE_XPATH[site])
48
- content = []
49
- article.children.each do |node|
50
- content.append self._handle_node(node)
51
- end
52
-
53
- content.join("\n")
13
+ url = Liquid::Template.parse(@content).render(context).strip
14
+ uri = URI(url)
15
+ syncer = SyncerFactory.get_syncer(uri)
16
+ return syncer.gen_html
54
17
  end
55
18
 
56
19
  Liquid::Template.register_tag "artisync", self
@@ -0,0 +1,35 @@
1
+ require 'net/http'
2
+
3
+
4
+ # user agent is necessary otherwise certain sites such as Zhihu throws 400
5
+ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
6
+
7
+ MESSAGE = "AbstractSyncer::[%s]: override required."
8
+
9
+ class AbstractSyncer
10
+ def initialize(uri)
11
+ @uri = uri
12
+ end
13
+
14
+ def _fetch_html
15
+ uri = @uri
16
+ res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
17
+ # :use_ssl => true for the uri is https
18
+ http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
19
+ end
20
+
21
+ res.body
22
+ end
23
+
24
+ def get_article_xpath
25
+ raise MESSAGE % [__method__]
26
+ end
27
+
28
+ def get_article_nodes
29
+ raise MESSAGE % [__method__]
30
+ end
31
+
32
+ def gen_html
33
+ raise MESSAGE % [__method__]
34
+ end
35
+ end
@@ -0,0 +1,14 @@
1
+ require_relative 'per_node_syncer'
2
+ require 'nokogiri'
3
+
4
+
5
+ class CSDNSyncer < PerNodeSyncer
6
+ def get_article_xpath
7
+ '//div[contains(@id, "content_views")]'
8
+ end
9
+
10
+ def _handle_node(node)
11
+ node.to_html
12
+ end
13
+
14
+ end
@@ -0,0 +1,31 @@
1
+ require_relative 'per_node_syncer'
2
+ require 'nokogiri'
3
+
4
+
5
+ class MediumSyncer < PerNodeSyncer
6
+ def get_article_xpath
7
+ '//article/div/section/div/div'
8
+ end
9
+
10
+ # override
11
+ def get_article_nodes
12
+ page_html = self._fetch_html
13
+ article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
14
+
15
+ # Medium embeds Author section, which is not needed for article
16
+ article.children[1..]
17
+ end
18
+
19
+ def _handle_node(node)
20
+ case node.name
21
+ when "figure"
22
+ img_nodes = node.css('img')
23
+ node = img_nodes[-1] if img_nodes
24
+ # to make sure image scales right
25
+ node.remove_attribute('width')
26
+ node.remove_attribute('height')
27
+ end
28
+ node.to_html
29
+ end
30
+
31
+ end
@@ -0,0 +1,34 @@
1
+ require_relative 'abstract_syncer'
2
+ require 'nokogiri'
3
+
4
+ module NodeAttrModule
5
+ ATTRS = ['class', 'id']
6
+ def NodeAttrModule.remove_common_attr(node)
7
+ ATTRS.each do |attr|
8
+ node.remove_attribute(attr) if node[attr]
9
+ end
10
+ end
11
+ end
12
+
13
+
14
+ class PerNodeSyncer < AbstractSyncer
15
+ include NodeAttrModule
16
+
17
+ def get_article_nodes
18
+ page_html = self._fetch_html
19
+ article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
20
+ article.children
21
+ end
22
+
23
+ def gen_html
24
+ article_nodes = self.get_article_nodes
25
+ content = []
26
+ article_nodes.each do |node|
27
+ NodeAttrModule.remove_common_attr(node)
28
+ content.append self._handle_node(node)
29
+ end
30
+
31
+ content.join("\n")
32
+ end
33
+
34
+ end
@@ -0,0 +1,30 @@
1
+ require_relative 'zhihu_syncer'
2
+ require_relative 'weixin_syncer'
3
+ require_relative 'medium_syncer'
4
+ require_relative 'csdn_syncer'
5
+
6
+
7
+ ZHIHU_ZHUANLAN = 'zhuanlan.zhihu.com'
8
+ WEIXIN = 'mp.weixin.qq.com'
9
+ MEDIUM = 'medium.com'
10
+ CSDN = 'blog.csdn.net'
11
+
12
+ class SyncerFactory
13
+ def self.get_syncer(uri)
14
+ host_name = uri.host
15
+
16
+ case host_name
17
+ when ZHIHU_ZHUANLAN
18
+ return ZhihuSyncer.new(uri)
19
+ when WEIXIN
20
+ return WeixinSyncer.new(uri)
21
+ when MEDIUM
22
+ return MediumSyncer.new(uri)
23
+ when CSDN
24
+ return CSDNSyncer.new(uri)
25
+ else
26
+ raise "Not supported website for host: #{host_name}"
27
+ end
28
+ end
29
+ end
30
+
@@ -0,0 +1,21 @@
1
+ require_relative 'per_node_syncer'
2
+ require 'nokogiri'
3
+
4
+
5
+ class WeixinSyncer < PerNodeSyncer
6
+ def get_article_xpath
7
+ '//div[contains(@class, "rich_media_content") and contains(@id, "js_content")]'
8
+ end
9
+
10
+ def _handle_node(node)
11
+ node.children.each do |child_node|
12
+ case child_node.name
13
+ when 'img'
14
+ child_node['src'] = child_node['data-src']
15
+ child_node['data-src'] = nil
16
+ end
17
+ end
18
+ node.to_html
19
+ end
20
+
21
+ end
@@ -0,0 +1,23 @@
1
+ require_relative 'per_node_syncer'
2
+ require 'nokogiri'
3
+
4
+
5
+ class ZhihuSyncer < PerNodeSyncer
6
+ def get_article_xpath
7
+ '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]'
8
+ end
9
+
10
+ def _handle_node(node)
11
+ case node.name
12
+ when 'figure'
13
+ img_node = node.css('img')[-1]
14
+ if img_node
15
+ node = img_node
16
+ node['src'] = node['data-actualsrc']
17
+ end
18
+ end
19
+
20
+ node.to_html
21
+ end
22
+
23
+ end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-artisync
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.8'
5
5
  platform: ruby
6
6
  authors:
7
- - Junhan
7
+ - Junhan Zhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-24 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2020-06-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
13
27
  description: Take an article from a given site and display on current Jekyll page.
14
28
  email:
15
29
  - junhanoct@gmail.com
@@ -18,6 +32,13 @@ extensions: []
18
32
  extra_rdoc_files: []
19
33
  files:
20
34
  - lib/jekyll-artisync.rb
35
+ - lib/syncers/abstract_syncer.rb
36
+ - lib/syncers/csdn_syncer.rb
37
+ - lib/syncers/medium_syncer.rb
38
+ - lib/syncers/per_node_syncer.rb
39
+ - lib/syncers/syncer_factory.rb
40
+ - lib/syncers/weixin_syncer.rb
41
+ - lib/syncers/zhihu_syncer.rb
21
42
  homepage: https://github.com/junhan-z/jekyll-artisync
22
43
  licenses:
23
44
  - MIT