jekyll-artisync 0.4 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 31d37fc6ffdd79cb4d259d1db001b0abea1f2f4d8be3d17b035e2db5e1a7cc80
4
- data.tar.gz: 1b5b26500b61c8e271d0c2a2605b28662c041572a1428469e9d30209c718a0c5
3
+ metadata.gz: '0606097a6fb1063a731eb03353da74882e9d5aca8edf40143f7c6182e189c91a'
4
+ data.tar.gz: 7bac92d7cae8be329f1f31c782efa8faced9f280450164f110f81f7f96865e36
5
5
  SHA512:
6
- metadata.gz: 5d643be4d2d2c4831251e06e6f092ff419148b7c21fbdc1d274642d7dad5cf7de741b635af6e556221e47090b61dd0a28cc4614b9adae8a829062c354ce71468
7
- data.tar.gz: 7334aeea98d30013f3ba030caaeafbb3fbd542d74998f00efb152d1873bbdd83542b9213b0cc041836b2e9becb62622d01e894da0e5fc003b44dbc2a576cb79b
6
+ metadata.gz: 897c6f5173ed4af5541c0784609a136e7f09d3a22e174ea38f4fbc908471a7228a311c670d61557611bbce5d62cb62831f667506f25188f4938562d989d8b207
7
+ data.tar.gz: 6b51c4520cb6a6005994184a478073dac60387e3e363f11432363bd9f38a59312f47763488d2fbdc54b4bb03db4933394ab0852fc5d73f72d75dd5e9d911d977
@@ -1,14 +1,6 @@
1
- require 'net/http'
2
- require 'nokogiri'
3
-
1
+ require 'syncers/syncer_factory'
4
2
  require "jekyll"
5
3
 
6
- # user agent is necessary otherwise certain sites such as Zhihu throws 400
7
- USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
8
-
9
- HOST_TO_ARTICLE_XPATH = {
10
- 'zhuanlan.zhihu.com' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
11
- }
12
4
 
13
5
  class ArticleSyncEmbed < Liquid::Tag
14
6
 
@@ -17,42 +9,12 @@ class ArticleSyncEmbed < Liquid::Tag
17
9
  @content = content
18
10
  end
19
11
 
20
- def _fetch_html(uri)
21
- res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
22
- # :use_ssl => true for the uri is https
23
- http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
24
- end
25
-
26
- res.body
27
- end
28
-
29
- def _handle_node(node)
30
- case node.name
31
- when 'figure'
32
- img_node = node.children[1]
33
-
34
- img_url = img_node['data-actualsrc']
35
- img_url['_b.jpg'] = '_720w.jpg'
36
- img_url['/v2'] = "/80/v2"
37
-
38
- img_node['src'] = img_url
39
- end
40
- node.to_html
41
- end
42
-
43
12
  def render(context)
44
13
  url = @content.strip
45
14
  uri = URI(url)
46
15
  page_host = uri.hostname
47
- puts page_host
48
- page_html = self._fetch_html(uri)
49
- article = Nokogiri::HTML(page_html).xpath(HOST_TO_ARTICLE_XPATH[page_host])
50
- content = []
51
- article.children.each do |node|
52
- content.append self._handle_node(node)
53
- end
54
-
55
- content.join("\n")
16
+ syncer = SyncerFactory.get_syncer(uri)
17
+ return syncer.gen_html
56
18
  end
57
19
 
58
20
  Liquid::Template.register_tag "artisync", self
@@ -0,0 +1,31 @@
1
+ require 'net/http'
2
+
3
+
4
+ # user agent is necessary otherwise certain sites such as Zhihu throws 400
5
+ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
6
+
7
+ MESSAGE = "AbstractSyncer::[%s]: override required."
8
+
9
+ class AbstractSyncer
10
+ def initialize(uri)
11
+ @uri = uri
12
+ end
13
+
14
+ def _fetch_html
15
+ uri = @uri
16
+ res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
17
+ # :use_ssl => true for the uri is https
18
+ http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
19
+ end
20
+
21
+ res.body
22
+ end
23
+
24
+ def get_article_xpath
25
+ raise MESSAGE % [__method__]
26
+ end
27
+
28
+ def gen_html
29
+ raise MESSAGE % [__method__]
30
+ end
31
+ end
@@ -0,0 +1,18 @@
1
+ require_relative 'zhihu_syncer'
2
+
3
+
4
+ ZHIHU_ZHUANLAN = 'zhuanlan.zhihu.com'
5
+
6
+ class SyncerFactory
7
+ def self.get_syncer(uri)
8
+ host_name = uri.host
9
+
10
+ case host_name
11
+ when ZHIHU_ZHUANLAN
12
+ return ZhihuSyncer.new(uri)
13
+ else
14
+ raise "Not supported website for host: #{host_name}"
15
+ end
16
+ end
17
+ end
18
+
@@ -0,0 +1,35 @@
1
+ require_relative 'abstract_syncer'
2
+ require 'nokogiri'
3
+
4
+
5
+ class ZhihuSyncer < AbstractSyncer
6
+ def get_article_xpath
7
+ '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]'
8
+ end
9
+
10
+ def _handle_node(node)
11
+ case node.name
12
+ when 'figure'
13
+ img_node = node.children[1]
14
+
15
+ img_url = img_node['data-actualsrc']
16
+ img_url['_b.jpg'] = '_720w.jpg'
17
+ img_url['/v2'] = "/80/v2"
18
+
19
+ img_node['src'] = img_url
20
+ end
21
+ node.to_html
22
+ end
23
+
24
+ def gen_html
25
+ page_html = self._fetch_html
26
+ article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
27
+ content = []
28
+ article.children.each do |node|
29
+ content.append self._handle_node(node)
30
+ end
31
+
32
+ content.join("\n")
33
+ end
34
+
35
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-artisync
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.4'
4
+ version: '0.5'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Junhan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-24 00:00:00.000000000 Z
11
+ date: 2020-05-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Take an article from a given site and display on current Jekyll page.
14
14
  email:
@@ -18,6 +18,9 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/jekyll-artisync.rb
21
+ - lib/syncers/abstract_syncer.rb
22
+ - lib/syncers/syncer_factory.rb
23
+ - lib/syncers/zhihu_syncer.rb
21
24
  homepage: https://github.com/junhan-z/jekyll-artisync
22
25
  licenses:
23
26
  - MIT