jekyll-artisync 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 31d37fc6ffdd79cb4d259d1db001b0abea1f2f4d8be3d17b035e2db5e1a7cc80
4
- data.tar.gz: 1b5b26500b61c8e271d0c2a2605b28662c041572a1428469e9d30209c718a0c5
3
+ metadata.gz: '0606097a6fb1063a731eb03353da74882e9d5aca8edf40143f7c6182e189c91a'
4
+ data.tar.gz: 7bac92d7cae8be329f1f31c782efa8faced9f280450164f110f81f7f96865e36
5
5
  SHA512:
6
- metadata.gz: 5d643be4d2d2c4831251e06e6f092ff419148b7c21fbdc1d274642d7dad5cf7de741b635af6e556221e47090b61dd0a28cc4614b9adae8a829062c354ce71468
7
- data.tar.gz: 7334aeea98d30013f3ba030caaeafbb3fbd542d74998f00efb152d1873bbdd83542b9213b0cc041836b2e9becb62622d01e894da0e5fc003b44dbc2a576cb79b
6
+ metadata.gz: 897c6f5173ed4af5541c0784609a136e7f09d3a22e174ea38f4fbc908471a7228a311c670d61557611bbce5d62cb62831f667506f25188f4938562d989d8b207
7
+ data.tar.gz: 6b51c4520cb6a6005994184a478073dac60387e3e363f11432363bd9f38a59312f47763488d2fbdc54b4bb03db4933394ab0852fc5d73f72d75dd5e9d911d977
@@ -1,14 +1,6 @@
1
- require 'net/http'
2
- require 'nokogiri'
3
-
1
+ require 'syncers/syncer_factory'
4
2
  require "jekyll"
5
3
 
6
- # user agent is necessary otherwise certain sites such as Zhihu throws 400
7
- USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
8
-
9
- HOST_TO_ARTICLE_XPATH = {
10
- 'zhuanlan.zhihu.com' => '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]',
11
- }
12
4
 
13
5
  class ArticleSyncEmbed < Liquid::Tag
14
6
 
@@ -17,42 +9,12 @@ class ArticleSyncEmbed < Liquid::Tag
17
9
  @content = content
18
10
  end
19
11
 
20
- def _fetch_html(uri)
21
- res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
22
- # :use_ssl => true for the uri is https
23
- http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
24
- end
25
-
26
- res.body
27
- end
28
-
29
- def _handle_node(node)
30
- case node.name
31
- when 'figure'
32
- img_node = node.children[1]
33
-
34
- img_url = img_node['data-actualsrc']
35
- img_url['_b.jpg'] = '_720w.jpg'
36
- img_url['/v2'] = "/80/v2"
37
-
38
- img_node['src'] = img_url
39
- end
40
- node.to_html
41
- end
42
-
43
12
  def render(context)
44
13
  url = @content.strip
45
14
  uri = URI(url)
46
15
  page_host = uri.hostname
47
- puts page_host
48
- page_html = self._fetch_html(uri)
49
- article = Nokogiri::HTML(page_html).xpath(HOST_TO_ARTICLE_XPATH[page_host])
50
- content = []
51
- article.children.each do |node|
52
- content.append self._handle_node(node)
53
- end
54
-
55
- content.join("\n")
16
+ syncer = SyncerFactory.get_syncer(uri)
17
+ return syncer.gen_html
56
18
  end
57
19
 
58
20
  Liquid::Template.register_tag "artisync", self
@@ -0,0 +1,31 @@
1
+ require 'net/http'
2
+
3
+
4
+ # user agent is necessary otherwise certain sites such as Zhihu throws 400
5
+ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
6
+
7
+ MESSAGE = "AbstractSyncer::[%s]: override required."
8
+
9
+ class AbstractSyncer
10
+ def initialize(uri)
11
+ @uri = uri
12
+ end
13
+
14
+ def _fetch_html
15
+ uri = @uri
16
+ res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => true) do |http|
17
+ # :use_ssl => true for the uri is https
18
+ http.request(Net::HTTP::Get.new(uri, {'User-Agent' => USER_AGENT}))
19
+ end
20
+
21
+ res.body
22
+ end
23
+
24
+ def get_article_xpath
25
+ raise MESSAGE % [__method__]
26
+ end
27
+
28
+ def gen_html
29
+ raise MESSAGE % [__method__]
30
+ end
31
+ end
@@ -0,0 +1,18 @@
1
+ require_relative 'zhihu_syncer'
2
+
3
+
4
+ ZHIHU_ZHUANLAN = 'zhuanlan.zhihu.com'
5
+
6
+ class SyncerFactory
7
+ def self.get_syncer(uri)
8
+ host_name = uri.host
9
+
10
+ case host_name
11
+ when ZHIHU_ZHUANLAN
12
+ return ZhihuSyncer.new(uri)
13
+ else
14
+ raise "Not supported website for host: #{host_name}"
15
+ end
16
+ end
17
+ end
18
+
@@ -0,0 +1,35 @@
1
+ require_relative 'abstract_syncer'
2
+ require 'nokogiri'
3
+
4
+
5
+ class ZhihuSyncer < AbstractSyncer
6
+ def get_article_xpath
7
+ '//div[contains(@class, "Post-RichText") and contains(@class, "ztext")]'
8
+ end
9
+
10
+ def _handle_node(node)
11
+ case node.name
12
+ when 'figure'
13
+ img_node = node.children[1]
14
+
15
+ img_url = img_node['data-actualsrc']
16
+ img_url['_b.jpg'] = '_720w.jpg'
17
+ img_url['/v2'] = "/80/v2"
18
+
19
+ img_node['src'] = img_url
20
+ end
21
+ node.to_html
22
+ end
23
+
24
+ def gen_html
25
+ page_html = self._fetch_html
26
+ article = Nokogiri::HTML(page_html).xpath(self.get_article_xpath)
27
+ content = []
28
+ article.children.each do |node|
29
+ content.append self._handle_node(node)
30
+ end
31
+
32
+ content.join("\n")
33
+ end
34
+
35
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-artisync
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.4'
4
+ version: '0.5'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Junhan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-24 00:00:00.000000000 Z
11
+ date: 2020-05-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Take an article from a given site and display on current Jekyll page.
14
14
  email:
@@ -18,6 +18,9 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/jekyll-artisync.rb
21
+ - lib/syncers/abstract_syncer.rb
22
+ - lib/syncers/syncer_factory.rb
23
+ - lib/syncers/zhihu_syncer.rb
21
24
  homepage: https://github.com/junhan-z/jekyll-artisync
22
25
  licenses:
23
26
  - MIT