panchira 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f2715a3395e43d5ad43f35bedae84dbfe25a4cd533f964cbcc4cdaf953bc0c4b
4
- data.tar.gz: '059e23e1ca4831bc58c62a4a7ccd4ed87010fee75b7e8997560fe49f43486f01'
3
+ metadata.gz: e0715fad65f1b2f6654761d9d1132217ce644c49b621a94aff1f0a6372dd327b
4
+ data.tar.gz: 265c9ab995d2c2913465e0dd17e69c008389c1c06e3b8fbeb798e24dcdced89d
5
5
  SHA512:
6
- metadata.gz: e5ed936514fec2e05dfcaeb727189d1bcc6328e1a27559bd925acba7dc3037c26c57c99fece2c88bad95c7d0d7ae7ffd6840f9e33dde58aef81db81ae600d829
7
- data.tar.gz: 8383db6bdc9c78e2e845651e7206d702f5a8566475b8161c9e464364da7b6aa9c5f9886125771c636e0465e8eec7f1ee1dda3c0865a1f0d478131510451c4a74
6
+ metadata.gz: 231b3b600e88c20f3479943c93efd3b8d720c7052c45c26301818b88a860ac8387610fdd7006c2c6211ecf482a76b24389e4e409290c8817869a3a19af0b0966
7
+ data.tar.gz: a69dfa20c85d5ef15c12d825dd791450835c7ac9edf511a2b3b5592621706ba0c80d94cf258fd513dc8568e5972270fd5e7271a3c7219c0a3ad8724e4a9a0b23
@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/).
6
6
 
7
+ ## 0.3.0 - 2020-06-04
8
+ ### Added
9
+ - You can now register and use your own Resolver with this gem. (see Panchira::Extensions#register)
10
+ - Added support for new Twitter UI.
11
+
12
+ ### Changed
13
+ - Panchira::fetch now returns an instance of PanchiraResult instead of a hash.
14
+ - Changed default User-Agent slightly.
15
+
7
16
  ## 0.2.0 - 2020-05-18
8
17
  ### Added
9
18
  - Added support for Shousetsuka Ni Narou (novel18.syosetu.com).
@@ -18,6 +27,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
18
27
  ### Added
19
28
  - Released Panchira gem. At this time we can parse only 5 websites.
20
29
 
30
+ [0.3.0]: https://github.com/nuita/panchira/releases/tag/v0.3.0
21
31
  [0.2.0]: https://github.com/nuita/panchira/releases/tag/v0.2.0
22
32
  [0.1.1]: https://github.com/nuita/panchira/releases/tag/v0.1.1
23
33
  [0.1.0]: https://github.com/nuita/panchira/releases/tag/v0.1.0
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- panchira (0.2.0)
4
+ panchira (0.3.0)
5
5
  fastimage (~> 2.1.7)
6
6
  nokogiri (~> 1.10.9)
7
7
 
@@ -6,6 +6,7 @@ require 'fastimage'
6
6
  require 'json'
7
7
 
8
8
  require_relative 'panchira/version'
9
+ require_relative 'panchira/panchira_result'
9
10
  require_relative 'panchira/resolvers/resolver'
10
11
  require_relative 'panchira/extensions'
11
12
 
@@ -13,9 +14,10 @@ project_root = File.dirname(File.absolute_path(__FILE__))
13
14
  Dir.glob(project_root + '/panchira/resolvers/*_resolver.rb').sort.each { |file| require file }
14
15
 
15
16
  # Main Panchira code goes here.
17
+ # If you simply want to get data from your URL, then ::Panchira::fetch() will do.
16
18
  module Panchira
17
19
  class << self
18
- # Fetch the given URL and returns a hash that contains attributes of hentai.
20
+ # Return a PanchiraResult that contains the attributes of given url.
19
21
  def fetch(url)
20
22
  resolver = select_resolver(url)
21
23
 
@@ -1,15 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
+ # This Module manages Resolver classes.
5
+ # To enable your own Resolver, you need to call Extensions::register().
4
6
  module Extensions
5
7
  @resolvers = []
6
8
 
7
9
  class << self
8
- # Register a resolver class which extends Panchira::Resolver.
10
+ # Register a given Resolver to Extensions::Resolvers.
9
11
  def register(resolver)
10
12
  @resolvers.push(resolver) unless @resolvers.include?(resolver)
11
13
  end
12
14
 
15
+ # Panchira::fetch will find a correct Resolver based on this list.
13
16
  attr_reader :resolvers
14
17
  end
15
18
  end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Panchira
4
+ # Image attributes in PanchiraResult.
5
+ class PanchiraImage
6
+ attr_accessor :url, :width, :height
7
+ end
8
+
9
+ # Result class for Panchira.fetch.
10
+ class PanchiraResult
11
+ attr_accessor :canonical_url, :title, :description, :image
12
+ end
13
+ end
@@ -30,7 +30,7 @@ module Panchira
30
30
 
31
31
  parent = @json['content']['parents'][0]['data']['title']
32
32
  description = '著: ' + author if author
33
- description += " / #{parent}" if parent
33
+ description + " / #{parent}" if parent
34
34
  end
35
35
 
36
36
  def parse_canonical_url
@@ -1,39 +1,42 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Resolver is a class that actually GET url and resolve attributes.
4
- # This class is the default resolver for pages,
5
- # and is inherited by the other resolvers.
6
3
  module Panchira
4
+ # Resolver is a class that actually get attributes by fetching designated url.
5
+ # This class is the default resolver for pages. <br>
6
+ # To create your own resolver, first you make a class that extends Resolver,
7
+ # and then register it by ::Panchira::Extensions::register().
8
+ # Then ::Panchira::fetch will pick up your resolver when Resolver::applicable?() is true.
7
9
  class Resolver
8
- # The URL pattern that this resolver tries to resolve.
9
- # Should be redefined in subclasses.
10
+ # URL pattern that a resolver tries to resolve.
11
+ # You must override this in subclasses to limit which urls to resolve.
10
12
  URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
11
13
 
12
- USER_AGENT = "Mozilla/5.0 (compatible; Panchira/#{VERSION}; +https://github.com/nuita/panchira)"
14
+ USER_AGENT = "Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
13
15
 
14
16
  def initialize(url)
15
17
  @url = url
16
18
  end
17
19
 
20
+ # This function is called right after this Resolver instance is made.
21
+ # Fetch page from @url and return PanchiraResult.
18
22
  def fetch
19
- attributes = {}
23
+ result = PanchiraResult.new
20
24
 
21
25
  @page = fetch_page(@url)
22
- attributes[:canonical_url] = parse_canonical_url
26
+ result.canonical_url = parse_canonical_url
23
27
 
24
- if @url != attributes[:canonical_url]
25
- @page = fetch_page(attributes[:canonical_url])
26
- end
28
+ @page = fetch_page(result.canonical_url) if @url != result.canonical_url
27
29
 
28
- attributes[:title] = parse_title
29
- attributes[:description] = parse_description
30
- attributes[:image] = parse_image
30
+ result.title = parse_title
31
+ result.description = parse_description
32
+ result.image = parse_image
31
33
 
32
- attributes
34
+ result
33
35
  end
34
36
 
35
37
  class << self
36
38
  # Tell whether the url is applicable for this resolver.
39
+ # ::Panchira::fetch uses this method to choose a Resolver for a URL.
37
40
  def applicable?(url)
38
41
  url =~ self::URL_REGEXP
39
42
  end
@@ -42,16 +45,28 @@ module Panchira
42
45
  private
43
46
 
44
47
  def fetch_page(url)
45
- raw_page = URI.parse(url).read('User-Agent' => USER_AGENT)
48
+ raw_page = URI.parse(url).read('User-Agent' => self.class::USER_AGENT)
46
49
  charset = raw_page.charset
47
50
  Nokogiri::HTML.parse(raw_page, url, charset)
48
51
  end
49
52
 
50
53
  def parse_canonical_url
51
- if (canonical_url = @page.css('//link[rel="canonical"]/@href')).any?
52
- canonical_url.to_s
53
- else
54
- @url
54
+ history = []
55
+
56
+ # fetch page and refresh canonical_url until canonical_url converges.
57
+ loop do
58
+ url_in_res = @page.css('//link[rel="canonical"]/@href').to_s
59
+
60
+ if url_in_res.empty?
61
+ return history.last || @url
62
+ else
63
+ if history.include?(url_in_res) || history.length > 5
64
+ return url_in_res
65
+ else
66
+ history.push(url_in_res)
67
+ @page = fetch_page(url_in_res)
68
+ end
69
+ end
55
70
  end
56
71
  end
57
72
 
@@ -72,9 +87,9 @@ module Panchira
72
87
  end
73
88
 
74
89
  def parse_image
75
- image = {}
76
- image[:url] = parse_image_url
77
- image[:width], image[:height] = FastImage.size(image[:url])
90
+ image = PanchiraImage.new
91
+ image.url = parse_image_url
92
+ image.width, image.height = FastImage.size(image.url)
78
93
 
79
94
  image
80
95
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
- VERSION = '0.2.0'
4
+ VERSION = '0.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: panchira
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kyp
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-18 00:00:00.000000000 Z
11
+ date: 2020-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -101,6 +101,7 @@ files:
101
101
  - bin/setup
102
102
  - lib/panchira.rb
103
103
  - lib/panchira/extensions.rb
104
+ - lib/panchira/panchira_result.rb
104
105
  - lib/panchira/resolvers/dlsite_resolver.rb
105
106
  - lib/panchira/resolvers/komiflo_resolver.rb
106
107
  - lib/panchira/resolvers/melonbooks_resolver.rb