panchira 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f2715a3395e43d5ad43f35bedae84dbfe25a4cd533f964cbcc4cdaf953bc0c4b
4
- data.tar.gz: '059e23e1ca4831bc58c62a4a7ccd4ed87010fee75b7e8997560fe49f43486f01'
3
+ metadata.gz: e0715fad65f1b2f6654761d9d1132217ce644c49b621a94aff1f0a6372dd327b
4
+ data.tar.gz: 265c9ab995d2c2913465e0dd17e69c008389c1c06e3b8fbeb798e24dcdced89d
5
5
  SHA512:
6
- metadata.gz: e5ed936514fec2e05dfcaeb727189d1bcc6328e1a27559bd925acba7dc3037c26c57c99fece2c88bad95c7d0d7ae7ffd6840f9e33dde58aef81db81ae600d829
7
- data.tar.gz: 8383db6bdc9c78e2e845651e7206d702f5a8566475b8161c9e464364da7b6aa9c5f9886125771c636e0465e8eec7f1ee1dda3c0865a1f0d478131510451c4a74
6
+ metadata.gz: 231b3b600e88c20f3479943c93efd3b8d720c7052c45c26301818b88a860ac8387610fdd7006c2c6211ecf482a76b24389e4e409290c8817869a3a19af0b0966
7
+ data.tar.gz: a69dfa20c85d5ef15c12d825dd791450835c7ac9edf511a2b3b5592621706ba0c80d94cf258fd513dc8568e5972270fd5e7271a3c7219c0a3ad8724e4a9a0b23
@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/).
6
6
 
7
+ ## 0.3.0 - 2020-06-04
8
+ ### Added
9
+ - You can now register and use your own Resolver with this gem. (see Panchira::Extensions#register)
10
+ - Added support for new Twitter UI.
11
+
12
+ ### Changed
13
+ - Panchira::fetch now returns an instance of PanchiraResult instead of a hash.
14
+ - Changed default User-Agent slightly.
15
+
7
16
  ## 0.2.0 - 2020-05-18
8
17
  ### Added
9
18
  - Added support for Shousetsuka Ni Narou (novel18.syosetu.com).
@@ -18,6 +27,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
18
27
  ### Added
19
28
  - Released Panchira gem. At this time we can parse only 5 websites.
20
29
 
30
+ [0.3.0]: https://github.com/nuita/panchira/releases/tag/v0.3.0
21
31
  [0.2.0]: https://github.com/nuita/panchira/releases/tag/v0.2.0
22
32
  [0.1.1]: https://github.com/nuita/panchira/releases/tag/v0.1.1
23
33
  [0.1.0]: https://github.com/nuita/panchira/releases/tag/v0.1.0
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- panchira (0.2.0)
4
+ panchira (0.3.0)
5
5
  fastimage (~> 2.1.7)
6
6
  nokogiri (~> 1.10.9)
7
7
 
@@ -6,6 +6,7 @@ require 'fastimage'
6
6
  require 'json'
7
7
 
8
8
  require_relative 'panchira/version'
9
+ require_relative 'panchira/panchira_result'
9
10
  require_relative 'panchira/resolvers/resolver'
10
11
  require_relative 'panchira/extensions'
11
12
 
@@ -13,9 +14,10 @@ project_root = File.dirname(File.absolute_path(__FILE__))
13
14
  Dir.glob(project_root + '/panchira/resolvers/*_resolver.rb').sort.each { |file| require file }
14
15
 
15
16
  # Main Panchira code goes here.
17
+ # If you simply want to get data from your URL, then ::Panchira::fetch() will do.
16
18
  module Panchira
17
19
  class << self
18
- # Fetch the given URL and returns a hash that contains attributes of hentai.
20
+ # Return a PanchiraResult that contains the attributes of given url.
19
21
  def fetch(url)
20
22
  resolver = select_resolver(url)
21
23
 
@@ -1,15 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
+ # This Module manages Resolver classes.
5
+ # To enable your own Resolver, you need to call Extensions::register().
4
6
  module Extensions
5
7
  @resolvers = []
6
8
 
7
9
  class << self
8
- # Register a resolver class which extends Panchira::Resolver.
10
+ # Register a given Resolver to Extensions::Resolvers.
9
11
  def register(resolver)
10
12
  @resolvers.push(resolver) unless @resolvers.include?(resolver)
11
13
  end
12
14
 
15
+ # Panchira::fetch will find a correct Resolver based on this list.
13
16
  attr_reader :resolvers
14
17
  end
15
18
  end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Panchira
4
+ # Image attributes in PanchiraResult.
5
+ class PanchiraImage
6
+ attr_accessor :url, :width, :height
7
+ end
8
+
9
+ # Result class for Panchira.fetch.
10
+ class PanchiraResult
11
+ attr_accessor :canonical_url, :title, :description, :image
12
+ end
13
+ end
@@ -30,7 +30,7 @@ module Panchira
30
30
 
31
31
  parent = @json['content']['parents'][0]['data']['title']
32
32
  description = '著: ' + author if author
33
- description += " / #{parent}" if parent
33
+ description + " / #{parent}" if parent
34
34
  end
35
35
 
36
36
  def parse_canonical_url
@@ -1,39 +1,42 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Resolver is a class that actually GET url and resolve attributes.
4
- # This class is the default resolver for pages,
5
- # and is inherited by the other resolvers.
6
3
  module Panchira
4
+ # Resolver is a class that actually get attributes by fetching designated url.
5
+ # This class is the default resolver for pages. <br>
6
+ # To create your own resolver, first you make a class that extends Resolver,
7
+ # and then register it by ::Panchira::Extensions::register().
8
+ # Then ::Panchira::fetch will pick up your resolver when Resolver::applicable?() is true.
7
9
  class Resolver
8
- # The URL pattern that this resolver tries to resolve.
9
- # Should be redefined in subclasses.
10
+ # URL pattern that a resolver tries to resolve.
11
+ # You must override this in subclasses to limit which urls to resolve.
10
12
  URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
11
13
 
12
- USER_AGENT = "Mozilla/5.0 (compatible; Panchira/#{VERSION}; +https://github.com/nuita/panchira)"
14
+ USER_AGENT = "Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
13
15
 
14
16
  def initialize(url)
15
17
  @url = url
16
18
  end
17
19
 
20
+ # This function is called right after this Resolver instance is made.
21
+ # Fetch page from @url and return PanchiraResult.
18
22
  def fetch
19
- attributes = {}
23
+ result = PanchiraResult.new
20
24
 
21
25
  @page = fetch_page(@url)
22
- attributes[:canonical_url] = parse_canonical_url
26
+ result.canonical_url = parse_canonical_url
23
27
 
24
- if @url != attributes[:canonical_url]
25
- @page = fetch_page(attributes[:canonical_url])
26
- end
28
+ @page = fetch_page(result.canonical_url) if @url != result.canonical_url
27
29
 
28
- attributes[:title] = parse_title
29
- attributes[:description] = parse_description
30
- attributes[:image] = parse_image
30
+ result.title = parse_title
31
+ result.description = parse_description
32
+ result.image = parse_image
31
33
 
32
- attributes
34
+ result
33
35
  end
34
36
 
35
37
  class << self
36
38
  # Tell whether the url is applicable for this resolver.
39
+ # ::Panchira::fetch uses this method to choose a Resolver for a URL.
37
40
  def applicable?(url)
38
41
  url =~ self::URL_REGEXP
39
42
  end
@@ -42,16 +45,28 @@ module Panchira
42
45
  private
43
46
 
44
47
  def fetch_page(url)
45
- raw_page = URI.parse(url).read('User-Agent' => USER_AGENT)
48
+ raw_page = URI.parse(url).read('User-Agent' => self.class::USER_AGENT)
46
49
  charset = raw_page.charset
47
50
  Nokogiri::HTML.parse(raw_page, url, charset)
48
51
  end
49
52
 
50
53
  def parse_canonical_url
51
- if (canonical_url = @page.css('//link[rel="canonical"]/@href')).any?
52
- canonical_url.to_s
53
- else
54
- @url
54
+ history = []
55
+
56
+ # fetch page and refresh canonical_url until canonical_url converges.
57
+ loop do
58
+ url_in_res = @page.css('//link[rel="canonical"]/@href').to_s
59
+
60
+ if url_in_res.empty?
61
+ return history.last || @url
62
+ else
63
+ if history.include?(url_in_res) || history.length > 5
64
+ return url_in_res
65
+ else
66
+ history.push(url_in_res)
67
+ @page = fetch_page(url_in_res)
68
+ end
69
+ end
55
70
  end
56
71
  end
57
72
 
@@ -72,9 +87,9 @@ module Panchira
72
87
  end
73
88
 
74
89
  def parse_image
75
- image = {}
76
- image[:url] = parse_image_url
77
- image[:width], image[:height] = FastImage.size(image[:url])
90
+ image = PanchiraImage.new
91
+ image.url = parse_image_url
92
+ image.width, image.height = FastImage.size(image.url)
78
93
 
79
94
  image
80
95
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
- VERSION = '0.2.0'
4
+ VERSION = '0.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: panchira
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kyp
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-18 00:00:00.000000000 Z
11
+ date: 2020-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -101,6 +101,7 @@ files:
101
101
  - bin/setup
102
102
  - lib/panchira.rb
103
103
  - lib/panchira/extensions.rb
104
+ - lib/panchira/panchira_result.rb
104
105
  - lib/panchira/resolvers/dlsite_resolver.rb
105
106
  - lib/panchira/resolvers/komiflo_resolver.rb
106
107
  - lib/panchira/resolvers/melonbooks_resolver.rb