panchira 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +1 -1
- data/lib/panchira.rb +3 -1
- data/lib/panchira/extensions.rb +4 -1
- data/lib/panchira/panchira_result.rb +13 -0
- data/lib/panchira/resolvers/komiflo_resolver.rb +1 -1
- data/lib/panchira/resolvers/resolver.rb +38 -23
- data/lib/panchira/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0715fad65f1b2f6654761d9d1132217ce644c49b621a94aff1f0a6372dd327b
|
4
|
+
data.tar.gz: 265c9ab995d2c2913465e0dd17e69c008389c1c06e3b8fbeb798e24dcdced89d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 231b3b600e88c20f3479943c93efd3b8d720c7052c45c26301818b88a860ac8387610fdd7006c2c6211ecf482a76b24389e4e409290c8817869a3a19af0b0966
|
7
|
+
data.tar.gz: a69dfa20c85d5ef15c12d825dd791450835c7ac9edf511a2b3b5592621706ba0c80d94cf258fd513dc8568e5972270fd5e7271a3c7219c0a3ad8724e4a9a0b23
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/).
|
6
6
|
|
7
|
+
## 0.3.0 - 2020-06-04
|
8
|
+
### Added
|
9
|
+
- You can now register and use your own Resolver with this gem. (see Panchira::Extensions#register)
|
10
|
+
- Added support for new Twitter UI.
|
11
|
+
|
12
|
+
### Changed
|
13
|
+
- Panchira::fetch now returns an instance of PanchiraResult instead of a hash.
|
14
|
+
- Changed default User-Agent slightly.
|
15
|
+
|
7
16
|
## 0.2.0 - 2020-05-18
|
8
17
|
### Added
|
9
18
|
- Added support for Shousetsuka Ni Narou (novel18.syosetu.com).
|
@@ -18,6 +27,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
|
|
18
27
|
### Added
|
19
28
|
- Released Panchira gem. At this time we can parse only 5 websites.
|
20
29
|
|
30
|
+
[0.3.0]: https://github.com/nuita/panchira/releases/tag/v0.3.0
|
21
31
|
[0.2.0]: https://github.com/nuita/panchira/releases/tag/v0.2.0
|
22
32
|
[0.1.1]: https://github.com/nuita/panchira/releases/tag/v0.1.1
|
23
33
|
[0.1.0]: https://github.com/nuita/panchira/releases/tag/v0.1.0
|
data/Gemfile.lock
CHANGED
data/lib/panchira.rb
CHANGED
@@ -6,6 +6,7 @@ require 'fastimage'
|
|
6
6
|
require 'json'
|
7
7
|
|
8
8
|
require_relative 'panchira/version'
|
9
|
+
require_relative 'panchira/panchira_result'
|
9
10
|
require_relative 'panchira/resolvers/resolver'
|
10
11
|
require_relative 'panchira/extensions'
|
11
12
|
|
@@ -13,9 +14,10 @@ project_root = File.dirname(File.absolute_path(__FILE__))
|
|
13
14
|
Dir.glob(project_root + '/panchira/resolvers/*_resolver.rb').sort.each { |file| require file }
|
14
15
|
|
15
16
|
# Main Panchira code goes here.
|
17
|
+
# If you simply want to get data from your URL, then ::Panchira::fetch() will do.
|
16
18
|
module Panchira
|
17
19
|
class << self
|
18
|
-
#
|
20
|
+
# Return a PanchiraResult that contains the attributes of given url.
|
19
21
|
def fetch(url)
|
20
22
|
resolver = select_resolver(url)
|
21
23
|
|
data/lib/panchira/extensions.rb
CHANGED
@@ -1,15 +1,18 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Panchira
|
4
|
+
# This Module manages Resolver classes.
|
5
|
+
# To enable your own Resolver, you need to call Extensions::register().
|
4
6
|
module Extensions
|
5
7
|
@resolvers = []
|
6
8
|
|
7
9
|
class << self
|
8
|
-
# Register a
|
10
|
+
# Register a given Resolver to Extensions::Resolvers.
|
9
11
|
def register(resolver)
|
10
12
|
@resolvers.push(resolver) unless @resolvers.include?(resolver)
|
11
13
|
end
|
12
14
|
|
15
|
+
# Panchira::fetch will find a correct Resolver based on this list.
|
13
16
|
attr_reader :resolvers
|
14
17
|
end
|
15
18
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Panchira
|
4
|
+
# Image attributes in PanchiraResult.
|
5
|
+
class PanchiraImage
|
6
|
+
attr_accessor :url, :width, :height
|
7
|
+
end
|
8
|
+
|
9
|
+
# Result class for Panchira.fetch.
|
10
|
+
class PanchiraResult
|
11
|
+
attr_accessor :canonical_url, :title, :description, :image
|
12
|
+
end
|
13
|
+
end
|
@@ -1,39 +1,42 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# Resolver is a class that actually GET url and resolve attributes.
|
4
|
-
# This class is the default resolver for pages,
|
5
|
-
# and is inherited by the other resolvers.
|
6
3
|
module Panchira
|
4
|
+
# Resolver is a class that actually get attributes by fetching designated url.
|
5
|
+
# This class is the default resolver for pages. <br>
|
6
|
+
# To create your own resolver, first you make a class that extends Resolver,
|
7
|
+
# and then register it by ::Panchira::Extensions::register().
|
8
|
+
# Then ::Panchira::fetch will pick up your resolver when Resolver::applicable?() is true.
|
7
9
|
class Resolver
|
8
|
-
#
|
9
|
-
#
|
10
|
+
# URL pattern that a resolver tries to resolve.
|
11
|
+
# You must override this in subclasses to limit which urls to resolve.
|
10
12
|
URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
|
11
13
|
|
12
|
-
USER_AGENT = "Mozilla/5.0 (compatible;
|
14
|
+
USER_AGENT = "Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
|
13
15
|
|
14
16
|
def initialize(url)
|
15
17
|
@url = url
|
16
18
|
end
|
17
19
|
|
20
|
+
# This function is called right after this Resolver instance is made.
|
21
|
+
# Fetch page from @url and return PanchiraResult.
|
18
22
|
def fetch
|
19
|
-
|
23
|
+
result = PanchiraResult.new
|
20
24
|
|
21
25
|
@page = fetch_page(@url)
|
22
|
-
|
26
|
+
result.canonical_url = parse_canonical_url
|
23
27
|
|
24
|
-
if @url !=
|
25
|
-
@page = fetch_page(attributes[:canonical_url])
|
26
|
-
end
|
28
|
+
@page = fetch_page(result.canonical_url) if @url != result.canonical_url
|
27
29
|
|
28
|
-
|
29
|
-
|
30
|
-
|
30
|
+
result.title = parse_title
|
31
|
+
result.description = parse_description
|
32
|
+
result.image = parse_image
|
31
33
|
|
32
|
-
|
34
|
+
result
|
33
35
|
end
|
34
36
|
|
35
37
|
class << self
|
36
38
|
# Tell whether the url is applicable for this resolver.
|
39
|
+
# ::Panchira::fetch uses this method to choose a Resolver for a URL.
|
37
40
|
def applicable?(url)
|
38
41
|
url =~ self::URL_REGEXP
|
39
42
|
end
|
@@ -42,16 +45,28 @@ module Panchira
|
|
42
45
|
private
|
43
46
|
|
44
47
|
def fetch_page(url)
|
45
|
-
raw_page = URI.parse(url).read('User-Agent' => USER_AGENT)
|
48
|
+
raw_page = URI.parse(url).read('User-Agent' => self.class::USER_AGENT)
|
46
49
|
charset = raw_page.charset
|
47
50
|
Nokogiri::HTML.parse(raw_page, url, charset)
|
48
51
|
end
|
49
52
|
|
50
53
|
def parse_canonical_url
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
54
|
+
history = []
|
55
|
+
|
56
|
+
# fetch page and refresh canonical_url until canonical_url converges.
|
57
|
+
loop do
|
58
|
+
url_in_res = @page.css('//link[rel="canonical"]/@href').to_s
|
59
|
+
|
60
|
+
if url_in_res.empty?
|
61
|
+
return history.last || @url
|
62
|
+
else
|
63
|
+
if history.include?(url_in_res) || history.length > 5
|
64
|
+
return url_in_res
|
65
|
+
else
|
66
|
+
history.push(url_in_res)
|
67
|
+
@page = fetch_page(url_in_res)
|
68
|
+
end
|
69
|
+
end
|
55
70
|
end
|
56
71
|
end
|
57
72
|
|
@@ -72,9 +87,9 @@ module Panchira
|
|
72
87
|
end
|
73
88
|
|
74
89
|
def parse_image
|
75
|
-
image =
|
76
|
-
image
|
77
|
-
image
|
90
|
+
image = PanchiraImage.new
|
91
|
+
image.url = parse_image_url
|
92
|
+
image.width, image.height = FastImage.size(image.url)
|
78
93
|
|
79
94
|
image
|
80
95
|
end
|
data/lib/panchira/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: panchira
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kyp
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -101,6 +101,7 @@ files:
|
|
101
101
|
- bin/setup
|
102
102
|
- lib/panchira.rb
|
103
103
|
- lib/panchira/extensions.rb
|
104
|
+
- lib/panchira/panchira_result.rb
|
104
105
|
- lib/panchira/resolvers/dlsite_resolver.rb
|
105
106
|
- lib/panchira/resolvers/komiflo_resolver.rb
|
106
107
|
- lib/panchira/resolvers/melonbooks_resolver.rb
|