panchira 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +1 -1
- data/lib/panchira.rb +3 -1
- data/lib/panchira/extensions.rb +4 -1
- data/lib/panchira/panchira_result.rb +13 -0
- data/lib/panchira/resolvers/komiflo_resolver.rb +1 -1
- data/lib/panchira/resolvers/resolver.rb +38 -23
- data/lib/panchira/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0715fad65f1b2f6654761d9d1132217ce644c49b621a94aff1f0a6372dd327b
|
4
|
+
data.tar.gz: 265c9ab995d2c2913465e0dd17e69c008389c1c06e3b8fbeb798e24dcdced89d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 231b3b600e88c20f3479943c93efd3b8d720c7052c45c26301818b88a860ac8387610fdd7006c2c6211ecf482a76b24389e4e409290c8817869a3a19af0b0966
|
7
|
+
data.tar.gz: a69dfa20c85d5ef15c12d825dd791450835c7ac9edf511a2b3b5592621706ba0c80d94cf258fd513dc8568e5972270fd5e7271a3c7219c0a3ad8724e4a9a0b23
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/).
|
6
6
|
|
7
|
+
## 0.3.0 - 2020-06-04
|
8
|
+
### Added
|
9
|
+
- You can now register and use your own Resolver with this gem. (see Panchira::Extensions#register)
|
10
|
+
- Added support for new Twitter UI.
|
11
|
+
|
12
|
+
### Changed
|
13
|
+
- Panchira::fetch now returns an instance of PanchiraResult instead of a hash.
|
14
|
+
- Changed default User-Agent slightly.
|
15
|
+
|
7
16
|
## 0.2.0 - 2020-05-18
|
8
17
|
### Added
|
9
18
|
- Added support for Shousetsuka Ni Narou (novel18.syosetu.com).
|
@@ -18,6 +27,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
|
|
18
27
|
### Added
|
19
28
|
- Released Panchira gem. At this time we can parse only 5 websites.
|
20
29
|
|
30
|
+
[0.3.0]: https://github.com/nuita/panchira/releases/tag/v0.3.0
|
21
31
|
[0.2.0]: https://github.com/nuita/panchira/releases/tag/v0.2.0
|
22
32
|
[0.1.1]: https://github.com/nuita/panchira/releases/tag/v0.1.1
|
23
33
|
[0.1.0]: https://github.com/nuita/panchira/releases/tag/v0.1.0
|
data/Gemfile.lock
CHANGED
data/lib/panchira.rb
CHANGED
@@ -6,6 +6,7 @@ require 'fastimage'
|
|
6
6
|
require 'json'
|
7
7
|
|
8
8
|
require_relative 'panchira/version'
|
9
|
+
require_relative 'panchira/panchira_result'
|
9
10
|
require_relative 'panchira/resolvers/resolver'
|
10
11
|
require_relative 'panchira/extensions'
|
11
12
|
|
@@ -13,9 +14,10 @@ project_root = File.dirname(File.absolute_path(__FILE__))
|
|
13
14
|
Dir.glob(project_root + '/panchira/resolvers/*_resolver.rb').sort.each { |file| require file }
|
14
15
|
|
15
16
|
# Main Panchira code goes here.
|
17
|
+
# If you simply want to get data from your URL, then ::Panchira::fetch() will do.
|
16
18
|
module Panchira
|
17
19
|
class << self
|
18
|
-
#
|
20
|
+
# Return a PanchiraResult that contains the attributes of given url.
|
19
21
|
def fetch(url)
|
20
22
|
resolver = select_resolver(url)
|
21
23
|
|
data/lib/panchira/extensions.rb
CHANGED
@@ -1,15 +1,18 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Panchira
|
4
|
+
# This Module manages Resolver classes.
|
5
|
+
# To enable your own Resolver, you need to call Extensions::register().
|
4
6
|
module Extensions
|
5
7
|
@resolvers = []
|
6
8
|
|
7
9
|
class << self
|
8
|
-
# Register a
|
10
|
+
# Register a given Resolver to Extensions::Resolvers.
|
9
11
|
def register(resolver)
|
10
12
|
@resolvers.push(resolver) unless @resolvers.include?(resolver)
|
11
13
|
end
|
12
14
|
|
15
|
+
# Panchira::fetch will find a correct Resolver based on this list.
|
13
16
|
attr_reader :resolvers
|
14
17
|
end
|
15
18
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Panchira
|
4
|
+
# Image attributes in PanchiraResult.
|
5
|
+
class PanchiraImage
|
6
|
+
attr_accessor :url, :width, :height
|
7
|
+
end
|
8
|
+
|
9
|
+
# Result class for Panchira.fetch.
|
10
|
+
class PanchiraResult
|
11
|
+
attr_accessor :canonical_url, :title, :description, :image
|
12
|
+
end
|
13
|
+
end
|
@@ -1,39 +1,42 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# Resolver is a class that actually GET url and resolve attributes.
|
4
|
-
# This class is the default resolver for pages,
|
5
|
-
# and is inherited by the other resolvers.
|
6
3
|
module Panchira
|
4
|
+
# Resolver is a class that actually get attributes by fetching designated url.
|
5
|
+
# This class is the default resolver for pages. <br>
|
6
|
+
# To create your own resolver, first you make a class that extends Resolver,
|
7
|
+
# and then register it by ::Panchira::Extensions::register().
|
8
|
+
# Then ::Panchira::fetch will pick up your resolver when Resolver::applicable?() is true.
|
7
9
|
class Resolver
|
8
|
-
#
|
9
|
-
#
|
10
|
+
# URL pattern that a resolver tries to resolve.
|
11
|
+
# You must override this in subclasses to limit which urls to resolve.
|
10
12
|
URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
|
11
13
|
|
12
|
-
USER_AGENT = "Mozilla/5.0 (compatible;
|
14
|
+
USER_AGENT = "Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
|
13
15
|
|
14
16
|
def initialize(url)
|
15
17
|
@url = url
|
16
18
|
end
|
17
19
|
|
20
|
+
# This function is called right after this Resolver instance is made.
|
21
|
+
# Fetch page from @url and return PanchiraResult.
|
18
22
|
def fetch
|
19
|
-
|
23
|
+
result = PanchiraResult.new
|
20
24
|
|
21
25
|
@page = fetch_page(@url)
|
22
|
-
|
26
|
+
result.canonical_url = parse_canonical_url
|
23
27
|
|
24
|
-
if @url !=
|
25
|
-
@page = fetch_page(attributes[:canonical_url])
|
26
|
-
end
|
28
|
+
@page = fetch_page(result.canonical_url) if @url != result.canonical_url
|
27
29
|
|
28
|
-
|
29
|
-
|
30
|
-
|
30
|
+
result.title = parse_title
|
31
|
+
result.description = parse_description
|
32
|
+
result.image = parse_image
|
31
33
|
|
32
|
-
|
34
|
+
result
|
33
35
|
end
|
34
36
|
|
35
37
|
class << self
|
36
38
|
# Tell whether the url is applicable for this resolver.
|
39
|
+
# ::Panchira::fetch uses this method to choose a Resolver for a URL.
|
37
40
|
def applicable?(url)
|
38
41
|
url =~ self::URL_REGEXP
|
39
42
|
end
|
@@ -42,16 +45,28 @@ module Panchira
|
|
42
45
|
private
|
43
46
|
|
44
47
|
def fetch_page(url)
|
45
|
-
raw_page = URI.parse(url).read('User-Agent' => USER_AGENT)
|
48
|
+
raw_page = URI.parse(url).read('User-Agent' => self.class::USER_AGENT)
|
46
49
|
charset = raw_page.charset
|
47
50
|
Nokogiri::HTML.parse(raw_page, url, charset)
|
48
51
|
end
|
49
52
|
|
50
53
|
def parse_canonical_url
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
54
|
+
history = []
|
55
|
+
|
56
|
+
# fetch page and refresh canonical_url until canonical_url converges.
|
57
|
+
loop do
|
58
|
+
url_in_res = @page.css('//link[rel="canonical"]/@href').to_s
|
59
|
+
|
60
|
+
if url_in_res.empty?
|
61
|
+
return history.last || @url
|
62
|
+
else
|
63
|
+
if history.include?(url_in_res) || history.length > 5
|
64
|
+
return url_in_res
|
65
|
+
else
|
66
|
+
history.push(url_in_res)
|
67
|
+
@page = fetch_page(url_in_res)
|
68
|
+
end
|
69
|
+
end
|
55
70
|
end
|
56
71
|
end
|
57
72
|
|
@@ -72,9 +87,9 @@ module Panchira
|
|
72
87
|
end
|
73
88
|
|
74
89
|
def parse_image
|
75
|
-
image =
|
76
|
-
image
|
77
|
-
image
|
90
|
+
image = PanchiraImage.new
|
91
|
+
image.url = parse_image_url
|
92
|
+
image.width, image.height = FastImage.size(image.url)
|
78
93
|
|
79
94
|
image
|
80
95
|
end
|
data/lib/panchira/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: panchira
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kyp
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -101,6 +101,7 @@ files:
|
|
101
101
|
- bin/setup
|
102
102
|
- lib/panchira.rb
|
103
103
|
- lib/panchira/extensions.rb
|
104
|
+
- lib/panchira/panchira_result.rb
|
104
105
|
- lib/panchira/resolvers/dlsite_resolver.rb
|
105
106
|
- lib/panchira/resolvers/komiflo_resolver.rb
|
106
107
|
- lib/panchira/resolvers/melonbooks_resolver.rb
|