panchira 0.1.1 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +33 -0
- data/CHANGELOG.md +44 -0
- data/Gemfile.lock +2 -2
- data/README.md +7 -5
- data/lib/panchira.rb +14 -19
- data/lib/panchira/extensions.rb +19 -0
- data/lib/panchira/panchira_result.rb +13 -0
- data/lib/panchira/resolvers/dlsite_resolver.rb +8 -0
- data/lib/panchira/resolvers/fanza_resolver.rb +48 -0
- data/lib/panchira/resolvers/image_resolver.rb +15 -0
- data/lib/panchira/resolvers/komiflo_resolver.rb +12 -4
- data/lib/panchira/resolvers/melonbooks_resolver.rb +9 -1
- data/lib/panchira/resolvers/narou_resolver.rb +44 -0
- data/lib/panchira/resolvers/nijie_resolver.rb +7 -0
- data/lib/panchira/resolvers/pixiv_resolver.rb +14 -4
- data/lib/panchira/resolvers/resolver.rb +64 -20
- data/lib/panchira/version.rb +3 -1
- metadata +8 -3
- data/.travis.yml +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '084250abfb3df8cee5d16db7e0312347663fac26ce27b88a0a8f35c12b8eed41'
|
4
|
+
data.tar.gz: 6d427b512e89d5c1797b7d713f70ceda702cd0397da22dc4b8ceefaf01a2f474
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 269c296c822ed831714ba70b771f3ec37f03f7b9910115ba6e2b435cfe26838a6dc9a30dd658458c754bd7a20c5270e59d4c7de57d83d2d84638e4e385b7ed20
|
7
|
+
data.tar.gz: 6f7f9884dca58326d32c1f95256193570c97625c579ec118919cde6cc0e83e758ea9b6ecdc4c390a45629ac1de0d95ff47d9126023d94cdfe3316a1b14234f8a
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ master ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ master ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
|
21
|
+
steps:
|
22
|
+
- uses: actions/checkout@v2
|
23
|
+
- name: Set up Ruby
|
24
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
25
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
26
|
+
# uses: ruby/setup-ruby@v1
|
27
|
+
uses: ruby/setup-ruby@ec106b438a1ff6ff109590de34ddc62c540232e0
|
28
|
+
with:
|
29
|
+
ruby-version: 2.6
|
30
|
+
- name: Install dependencies
|
31
|
+
run: bundle install
|
32
|
+
- name: Run tests
|
33
|
+
run: bundle exec rake test
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,45 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/).
|
6
6
|
|
7
|
+
## 1.1.1 - 2020-08-09
|
8
|
+
### Added
|
9
|
+
- Added support for Fanza Doujin.
|
10
|
+
- Added support for description in Fanza Book.
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
- Fixed an issue that fetching image was not working in Fanza Book.
|
14
|
+
|
15
|
+
## 1.1.0 - 2020-08-06
|
16
|
+
### Added
|
17
|
+
- Added support for Fanza Books.
|
18
|
+
- Added support for direct links to an image.
|
19
|
+
- You can now set cookie by overriding Resolver#cookie in individual resolvers.
|
20
|
+
|
21
|
+
### Changed
|
22
|
+
- Resolver::USER_AGENT changed to Resolver#user_agent.
|
23
|
+
|
24
|
+
## 1.0.0 - 2020-06-23
|
25
|
+
### Added
|
26
|
+
- Added support for tags.
|
27
|
+
|
28
|
+
### Fixed
|
29
|
+
- Fixed some outdated documents.
|
30
|
+
|
31
|
+
## 0.3.0 - 2020-06-04
|
32
|
+
### Added
|
33
|
+
- You can now register and use your own Resolver with this gem. (see Panchira::Extensions#register)
|
34
|
+
- Added support for new Twitter UI.
|
35
|
+
|
36
|
+
### Changed
|
37
|
+
- Panchira::fetch now returns an instance of PanchiraResult instead of a hash.
|
38
|
+
- Changed default User-Agent slightly.
|
39
|
+
|
40
|
+
## 0.2.0 - 2020-05-18
|
41
|
+
### Added
|
42
|
+
- Added support for Shousetsuka Ni Narou (novel18.syosetu.com).
|
43
|
+
- Added support for external resolvers.
|
44
|
+
- Added method Panchira::Resolvers#applicable?.
|
45
|
+
|
7
46
|
## 0.1.1 - 2020-05-13
|
8
47
|
### Fixed
|
9
48
|
- Fix serious requirement issue and make this gem at least executable.
|
@@ -12,4 +51,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
|
|
12
51
|
### Added
|
13
52
|
- Released Panchira gem. At this time we can parse only 5 websites.
|
14
53
|
|
54
|
+
[1.1.0]: https://github.com/nuita/panchira/releases/tag/v1.1.0
|
55
|
+
[1.0.0]: https://github.com/nuita/panchira/releases/tag/v1.0.0
|
56
|
+
[0.3.0]: https://github.com/nuita/panchira/releases/tag/v0.3.0
|
57
|
+
[0.2.0]: https://github.com/nuita/panchira/releases/tag/v0.2.0
|
58
|
+
[0.1.1]: https://github.com/nuita/panchira/releases/tag/v0.1.1
|
15
59
|
[0.1.0]: https://github.com/nuita/panchira/releases/tag/v0.1.0
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
panchira (
|
4
|
+
panchira (1.1.1)
|
5
5
|
fastimage (~> 2.1.7)
|
6
6
|
nokogiri (~> 1.10.9)
|
7
7
|
|
@@ -11,7 +11,7 @@ GEM
|
|
11
11
|
fastimage (2.1.7)
|
12
12
|
mini_portile2 (2.4.0)
|
13
13
|
minitest (5.14.0)
|
14
|
-
nokogiri (1.10.
|
14
|
+
nokogiri (1.10.10)
|
15
15
|
mini_portile2 (~> 2.4.0)
|
16
16
|
rake (12.3.3)
|
17
17
|
|
data/README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
|
2
|
-
|
2
|
+
![Ruby](https://github.com/nuita/panchira/workflows/Ruby/badge.svg)
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/panchira.svg)](https://badge.fury.io/rb/panchira)
|
4
4
|
|
5
5
|
# Panchira
|
6
6
|
|
7
7
|
Due to some legal or ethical issues, most hentai and NSFW platforms don't clarify their content on meta tags. As a result, most hentai platforms are rendered poorly on the card previews on social media.
|
8
8
|
|
9
|
-
To solve this issue, Panchira is made to parse correct and uncensored metadata from such web platforms (at this time we cover **DLSite, Komiflo, Melonbooks, Nijie and
|
9
|
+
To solve this issue, Panchira is made to parse correct and uncensored metadata from such web platforms (at this time we cover **DLSite, Komiflo, Melonbooks, Nijie, Pixiv, Shousetsuka ni narou, Fanza and Twitter**).
|
10
10
|
|
11
11
|
If you need card previews of hentai on your web application, but can't get them with simply parsing metatags, then it is time for Panchira.
|
12
12
|
|
@@ -16,7 +16,7 @@ This gem is derived from the [Nuita](https://github.com/nuita/nuita) project.
|
|
16
16
|
|
17
17
|
**Please use this gem with appropriate censoring and age-restricting. Never violate local laws and copyrights.**
|
18
18
|
|
19
|
-
If you are running one of the websites we cover and feel negative about
|
19
|
+
If you are running one of the websites we cover and feel negative about this gem, please contact the community or the author([@kypkyp](https://github.com/kypkyp)).
|
20
20
|
|
21
21
|
## Installation
|
22
22
|
|
@@ -39,10 +39,12 @@ Or install it yourself as:
|
|
39
39
|
```
|
40
40
|
> Panchira.fetch("https://www.pixiv.net/artworks/61711172")
|
41
41
|
|
42
|
-
=>
|
42
|
+
=> #<Panchira::PanchiraResult:0x00007fb95d2c53f8 @canonical_url="https://pixiv.net/member_illust.php?mode=medium&illust_id=61711172", @title="#輿水幸子 すずしい顔で締め切りを破る幸子 - むらためのイラスト - pixiv", @description="(UTF16の)Pietで実行すると「すずしい」と出力する幸子(5色+白Pietカラーゴルフ)。解説記事は http://chy72.hatenablog.com/entry/2016/12/24/1", @image=#<Panchira::PanchiraImage:0x00007fb95f126ea0 @url="https://pixiv.cat/61711172.jpg", @width=810, @height=500>, @tags=["輿水幸子", "Piet", "プログラミング"]>
|
43
43
|
```
|
44
44
|
|
45
|
-
Panchira is
|
45
|
+
In most situation you would call `Panchira#fetch`. It is a singular method that takes a URI and returns an instance of `PanchiraResult`, which is a simple class that stores the website's information, such as title, description and so on.
|
46
|
+
|
47
|
+
Panchira has a special treatment for each website. `Resolver` classes are where those treatments take place, and you can use your own `Resolver` classes by registering it to Panchira. See `Panchira::Extensions` documentation in source code for further details.
|
46
48
|
|
47
49
|
## Development
|
48
50
|
|
data/lib/panchira.rb
CHANGED
@@ -6,17 +6,21 @@ require 'fastimage'
|
|
6
6
|
require 'json'
|
7
7
|
|
8
8
|
require_relative 'panchira/version'
|
9
|
+
require_relative 'panchira/panchira_result'
|
9
10
|
require_relative 'panchira/resolvers/resolver'
|
10
|
-
require_relative 'panchira/
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
require_relative 'panchira/extensions'
|
12
|
+
|
13
|
+
project_root = File.dirname(File.absolute_path(__FILE__))
|
14
|
+
Dir.glob(project_root + '/panchira/resolvers/*_resolver.rb').sort.each { |file| require file }
|
15
|
+
|
16
|
+
# register fallback ImageResolver at the end. (resolver is selected by registration order)
|
17
|
+
::Panchira::Extensions.register(Panchira::ImageResolver)
|
15
18
|
|
16
19
|
# Main Panchira code goes here.
|
20
|
+
# If you simply want to get data from your URL, then ::Panchira::fetch() will do.
|
17
21
|
module Panchira
|
18
22
|
class << self
|
19
|
-
#
|
23
|
+
# Return a PanchiraResult that contains the attributes of given url.
|
20
24
|
def fetch(url)
|
21
25
|
resolver = select_resolver(url)
|
22
26
|
|
@@ -26,20 +30,11 @@ module Panchira
|
|
26
30
|
private
|
27
31
|
|
28
32
|
def select_resolver(url)
|
29
|
-
|
30
|
-
|
31
|
-
Panchira::KomifloResolver
|
32
|
-
when %r{melonbooks.co.jp/detail/detail.php\?product_id=(\d+)}
|
33
|
-
Panchira::MelonbooksResolver
|
34
|
-
when %r{pixiv\.net/(member_illust.php?.*illust_id=|artworks/)(\d+)}
|
35
|
-
Panchira::PixivResolver
|
36
|
-
when /nijie.*view.*id=\d+/
|
37
|
-
Panchira::NijieResolver
|
38
|
-
when /dlsite/
|
39
|
-
Panchira::DlsiteResolver
|
40
|
-
else
|
41
|
-
Panchira::Resolver
|
33
|
+
Panchira::Extensions.resolvers.each do |resolver|
|
34
|
+
return resolver if resolver.applicable?(url)
|
42
35
|
end
|
36
|
+
|
37
|
+
Panchira::Resolver
|
43
38
|
end
|
44
39
|
end
|
45
40
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Panchira
|
4
|
+
# This Module manages Resolver classes.
|
5
|
+
# To enable your own Resolver, you need to call Extensions::register().
|
6
|
+
module Extensions
|
7
|
+
@resolvers = []
|
8
|
+
|
9
|
+
class << self
|
10
|
+
# Register a given Resolver to Extensions::Resolvers.
|
11
|
+
def register(resolver)
|
12
|
+
@resolvers.push(resolver) unless @resolvers.include?(resolver)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Panchira::fetch will find a correct Resolver based on this list.
|
16
|
+
attr_reader :resolvers
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Panchira
|
4
|
+
# Image attributes in PanchiraResult.
|
5
|
+
class PanchiraImage
|
6
|
+
attr_accessor :url, :width, :height
|
7
|
+
end
|
8
|
+
|
9
|
+
# Result class for Panchira.fetch.
|
10
|
+
class PanchiraResult
|
11
|
+
attr_accessor :canonical_url, :title, :description, :image, :tags
|
12
|
+
end
|
13
|
+
end
|
@@ -2,10 +2,18 @@
|
|
2
2
|
|
3
3
|
module Panchira
|
4
4
|
class DlsiteResolver < Resolver
|
5
|
+
URL_REGEXP = /dlsite/.freeze
|
6
|
+
|
5
7
|
private
|
6
8
|
|
7
9
|
def parse_image_url
|
8
10
|
@page.css('//meta[property="og:image"]/@content').first.to_s.sub(/sam/, 'main')
|
9
11
|
end
|
12
|
+
|
13
|
+
def parse_tags
|
14
|
+
@page.css('.main_genre').children.children.map(&:text)
|
15
|
+
end
|
10
16
|
end
|
17
|
+
|
18
|
+
::Panchira::Extensions.register(Panchira::DlsiteResolver)
|
11
19
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'net/https'
|
4
|
+
|
5
|
+
module Panchira
|
6
|
+
module Fanza
|
7
|
+
FANZA_COOKIE = 'age_check_done=1;'
|
8
|
+
|
9
|
+
class FanzaResolver < Resolver
|
10
|
+
private
|
11
|
+
|
12
|
+
def cookie
|
13
|
+
::Panchira::Fanza::FANZA_COOKIE
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class FanzaBookResolver < FanzaResolver
|
18
|
+
URL_REGEXP = %r{book\.dmm\.co\.jp\/}.freeze
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def parse_image_url
|
23
|
+
@page.css('.m-imgDetailProductPack/@src').first.to_s
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_tags
|
27
|
+
@page.css('.m-boxDetailProductInfo__list__description__item > a').map(&:text)
|
28
|
+
end
|
29
|
+
|
30
|
+
def parse_description
|
31
|
+
@page.css('.m-boxDetailProduct__info__story').first&.text.to_s.gsub(/[\n\t]/, '')
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class FanzaDoujinResolver < FanzaResolver
|
36
|
+
URL_REGEXP = %r{dmm\.co\.jp\/dc\/doujin\/}.freeze
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def parse_tags
|
41
|
+
@page.css('.genreTag__item').map { |t| t.text.strip }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
::Panchira::Extensions.register(Panchira::Fanza::FanzaBookResolver)
|
47
|
+
::Panchira::Extensions.register(Panchira::Fanza::FanzaDoujinResolver)
|
48
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Panchira
|
4
|
+
class ImageResolver < Resolver
|
5
|
+
URL_REGEXP = %r{\.(png|gif|jpg|jpeg|webp)$}.freeze
|
6
|
+
|
7
|
+
def fetch
|
8
|
+
result = PanchiraResult.new
|
9
|
+
result.canonical_url = @url
|
10
|
+
result.image = PanchiraImage.new
|
11
|
+
result.image.url = @url
|
12
|
+
result
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -4,12 +4,14 @@ module Panchira
|
|
4
4
|
# KomifloResolver resolves Komiflo.
|
5
5
|
# Komiflo has its API server, so we can utilize it.
|
6
6
|
class KomifloResolver < Resolver
|
7
|
+
URL_REGEXP = %r{komiflo\.com(?:/#!)?/comics/(\d+)}.freeze
|
8
|
+
|
7
9
|
def initialize(url)
|
8
10
|
@url = url
|
9
11
|
|
10
|
-
@id = url.slice(
|
11
|
-
|
12
|
-
@json = JSON.parse(
|
12
|
+
@id = url.slice(URL_REGEXP, 1)
|
13
|
+
raw_json = URI.parse("https://api.komiflo.com/content/id/#{@id}").read('User-Agent' => user_agent)
|
14
|
+
@json = JSON.parse(raw_json)
|
13
15
|
end
|
14
16
|
|
15
17
|
private
|
@@ -28,12 +30,18 @@ module Panchira
|
|
28
30
|
|
29
31
|
parent = @json['content']['parents'][0]['data']['title']
|
30
32
|
description = '著: ' + author if author
|
31
|
-
description
|
33
|
+
description + " / #{parent}" if parent
|
32
34
|
end
|
33
35
|
|
34
36
|
def parse_canonical_url
|
35
37
|
id = @url.slice(%r{komiflo\.com(?:/#!)?/comics/(\d+)}, 1)
|
36
38
|
'https://komiflo.com/comics/' + id
|
37
39
|
end
|
40
|
+
|
41
|
+
def parse_tags
|
42
|
+
@json['content']['attributes']['tags']['children'].map { |content| content['data']['name'] }
|
43
|
+
end
|
38
44
|
end
|
45
|
+
|
46
|
+
::Panchira::Extensions.register(Panchira::KomifloResolver)
|
39
47
|
end
|
@@ -2,10 +2,12 @@
|
|
2
2
|
|
3
3
|
module Panchira
|
4
4
|
class MelonbooksResolver < Resolver
|
5
|
+
URL_REGEXP = %r{melonbooks.co.jp/detail/detail.php\?product_id=(\d+)}.freeze
|
6
|
+
|
5
7
|
private
|
6
8
|
|
7
9
|
def parse_canonical_url
|
8
|
-
product_id = @url.slice(
|
10
|
+
product_id = @url.slice(URL_REGEXP, 1)
|
9
11
|
'https://www.melonbooks.co.jp/detail/detail.php?product_id=' + product_id + '&adult_view=1'
|
10
12
|
end
|
11
13
|
|
@@ -23,5 +25,11 @@ module Panchira
|
|
23
25
|
def parse_image_url
|
24
26
|
@page.css('//meta[property="og:image"]/@content').first.to_s.sub(/&c=1/, '')
|
25
27
|
end
|
28
|
+
|
29
|
+
def parse_tags
|
30
|
+
@page.css('#related_tags .clearfix').children.children.map(&:text)
|
31
|
+
end
|
26
32
|
end
|
33
|
+
|
34
|
+
::Panchira::Extensions.register(Panchira::MelonbooksResolver)
|
27
35
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'net/https'
|
4
|
+
|
5
|
+
module Panchira
|
6
|
+
module Narou
|
7
|
+
class Novel18Resolver < Resolver
|
8
|
+
URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
|
9
|
+
ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}.freeze
|
10
|
+
|
11
|
+
def fetch_page(uri)
|
12
|
+
u = URI.parse(uri)
|
13
|
+
http = Net::HTTP.new(u.host, u.port)
|
14
|
+
http.use_ssl = u.port == 443
|
15
|
+
res = http.get u.request_uri, { 'cookie' => 'over18=yes;' }
|
16
|
+
|
17
|
+
Nokogiri::HTML.parse(res.body, uri)
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse_tags
|
21
|
+
id = @url.match(ID_REGEXP)[:id]
|
22
|
+
return [] unless id
|
23
|
+
|
24
|
+
desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
|
25
|
+
desc.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.split(' ') # つらい。
|
26
|
+
end
|
27
|
+
end
|
28
|
+
class NcodeResolver < Resolver
|
29
|
+
URL_REGEXP = %r{ncode\.syosetu\.com}.freeze
|
30
|
+
ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}.freeze
|
31
|
+
|
32
|
+
def parse_tags
|
33
|
+
id = @url.match(ID_REGEXP)[:id]
|
34
|
+
return [] unless id
|
35
|
+
|
36
|
+
desc = fetch_page("https://ncode.syosetu.com/novelview/infotop/ncode/#{id}/")
|
37
|
+
desc.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.delete("\u00A0")&.split(' ')&.grep_v('') # めっちゃつらい。
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
::Panchira::Extensions.register(Panchira::Narou::NcodeResolver)
|
43
|
+
::Panchira::Extensions.register(Panchira::Narou::Novel18Resolver)
|
44
|
+
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
module Panchira
|
4
4
|
class NijieResolver < Resolver
|
5
|
+
URL_REGEXP = /nijie.*view.*id=\d+/.freeze
|
5
6
|
|
6
7
|
private
|
7
8
|
|
@@ -23,5 +24,11 @@ module Panchira
|
|
23
24
|
@page.css('//meta[property="og:image"]/@content').first.to_s
|
24
25
|
end
|
25
26
|
end
|
27
|
+
|
28
|
+
def parse_tags
|
29
|
+
@page.css('#view-tag span.tag_name').map(&:text)
|
30
|
+
end
|
26
31
|
end
|
32
|
+
|
33
|
+
::Panchira::Extensions.register(Panchira::NijieResolver)
|
27
34
|
end
|
@@ -2,21 +2,25 @@
|
|
2
2
|
|
3
3
|
module Panchira
|
4
4
|
class PixivResolver < Resolver
|
5
|
+
URL_REGEXP = %r{pixiv\.net/(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
|
6
|
+
|
5
7
|
def initialize(url)
|
6
8
|
super(url)
|
7
|
-
@illust_id = url.slice(
|
9
|
+
@illust_id = url.slice(URL_REGEXP, 2)
|
10
|
+
|
11
|
+
raw_json = URI.parse("https://www.pixiv.net/ajax/illust/#{@illust_id}").read('User-Agent' => user_agent)
|
12
|
+
@json = JSON.parse(raw_json)
|
8
13
|
end
|
9
14
|
|
10
15
|
private
|
11
16
|
|
12
17
|
def parse_canonical_url
|
13
|
-
|
14
|
-
'https://pixiv.net/member_illust.php?mode=medium&illust_id=' + illust_id
|
18
|
+
'https://pixiv.net/member_illust.php?mode=medium&illust_id=' + @illust_id
|
15
19
|
end
|
16
20
|
|
17
21
|
def parse_image_url
|
18
22
|
proxy_url = "https://pixiv.cat/#{@illust_id}.jpg"
|
19
|
-
|
23
|
+
|
20
24
|
case Net::HTTP.get_response(URI.parse(proxy_url))
|
21
25
|
when Net::HTTPNotFound
|
22
26
|
proxy_url = "https://pixiv.cat/#{@illust_id}-1.jpg"
|
@@ -26,5 +30,11 @@ module Panchira
|
|
26
30
|
rescue StandardError
|
27
31
|
@page.css('//meta[property="og:image"]/@content').first.to_s
|
28
32
|
end
|
33
|
+
|
34
|
+
def parse_tags
|
35
|
+
@json['body']['tags']['tags'].map{|content| content['tag']}
|
36
|
+
end
|
29
37
|
end
|
38
|
+
|
39
|
+
::Panchira::Extensions.register(Panchira::PixivResolver)
|
30
40
|
end
|
@@ -1,44 +1,76 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# Resolver is a class that actually GET url and resolve attributes.
|
4
|
-
# This class is the default resolver for pages,
|
5
|
-
# and is inherited by the other resolvers.
|
6
3
|
module Panchira
|
4
|
+
# Resolver is a class that actually get attributes by fetching designated url.
|
5
|
+
# This class is the default resolver for pages. <br>
|
6
|
+
# To create your own resolver, first you make a class that extends Resolver,
|
7
|
+
# and then register it by ::Panchira::Extensions::register().
|
8
|
+
# Then ::Panchira::fetch will pick up your resolver when Resolver::applicable?() is true.
|
7
9
|
class Resolver
|
10
|
+
# URL pattern that a resolver tries to resolve.
|
11
|
+
# You must override this in subclasses to limit which urls to resolve.
|
12
|
+
URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
|
13
|
+
|
8
14
|
def initialize(url)
|
9
15
|
@url = url
|
10
16
|
end
|
11
17
|
|
18
|
+
# This function is called right after this Resolver instance is made.
|
19
|
+
# Fetch page from @url and return PanchiraResult.
|
12
20
|
def fetch
|
13
|
-
|
21
|
+
result = PanchiraResult.new
|
14
22
|
|
15
23
|
@page = fetch_page(@url)
|
16
|
-
|
24
|
+
result.canonical_url = parse_canonical_url
|
17
25
|
|
18
|
-
if @url !=
|
19
|
-
@page = fetch_page(attributes[:canonical_url])
|
20
|
-
end
|
26
|
+
@page = fetch_page(result.canonical_url) if @url != result.canonical_url
|
21
27
|
|
22
|
-
|
23
|
-
|
24
|
-
|
28
|
+
result.title = parse_title
|
29
|
+
result.description = parse_description
|
30
|
+
result.image = parse_image
|
31
|
+
result.tags = parse_tags
|
32
|
+
|
33
|
+
result
|
34
|
+
end
|
25
35
|
|
26
|
-
|
36
|
+
class << self
|
37
|
+
# Tell whether the url is applicable for this resolver.
|
38
|
+
# ::Panchira::fetch uses this method to choose a Resolver for a URL.
|
39
|
+
def applicable?(url)
|
40
|
+
url =~ self::URL_REGEXP
|
41
|
+
end
|
27
42
|
end
|
28
43
|
|
29
44
|
private
|
30
45
|
|
31
46
|
def fetch_page(url)
|
32
|
-
|
47
|
+
read_options = {
|
48
|
+
'User-Agent' => user_agent,
|
49
|
+
'Cookie' => cookie
|
50
|
+
}
|
51
|
+
|
52
|
+
raw_page = URI.parse(url).read(read_options)
|
33
53
|
charset = raw_page.charset
|
34
54
|
Nokogiri::HTML.parse(raw_page, url, charset)
|
35
55
|
end
|
36
56
|
|
37
57
|
def parse_canonical_url
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
58
|
+
history = []
|
59
|
+
|
60
|
+
# fetch page and refresh canonical_url until canonical_url converges.
|
61
|
+
loop do
|
62
|
+
url_in_res = @page.css('//link[rel="canonical"]/@href').to_s
|
63
|
+
|
64
|
+
if url_in_res.empty?
|
65
|
+
return history.last || @url
|
66
|
+
else
|
67
|
+
if history.include?(url_in_res) || history.length > 5
|
68
|
+
return url_in_res
|
69
|
+
else
|
70
|
+
history.push(url_in_res)
|
71
|
+
@page = fetch_page(url_in_res)
|
72
|
+
end
|
73
|
+
end
|
42
74
|
end
|
43
75
|
end
|
44
76
|
|
@@ -59,9 +91,9 @@ module Panchira
|
|
59
91
|
end
|
60
92
|
|
61
93
|
def parse_image
|
62
|
-
image =
|
63
|
-
image
|
64
|
-
image
|
94
|
+
image = PanchiraImage.new
|
95
|
+
image.url = parse_image_url
|
96
|
+
image.width, image.height = FastImage.size(image.url)
|
65
97
|
|
66
98
|
image
|
67
99
|
end
|
@@ -69,5 +101,17 @@ module Panchira
|
|
69
101
|
def parse_image_url
|
70
102
|
@page.css('//meta[property="og:image"]/@content').first.to_s
|
71
103
|
end
|
104
|
+
|
105
|
+
def parse_tags
|
106
|
+
[]
|
107
|
+
end
|
108
|
+
|
109
|
+
def cookie
|
110
|
+
''
|
111
|
+
end
|
112
|
+
|
113
|
+
def user_agent
|
114
|
+
"Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
|
115
|
+
end
|
72
116
|
end
|
73
117
|
end
|
data/lib/panchira/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: panchira
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kyp
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -89,8 +89,8 @@ executables: []
|
|
89
89
|
extensions: []
|
90
90
|
extra_rdoc_files: []
|
91
91
|
files:
|
92
|
+
- ".github/workflows/ruby.yml"
|
92
93
|
- ".gitignore"
|
93
|
-
- ".travis.yml"
|
94
94
|
- CHANGELOG.md
|
95
95
|
- Gemfile
|
96
96
|
- Gemfile.lock
|
@@ -100,9 +100,14 @@ files:
|
|
100
100
|
- bin/console
|
101
101
|
- bin/setup
|
102
102
|
- lib/panchira.rb
|
103
|
+
- lib/panchira/extensions.rb
|
104
|
+
- lib/panchira/panchira_result.rb
|
103
105
|
- lib/panchira/resolvers/dlsite_resolver.rb
|
106
|
+
- lib/panchira/resolvers/fanza_resolver.rb
|
107
|
+
- lib/panchira/resolvers/image_resolver.rb
|
104
108
|
- lib/panchira/resolvers/komiflo_resolver.rb
|
105
109
|
- lib/panchira/resolvers/melonbooks_resolver.rb
|
110
|
+
- lib/panchira/resolvers/narou_resolver.rb
|
106
111
|
- lib/panchira/resolvers/nijie_resolver.rb
|
107
112
|
- lib/panchira/resolvers/pixiv_resolver.rb
|
108
113
|
- lib/panchira/resolvers/resolver.rb
|