panchira 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Gemfile.lock +2 -2
- data/lib/panchira/panchira_result.rb +1 -1
- data/lib/panchira/resolvers/dlsite_resolver.rb +26 -0
- data/lib/panchira/resolvers/fanza_resolver.rb +8 -0
- data/lib/panchira/resolvers/komiflo_resolver.rb +6 -7
- data/lib/panchira/resolvers/melonbooks_resolver.rb +33 -0
- data/lib/panchira/resolvers/narou_resolver.rb +32 -13
- data/lib/panchira/resolvers/nijie_resolver.rb +15 -0
- data/lib/panchira/resolvers/pixiv_resolver.rb +9 -1
- data/lib/panchira/resolvers/resolver.rb +10 -0
- data/lib/panchira/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 066440e461b75b84a9df04fd76f1960243785b26bc7f4c61289029248e0a8bd9
|
4
|
+
data.tar.gz: 1fc1f712c6a8d88363cf3c4162be2681e08631c515ffbe6631fba3fd204b91c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63a914d286eaf909f4a2ab7c128f3725a96a6badbac71a878362e4a09a4e29f720f1f81fab2fa4b1f0ddeb513fac04b5c00597132012f5dbe42d783f54b221b2
|
7
|
+
data.tar.gz: af6085627c05532b7019a7134da472329c52b0f61b3329079694a2f59115e52f1c7b0bc0acc2c9cc3ea19814a33c3e2cd9116fcd7f692278e2150de7874bb424
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/).
|
6
6
|
|
7
|
+
## 1.2.0 - 2020-10-31
|
8
|
+
### Added
|
9
|
+
- You can now fetch author and circle name in resolvers (Resolver#fetch_author, Resolver#fetch_circle).
|
10
|
+
|
11
|
+
### Changed
|
12
|
+
- Resolver#fetch_title returns the title of the content (not the original title of the page).
|
13
|
+
|
7
14
|
## 1.1.1 - 2020-08-09
|
8
15
|
### Added
|
9
16
|
- Added support for Fanza Doujin.
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
panchira (1.
|
4
|
+
panchira (1.2.0)
|
5
5
|
fastimage (~> 2.1.7)
|
6
6
|
nokogiri (~> 1.10.9)
|
7
7
|
|
@@ -10,7 +10,7 @@ GEM
|
|
10
10
|
specs:
|
11
11
|
fastimage (2.1.7)
|
12
12
|
mini_portile2 (2.4.0)
|
13
|
-
minitest (5.14.
|
13
|
+
minitest (5.14.2)
|
14
14
|
nokogiri (1.10.10)
|
15
15
|
mini_portile2 (~> 2.4.0)
|
16
16
|
rake (12.3.3)
|
@@ -6,6 +6,32 @@ module Panchira
|
|
6
6
|
|
7
7
|
private
|
8
8
|
|
9
|
+
# DLSiteのタイトルの[]に含まれている値はtitleタグだとサークル名 or 出版社名だが、
|
10
|
+
# Panchiraが優先するog:titleではサークル名 or 著者名 となる。
|
11
|
+
# 取得に際しては、以下の3パターンを考慮する必要があるため、titleタグとtableの解析が必要となる:
|
12
|
+
# 1) 同人系の一部, 特に音声など。タイトル[サークル名]. 本文中に著者・作者の記載なし
|
13
|
+
# 2) 同人系の一部, 特に一部の同人誌など。タイトル[サークル名]. 本文中に「作者」の記載あり
|
14
|
+
# 3) 商業系。タイトル[著者名] サークル名なし
|
15
|
+
# 込み入った実装になってしまったため、parse自体をいじる必要があるかも
|
16
|
+
def parse_title
|
17
|
+
@title_md = super.match(/(.+) \[(\S+)\] \|.+/)
|
18
|
+
@title_md[1]
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse_author
|
22
|
+
@page.css('table[id*="work_"] tr').each do |tr|
|
23
|
+
if tr.css('th').text =~ /(作|著)者/
|
24
|
+
return @author = tr.css('td > a').first.text.strip
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
@author = nil
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_circle
|
32
|
+
@title_md[2] if @author != @title_md[2]
|
33
|
+
end
|
34
|
+
|
9
35
|
def parse_image_url
|
10
36
|
@page.css('//meta[property="og:image"]/@content').first.to_s.sub(/sam/, 'main')
|
11
37
|
end
|
@@ -19,6 +19,10 @@ module Panchira
|
|
19
19
|
|
20
20
|
private
|
21
21
|
|
22
|
+
def parse_author
|
23
|
+
@page.css('.m-boxDetailProductInfoMainList__description__list__item > a').first&.text.to_s
|
24
|
+
end
|
25
|
+
|
22
26
|
def parse_image_url
|
23
27
|
@page.css('.m-imgDetailProductPack/@src').first.to_s
|
24
28
|
end
|
@@ -37,6 +41,10 @@ module Panchira
|
|
37
41
|
|
38
42
|
private
|
39
43
|
|
44
|
+
def parse_circle
|
45
|
+
@page.css('a.circleName__txt').first.text
|
46
|
+
end
|
47
|
+
|
40
48
|
def parse_tags
|
41
49
|
@page.css('.genreTag__item').map { |t| t.text.strip }
|
42
50
|
end
|
@@ -17,20 +17,19 @@ module Panchira
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def parse_title
|
20
|
-
|
21
|
-
"#{comic_title} | Komiflo"
|
20
|
+
@json['content']['data']['title']
|
22
21
|
end
|
23
22
|
|
24
23
|
def parse_image_url
|
25
24
|
'https://t.komiflo.com/564_mobile_large_3x/' + @json['content']['named_imgs']['cover']['filename']
|
26
25
|
end
|
27
26
|
|
28
|
-
def
|
29
|
-
|
27
|
+
def parse_author
|
28
|
+
@json['content']['attributes']['artists']['children'][0]['data']['name']
|
29
|
+
end
|
30
30
|
|
31
|
-
|
32
|
-
|
33
|
-
description + " / #{parent}" if parent
|
31
|
+
def parse_description
|
32
|
+
@json['content']['parents'][0]['data']['title']
|
34
33
|
end
|
35
34
|
|
36
35
|
def parse_canonical_url
|
@@ -4,8 +4,41 @@ module Panchira
|
|
4
4
|
class MelonbooksResolver < Resolver
|
5
5
|
URL_REGEXP = %r{melonbooks.co.jp/detail/detail.php\?product_id=(\d+)}.freeze
|
6
6
|
|
7
|
+
def fetch
|
8
|
+
result = PanchiraResult.new
|
9
|
+
|
10
|
+
@page = fetch_page(@url)
|
11
|
+
result.canonical_url = parse_canonical_url
|
12
|
+
|
13
|
+
@page = fetch_page(result.canonical_url) if @url != result.canonical_url
|
14
|
+
|
15
|
+
result.title, result.author, result.circle = parse_table
|
16
|
+
result.description = parse_description
|
17
|
+
result.image = parse_image
|
18
|
+
result.tags = parse_tags
|
19
|
+
|
20
|
+
result
|
21
|
+
end
|
22
|
+
|
7
23
|
private
|
8
24
|
|
25
|
+
def parse_table
|
26
|
+
title, author, circle = nil, nil, nil
|
27
|
+
|
28
|
+
@page.css('#description > table.stripe > tr').each do |tr|
|
29
|
+
case tr.css('th').text
|
30
|
+
when 'タイトル'
|
31
|
+
title = tr.css('td').text.strip
|
32
|
+
when 'サークル名'
|
33
|
+
circle = tr.css('td > a').text.match(/^(.+)\W\(作品数:/)&.values_at(1)[0]
|
34
|
+
when '作家名'
|
35
|
+
author = tr.css('td > a').text.strip
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
[title, author, circle]
|
40
|
+
end
|
41
|
+
|
9
42
|
def parse_canonical_url
|
10
43
|
product_id = @url.slice(URL_REGEXP, 1)
|
11
44
|
'https://www.melonbooks.co.jp/detail/detail.php?product_id=' + product_id + '&adult_view=1'
|
@@ -6,7 +6,15 @@ module Panchira
|
|
6
6
|
module Narou
|
7
7
|
class Novel18Resolver < Resolver
|
8
8
|
URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
|
9
|
-
ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
|
9
|
+
ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
|
10
|
+
|
11
|
+
def initialize(url)
|
12
|
+
super(url)
|
13
|
+
|
14
|
+
if id = @url.match(ID_REGEXP)[:id]
|
15
|
+
@desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
|
16
|
+
end
|
17
|
+
end
|
10
18
|
|
11
19
|
def fetch_page(uri)
|
12
20
|
u = URI.parse(uri)
|
@@ -17,24 +25,35 @@ module Panchira
|
|
17
25
|
Nokogiri::HTML.parse(res.body, uri)
|
18
26
|
end
|
19
27
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
28
|
+
def parse_author
|
29
|
+
@desc&.xpath('//*[@id="noveltable1"]/tr[2]/td')&.text&.strip
|
30
|
+
end
|
23
31
|
|
24
|
-
|
25
|
-
|
32
|
+
def parse_tags
|
33
|
+
# つらい。
|
34
|
+
@desc&.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.split(' ')
|
26
35
|
end
|
27
36
|
end
|
37
|
+
|
28
38
|
class NcodeResolver < Resolver
|
29
|
-
URL_REGEXP =
|
30
|
-
ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
|
39
|
+
URL_REGEXP = /ncode\.syosetu\.com/.freeze
|
40
|
+
ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
|
31
41
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
42
|
+
def initialize(url)
|
43
|
+
super(url)
|
44
|
+
|
45
|
+
if id = @url.match(ID_REGEXP)[:id]
|
46
|
+
@desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
|
47
|
+
end
|
48
|
+
end
|
35
49
|
|
36
|
-
|
37
|
-
desc
|
50
|
+
def parse_author
|
51
|
+
@desc&.xpath('//*[@id="noveltable1"]/tr[2]/td')&.text&.strip
|
52
|
+
end
|
53
|
+
|
54
|
+
def parse_tags
|
55
|
+
# めっちゃつらい。
|
56
|
+
@desc&.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.delete("\u00A0")&.split(' ')&.grep_v('')
|
38
57
|
end
|
39
58
|
end
|
40
59
|
end
|
@@ -6,6 +6,21 @@ module Panchira
|
|
6
6
|
|
7
7
|
private
|
8
8
|
|
9
|
+
def parse_title
|
10
|
+
full_title = super
|
11
|
+
@md = full_title.match(/\A(?<title>.+) \| (?<author>.+)\z/)
|
12
|
+
|
13
|
+
@md[:title]
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_author
|
17
|
+
@md[:author]
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse_description
|
21
|
+
@page.css('p.illust_description')&.first&.text&.strip
|
22
|
+
end
|
23
|
+
|
9
24
|
def parse_canonical_url
|
10
25
|
@url.sub(/sp.nijie/, 'nijie').sub(/view_popup/, 'view')
|
11
26
|
end
|
@@ -14,6 +14,14 @@ module Panchira
|
|
14
14
|
|
15
15
|
private
|
16
16
|
|
17
|
+
def parse_title
|
18
|
+
@json['body']['title']
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse_author
|
22
|
+
@json['body']['userName']
|
23
|
+
end
|
24
|
+
|
17
25
|
def parse_canonical_url
|
18
26
|
'https://pixiv.net/member_illust.php?mode=medium&illust_id=' + @illust_id
|
19
27
|
end
|
@@ -32,7 +40,7 @@ module Panchira
|
|
32
40
|
end
|
33
41
|
|
34
42
|
def parse_tags
|
35
|
-
@json['body']['tags']['tags'].map{|content| content['tag']}
|
43
|
+
@json['body']['tags']['tags'].map { |content| content['tag'] }
|
36
44
|
end
|
37
45
|
end
|
38
46
|
|
@@ -29,6 +29,8 @@ module Panchira
|
|
29
29
|
result.description = parse_description
|
30
30
|
result.image = parse_image
|
31
31
|
result.tags = parse_tags
|
32
|
+
result.author = parse_author
|
33
|
+
result.circle = parse_circle
|
32
34
|
|
33
35
|
result
|
34
36
|
end
|
@@ -110,6 +112,14 @@ module Panchira
|
|
110
112
|
''
|
111
113
|
end
|
112
114
|
|
115
|
+
def parse_author
|
116
|
+
@page.css('//meta[name="author"]/@content').first.to_s
|
117
|
+
end
|
118
|
+
|
119
|
+
def parse_circle
|
120
|
+
nil
|
121
|
+
end
|
122
|
+
|
113
123
|
def user_agent
|
114
124
|
"Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
|
115
125
|
end
|
data/lib/panchira/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: panchira
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kyp
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|