panchira 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Gemfile.lock +2 -2
- data/lib/panchira/panchira_result.rb +1 -1
- data/lib/panchira/resolvers/dlsite_resolver.rb +26 -0
- data/lib/panchira/resolvers/fanza_resolver.rb +8 -0
- data/lib/panchira/resolvers/komiflo_resolver.rb +6 -7
- data/lib/panchira/resolvers/melonbooks_resolver.rb +33 -0
- data/lib/panchira/resolvers/narou_resolver.rb +32 -13
- data/lib/panchira/resolvers/nijie_resolver.rb +15 -0
- data/lib/panchira/resolvers/pixiv_resolver.rb +9 -1
- data/lib/panchira/resolvers/resolver.rb +10 -0
- data/lib/panchira/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 066440e461b75b84a9df04fd76f1960243785b26bc7f4c61289029248e0a8bd9
|
4
|
+
data.tar.gz: 1fc1f712c6a8d88363cf3c4162be2681e08631c515ffbe6631fba3fd204b91c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63a914d286eaf909f4a2ab7c128f3725a96a6badbac71a878362e4a09a4e29f720f1f81fab2fa4b1f0ddeb513fac04b5c00597132012f5dbe42d783f54b221b2
|
7
|
+
data.tar.gz: af6085627c05532b7019a7134da472329c52b0f61b3329079694a2f59115e52f1c7b0bc0acc2c9cc3ea19814a33c3e2cd9116fcd7f692278e2150de7874bb424
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/).
|
6
6
|
|
7
|
+
## 1.2.0 - 2020-10-31
|
8
|
+
### Added
|
9
|
+
- You can now fetch author and circle name in resolvers (Resolver#fetch_author, Resolver#fetch_circle).
|
10
|
+
|
11
|
+
### Changed
|
12
|
+
- Resolver#fetch_title returns the title of the content (not the original title of the page).
|
13
|
+
|
7
14
|
## 1.1.1 - 2020-08-09
|
8
15
|
### Added
|
9
16
|
- Added support for Fanza Doujin.
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
panchira (1.
|
4
|
+
panchira (1.2.0)
|
5
5
|
fastimage (~> 2.1.7)
|
6
6
|
nokogiri (~> 1.10.9)
|
7
7
|
|
@@ -10,7 +10,7 @@ GEM
|
|
10
10
|
specs:
|
11
11
|
fastimage (2.1.7)
|
12
12
|
mini_portile2 (2.4.0)
|
13
|
-
minitest (5.14.
|
13
|
+
minitest (5.14.2)
|
14
14
|
nokogiri (1.10.10)
|
15
15
|
mini_portile2 (~> 2.4.0)
|
16
16
|
rake (12.3.3)
|
@@ -6,6 +6,32 @@ module Panchira
|
|
6
6
|
|
7
7
|
private
|
8
8
|
|
9
|
+
# DLSiteのタイトルの[]に含まれている値はtitleタグだとサークル名 or 出版社名だが、
|
10
|
+
# Panchiraが優先するog:titleではサークル名 or 著者名 となる。
|
11
|
+
# 取得に際しては、以下の3パターンを考慮する必要があるため、titleタグとtableの解析が必要となる:
|
12
|
+
# 1) 同人系の一部, 特に音声など。タイトル[サークル名]. 本文中に著者・作者の記載なし
|
13
|
+
# 2) 同人系の一部, 特に一部の同人誌など。タイトル[サークル名]. 本文中に「作者」の記載あり
|
14
|
+
# 3) 商業系。タイトル[著者名] サークル名なし
|
15
|
+
# 込み入った実装になってしまったため、parse自体をいじる必要があるかも
|
16
|
+
def parse_title
|
17
|
+
@title_md = super.match(/(.+) \[(\S+)\] \|.+/)
|
18
|
+
@title_md[1]
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse_author
|
22
|
+
@page.css('table[id*="work_"] tr').each do |tr|
|
23
|
+
if tr.css('th').text =~ /(作|著)者/
|
24
|
+
return @author = tr.css('td > a').first.text.strip
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
@author = nil
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_circle
|
32
|
+
@title_md[2] if @author != @title_md[2]
|
33
|
+
end
|
34
|
+
|
9
35
|
def parse_image_url
|
10
36
|
@page.css('//meta[property="og:image"]/@content').first.to_s.sub(/sam/, 'main')
|
11
37
|
end
|
@@ -19,6 +19,10 @@ module Panchira
|
|
19
19
|
|
20
20
|
private
|
21
21
|
|
22
|
+
def parse_author
|
23
|
+
@page.css('.m-boxDetailProductInfoMainList__description__list__item > a').first&.text.to_s
|
24
|
+
end
|
25
|
+
|
22
26
|
def parse_image_url
|
23
27
|
@page.css('.m-imgDetailProductPack/@src').first.to_s
|
24
28
|
end
|
@@ -37,6 +41,10 @@ module Panchira
|
|
37
41
|
|
38
42
|
private
|
39
43
|
|
44
|
+
def parse_circle
|
45
|
+
@page.css('a.circleName__txt').first.text
|
46
|
+
end
|
47
|
+
|
40
48
|
def parse_tags
|
41
49
|
@page.css('.genreTag__item').map { |t| t.text.strip }
|
42
50
|
end
|
@@ -17,20 +17,19 @@ module Panchira
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def parse_title
|
20
|
-
|
21
|
-
"#{comic_title} | Komiflo"
|
20
|
+
@json['content']['data']['title']
|
22
21
|
end
|
23
22
|
|
24
23
|
def parse_image_url
|
25
24
|
'https://t.komiflo.com/564_mobile_large_3x/' + @json['content']['named_imgs']['cover']['filename']
|
26
25
|
end
|
27
26
|
|
28
|
-
def
|
29
|
-
|
27
|
+
def parse_author
|
28
|
+
@json['content']['attributes']['artists']['children'][0]['data']['name']
|
29
|
+
end
|
30
30
|
|
31
|
-
|
32
|
-
|
33
|
-
description + " / #{parent}" if parent
|
31
|
+
def parse_description
|
32
|
+
@json['content']['parents'][0]['data']['title']
|
34
33
|
end
|
35
34
|
|
36
35
|
def parse_canonical_url
|
@@ -4,8 +4,41 @@ module Panchira
|
|
4
4
|
class MelonbooksResolver < Resolver
|
5
5
|
URL_REGEXP = %r{melonbooks.co.jp/detail/detail.php\?product_id=(\d+)}.freeze
|
6
6
|
|
7
|
+
def fetch
|
8
|
+
result = PanchiraResult.new
|
9
|
+
|
10
|
+
@page = fetch_page(@url)
|
11
|
+
result.canonical_url = parse_canonical_url
|
12
|
+
|
13
|
+
@page = fetch_page(result.canonical_url) if @url != result.canonical_url
|
14
|
+
|
15
|
+
result.title, result.author, result.circle = parse_table
|
16
|
+
result.description = parse_description
|
17
|
+
result.image = parse_image
|
18
|
+
result.tags = parse_tags
|
19
|
+
|
20
|
+
result
|
21
|
+
end
|
22
|
+
|
7
23
|
private
|
8
24
|
|
25
|
+
def parse_table
|
26
|
+
title, author, circle = nil, nil, nil
|
27
|
+
|
28
|
+
@page.css('#description > table.stripe > tr').each do |tr|
|
29
|
+
case tr.css('th').text
|
30
|
+
when 'タイトル'
|
31
|
+
title = tr.css('td').text.strip
|
32
|
+
when 'サークル名'
|
33
|
+
circle = tr.css('td > a').text.match(/^(.+)\W\(作品数:/)&.values_at(1)[0]
|
34
|
+
when '作家名'
|
35
|
+
author = tr.css('td > a').text.strip
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
[title, author, circle]
|
40
|
+
end
|
41
|
+
|
9
42
|
def parse_canonical_url
|
10
43
|
product_id = @url.slice(URL_REGEXP, 1)
|
11
44
|
'https://www.melonbooks.co.jp/detail/detail.php?product_id=' + product_id + '&adult_view=1'
|
@@ -6,7 +6,15 @@ module Panchira
|
|
6
6
|
module Narou
|
7
7
|
class Novel18Resolver < Resolver
|
8
8
|
URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
|
9
|
-
ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
|
9
|
+
ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
|
10
|
+
|
11
|
+
def initialize(url)
|
12
|
+
super(url)
|
13
|
+
|
14
|
+
if id = @url.match(ID_REGEXP)[:id]
|
15
|
+
@desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
|
16
|
+
end
|
17
|
+
end
|
10
18
|
|
11
19
|
def fetch_page(uri)
|
12
20
|
u = URI.parse(uri)
|
@@ -17,24 +25,35 @@ module Panchira
|
|
17
25
|
Nokogiri::HTML.parse(res.body, uri)
|
18
26
|
end
|
19
27
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
28
|
+
def parse_author
|
29
|
+
@desc&.xpath('//*[@id="noveltable1"]/tr[2]/td')&.text&.strip
|
30
|
+
end
|
23
31
|
|
24
|
-
|
25
|
-
|
32
|
+
def parse_tags
|
33
|
+
# つらい。
|
34
|
+
@desc&.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.split(' ')
|
26
35
|
end
|
27
36
|
end
|
37
|
+
|
28
38
|
class NcodeResolver < Resolver
|
29
|
-
URL_REGEXP =
|
30
|
-
ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
|
39
|
+
URL_REGEXP = /ncode\.syosetu\.com/.freeze
|
40
|
+
ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
|
31
41
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
42
|
+
def initialize(url)
|
43
|
+
super(url)
|
44
|
+
|
45
|
+
if id = @url.match(ID_REGEXP)[:id]
|
46
|
+
@desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
|
47
|
+
end
|
48
|
+
end
|
35
49
|
|
36
|
-
|
37
|
-
desc
|
50
|
+
def parse_author
|
51
|
+
@desc&.xpath('//*[@id="noveltable1"]/tr[2]/td')&.text&.strip
|
52
|
+
end
|
53
|
+
|
54
|
+
def parse_tags
|
55
|
+
# めっちゃつらい。
|
56
|
+
@desc&.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.delete("\u00A0")&.split(' ')&.grep_v('')
|
38
57
|
end
|
39
58
|
end
|
40
59
|
end
|
@@ -6,6 +6,21 @@ module Panchira
|
|
6
6
|
|
7
7
|
private
|
8
8
|
|
9
|
+
def parse_title
|
10
|
+
full_title = super
|
11
|
+
@md = full_title.match(/\A(?<title>.+) \| (?<author>.+)\z/)
|
12
|
+
|
13
|
+
@md[:title]
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_author
|
17
|
+
@md[:author]
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse_description
|
21
|
+
@page.css('p.illust_description')&.first&.text&.strip
|
22
|
+
end
|
23
|
+
|
9
24
|
def parse_canonical_url
|
10
25
|
@url.sub(/sp.nijie/, 'nijie').sub(/view_popup/, 'view')
|
11
26
|
end
|
@@ -14,6 +14,14 @@ module Panchira
|
|
14
14
|
|
15
15
|
private
|
16
16
|
|
17
|
+
def parse_title
|
18
|
+
@json['body']['title']
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse_author
|
22
|
+
@json['body']['userName']
|
23
|
+
end
|
24
|
+
|
17
25
|
def parse_canonical_url
|
18
26
|
'https://pixiv.net/member_illust.php?mode=medium&illust_id=' + @illust_id
|
19
27
|
end
|
@@ -32,7 +40,7 @@ module Panchira
|
|
32
40
|
end
|
33
41
|
|
34
42
|
def parse_tags
|
35
|
-
@json['body']['tags']['tags'].map{|content| content['tag']}
|
43
|
+
@json['body']['tags']['tags'].map { |content| content['tag'] }
|
36
44
|
end
|
37
45
|
end
|
38
46
|
|
@@ -29,6 +29,8 @@ module Panchira
|
|
29
29
|
result.description = parse_description
|
30
30
|
result.image = parse_image
|
31
31
|
result.tags = parse_tags
|
32
|
+
result.author = parse_author
|
33
|
+
result.circle = parse_circle
|
32
34
|
|
33
35
|
result
|
34
36
|
end
|
@@ -110,6 +112,14 @@ module Panchira
|
|
110
112
|
''
|
111
113
|
end
|
112
114
|
|
115
|
+
def parse_author
|
116
|
+
@page.css('//meta[name="author"]/@content').first.to_s
|
117
|
+
end
|
118
|
+
|
119
|
+
def parse_circle
|
120
|
+
nil
|
121
|
+
end
|
122
|
+
|
113
123
|
def user_agent
|
114
124
|
"Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
|
115
125
|
end
|
data/lib/panchira/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: panchira
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kyp
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|