panchira 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '084250abfb3df8cee5d16db7e0312347663fac26ce27b88a0a8f35c12b8eed41'
4
- data.tar.gz: 6d427b512e89d5c1797b7d713f70ceda702cd0397da22dc4b8ceefaf01a2f474
3
+ metadata.gz: 066440e461b75b84a9df04fd76f1960243785b26bc7f4c61289029248e0a8bd9
4
+ data.tar.gz: 1fc1f712c6a8d88363cf3c4162be2681e08631c515ffbe6631fba3fd204b91c0
5
5
  SHA512:
6
- metadata.gz: 269c296c822ed831714ba70b771f3ec37f03f7b9910115ba6e2b435cfe26838a6dc9a30dd658458c754bd7a20c5270e59d4c7de57d83d2d84638e4e385b7ed20
7
- data.tar.gz: 6f7f9884dca58326d32c1f95256193570c97625c579ec118919cde6cc0e83e758ea9b6ecdc4c390a45629ac1de0d95ff47d9126023d94cdfe3316a1b14234f8a
6
+ metadata.gz: 63a914d286eaf909f4a2ab7c128f3725a96a6badbac71a878362e4a09a4e29f720f1f81fab2fa4b1f0ddeb513fac04b5c00597132012f5dbe42d783f54b221b2
7
+ data.tar.gz: af6085627c05532b7019a7134da472329c52b0f61b3329079694a2f59115e52f1c7b0bc0acc2c9cc3ea19814a33c3e2cd9116fcd7f692278e2150de7874bb424
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/).
6
6
 
7
+ ## 1.2.0 - 2020-10-31
8
+ ### Added
9
+ - You can now fetch author and circle name in resolvers (Resolver#fetch_author, Resolver#fetch_circle).
10
+
11
+ ### Changed
12
+ - Resolver#fetch_title returns the title of the content (not the original title of the page).
13
+
7
14
  ## 1.1.1 - 2020-08-09
8
15
  ### Added
9
16
  - Added support for Fanza Doujin.
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- panchira (1.1.1)
4
+ panchira (1.2.0)
5
5
  fastimage (~> 2.1.7)
6
6
  nokogiri (~> 1.10.9)
7
7
 
@@ -10,7 +10,7 @@ GEM
10
10
  specs:
11
11
  fastimage (2.1.7)
12
12
  mini_portile2 (2.4.0)
13
- minitest (5.14.0)
13
+ minitest (5.14.2)
14
14
  nokogiri (1.10.10)
15
15
  mini_portile2 (~> 2.4.0)
16
16
  rake (12.3.3)
@@ -8,6 +8,6 @@ module Panchira
8
8
 
9
9
  # Result class for Panchira.fetch.
10
10
  class PanchiraResult
11
- attr_accessor :canonical_url, :title, :description, :image, :tags
11
+ attr_accessor :canonical_url, :title, :description, :image, :tags, :author, :circle
12
12
  end
13
13
  end
@@ -6,6 +6,32 @@ module Panchira
6
6
 
7
7
  private
8
8
 
9
+ # DLSiteのタイトルの[]に含まれている値はtitleタグだとサークル名 or 出版社名だが、
10
+ # Panchiraが優先するog:titleではサークル名 or 著者名 となる。
11
+ # 取得に際しては、以下の3パターンを考慮する必要があるため、titleタグとtableの解析が必要となる:
12
+ # 1) 同人系の一部, 特に音声など。タイトル[サークル名]. 本文中に著者・作者の記載なし
13
+ # 2) 同人系の一部, 特に一部の同人誌など。タイトル[サークル名]. 本文中に「作者」の記載あり
14
+ # 3) 商業系。タイトル[著者名] サークル名なし
15
+ # 込み入った実装になってしまったため、parse自体をいじる必要があるかも
16
+ def parse_title
17
+ @title_md = super.match(/(.+) \[(\S+)\] \|.+/)
18
+ @title_md[1]
19
+ end
20
+
21
+ def parse_author
22
+ @page.css('table[id*="work_"] tr').each do |tr|
23
+ if tr.css('th').text =~ /(作|著)者/
24
+ return @author = tr.css('td > a').first.text.strip
25
+ end
26
+ end
27
+
28
+ @author = nil
29
+ end
30
+
31
+ def parse_circle
32
+ @title_md[2] if @author != @title_md[2]
33
+ end
34
+
9
35
  def parse_image_url
10
36
  @page.css('//meta[property="og:image"]/@content').first.to_s.sub(/sam/, 'main')
11
37
  end
@@ -19,6 +19,10 @@ module Panchira
19
19
 
20
20
  private
21
21
 
22
+ def parse_author
23
+ @page.css('.m-boxDetailProductInfoMainList__description__list__item > a').first&.text.to_s
24
+ end
25
+
22
26
  def parse_image_url
23
27
  @page.css('.m-imgDetailProductPack/@src').first.to_s
24
28
  end
@@ -37,6 +41,10 @@ module Panchira
37
41
 
38
42
  private
39
43
 
44
+ def parse_circle
45
+ @page.css('a.circleName__txt').first.text
46
+ end
47
+
40
48
  def parse_tags
41
49
  @page.css('.genreTag__item').map { |t| t.text.strip }
42
50
  end
@@ -17,20 +17,19 @@ module Panchira
17
17
  private
18
18
 
19
19
  def parse_title
20
- comic_title = @json['content']['data']['title']
21
- "#{comic_title} | Komiflo"
20
+ @json['content']['data']['title']
22
21
  end
23
22
 
24
23
  def parse_image_url
25
24
  'https://t.komiflo.com/564_mobile_large_3x/' + @json['content']['named_imgs']['cover']['filename']
26
25
  end
27
26
 
28
- def parse_description
29
- author = @json['content']['attributes']['artists']['children'][0]['data']['name']
27
+ def parse_author
28
+ @json['content']['attributes']['artists']['children'][0]['data']['name']
29
+ end
30
30
 
31
- parent = @json['content']['parents'][0]['data']['title']
32
- description = '著: ' + author if author
33
- description + " / #{parent}" if parent
31
+ def parse_description
32
+ @json['content']['parents'][0]['data']['title']
34
33
  end
35
34
 
36
35
  def parse_canonical_url
@@ -4,8 +4,41 @@ module Panchira
4
4
  class MelonbooksResolver < Resolver
5
5
  URL_REGEXP = %r{melonbooks.co.jp/detail/detail.php\?product_id=(\d+)}.freeze
6
6
 
7
+ def fetch
8
+ result = PanchiraResult.new
9
+
10
+ @page = fetch_page(@url)
11
+ result.canonical_url = parse_canonical_url
12
+
13
+ @page = fetch_page(result.canonical_url) if @url != result.canonical_url
14
+
15
+ result.title, result.author, result.circle = parse_table
16
+ result.description = parse_description
17
+ result.image = parse_image
18
+ result.tags = parse_tags
19
+
20
+ result
21
+ end
22
+
7
23
  private
8
24
 
25
+ def parse_table
26
+ title, author, circle = nil, nil, nil
27
+
28
+ @page.css('#description > table.stripe > tr').each do |tr|
29
+ case tr.css('th').text
30
+ when 'タイトル'
31
+ title = tr.css('td').text.strip
32
+ when 'サークル名'
33
+ circle = tr.css('td > a').text.match(/^(.+)\W\(作品数:/)&.values_at(1)[0]
34
+ when '作家名'
35
+ author = tr.css('td > a').text.strip
36
+ end
37
+ end
38
+
39
+ [title, author, circle]
40
+ end
41
+
9
42
  def parse_canonical_url
10
43
  product_id = @url.slice(URL_REGEXP, 1)
11
44
  'https://www.melonbooks.co.jp/detail/detail.php?product_id=' + product_id + '&adult_view=1'
@@ -6,7 +6,15 @@ module Panchira
6
6
  module Narou
7
7
  class Novel18Resolver < Resolver
8
8
  URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
9
- ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}.freeze
9
+ ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
10
+
11
+ def initialize(url)
12
+ super(url)
13
+
14
+ if id = @url.match(ID_REGEXP)[:id]
15
+ @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
16
+ end
17
+ end
10
18
 
11
19
  def fetch_page(uri)
12
20
  u = URI.parse(uri)
@@ -17,24 +25,35 @@ module Panchira
17
25
  Nokogiri::HTML.parse(res.body, uri)
18
26
  end
19
27
 
20
- def parse_tags
21
- id = @url.match(ID_REGEXP)[:id]
22
- return [] unless id
28
+ def parse_author
29
+ @desc&.xpath('//*[@id="noveltable1"]/tr[2]/td')&.text&.strip
30
+ end
23
31
 
24
- desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
25
- desc.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.split(' ') # つらい。
32
+ def parse_tags
33
+ # つらい。
34
+ @desc&.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.split(' ')
26
35
  end
27
36
  end
37
+
28
38
  class NcodeResolver < Resolver
29
- URL_REGEXP = %r{ncode\.syosetu\.com}.freeze
30
- ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}.freeze
39
+ URL_REGEXP = /ncode\.syosetu\.com/.freeze
40
+ ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
31
41
 
32
- def parse_tags
33
- id = @url.match(ID_REGEXP)[:id]
34
- return [] unless id
42
+ def initialize(url)
43
+ super(url)
44
+
45
+ if id = @url.match(ID_REGEXP)[:id]
46
+ @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
47
+ end
48
+ end
35
49
 
36
- desc = fetch_page("https://ncode.syosetu.com/novelview/infotop/ncode/#{id}/")
37
- desc.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.delete("\u00A0")&.split(' ')&.grep_v('') # めっちゃつらい。
50
+ def parse_author
51
+ @desc&.xpath('//*[@id="noveltable1"]/tr[2]/td')&.text&.strip
52
+ end
53
+
54
+ def parse_tags
55
+ # めっちゃつらい。
56
+ @desc&.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.delete("\u00A0")&.split(' ')&.grep_v('')
38
57
  end
39
58
  end
40
59
  end
@@ -6,6 +6,21 @@ module Panchira
6
6
 
7
7
  private
8
8
 
9
+ def parse_title
10
+ full_title = super
11
+ @md = full_title.match(/\A(?<title>.+) \| (?<author>.+)\z/)
12
+
13
+ @md[:title]
14
+ end
15
+
16
+ def parse_author
17
+ @md[:author]
18
+ end
19
+
20
+ def parse_description
21
+ @page.css('p.illust_description')&.first&.text&.strip
22
+ end
23
+
9
24
  def parse_canonical_url
10
25
  @url.sub(/sp.nijie/, 'nijie').sub(/view_popup/, 'view')
11
26
  end
@@ -14,6 +14,14 @@ module Panchira
14
14
 
15
15
  private
16
16
 
17
+ def parse_title
18
+ @json['body']['title']
19
+ end
20
+
21
+ def parse_author
22
+ @json['body']['userName']
23
+ end
24
+
17
25
  def parse_canonical_url
18
26
  'https://pixiv.net/member_illust.php?mode=medium&illust_id=' + @illust_id
19
27
  end
@@ -32,7 +40,7 @@ module Panchira
32
40
  end
33
41
 
34
42
  def parse_tags
35
- @json['body']['tags']['tags'].map{|content| content['tag']}
43
+ @json['body']['tags']['tags'].map { |content| content['tag'] }
36
44
  end
37
45
  end
38
46
 
@@ -29,6 +29,8 @@ module Panchira
29
29
  result.description = parse_description
30
30
  result.image = parse_image
31
31
  result.tags = parse_tags
32
+ result.author = parse_author
33
+ result.circle = parse_circle
32
34
 
33
35
  result
34
36
  end
@@ -110,6 +112,14 @@ module Panchira
110
112
  ''
111
113
  end
112
114
 
115
+ def parse_author
116
+ @page.css('//meta[name="author"]/@content').first.to_s
117
+ end
118
+
119
+ def parse_circle
120
+ nil
121
+ end
122
+
113
123
  def user_agent
114
124
  "Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
115
125
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
- VERSION = '1.1.1'
4
+ VERSION = '1.2.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: panchira
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kyp
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-09 00:00:00.000000000 Z
11
+ date: 2020-10-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler