panchira 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '084250abfb3df8cee5d16db7e0312347663fac26ce27b88a0a8f35c12b8eed41'
4
- data.tar.gz: 6d427b512e89d5c1797b7d713f70ceda702cd0397da22dc4b8ceefaf01a2f474
3
+ metadata.gz: 066440e461b75b84a9df04fd76f1960243785b26bc7f4c61289029248e0a8bd9
4
+ data.tar.gz: 1fc1f712c6a8d88363cf3c4162be2681e08631c515ffbe6631fba3fd204b91c0
5
5
  SHA512:
6
- metadata.gz: 269c296c822ed831714ba70b771f3ec37f03f7b9910115ba6e2b435cfe26838a6dc9a30dd658458c754bd7a20c5270e59d4c7de57d83d2d84638e4e385b7ed20
7
- data.tar.gz: 6f7f9884dca58326d32c1f95256193570c97625c579ec118919cde6cc0e83e758ea9b6ecdc4c390a45629ac1de0d95ff47d9126023d94cdfe3316a1b14234f8a
6
+ metadata.gz: 63a914d286eaf909f4a2ab7c128f3725a96a6badbac71a878362e4a09a4e29f720f1f81fab2fa4b1f0ddeb513fac04b5c00597132012f5dbe42d783f54b221b2
7
+ data.tar.gz: af6085627c05532b7019a7134da472329c52b0f61b3329079694a2f59115e52f1c7b0bc0acc2c9cc3ea19814a33c3e2cd9116fcd7f692278e2150de7874bb424
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/).
6
6
 
7
+ ## 1.2.0 - 2020-10-31
8
+ ### Added
9
+ - You can now fetch author and circle name in resolvers (Resolver#fetch_author, Resolver#fetch_circle).
10
+
11
+ ### Changed
12
+ - Resolver#fetch_title returns the title of the content (not the original title of the page).
13
+
7
14
  ## 1.1.1 - 2020-08-09
8
15
  ### Added
9
16
  - Added support for Fanza Doujin.
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- panchira (1.1.1)
4
+ panchira (1.2.0)
5
5
  fastimage (~> 2.1.7)
6
6
  nokogiri (~> 1.10.9)
7
7
 
@@ -10,7 +10,7 @@ GEM
10
10
  specs:
11
11
  fastimage (2.1.7)
12
12
  mini_portile2 (2.4.0)
13
- minitest (5.14.0)
13
+ minitest (5.14.2)
14
14
  nokogiri (1.10.10)
15
15
  mini_portile2 (~> 2.4.0)
16
16
  rake (12.3.3)
@@ -8,6 +8,6 @@ module Panchira
8
8
 
9
9
  # Result class for Panchira.fetch.
10
10
  class PanchiraResult
11
- attr_accessor :canonical_url, :title, :description, :image, :tags
11
+ attr_accessor :canonical_url, :title, :description, :image, :tags, :author, :circle
12
12
  end
13
13
  end
@@ -6,6 +6,32 @@ module Panchira
6
6
 
7
7
  private
8
8
 
9
+ # DLSiteのタイトルの[]に含まれている値はtitleタグだとサークル名 or 出版社名だが、
10
+ # Panchiraが優先するog:titleではサークル名 or 著者名 となる。
11
+ # 取得に際しては、以下の3パターンを考慮する必要があるため、titleタグとtableの解析が必要となる:
12
+ # 1) 同人系の一部, 特に音声など。タイトル[サークル名]. 本文中に著者・作者の記載なし
13
+ # 2) 同人系の一部, 特に一部の同人誌など。タイトル[サークル名]. 本文中に「作者」の記載あり
14
+ # 3) 商業系。タイトル[著者名] サークル名なし
15
+ # 込み入った実装になってしまったため、parse自体をいじる必要があるかも
16
+ def parse_title
17
+ @title_md = super.match(/(.+) \[(\S+)\] \|.+/)
18
+ @title_md[1]
19
+ end
20
+
21
+ def parse_author
22
+ @page.css('table[id*="work_"] tr').each do |tr|
23
+ if tr.css('th').text =~ /(作|著)者/
24
+ return @author = tr.css('td > a').first.text.strip
25
+ end
26
+ end
27
+
28
+ @author = nil
29
+ end
30
+
31
+ def parse_circle
32
+ @title_md[2] if @author != @title_md[2]
33
+ end
34
+
9
35
  def parse_image_url
10
36
  @page.css('//meta[property="og:image"]/@content').first.to_s.sub(/sam/, 'main')
11
37
  end
@@ -19,6 +19,10 @@ module Panchira
19
19
 
20
20
  private
21
21
 
22
+ def parse_author
23
+ @page.css('.m-boxDetailProductInfoMainList__description__list__item > a').first&.text.to_s
24
+ end
25
+
22
26
  def parse_image_url
23
27
  @page.css('.m-imgDetailProductPack/@src').first.to_s
24
28
  end
@@ -37,6 +41,10 @@ module Panchira
37
41
 
38
42
  private
39
43
 
44
+ def parse_circle
45
+ @page.css('a.circleName__txt').first.text
46
+ end
47
+
40
48
  def parse_tags
41
49
  @page.css('.genreTag__item').map { |t| t.text.strip }
42
50
  end
@@ -17,20 +17,19 @@ module Panchira
17
17
  private
18
18
 
19
19
  def parse_title
20
- comic_title = @json['content']['data']['title']
21
- "#{comic_title} | Komiflo"
20
+ @json['content']['data']['title']
22
21
  end
23
22
 
24
23
  def parse_image_url
25
24
  'https://t.komiflo.com/564_mobile_large_3x/' + @json['content']['named_imgs']['cover']['filename']
26
25
  end
27
26
 
28
- def parse_description
29
- author = @json['content']['attributes']['artists']['children'][0]['data']['name']
27
+ def parse_author
28
+ @json['content']['attributes']['artists']['children'][0]['data']['name']
29
+ end
30
30
 
31
- parent = @json['content']['parents'][0]['data']['title']
32
- description = '著: ' + author if author
33
- description + " / #{parent}" if parent
31
+ def parse_description
32
+ @json['content']['parents'][0]['data']['title']
34
33
  end
35
34
 
36
35
  def parse_canonical_url
@@ -4,8 +4,41 @@ module Panchira
4
4
  class MelonbooksResolver < Resolver
5
5
  URL_REGEXP = %r{melonbooks.co.jp/detail/detail.php\?product_id=(\d+)}.freeze
6
6
 
7
+ def fetch
8
+ result = PanchiraResult.new
9
+
10
+ @page = fetch_page(@url)
11
+ result.canonical_url = parse_canonical_url
12
+
13
+ @page = fetch_page(result.canonical_url) if @url != result.canonical_url
14
+
15
+ result.title, result.author, result.circle = parse_table
16
+ result.description = parse_description
17
+ result.image = parse_image
18
+ result.tags = parse_tags
19
+
20
+ result
21
+ end
22
+
7
23
  private
8
24
 
25
+ def parse_table
26
+ title, author, circle = nil, nil, nil
27
+
28
+ @page.css('#description > table.stripe > tr').each do |tr|
29
+ case tr.css('th').text
30
+ when 'タイトル'
31
+ title = tr.css('td').text.strip
32
+ when 'サークル名'
33
+ circle = tr.css('td > a').text.match(/^(.+)\W\(作品数:/)&.values_at(1)[0]
34
+ when '作家名'
35
+ author = tr.css('td > a').text.strip
36
+ end
37
+ end
38
+
39
+ [title, author, circle]
40
+ end
41
+
9
42
  def parse_canonical_url
10
43
  product_id = @url.slice(URL_REGEXP, 1)
11
44
  'https://www.melonbooks.co.jp/detail/detail.php?product_id=' + product_id + '&adult_view=1'
@@ -6,7 +6,15 @@ module Panchira
6
6
  module Narou
7
7
  class Novel18Resolver < Resolver
8
8
  URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
9
- ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}.freeze
9
+ ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
10
+
11
+ def initialize(url)
12
+ super(url)
13
+
14
+ if id = @url.match(ID_REGEXP)[:id]
15
+ @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
16
+ end
17
+ end
10
18
 
11
19
  def fetch_page(uri)
12
20
  u = URI.parse(uri)
@@ -17,24 +25,35 @@ module Panchira
17
25
  Nokogiri::HTML.parse(res.body, uri)
18
26
  end
19
27
 
20
- def parse_tags
21
- id = @url.match(ID_REGEXP)[:id]
22
- return [] unless id
28
+ def parse_author
29
+ @desc&.xpath('//*[@id="noveltable1"]/tr[2]/td')&.text&.strip
30
+ end
23
31
 
24
- desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
25
- desc.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.split(' ') # つらい。
32
+ def parse_tags
33
+ # つらい。
34
+ @desc&.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.split(' ')
26
35
  end
27
36
  end
37
+
28
38
  class NcodeResolver < Resolver
29
- URL_REGEXP = %r{ncode\.syosetu\.com}.freeze
30
- ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}.freeze
39
+ URL_REGEXP = /ncode\.syosetu\.com/.freeze
40
+ ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
31
41
 
32
- def parse_tags
33
- id = @url.match(ID_REGEXP)[:id]
34
- return [] unless id
42
+ def initialize(url)
43
+ super(url)
44
+
45
+ if id = @url.match(ID_REGEXP)[:id]
46
+ @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
47
+ end
48
+ end
35
49
 
36
- desc = fetch_page("https://ncode.syosetu.com/novelview/infotop/ncode/#{id}/")
37
- desc.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.delete("\u00A0")&.split(' ')&.grep_v('') # めっちゃつらい。
50
+ def parse_author
51
+ @desc&.xpath('//*[@id="noveltable1"]/tr[2]/td')&.text&.strip
52
+ end
53
+
54
+ def parse_tags
55
+ # めっちゃつらい。
56
+ @desc&.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.delete("\u00A0")&.split(' ')&.grep_v('')
38
57
  end
39
58
  end
40
59
  end
@@ -6,6 +6,21 @@ module Panchira
6
6
 
7
7
  private
8
8
 
9
+ def parse_title
10
+ full_title = super
11
+ @md = full_title.match(/\A(?<title>.+) \| (?<author>.+)\z/)
12
+
13
+ @md[:title]
14
+ end
15
+
16
+ def parse_author
17
+ @md[:author]
18
+ end
19
+
20
+ def parse_description
21
+ @page.css('p.illust_description')&.first&.text&.strip
22
+ end
23
+
9
24
  def parse_canonical_url
10
25
  @url.sub(/sp.nijie/, 'nijie').sub(/view_popup/, 'view')
11
26
  end
@@ -14,6 +14,14 @@ module Panchira
14
14
 
15
15
  private
16
16
 
17
+ def parse_title
18
+ @json['body']['title']
19
+ end
20
+
21
+ def parse_author
22
+ @json['body']['userName']
23
+ end
24
+
17
25
  def parse_canonical_url
18
26
  'https://pixiv.net/member_illust.php?mode=medium&illust_id=' + @illust_id
19
27
  end
@@ -32,7 +40,7 @@ module Panchira
32
40
  end
33
41
 
34
42
  def parse_tags
35
- @json['body']['tags']['tags'].map{|content| content['tag']}
43
+ @json['body']['tags']['tags'].map { |content| content['tag'] }
36
44
  end
37
45
  end
38
46
 
@@ -29,6 +29,8 @@ module Panchira
29
29
  result.description = parse_description
30
30
  result.image = parse_image
31
31
  result.tags = parse_tags
32
+ result.author = parse_author
33
+ result.circle = parse_circle
32
34
 
33
35
  result
34
36
  end
@@ -110,6 +112,14 @@ module Panchira
110
112
  ''
111
113
  end
112
114
 
115
+ def parse_author
116
+ @page.css('//meta[name="author"]/@content').first.to_s
117
+ end
118
+
119
+ def parse_circle
120
+ nil
121
+ end
122
+
113
123
  def user_agent
114
124
  "Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
115
125
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
- VERSION = '1.1.1'
4
+ VERSION = '1.2.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: panchira
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kyp
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-09 00:00:00.000000000 Z
11
+ date: 2020-10-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler