media_arts_db 0.1.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/README.md +73 -44
- data/lib/media_arts_db.rb +3 -7
- data/lib/media_arts_db/comic/component.rb +138 -0
- data/lib/media_arts_db/comic/parse.rb +693 -0
- data/lib/media_arts_db/comic/retrieve.rb +343 -0
- data/lib/media_arts_db/http_base.rb +5 -6
- data/lib/media_arts_db/uri.rb +4 -4
- data/lib/media_arts_db/version.rb +1 -1
- metadata +5 -4
- data/lib/media_arts_db/comic.rb +0 -870
- data/lib/media_arts_db/comic/comic.rb +0 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2bd665b5beb4778975fc945232946e266b7e4f1
|
4
|
+
data.tar.gz: 3be0f992dc1ee1e21f1adc8986353f56632b99d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 79a8f6dfd6ff7a07a80ef8fd79b74567b54f4c1741eccdcf38c1e8b28681c981280b876d5fdd5ce4513df81397d37d760f0443af763ff2953c2cf0c80be4fb3c
|
7
|
+
data.tar.gz: e29c11ad92f728774feb3c8faea848b6661c99820c2426cbd02417fe48a30d00c88940d1b6819336db130617c05748d00b83f41e4bb675503f5c2b085ff5f6e9
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
media_arts_db is RubyGem to scraping to the MediaArtsDataBase(メディア芸術データベース: http://mediaarts-db.jp/).
|
4
4
|
|
5
|
+
[![Build Status](https://travis-ci.org/bisque33/media_arts_db.svg?branch=master)](https://travis-ci.org/bisque33/media_arts_db)
|
6
|
+
|
5
7
|
## Installation
|
6
8
|
|
7
9
|
Add this line to your application's Gemfile:
|
@@ -23,65 +25,92 @@ Or install it yourself as:
|
|
23
25
|
### Comic
|
24
26
|
|
25
27
|
Search and find the information from the Comic Database.
|
26
|
-
|
28
|
+
|
29
|
+
検索方法
|
27
30
|
|
28
31
|
```ruby
|
29
32
|
# 作品名(TITLE)で検索
|
30
|
-
|
31
|
-
|
33
|
+
search = MediaArtsDb::Comic::SearchWork.new('カードキャプター')
|
34
|
+
search.execute
|
35
|
+
# => 検索結果は、ComicWork(マンガ単行本作品情報)とMagazineWork(マンガ雑誌作品情報)が混在する配列が返る
|
32
36
|
|
33
37
|
# 雑誌名(MAGAZINE)で検索
|
34
|
-
|
35
|
-
|
38
|
+
search = MediaArtsDb::Comic::SearchMagazine.new('なかよし', per: 10, page: 2)
|
39
|
+
search.execute
|
40
|
+
# => 検索結果は、MagazineTitle(マンガ雑誌基本情報)の配列が返る(11-20件目)
|
36
41
|
|
37
42
|
# 著者名(AUTHOR)で検索
|
38
|
-
|
39
|
-
|
40
|
-
|
43
|
+
search = MediaArtsDb::Comic::SearchAuthor.new('CLAMP')
|
44
|
+
search.execute
|
45
|
+
# => 検索結果は、Author(著者情報)の配列が返る
|
41
46
|
|
42
|
-
```ruby
|
43
47
|
# 単行本・雑誌・資料(SOURCE)で検索
|
44
|
-
#
|
45
|
-
#
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
#
|
51
|
-
|
52
|
-
|
53
|
-
#
|
54
|
-
# -
|
48
|
+
# まず検索条件を指定するためにSearchOptionBuilderクラスにパラメータを設定する
|
49
|
+
# .target_xxxはどの検索結果を取得するかの設定で、必須項目
|
50
|
+
# .option_xxxは検索条件で、サイトの制限により最大5個まで設定できる。また、条件を削除する場合はnilを代入する
|
51
|
+
option = MediaArtsDb::Comic::SearchOptionBuilder.new
|
52
|
+
option.target_comic
|
53
|
+
option.option_title = 'さくら'
|
54
|
+
# SearchクラスにSearchOptionBuilderを渡す。per:, page:の指定も可能
|
55
|
+
search = MediaArtsDb::Comic::Search.new(option)
|
56
|
+
result = search.execute
|
57
|
+
# => 検索結果は、targetの設定により以下が返る
|
58
|
+
# - option.target_comicの場合、Comic(マンガ単行本情報)の配列が返る
|
59
|
+
# - option.target_magazineの場合、Magazine(マンガ雑誌情報)の配列が返る
|
60
|
+
# - option.target_materialの場合、Material(資料情報)の配列が返る
|
61
|
+
# - option.target_original_pictureの場合、OriginalPicture(マンガ原画情報)の配列が返る
|
62
|
+
# - option.target_bookletの場合、Booklet(その他冊子情報)の配列が返る
|
55
63
|
```
|
56
64
|
|
65
|
+
検索結果の取得
|
66
|
+
|
57
67
|
```ruby
|
58
|
-
#
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
#
|
65
|
-
|
66
|
-
#
|
67
|
-
|
68
|
-
|
69
|
-
result = MediaArtsDb::Comic.find_magazine(magazine_id)
|
70
|
-
# book(著者情報)の詳細情報取得
|
71
|
-
result = MediaArtsDb::Comic.find_author(author_id)
|
72
|
-
# book(資料情報)の詳細情報取得
|
73
|
-
result = MediaArtsDb::Comic.find_material(material_id)
|
74
|
-
# book(原画情報)の詳細情報取得
|
75
|
-
result = MediaArtsDb::Comic.find_original_picture(original_picture_id)
|
76
|
-
# book(その他冊子情報)の詳細情報取得
|
77
|
-
result = MediaArtsDb::Comic.find_booklet(booklet_id)
|
68
|
+
# 検索
|
69
|
+
search = MediaArtsDb::Comic::SearchWork.new('カードキャプター')
|
70
|
+
results = search.execute
|
71
|
+
# 値の取得
|
72
|
+
results.first.title
|
73
|
+
results.first[:title] # キーワードでも取得可能
|
74
|
+
# すべての値の取得
|
75
|
+
results.first.content # 詳細ページから全ての情報を取得して返す
|
76
|
+
results.first.content_cache # 検索結果で得られた情報のみを返す
|
77
|
+
# 結果のネスト
|
78
|
+
results.first.comic_titles[0].comics[0].published_date
|
78
79
|
```
|
79
80
|
|
81
|
+
個別要素の検索
|
82
|
+
|
80
83
|
```ruby
|
81
|
-
#
|
82
|
-
|
83
|
-
|
84
|
-
|
84
|
+
# ComicWork(マンガ単行本作品情報)の詳細情報取得
|
85
|
+
finder = MediaArtsDb::Comic::FindComicWork.new(comic_work_id)
|
86
|
+
finder.execute
|
87
|
+
# ComicTitle(マンガ単行本全巻情報)の詳細情報取得
|
88
|
+
finder = MediaArtsDb::Comic::FindComicTitle.new(comic_title_id)
|
89
|
+
finder.execute
|
90
|
+
# Comic(マンガ単行本情報)の詳細情報取得
|
91
|
+
finder = MediaArtsDb::Comic::FindComic.new(comic_id)
|
92
|
+
finder.execute
|
93
|
+
# MagazineWork(マンガ雑誌作品情報)の詳細情報取得
|
94
|
+
finder = MediaArtsDb::Comic::FindMagazineWork.new(magazine_works_id)
|
95
|
+
finder.execute
|
96
|
+
# MagazineTitle(マンガ雑誌全巻情報)の詳細情報取得
|
97
|
+
finder = MediaArtsDb::Comic::FindMagazineTitle.new(magazine_titles_id)
|
98
|
+
finder.execute
|
99
|
+
# Magazine(マンガ雑誌情報)の詳細情報取得
|
100
|
+
finder = MediaArtsDb::Comic::FindMagazine.new(magazine_id)
|
101
|
+
finder.execute
|
102
|
+
# Author(著者情報)の詳細情報取得
|
103
|
+
finder = MediaArtsDb::Comic::FindAuthor.new(author_id)
|
104
|
+
finder.execute
|
105
|
+
# Material(資料情報)の詳細情報取得
|
106
|
+
finder = MediaArtsDb::Comic::FindMaterial.new(material_id)
|
107
|
+
finder.execute
|
108
|
+
# OriginalPicture(原画情報)の詳細情報取得
|
109
|
+
finder = MediaArtsDb::Comic::FindOriginalPicture.new(original_picture_id)
|
110
|
+
finder.execute
|
111
|
+
# Booklet(その他冊子情報)の詳細情報取得
|
112
|
+
finder = MediaArtsDb::Comic::FindBooklet.new(booklet_id)
|
113
|
+
finder.execute
|
85
114
|
```
|
86
115
|
|
87
116
|
### Animation
|
data/lib/media_arts_db.rb
CHANGED
@@ -1,10 +1,6 @@
|
|
1
1
|
require "media_arts_db/version"
|
2
2
|
require "media_arts_db/uri"
|
3
3
|
require "media_arts_db/http_base"
|
4
|
-
require "media_arts_db/comic"
|
5
|
-
|
6
|
-
require "
|
7
|
-
|
8
|
-
module MediaArtsDb
|
9
|
-
|
10
|
-
end
|
4
|
+
require "media_arts_db/comic/retrieve"
|
5
|
+
require "media_arts_db/comic/component"
|
6
|
+
require "media_arts_db/comic/parse"
|
@@ -0,0 +1,138 @@
|
|
1
|
+
module MediaArtsDb
|
2
|
+
module Comic
|
3
|
+
class Component
|
4
|
+
attr_reader :id
|
5
|
+
|
6
|
+
def initialize(id, content = {}, retrieved = false)
|
7
|
+
@id = id
|
8
|
+
@content = content
|
9
|
+
@retrieved = retrieved
|
10
|
+
end
|
11
|
+
|
12
|
+
def [](key)
|
13
|
+
if @content.has_key?(key)
|
14
|
+
@content[key]
|
15
|
+
else
|
16
|
+
unless retrieved?
|
17
|
+
@content.merge!(@retriever.execute.content)
|
18
|
+
@retrieved = true
|
19
|
+
@content.has_key?(key) ? @content[key] : nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def method_missing(name, *args)
|
25
|
+
self[name.to_sym]
|
26
|
+
end
|
27
|
+
|
28
|
+
def content
|
29
|
+
unless retrieved?
|
30
|
+
@content.merge!(@retriever.execute.content)
|
31
|
+
@retrieved = true
|
32
|
+
end
|
33
|
+
@content
|
34
|
+
end
|
35
|
+
|
36
|
+
def content_cache
|
37
|
+
@content
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def retrieved?
|
43
|
+
@retrieved
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# NOTE:
|
48
|
+
# すべてのコンポーネントをComponentクラスだけで表現することも可能であるが、
|
49
|
+
# コンポーネントを区別する手段として一番わかり易いのがクラスを分けることだと思うので、
|
50
|
+
# 冗長ではあるがコンポーネントの種類ごとにクラスを作成する。
|
51
|
+
|
52
|
+
class ComicWork < Component
|
53
|
+
def initialize(id, content = {}, retrieved = false)
|
54
|
+
super(id, content, retrieved)
|
55
|
+
@retriever = FindComicWork.new(@id)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class ComicTitle < Component
|
60
|
+
def initialize(id, content = {}, retrieved = false)
|
61
|
+
super(id, content, retrieved)
|
62
|
+
@retriever = FindComicTitle.new(@id)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class Comic < Component
|
67
|
+
def initialize(id, content = {}, retrieved = false)
|
68
|
+
super(id, content, retrieved)
|
69
|
+
@retriever = FindComic.new(@id)
|
70
|
+
end
|
71
|
+
|
72
|
+
def next
|
73
|
+
# YAGNI
|
74
|
+
end
|
75
|
+
|
76
|
+
def prev
|
77
|
+
# YAGNI
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class MagazineWork < Component
|
82
|
+
def initialize(id, content = {}, retrieved = false)
|
83
|
+
super(id, content, retrieved)
|
84
|
+
@retriever = FindMagazineWork.new(@id)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class MagazineTitle < Component
|
89
|
+
def initialize(id, content = {}, retrieved = false)
|
90
|
+
super(id, content, retrieved)
|
91
|
+
@retriever = FindMagazineTitle.new(@id)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
class Magazine < Component
|
96
|
+
def initialize(id, content = {}, retrieved = false)
|
97
|
+
super(id, content, retrieved)
|
98
|
+
@retriever = FindMagazine.new(@id)
|
99
|
+
end
|
100
|
+
|
101
|
+
def next
|
102
|
+
# YAGNI
|
103
|
+
end
|
104
|
+
|
105
|
+
def prev
|
106
|
+
# YAGNI
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class Author < Component
|
111
|
+
def initialize(id, content = {}, retrieved = false)
|
112
|
+
super(id, content, retrieved)
|
113
|
+
@retriever = FindAuthor.new(@id)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class Material < Component
|
118
|
+
def initialize(id, content = {}, retrieved = false)
|
119
|
+
super(id, content, retrieved)
|
120
|
+
@retriever = FindMaterial.new(@id)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
class OriginalPicture < Component
|
125
|
+
def initialize(id, content = {}, retrieved = false)
|
126
|
+
super(id, content, retrieved)
|
127
|
+
@retriever = FindOriginalPicture.new(@id)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
class Booklet < Component
|
132
|
+
def initialize(id, content = {}, retrieved = false)
|
133
|
+
super(id, content, retrieved)
|
134
|
+
@retriever = FindBooklet.new(@id)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,693 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
|
3
|
+
module MediaArtsDb
|
4
|
+
module Comic
|
5
|
+
class Parse
|
6
|
+
class << self
|
7
|
+
def parse_search_title(response_body)
|
8
|
+
contents = []
|
9
|
+
doc = Nokogiri::HTML.parse(response_body)
|
10
|
+
doc.css('div.resultTabA table > tbody > tr').each do |tr|
|
11
|
+
content = {}
|
12
|
+
content[:title] = clip_text(tr, 1) # 作品名
|
13
|
+
content[:author] = clip_text(tr, 2) # 著者名
|
14
|
+
content[:tags] = clip_text(tr, 3) # タグ
|
15
|
+
content[:comic_title_quantity] = clip_text(tr, 4) # 単行本全巻
|
16
|
+
content[:magazine_work_quantity] = clip_text(tr, 5) # 雑誌掲載作品
|
17
|
+
content[:material_quantity] = clip_text(tr, 6) # 資料 クラス化非対応
|
18
|
+
content[:original_picture_quantity] = clip_text(tr, 7) # 原画 クラス化非対応
|
19
|
+
content[:booklet_quantity] = clip_text(tr, 8) # その他 クラス化非対応
|
20
|
+
|
21
|
+
# リンクがcomic_worksとmagazine_worksの場合がある
|
22
|
+
# TODO: link_urlが正しいか確認する
|
23
|
+
case clip_uri(tr, 1)
|
24
|
+
when /comic_works/
|
25
|
+
contents << ComicWork.new(clip_id(tr, 1), content)
|
26
|
+
when /magazine_works/
|
27
|
+
contents << MagazineWork.new(clip_id(tr, 1), content)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
contents
|
31
|
+
end
|
32
|
+
|
33
|
+
def parse_search_magazine(response_body)
|
34
|
+
contents = []
|
35
|
+
doc = Nokogiri::HTML.parse(response_body)
|
36
|
+
doc.css('div.resultTabB table > tbody > tr').each do |tr|
|
37
|
+
content = {}
|
38
|
+
content[:title] = clip_text(tr, 1)
|
39
|
+
content[:publisher] = clip_text(tr, 2)
|
40
|
+
content[:published_interval] = clip_text(tr, 3)
|
41
|
+
content[:published_start_date] = clip_text(tr, 4)
|
42
|
+
content[:published_end_date] = clip_text(tr, 5)
|
43
|
+
content[:tags] = clip_text(tr, 6)
|
44
|
+
|
45
|
+
contents << MagazineTitle.new(clip_id(tr, 1), content)
|
46
|
+
end
|
47
|
+
contents
|
48
|
+
end
|
49
|
+
|
50
|
+
def parse_search_author(response_body)
|
51
|
+
contents = []
|
52
|
+
doc = Nokogiri::HTML.parse(response_body)
|
53
|
+
doc.css('div.resultTabC table > tbody > tr').each do |tr|
|
54
|
+
# NOTE:
|
55
|
+
# 検索結果に著者名の行と雑誌掲載作品名の行があるが、
|
56
|
+
# 検索したいのは著者名なので雑誌掲載作品名の行は無視する
|
57
|
+
next unless has_id?(tr, 1)
|
58
|
+
|
59
|
+
content = {}
|
60
|
+
content[:name] = clip_text(tr, 1)
|
61
|
+
content[:name_kana] = clip_text(tr, 2)
|
62
|
+
content[:related_authors] = clip_authors(tr, 3)
|
63
|
+
content[:comic_work_quantity] = clip_text(tr, 4)
|
64
|
+
# content[:magazine_works_name] = clip_text(tr, 5)
|
65
|
+
|
66
|
+
contents << Author.new(clip_id(tr, 1), content)
|
67
|
+
end
|
68
|
+
contents
|
69
|
+
end
|
70
|
+
|
71
|
+
def parse_search_target_comic(response_body)
|
72
|
+
contents = []
|
73
|
+
doc = Nokogiri::HTML.parse(response_body)
|
74
|
+
doc.css('div.resultTabD_subA > div > table > tbody > tr').each do |tr|
|
75
|
+
content = {}
|
76
|
+
content[:isbn10] = clip_isbn10(tr, 1) # ISBN10
|
77
|
+
content[:isbn13] = clip_isbn13(tr, 1) # ISBN13
|
78
|
+
content[:title] = clip_text(tr, 2) # 単行本名
|
79
|
+
content[:label] = clip_text(tr, 3) # 単行本レーベル
|
80
|
+
content[:volume] = clip_text(tr, 4) # 巻
|
81
|
+
content[:author] = clip_text(tr, 5) # 著者名
|
82
|
+
content[:publisher] = clip_text(tr, 6) # 出版者
|
83
|
+
content[:published_date] = clip_text(tr, 7) # 発行年月
|
84
|
+
|
85
|
+
contents << Comic.new(clip_id(tr, 2), content)
|
86
|
+
end
|
87
|
+
contents
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse_search_target_magazine(response_body)
|
91
|
+
contents = []
|
92
|
+
doc = Nokogiri::HTML.parse(response_body)
|
93
|
+
doc.css('div.resultTabD_subB > div > table > tbody > tr').each do |tr|
|
94
|
+
content = {}
|
95
|
+
content[:title] = clip_text(tr, 2) # 雑誌名
|
96
|
+
content[:volume] = clip_text(tr, 3) # 巻・合・通巻
|
97
|
+
content[:display_volume] = clip_text(tr, 4) # 表示号数
|
98
|
+
content[:display_sub_volume] = clip_text(tr, 5) # 補助号数
|
99
|
+
content[:publisher] = clip_text(tr, 6) # 出版者
|
100
|
+
content[:published_date] = clip_text(tr, 7) # 表示年月
|
101
|
+
|
102
|
+
contents << Magazine.new(clip_id(tr, 2), content)
|
103
|
+
end
|
104
|
+
contents
|
105
|
+
end
|
106
|
+
|
107
|
+
def parse_search_target_material(response_body)
|
108
|
+
contents = []
|
109
|
+
doc = Nokogiri::HTML.parse(response_body)
|
110
|
+
doc.css('div.resultTabD_subC > div > table > tbody > tr').each do |tr|
|
111
|
+
content = {}
|
112
|
+
content[:title] = clip_text(tr, 2) # 資料名
|
113
|
+
content[:category] = clip_text(tr, 3) # 分類・カテゴリー
|
114
|
+
content[:number] = clip_text(tr, 4) # 順序
|
115
|
+
content[:author] = clip_text(tr, 5) # 著者名
|
116
|
+
content[:related_material_title] = clip_text(tr, 6) # 関連物
|
117
|
+
content[:published_date] = clip_text(tr, 7) # 時期
|
118
|
+
|
119
|
+
contents << Material.new(clip_id(tr, 2), content)
|
120
|
+
end
|
121
|
+
contents
|
122
|
+
end
|
123
|
+
|
124
|
+
def parse_search_target_original_picture(response_body)
|
125
|
+
contents = []
|
126
|
+
doc = Nokogiri::HTML.parse(response_body)
|
127
|
+
doc.css('div.resultTabD_subD > div > table > tbody > tr').each do |tr|
|
128
|
+
content = {}
|
129
|
+
content[:title] = clip_text(tr, 2) # 原画作品名
|
130
|
+
content[:recorded] = clip_text(tr, 3) # 収録
|
131
|
+
content[:number] = clip_text(tr, 4) # 順序
|
132
|
+
content[:quantity] = clip_text(tr, 5) # 枚数
|
133
|
+
content[:author] = clip_text(tr, 6) # 著者名
|
134
|
+
content[:published_date] = clip_text(tr, 7) # 初出
|
135
|
+
content[:writing_time] = clip_text(tr, 8) # 執筆期間
|
136
|
+
|
137
|
+
contents << OriginalPicture.new(clip_id(tr, 2), content)
|
138
|
+
end
|
139
|
+
contents
|
140
|
+
end
|
141
|
+
|
142
|
+
def parse_search_target_booklet(response_body)
|
143
|
+
contents = []
|
144
|
+
doc = Nokogiri::HTML.parse(response_body)
|
145
|
+
doc.css('div.resultTabD_subE > div > table > tbody > tr').each do |tr|
|
146
|
+
content = {}
|
147
|
+
content[:title] = clip_text(tr, 2) # 冊子名
|
148
|
+
content[:series] = clip_text(tr, 3) # シリーズ
|
149
|
+
content[:volume] = clip_text(tr, 4) # 巻
|
150
|
+
content[:author] = clip_text(tr, 5) # 著者名
|
151
|
+
content[:publisher] = clip_text(tr, 6) # 出版者・サークル名
|
152
|
+
content[:published_date] = clip_text(tr, 7) # 発行年月
|
153
|
+
|
154
|
+
contents << Booklet.new(clip_id(tr, 2), content)
|
155
|
+
end
|
156
|
+
contents
|
157
|
+
end
|
158
|
+
|
159
|
+
def parse_comic_work(response_body)
|
160
|
+
content = {}
|
161
|
+
doc = Nokogiri::HTML.parse(response_body)
|
162
|
+
tbody = doc.css('body > article > div.main > section > table > tbody')
|
163
|
+
# NOTE: HTML構造の誤りにより要素番号をずらす必要がある。
|
164
|
+
offset = 1
|
165
|
+
content[:title] = clip_text(tbody, 2, 2 + offset) # マンガ作品名
|
166
|
+
content[:title_kana] = clip_text(tbody, 2, 3 + offset) # マンガ作品名ヨミ
|
167
|
+
content[:sub_title] = clip_text(tbody, 2, 4 + offset) # 別題・副題・原題
|
168
|
+
content[:title_alphabet] = clip_text(tbody, 2, 5 + offset) # ローマ字表記
|
169
|
+
# content[:author] = clip_text(tbody, 2, 6 + offset) # 著者(責任表示) # author.nameを参照するためコメントアウト
|
170
|
+
content[:authors] = clip_authors(tbody, 2, 7 + offset) # 著者典拠ID
|
171
|
+
content[:published_date] = clip_text(tbody, 2, 8 + offset) # 公表時期
|
172
|
+
content[:source] = clip_text(tbody, 2, 9 + offset) # 出典(初出)
|
173
|
+
content[:introduction] = clip_text(tbody, 2, 10 + offset) # マンガ作品紹介文・解説
|
174
|
+
content[:category] = clip_text(tbody, 2, 11 + offset) # 分類
|
175
|
+
content[:tags] = clip_text(tbody, 2, 12 + offset) # タグ
|
176
|
+
content[:rating] = clip_text(tbody, 2, 13 + offset) # レイティング
|
177
|
+
|
178
|
+
content[:comic_title_quantity] = doc.css('body > article > div.sub > section:nth-child(1) > h3 > span').text
|
179
|
+
content[:comic_titles] = [] # 単行本全巻
|
180
|
+
doc.css('body > article > div.sub > section:nth-child(1) table').each do |table|
|
181
|
+
table.css('tr').each do |tr|
|
182
|
+
next if tr.css('td').empty?
|
183
|
+
comic_title_content = {}
|
184
|
+
comic_title_content[:title] = clip_text(tr, 1)
|
185
|
+
comic_title_content[:author] = clip_text(tr, 2)
|
186
|
+
comic_title_content[:total_comic_volume] = clip_text(tr, 3)
|
187
|
+
content[:comic_titles] << ComicTitle.new(clip_id(tr, 1), comic_title_content)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
content[:magazine_work_quantity] = doc.css('body > article > div.sub > section:nth-child(2) > h3 > span').text
|
192
|
+
content[:magazine_works] = [] # 雑誌掲載作品
|
193
|
+
doc.css('body > article > div.sub > section:nth-child(2) table').each do |table|
|
194
|
+
table.css('tr').each do |tr|
|
195
|
+
next if tr.css('td').empty?
|
196
|
+
magazine_work_content = {}
|
197
|
+
magazine_work_content[:title] = clip_text(tr, 1)
|
198
|
+
magazine_work_content[:author] = clip_text(tr, 2)
|
199
|
+
magazine_work_content[:magazine_title] = clip_text(tr, 3)
|
200
|
+
magazine_work_content[:published_date] = clip_text(tr, 4)
|
201
|
+
content[:magazine_works] << MagazineWork.new(clip_id(tr, 1), magazine_work_content)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# 資料、マンガ原画、その他の冊子、関連マンガ作品はサンプルが見つからないので未実装
|
206
|
+
|
207
|
+
content
|
208
|
+
end
|
209
|
+
|
210
|
+
def parse_comic_title(response_body)
|
211
|
+
result = {}
|
212
|
+
doc = Nokogiri::HTML.parse(response_body)
|
213
|
+
tbody = doc.css('body > article > div.main > section > table > tbody')
|
214
|
+
# NOTE: HTML構造の誤りにより要素番号をずらす必要がある。
|
215
|
+
offset = 1
|
216
|
+
result[:comic_work] = ComicWork.new(clip_id(tbody, 2, 2 + offset))
|
217
|
+
result[:title] = clip_text(tbody, 2, 3 + offset) # 単行本全巻名
|
218
|
+
result[:title_kana] = clip_text(tbody, 2, 4 + offset) # 単行本全巻名 ヨミ
|
219
|
+
result[:title_append] = clip_text(tbody, 2, 5 + offset) # 単行本全巻名 追記
|
220
|
+
result[:title_append_kana] = clip_text(tbody, 2, 6 + offset) # 単行本全巻名 追記 ヨミ
|
221
|
+
result[:title_other] = clip_text(tbody, 2, 7 + offset) # 単行本全巻名 別版表示
|
222
|
+
# result[:total_comic_volume] = clip_text(tbody, 2, 8 + offset) # 単行本全巻数 comic_quantityと同値なのでコメントアウト
|
223
|
+
result[:responsible] = clip_text(tbody, 2, 9 + offset) # 責任表示
|
224
|
+
result[:author] = clip_text(tbody, 2, 10 + offset) # 作者・著者
|
225
|
+
result[:author_kana] = clip_text(tbody, 2, 11 + offset) # 作者・著者 ヨミ
|
226
|
+
result[:origina] = clip_text(tbody, 2, 12 + offset) # 原作・原案
|
227
|
+
result[:origina_kana] = clip_text(tbody, 2, 13 + offset) # 原作・原案 ヨミ
|
228
|
+
result[:collaborator] = clip_text(tbody, 2, 14 + offset) # 協力者
|
229
|
+
result[:collaborator_kana] = clip_text(tbody, 2, 15 + offset) # 協力者 ヨミ
|
230
|
+
result[:headings] = clip_text(tbody, 2, 16 + offset) # 標目
|
231
|
+
result[:authors] = clip_authors(tbody, 2, 17 + offset) # 著者典拠ID
|
232
|
+
result[:label] = clip_text(tbody, 2, 18 + offset) # 単行本レーベル
|
233
|
+
result[:label_kana] = clip_text(tbody, 2, 19 + offset) # 単行本レーベル ヨミ
|
234
|
+
result[:series] = clip_text(tbody, 2, 21 + offset) # シリーズ
|
235
|
+
result[:series_kana] = clip_text(tbody, 2, 22 + offset) # シリーズ ヨミ
|
236
|
+
result[:publisher] = clip_text(tbody, 2, 23 + offset) # 出版者名
|
237
|
+
result[:published_area] = clip_text(tbody, 2, 25 + offset) # 出版地
|
238
|
+
result[:size] = clip_text(tbody, 2, 26 + offset) # 縦の長さ×横の長さ
|
239
|
+
result[:isbn] = clip_text(tbody, 2, 27 + offset) # ISBNなどのセットコード
|
240
|
+
result[:langage] = clip_text(tbody, 2, 28 + offset) # 言語区分
|
241
|
+
result[:category] = clip_text(tbody, 2, 29 + offset) # 分類
|
242
|
+
result[:rating] = clip_text(tbody, 2, 30 + offset) # レイティング
|
243
|
+
result[:introduction] = clip_text(tbody, 2, 31 + offset) # 単行本全巻紹介文
|
244
|
+
result[:tags] = clip_text(tbody, 2, 32 + offset) # 単行本全巻タグ
|
245
|
+
result[:note] = clip_text(tbody, 2, 33 + offset) # 単行本全巻備考
|
246
|
+
|
247
|
+
result[:comic_quantity] = doc.css('body > article > div.sub > section > nav > h3 > span').text
|
248
|
+
result[:comics] = [] # 単行本
|
249
|
+
doc.css('body > article > div.sub > section:nth-child(1) > table > tbody > tr').each do |tr|
|
250
|
+
next if tr.css('td').empty?
|
251
|
+
content = {}
|
252
|
+
content[:title] = clip_text(tr, 1)
|
253
|
+
content[:title_append] = clip_text(tr, 2)
|
254
|
+
content[:volume] = clip_text(tr, 3)
|
255
|
+
result[:comics] << Comic.new(clip_id(tr, 1), content)
|
256
|
+
end
|
257
|
+
|
258
|
+
result
|
259
|
+
end
|
260
|
+
|
261
|
+
def parse_comic(response_body)
|
262
|
+
result = {}
|
263
|
+
doc = Nokogiri::HTML.parse(response_body)
|
264
|
+
return result if doc.css('body > article > div.main > section:nth-child(1) > table').empty?
|
265
|
+
|
266
|
+
# 前巻/次巻
|
267
|
+
prev_id = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.prv > a'))
|
268
|
+
result[:prev_comic] = prev_id ? Comic.new(prev_id) : nil
|
269
|
+
next_id = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.nxt > a'))
|
270
|
+
result[:next_comic] = next_id ? Comic.new(next_id) : nil
|
271
|
+
|
272
|
+
# 基本情報
|
273
|
+
tbody = doc.css('body > article > div.main > section:nth-child(1) > table > tbody')
|
274
|
+
result[:comic_title] = ComicTitle.new(clip_id(tbody, 4, 1))
|
275
|
+
result[:title] = clip_text(tbody, 2, 2)
|
276
|
+
result[:title_kana] = clip_text(tbody, 2, 3)
|
277
|
+
result[:title_append] = clip_text(tbody, 2, 4)
|
278
|
+
result[:title_append_kana] = clip_text(tbody, 2, 5)
|
279
|
+
result[:volume] = clip_text(tbody, 2, 6)
|
280
|
+
result[:volume_sort_number] = clip_text(tbody, 4, 6)
|
281
|
+
result[:volume_other_number] = clip_text(tbody, 2, 7)
|
282
|
+
result[:introduction] = clip_text(tbody, 2, 8)
|
283
|
+
|
284
|
+
# 著者表示
|
285
|
+
tbody = doc.css('body > article > div.main > section:nth-child(2) > table > tbody')
|
286
|
+
result[:responsible] = clip_text(tbody, 2, 1)
|
287
|
+
result[:authors] = clip_authors(tbody, 2, 2)
|
288
|
+
result[:headings] = clip_text(tbody, 2, 3)
|
289
|
+
result[:auhtor] = clip_text(tbody, 2, 4)
|
290
|
+
result[:auhtor_kana] = clip_text(tbody, 4, 4)
|
291
|
+
result[:original_title] = clip_text(tbody, 2, 5)
|
292
|
+
result[:original_title_kana] = clip_text(tbody, 4, 5)
|
293
|
+
result[:collaborator] = clip_text(tbody, 2, 5)
|
294
|
+
result[:collaborator_kana] = clip_text(tbody, 4, 6)
|
295
|
+
|
296
|
+
# 出版者・レーベル
|
297
|
+
tbody = doc.css('body > article > div.main > section:nth-child(3) > table > tbody')
|
298
|
+
result[:publisher] = clip_text(tbody, 2, 1)
|
299
|
+
result[:label] = clip_text(tbody, 2, 3)
|
300
|
+
result[:label_kana] = clip_text(tbody, 2, 4)
|
301
|
+
result[:label_number] = clip_text(tbody, 2, 5)
|
302
|
+
result[:series] = clip_text(tbody, 2, 6)
|
303
|
+
result[:series_kana] = clip_text(tbody, 4, 6)
|
304
|
+
|
305
|
+
# その他
|
306
|
+
tbody = doc.css('body > article > div.main > section:nth-child(4) > table > tbody')
|
307
|
+
result[:published_date] = clip_text(tbody, 2, 1)
|
308
|
+
result[:first_price] = clip_text(tbody, 4, 1)
|
309
|
+
result[:isbn10] = clip_isbn10(tbody, 2, 2)
|
310
|
+
result[:isbn13] = clip_isbn13(tbody, 2, 2)
|
311
|
+
result[:japan_book_number] = clip_text(tbody, 2, 3)
|
312
|
+
result[:total_page] = clip_text(tbody, 2, 4)
|
313
|
+
result[:size] = clip_text(tbody, 4, 4)
|
314
|
+
result[:langage] = clip_text(tbody, 2, 5)
|
315
|
+
result[:published_area] = clip_text(tbody, 4, 5)
|
316
|
+
result[:rating] = clip_text(tbody, 2, 6)
|
317
|
+
result[:category] = clip_text(tbody, 2, 7)
|
318
|
+
result[:tags] = clip_text(tbody, 2, 8)
|
319
|
+
result[:note] = clip_text(tbody, 2, 9)
|
320
|
+
|
321
|
+
result
|
322
|
+
end
|
323
|
+
|
324
|
+
def parse_magazine_work(response_body)
|
325
|
+
result = {}
|
326
|
+
doc = Nokogiri::HTML.parse(response_body)
|
327
|
+
tbody = doc.css('body > article > div.main > section > table > tbody')
|
328
|
+
# NOTE: HTML構造の誤りにより要素番号をずらす必要がある。
|
329
|
+
offset = 1
|
330
|
+
result[:title] = clip_text(tbody, 2, 2 + offset) # 雑誌作品名
|
331
|
+
result[:title_kana] = clip_text(tbody, 2, 3 + offset) # 雑誌作品名 ヨミ
|
332
|
+
result[:author] = clip_text(tbody, 2, 4 + offset) # 作者・著者
|
333
|
+
result[:author_kana] = clip_text(tbody, 2, 5 + offset) # 作者・著者 ヨミ
|
334
|
+
result[:original] = clip_text(tbody, 2, 6 + offset) # 原作・原案
|
335
|
+
result[:original_kana] = clip_text(tbody, 2, 7 + offset) # 原作・原案 ヨミ
|
336
|
+
result[:collaborator] = clip_text(tbody, 2, 8 + offset) # 協力者
|
337
|
+
result[:collaborator_kana] = clip_text(tbody, 2, 9 + offset) # 協力者 ヨミ
|
338
|
+
result[:tags] = clip_text(tbody, 2, 10 + offset) # タグ
|
339
|
+
result[:note] = clip_text(tbody, 2, 11 + offset) # 備考
|
340
|
+
|
341
|
+
result[:magazines] = [] # 雑誌巻号
|
342
|
+
doc.css('body > article > div.sub > section > div.moreBlock table.infoTbl2 tbody tr').each do |tr|
|
343
|
+
next if tr.css('td').empty?
|
344
|
+
content = {}
|
345
|
+
content[:title] = clip_text(tr, 1)
|
346
|
+
content[:published_date] = clip_text(tr, 2)
|
347
|
+
content[:display_volume] = clip_text(tr, 3)
|
348
|
+
content[:display_sub_volume] = clip_text(tr, 4)
|
349
|
+
result[:magazines] << Magazine.new(clip_id(tr, 1), content)
|
350
|
+
end
|
351
|
+
|
352
|
+
result
|
353
|
+
end
|
354
|
+
|
355
|
+
def parse_magazine_title(response_body)
|
356
|
+
result = {}
|
357
|
+
doc = Nokogiri::HTML.parse(response_body)
|
358
|
+
tbody = doc.css('body > article > div.main > section > table > tbody')
|
359
|
+
# NOTE: 「雑誌略号ID」部分のHTML構造の誤りにより要素番号をずらす必要がある。
|
360
|
+
offset = 1
|
361
|
+
result[:title] = clip_text(tbody, 2, 2 + offset) # 雑誌名
|
362
|
+
result[:title_kana] = clip_text(tbody, 2, 3 + offset) # 雑誌名 ヨミ
|
363
|
+
result[:publisher] = clip_text(tbody, 2, 4 + offset) # 出版者名
|
364
|
+
result[:published_area] = clip_text(tbody, 2, 6 + offset) # 出版地
|
365
|
+
result[:published_interval] = clip_text(tbody, 2, 7 + offset) # 発行頻度
|
366
|
+
result[:history] = clip_text(tbody, 2, 8 + offset) # 変遷
|
367
|
+
result[:introduction] = clip_text(tbody, 2, 9 + offset) # 紹介文
|
368
|
+
result[:published_start_date] = clip_text(tbody, 2, 10 + offset) # 創刊年月日
|
369
|
+
result[:published_end_date] = clip_text(tbody, 2, 11 + offset) # 終刊年月日
|
370
|
+
result[:display_last_volume] = clip_text(tbody, 2, 12 + offset) # 終刊表示号数
|
371
|
+
result[:last_volume] = clip_text(tbody, 2, 13 + offset) # 終刊号
|
372
|
+
result[:volume] = clip_text(tbody, 4, 13 + offset) # 号
|
373
|
+
result[:volume2] = clip_text(tbody, 6, 13 + offset) # 巻号
|
374
|
+
# NOTE: 「ISSN」部分のHTML構造の誤りにより要素番号をずらす必要がある。
|
375
|
+
# result[:issn] = clip_text(tbody, 2, 14 + offset) # ISSN
|
376
|
+
offset = 2
|
377
|
+
result[:japan_book_number] = clip_text(tbody, 2, 15 + offset) # 全国書誌番号
|
378
|
+
result[:osaka_title_code] = clip_text(tbody, 2, 16 + offset) # 大阪タイトルコード
|
379
|
+
result[:langage] = clip_text(tbody, 2, 17 + offset) # 言語区分
|
380
|
+
result[:tags] = clip_text(tbody, 2, 18 + offset) # タグ
|
381
|
+
result[:note] = clip_text(tbody, 2, 19 + offset) # 備考
|
382
|
+
|
383
|
+
result[:magazines] = [] # 雑誌巻号
|
384
|
+
doc.css('body > article > div.sub > section > div.moreBlock table.infoTbl2 tbody tr').each do |tr|
|
385
|
+
next if tr.css('td').empty?
|
386
|
+
content = {}
|
387
|
+
content[:title] = clip_text(tr, 1)
|
388
|
+
content[:published_date] = clip_text(tr, 2)
|
389
|
+
content[:display_volume] = clip_text(tr, 3)
|
390
|
+
content[:display_sub_volume] = clip_text(tr, 4)
|
391
|
+
result[:magazines] << Magazine.new(clip_id(tr, 1), content)
|
392
|
+
end
|
393
|
+
|
394
|
+
result
|
395
|
+
end
|
396
|
+
|
397
|
+
def parse_magazine(response_body)
|
398
|
+
result = {}
|
399
|
+
doc = Nokogiri::HTML.parse(response_body)
|
400
|
+
return result if doc.css('body > article > div.main > section:nth-child(1) > table').empty?
|
401
|
+
|
402
|
+
# Next/Prev
|
403
|
+
prev_id = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.prv > a'))
|
404
|
+
result[:prev_magazine] = prev_id ? Magazine.new(prev_id) : nil
|
405
|
+
next_id = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.nxt > a'))
|
406
|
+
result[:next_magazine] = next_id ? Magazine.new(next_id) : nil
|
407
|
+
|
408
|
+
# 基本情報
|
409
|
+
tbody = doc.css('body > article > div.main > section:nth-child(1) > table > tbody')
|
410
|
+
# NOTE: 「雑誌略号ID」部分のHTML構造の誤りにより要素番号をずらす必要がある。
|
411
|
+
offset = 2
|
412
|
+
result[:sub_title] = clip_text(tbody, 2, 3 + offset) # サブタイトル
|
413
|
+
result[:sub_title_kana] = clip_text(tbody, 2, 4 + offset) # サブタイトルヨミ
|
414
|
+
result[:display_date] = clip_text(tbody, 2, 5 + offset) # 表示年月日
|
415
|
+
result[:display_date_merger] = clip_text(tbody, 2, 6 + offset) # 表示月日(合併 + offset)
|
416
|
+
result[:published_date] = clip_text(tbody, 2, 7 + offset) # 発行年月日
|
417
|
+
result[:published_date_merger] = clip_text(tbody, 2, 8 + offset) # 発行月日(合併 + offset)
|
418
|
+
result[:release_date] = clip_text(tbody, 2, 9 + offset) # 発売年月日
|
419
|
+
result[:display_volume] = clip_text(tbody, 2, 10 + offset) # 表示号数
|
420
|
+
result[:display_merger_volume] = clip_text(tbody, 2, 11 + offset) # 表示合併号数
|
421
|
+
result[:display_sub_volume] = clip_text(tbody, 2, 12 + offset) # 補助号数
|
422
|
+
result[:volume] = clip_text(tbody.css("tr:nth-child(#{13 + offset}) table > tbody > td:nth-child(2)")) # 巻
|
423
|
+
result[:volume2] = clip_text(tbody.css("tr:nth-child(#{13 + offset}) table > tbody > td:nth-child(4)")) # 号
|
424
|
+
result[:volume3] = clip_text(tbody.css("tr:nth-child(#{13 + offset}) table > tbody > td:nth-child(6)")) # 通巻
|
425
|
+
|
426
|
+
# 出版者、ページ数、価格
|
427
|
+
tbody = doc.css('body > article > div.main > section:nth-child(2) > table > tbody')
|
428
|
+
# NOTE: 「出版者名」部分のHTML構造の誤りにより要素番号をずらす必要がある。
|
429
|
+
offset = 1
|
430
|
+
# result[:publisher] = clip_text(tbody, 2, 1) # 出版者名 # 取得できない
|
431
|
+
result[:publisher2] = clip_text(tbody, 2, 3 + offset) # 発行人
|
432
|
+
result[:publisher3] = clip_text(tbody, 2, 4 + offset) # 編集人
|
433
|
+
result[:total_page] = clip_text(tbody, 2, 5 + offset) # ページ数
|
434
|
+
result[:binding] = clip_text(tbody, 2, 6 + offset) # 製本
|
435
|
+
result[:category] = clip_text(tbody, 2, 7 + offset) # 分類
|
436
|
+
result[:rating] = clip_text(tbody, 2, 8 + offset) # レイティング
|
437
|
+
result[:size] = clip_text(tbody, 2, 9 + offset) # 縦の長さx横の長さ
|
438
|
+
result[:price] = clip_text(tbody, 2, 10 + offset) # 価格
|
439
|
+
result[:magazine_code] = clip_text(tbody, 2, 11 + offset) # 雑誌コード
|
440
|
+
result[:tags] = clip_text(tbody, 2, 12 + offset) # タグ
|
441
|
+
result[:note] = clip_text(tbody, 2, 13 + offset) # 備考
|
442
|
+
|
443
|
+
# 目次
|
444
|
+
result[:contents] = []
|
445
|
+
doc.css('body > article > div.sub > section:nth-child(2) > table > tbody > tr').each do |tr|
|
446
|
+
next if tr.css('td').empty?
|
447
|
+
content = {}
|
448
|
+
content[:category] = clip_text(tr, 1)
|
449
|
+
# content[:title] = clip_text(tr.css('td:nth-child(2)')) # magazine_work.titleを参照する
|
450
|
+
content[:magazine_work] = MagazineWork.new(clip_id(tr, 2))
|
451
|
+
# content[:author] = clip_text(tr, 3) # magazine_work.authorを参照する
|
452
|
+
content[:sub_title] = clip_text(tr, 4)
|
453
|
+
content[:start_page] = clip_text(tr, 5)
|
454
|
+
content[:total_page] = clip_text(tr, 6)
|
455
|
+
content[:note] = clip_text(tr, 7)
|
456
|
+
content[:format] = clip_text(tr, 8)
|
457
|
+
result[:contents] << content
|
458
|
+
end
|
459
|
+
|
460
|
+
result
|
461
|
+
end
|
462
|
+
|
463
|
+
def parse_author(response_body)
|
464
|
+
result = {}
|
465
|
+
doc = Nokogiri::HTML.parse(response_body)
|
466
|
+
tbody = doc.css('body > article > div.main > section > table > tbody')
|
467
|
+
# NOTE: 「ID」部分のHTML構造の誤りにより要素番号をずらす必要がある。
|
468
|
+
offset = 1
|
469
|
+
result[:headings] = clip_text(tbody, 2, 2 + offset) # 標目
|
470
|
+
result[:name] = clip_text(tbody, 2, 3 + offset) # 名称
|
471
|
+
result[:name_kana] = clip_text(tbody, 2, 4 + offset) # ヨミ
|
472
|
+
result[:name_alphabet] = clip_text(tbody, 2, 5 + offset) # ローマ字
|
473
|
+
result[:reference_authors] = clip_authors(tbody, 2, 6 + offset) # をも見よ参照
|
474
|
+
result[:other_name] = clip_text(tbody, 2, 7 + offset) # 別名(表記ミス・ユレ、本名、新字旧字など)
|
475
|
+
result[:birthday] = clip_text(tbody, 2, 8 + offset) # 生年月日(結成年月日)
|
476
|
+
result[:death_date] = clip_text(tbody, 2, 9 + offset) # 没年月日
|
477
|
+
|
478
|
+
result[:comic_work_quantity] = doc.css('body > article > div.sub > section:nth-child(1) > h3 > span').text
|
479
|
+
result[:comic_works] = [] # 単行本化された作品 ※マンガ作品
|
480
|
+
doc.css('body > article > div.sub > section:nth-child(1) table').each do |table|
|
481
|
+
table.css('tr').each do |tr|
|
482
|
+
next if tr.css('td').empty?
|
483
|
+
content = {}
|
484
|
+
# content[:author] = clip_text(tr, 2)
|
485
|
+
result[:comic_works] << ComicWork.new(clip_id(tr, 1), content)
|
486
|
+
end
|
487
|
+
end
|
488
|
+
|
489
|
+
result[:comic_title_quantity] = doc.css('body > article > div.sub > section:nth-child(2) > h3 > span').text
|
490
|
+
result[:comic_titles] = [] # 単行本全巻
|
491
|
+
doc.css('body > article > div.sub > section:nth-child(2) table').each do |table|
|
492
|
+
table.css('tr').each do |tr|
|
493
|
+
next if tr.css('td').empty?
|
494
|
+
content = {}
|
495
|
+
content[:title] = clip_text(tr, 1)
|
496
|
+
# content[:author] = clip_text(tr, 2)
|
497
|
+
content[:comic_quantity] = clip_text(tr, 3)
|
498
|
+
result[:comic_titles] << ComicTitle.new(clip_id(tr, 1), content)
|
499
|
+
end
|
500
|
+
end
|
501
|
+
|
502
|
+
# 資料、マンガ原画、その他の冊子、関連マンガ作品はサンプルが見つからないので未実装
|
503
|
+
|
504
|
+
result
|
505
|
+
end
|
506
|
+
|
507
|
+
def parse_material(response_body)
|
508
|
+
result = {}
|
509
|
+
doc = Nokogiri::HTML.parse(response_body)
|
510
|
+
tbody = doc.css('body > article > div.main > section > table > tbody')
|
511
|
+
result[:title] = clip_text(tbody, 2, 2) # 資料名
|
512
|
+
result[:title_kana] = clip_text(tbody, 2, 3) # 資料名ヨミ
|
513
|
+
result[:discription] = clip_text(tbody, 2, 4) # 詳細説明(説明文)
|
514
|
+
result[:number] = clip_text(tbody, 2, 5) # 順序
|
515
|
+
result[:number_sort] = clip_text(tbody, 4, 5) # 順序ソート
|
516
|
+
result[:related_material_title] = clip_text(tbody, 2, 6) # 関連物(連携収蔵物)
|
517
|
+
result[:cateogry] = clip_text(tbody, 2, 7) # 分類・カテゴリー
|
518
|
+
result[:authority] = clip_text(tbody, 2, 8) # 責任表示
|
519
|
+
result[:authority_kana] = clip_text(tbody, 2, 9) # 責任表示ヨミ
|
520
|
+
result[:published_date] = clip_text(tbody, 2, 11) # 時期
|
521
|
+
result[:created_date] = clip_text(tbody, 2, 12) # 作成(発行)地
|
522
|
+
result[:quantity] = clip_text(tbody, 2, 13) # 数量
|
523
|
+
result[:size] = clip_text(tbody, 2, 14) # 大きさ
|
524
|
+
result[:condition] = clip_text(tbody, 2, 15) # 状態
|
525
|
+
result[:langage] = clip_text(tbody, 2, 16) # 言語区分
|
526
|
+
result[:code] = clip_text(tbody, 2, 17) # コード
|
527
|
+
result[:tags] = clip_text(tbody, 2, 18) # タグ
|
528
|
+
|
529
|
+
result
|
530
|
+
end
|
531
|
+
|
532
|
+
def parse_original_picture(response_body)
|
533
|
+
result = {}
|
534
|
+
doc = Nokogiri::HTML.parse(response_body)
|
535
|
+
tbody = doc.css('body > article > div.main > section > table > tbody')
|
536
|
+
result[:title] = clip_text(tbody, 2, 2) # 原画名
|
537
|
+
result[:title_kana] = clip_text(tbody, 2, 3) # 原画ヨミ
|
538
|
+
result[:volume] = clip_text(tbody, 2, 4) # 順序
|
539
|
+
result[:volume_sort_number] = clip_text(tbody, 4, 4) # 順序ソート
|
540
|
+
result[:total_page] = clip_text(tbody, 2, 5) # 枚数
|
541
|
+
result[:discription] = clip_text(tbody, 2, 6) # 詳細説明(紹介文)
|
542
|
+
result[:first_magazine_published] = clip_text(tbody, 2, 7) # 雑誌初出
|
543
|
+
result[:recording] = clip_text(tbody, 2, 8) # 収録
|
544
|
+
result[:authority] = clip_text(tbody, 2, 9) # 作画者・共著者
|
545
|
+
result[:authority_kana] = clip_text(tbody, 2, 10) # 作画者・共著者ヨミ
|
546
|
+
result[:authors] = clip_authors(tbody, 2, 11) # 著者典拠ID
|
547
|
+
result[:created_date] = clip_text(tbody, 2, 12) # 執筆時期
|
548
|
+
result[:size] = clip_text(tbody, 2, 13) # 大きさ
|
549
|
+
result[:color] = clip_text(tbody, 2, 14) # 色数
|
550
|
+
result[:painting_material] = clip_text(tbody, 2, 15) # 状態
|
551
|
+
result[:condition] = clip_text(tbody, 2, 16) # 状態
|
552
|
+
result[:langage] = clip_text(tbody, 2, 17) # 言語区分
|
553
|
+
result[:tags] = clip_text(tbody, 2, 18) # タグ
|
554
|
+
result[:comic_work] = ComicWork.new(clip_id(tbody, 2, 18)) # 作品ID
|
555
|
+
|
556
|
+
result
|
557
|
+
end
|
558
|
+
|
559
|
+
def parse_booklet(response_body)
|
560
|
+
result = {}
|
561
|
+
doc = Nokogiri::HTML.parse(response_body)
|
562
|
+
|
563
|
+
tbody = doc.css('body > article > div.main > section:nth-child(1) > table > tbody')
|
564
|
+
result[:comic_work] = ComicWork.new(clip_id(tbody, 4, 1), {}) # (マンガ)作品ID
|
565
|
+
result[:category] = clip_text(tbody, 2, 2) # 分類
|
566
|
+
result[:title] = clip_text(tbody, 2, 3) # 冊子名
|
567
|
+
result[:title_kana] = clip_text(tbody, 2, 4) # ヨミ
|
568
|
+
result[:title_append] = clip_text(tbody, 2, 5) # 冊子名追記
|
569
|
+
result[:title_append_kana] = clip_text(tbody, 2, 6) # ヨミ
|
570
|
+
result[:volume] = clip_text(tbody, 2, 7) # 巻
|
571
|
+
result[:volume_sort_number] = clip_text(tbody, 4, 7) # 巻ソート
|
572
|
+
result[:title_other] = clip_text(tbody, 2, 8) # 冊子名別版表示
|
573
|
+
result[:introduction] = clip_text(tbody, 2, 9) # 紹介文
|
574
|
+
|
575
|
+
tbody = doc.css('body > article > div.main > section:nth-child(2) > table > tbody')
|
576
|
+
result[:authority] = clip_text(tbody, 2, 1) # 責任表示
|
577
|
+
result[:authors] = clip_authors(tbody, 2, 2) # 著者典拠ID
|
578
|
+
result[:author] = clip_text(tbody, 2, 3) # 作者・著者
|
579
|
+
result[:author_kana] = clip_text(tbody, 4, 3)
|
580
|
+
result[:original] = clip_text(tbody, 2, 4) # 原作・原案
|
581
|
+
result[:original_kana] = clip_text(tbody, 4, 4)
|
582
|
+
result[:collaborator] = clip_text(tbody, 2, 5) # 協力者
|
583
|
+
result[:collaborator_kana] = clip_text(tbody, 4, 5)
|
584
|
+
result[:headings] = clip_text(tbody, 2, 6) # 標目
|
585
|
+
|
586
|
+
tbody = doc.css('body > article > div.main > section:nth-child(3) > table > tbody')
|
587
|
+
result[:publisher] = clip_text(tbody, 2, 1) # 出版者名(サークル名)
|
588
|
+
result[:series] = clip_text(tbody, 2, 3) # シリーズ
|
589
|
+
result[:series_kana] = clip_text(tbody, 2, 4) # ヨミ
|
590
|
+
result[:series_number] = clip_text(tbody, 2, 5) # シリーズ番号
|
591
|
+
result[:published_event] = clip_text(tbody, 2, 6) # 頒布イベント
|
592
|
+
|
593
|
+
tbody = doc.css('body > article > div.main > section:nth-child(4) > table > tbody')
|
594
|
+
result[:published_data] = clip_text(tbody, 2, 1) # 初版発行年月日
|
595
|
+
result[:price] = clip_text(tbody, 4, 1)
|
596
|
+
result[:published_data_note] = clip_text(tbody, 2, 2) # 発行日備考
|
597
|
+
result[:japan_book_number] = clip_text(tbody, 2, 3) # 全国書誌番号
|
598
|
+
result[:format] = clip_text(tbody, 2, 4) # 製本・造本形態
|
599
|
+
result[:total_page] = clip_text(tbody, 2, 5) # ページ数
|
600
|
+
result[:size] = clip_text(tbody, 4, 5)
|
601
|
+
result[:published_area] = clip_text(tbody, 2, 6) # 発行地
|
602
|
+
result[:publisher2] = clip_text(tbody, 4, 6)
|
603
|
+
result[:langage] = clip_text(tbody, 2, 7) # 言語区分
|
604
|
+
result[:rating] = clip_text(tbody, 2, 8) # レイティング
|
605
|
+
result[:tags] = clip_text(tbody, 2, 9) # タグ
|
606
|
+
result[:note] = clip_text(tbody, 2, 10) # 備考
|
607
|
+
|
608
|
+
result
|
609
|
+
end
|
610
|
+
|
611
|
+
private
|
612
|
+
|
613
|
+
def clip_text(node, td_number = nil, tr_number = nil)
|
614
|
+
if td_number && tr_number
|
615
|
+
node = node.css("tr:nth-child(#{tr_number}) > td:nth-child(#{td_number})")
|
616
|
+
elsif td_number
|
617
|
+
node = node.css("td:nth-child(#{td_number})")
|
618
|
+
end
|
619
|
+
text = node.css('a').empty? ? node.text : node.css('a').text
|
620
|
+
# 改行を取り除き、半角スペースを削除、全角スペースを削除
|
621
|
+
text.gsub!(/\n/, '')
|
622
|
+
text.strip!
|
623
|
+
text.gsub!(/^[ \s]*(.*?)[ \s]*$/, '\1')
|
624
|
+
rescue
|
625
|
+
nil
|
626
|
+
end
|
627
|
+
|
628
|
+
def clip_uri(node, td_number = nil, tr_number = nil)
|
629
|
+
if td_number && tr_number
|
630
|
+
node = node.css("tr:nth-child(#{tr_number}) > td:nth-child(#{td_number}) > a")
|
631
|
+
elsif td_number
|
632
|
+
node = node.css("td:nth-child(#{td_number}) > a")
|
633
|
+
end
|
634
|
+
node.attribute('href').value
|
635
|
+
rescue
|
636
|
+
nil
|
637
|
+
end
|
638
|
+
|
639
|
+
def clip_id(node, td_number = nil, tr_number = nil)
|
640
|
+
uri = clip_uri(node, td_number, tr_number)
|
641
|
+
# urlにqueryパラメータがある場合、?以降をを取り除く
|
642
|
+
uri = uri[0...uri.index(/\?/)] if uri.include?('?')
|
643
|
+
uri.slice(/[0-9]+$/)
|
644
|
+
rescue
|
645
|
+
nil
|
646
|
+
end
|
647
|
+
|
648
|
+
def has_id?(node, td_number = nil, tr_number = nil)
|
649
|
+
if clip_id(node, td_number, tr_number)
|
650
|
+
true
|
651
|
+
else
|
652
|
+
false
|
653
|
+
end
|
654
|
+
end
|
655
|
+
|
656
|
+
def clip_authors(node, td_number = nil, tr_number = nil)
|
657
|
+
authors = []
|
658
|
+
if td_number && tr_number
|
659
|
+
node = node.css("tr:nth-child(#{tr_number}) > td:nth-child(#{td_number}) > a")
|
660
|
+
elsif td_number
|
661
|
+
node = node.css("td:nth-child(#{td_number}) > a")
|
662
|
+
end
|
663
|
+
node.each do |a|
|
664
|
+
# content = { :name => clip_text(a) } # textがnameではない場合もあるのでコメントアウト
|
665
|
+
content = {}
|
666
|
+
authors << Author.new(clip_id(a), content)
|
667
|
+
end
|
668
|
+
authors
|
669
|
+
rescue
|
670
|
+
[]
|
671
|
+
end
|
672
|
+
|
673
|
+
# NOTE:
|
674
|
+
# ISBN10は旧フォーマット。10桁の数字(最後は「X」の場合もある)
|
675
|
+
# ISBN13は現行フォーマット。13桁の数字
|
676
|
+
|
677
|
+
def clip_isbn10(node, td_number = nil, tr_number = nil)
|
678
|
+
text = clip_text(node, td_number, tr_number)
|
679
|
+
text.match(/\b[0-9X]{10}\b/).to_s
|
680
|
+
rescue
|
681
|
+
nil
|
682
|
+
end
|
683
|
+
|
684
|
+
def clip_isbn13(node, td_number = nil, tr_number = nil)
|
685
|
+
text = clip_text(node, td_number, tr_number)
|
686
|
+
text.match(/\b[0-9]{13}\b/).to_s
|
687
|
+
rescue
|
688
|
+
nil
|
689
|
+
end
|
690
|
+
end
|
691
|
+
end
|
692
|
+
end
|
693
|
+
end
|