media_arts_db 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,870 @@
1
+ module MediaArtsDb
2
+
3
+ module ComicSearchOption
4
+ TARGET_COMIC = 1
5
+ TARGET_MAGAZINE_VOLUME = 2
6
+ TARGET_MATERIAL = 3
7
+ TARGET_ORIGINAL_PICTURE = 4
8
+ TARGET_BOOKLET = 5
9
+
10
+ START_YEAR = 'start_year' # 日付範囲指定(From年)
11
+ START_MONTH = 'start_month' # 日付範囲指定(From月)
12
+ END_YEAR = 'end_year' # 日付範囲指定(To年)
13
+ END_MONTH = 'end_month' # 日付範囲指定(To月)
14
+
15
+ ID = 1 # ID(ISBNなど)
16
+ TITLE = 2 # 名称
17
+ VOLUME = 3 # 巻・順序
18
+ PERSON_NAME = 4 # 人名
19
+ AUHTORITY_ID = 5 # 典拠ID
20
+ PUBLISHER = 6 # 出版者
21
+ LABEL = 7 # レーベル
22
+ BOOK_FORMAT = 8 # 本の形状など
23
+ TAG = 9 # タグ
24
+ CATEGORY = 10 # 分類
25
+ NOTE = 11 # 備考
26
+ MAGAZINE_DISPLAY_VOLUME = 12 # [雑誌巻号]表示号数
27
+ MAGAZINE_DISPLAY_SUB_VOLUME = 13 # [雑誌巻号]補助号数
28
+ MAGAZINE_VOLUME = 14 # [雑誌巻号]巻・号・通巻
29
+
30
+ def self.enable_targets
31
+ [TARGET_COMIC, TARGET_MAGAZINE_VOLUME, TARGET_MATERIAL, TARGET_ORIGINAL_PICTURE, TARGET_BOOKLET]
32
+ end
33
+
34
+ def self.enable_optins_for_time_range
35
+ [START_YEAR, START_MONTH, END_YEAR, END_MONTH]
36
+ end
37
+
38
+ def self.enable_options
39
+ [ID, TITLE, VOLUME, PERSON_NAME, AUHTORITY_ID, PUBLISHER, LABEL, BOOK_FORMAT, TAG, CATEGORY, NOTE]
40
+ end
41
+
42
+ def self.enable_options_for_magazine
43
+ enable_options + [MAGAZINE_DISPLAY_VOLUME, MAGAZINE_DISPLAY_SUB_VOLUME, MAGAZINE_VOLUME]
44
+ end
45
+ end
46
+
47
+ class Comic < HttpBase
48
+
49
+ include MediaArtsDb
50
+ include MediaArtsDb::ComicSearchOption
51
+
52
+ class << self
53
+
54
+ def search_by_keyword(title: nil, magazine: nil, author: nil, per: 100, page: 1)
55
+ uri = MediaArtsDb.comic_search_uri
56
+ params = { per: per, page: page }
57
+ if title
58
+ params[:keyword_title] = title
59
+ res_body = search_request(uri, params)
60
+ parse_title_search_result(res_body)
61
+ elsif magazine
62
+ params[:keyword_magazine] = magazine
63
+ res_body = search_request(uri, params)
64
+ parse_magazine_search_result(res_body)
65
+ elsif author
66
+ params[:keyword_author] = author
67
+ res_body = search_request(uri, params)
68
+ parse_author_search_result(res_body)
69
+ else
70
+ return []
71
+ end
72
+ end
73
+
74
+ def search_by_source(target: ComicSearchOption::TARGET_COMIC, options: {}, per: 100, page: 1)
75
+ return [] unless ComicSearchOption.enable_targets.include?(target)
76
+ uri = MediaArtsDb.comic_search_uri
77
+ params = { per: per, page: page }
78
+ params['msf[target][]'] = target
79
+ option_index = 1
80
+ options.each do |key, value|
81
+ case key
82
+ when *ComicSearchOption.enable_optins_for_time_range
83
+ params["msf[#{key}"] = value
84
+ when *ComicSearchOption.enable_options
85
+ next if option_index > 5
86
+ params["msf[select#{option_index}]"] = key
87
+ params["msf[text#{option_index}]"] = value
88
+ option_index += 1
89
+ when *ComicSearchOption.enable_options_for_magazine
90
+ next unless target == ComicSearchOption::TARGET_MAGAZINE_VOLUME
91
+ next if option_index > 5
92
+ params["msf[select#{option_index}]"] = key
93
+ params["msf[text#{option_index}]"] = value
94
+ option_index += 1
95
+ end
96
+ end
97
+
98
+ res_body = search_request(uri, params)
99
+ case target
100
+ when ComicSearchOption::TARGET_COMIC
101
+ parse_comic_search_result(res_body)
102
+ when ComicSearchOption::TARGET_MAGAZINE_VOLUME
103
+ parse_magazine_volume_search_result(res_body)
104
+ when ComicSearchOption::TARGET_MATERIAL
105
+ parse_material_search_result(res_body)
106
+ when ComicSearchOption::TARGET_ORIGINAL_PICTURE
107
+ parse_original_picture_search_result(res_body)
108
+ when ComicSearchOption::TARGET_BOOKLET
109
+ parse_booklet_search_result(res_body)
110
+ end
111
+ end
112
+
113
+
114
+ def find_comic_works(id)
115
+ uri = MediaArtsDb.comic_comic_works_uri(id)
116
+ res_body = http_get(uri)
117
+ parse_comic_works_result(res_body)
118
+ end
119
+
120
+ def find_comic_titles(id, per: 100, page: 1)
121
+ uri = MediaArtsDb.comic_comic_titles_uri(id)
122
+ params = { per: per, page: page }
123
+ res_body = search_request(uri, params)
124
+ parse_comic_titles_result(res_body)
125
+ end
126
+
127
+ def find_comic(id)
128
+ uri = MediaArtsDb.comic_comic_uri(id)
129
+ res_body = http_get(uri)
130
+ parse_comic_result(res_body)
131
+ end
132
+
133
+ def find_magazine_works(id)
134
+ uri = MediaArtsDb.comic_magazine_works_uri(id)
135
+ res_body = http_get(uri)
136
+ parse_magazine_works_result(res_body)
137
+ end
138
+
139
+ def find_magazine_titles(id)
140
+ uri = MediaArtsDb.comic_magazine_titles_uri(id)
141
+ res_body = http_get(uri)
142
+ parse_magazine_titles_result(res_body)
143
+ end
144
+
145
+ def find_magazine(id)
146
+ uri = MediaArtsDb.comic_magazine_uri(id)
147
+ res_body = http_get(uri)
148
+ parse_magazine_result(res_body)
149
+ end
150
+
151
+ def find_author(id)
152
+ uri = MediaArtsDb.comic_author_uri(id)
153
+ res_body = http_get(uri)
154
+ parse_author_result(res_body)
155
+ end
156
+
157
+ def find_material(id)
158
+ uri = MediaArtsDb.comic_material_uri(id)
159
+ res_body = http_get(uri)
160
+ parse_material_result(res_body)
161
+ end
162
+
163
+ def find_original_picture(id)
164
+ uri = MediaArtsDb.comic_original_picture_uri(id)
165
+ res_body = http_get(uri)
166
+ parse_original_picture_result(res_body)
167
+ end
168
+
169
+ def find_booklet(id)
170
+ uri = MediaArtsDb.comic_booklet_uri(id)
171
+ res_body = http_get(uri)
172
+ parse_booklet_result(res_body)
173
+ end
174
+
175
+ private
176
+
177
+ def parse_title_search_result(res_body)
178
+ result = []
179
+ doc = Nokogiri::HTML.parse(res_body)
180
+ doc.css('div.resultTabA table > tbody > tr').each do |tr|
181
+ row = {}
182
+ link_url = tr.css('td:nth-child(1) > a').attribute('href').value
183
+ # リンクがcomic_worksとmagazine_worksの場合がある
184
+ if link_url =~ /comic_works/
185
+ row[:type] = 'comic_works'
186
+ row[:comic_works_id] = clip_id(tr.css('td:nth-child(1) > a'))
187
+ elsif link_url =~ /magazine_works/
188
+ row[:type] = 'magazine_works'
189
+ row[:magazine_works_id] = clip_id(tr.css('td:nth-child(1) > a'))
190
+ end
191
+ row[:title] = clip_text(tr.css('td:nth-child(1)')) # 作品名
192
+ row[:author] = tr.css('td:nth-child(2)').text # 著者名
193
+ row[:tags] = tr.css('td:nth-child(3)').text # タグ
194
+ row[:total_comic_volume] = tr.css('td:nth-child(4)').text # 単行本全巻
195
+ row[:total_magazine_volume] = tr.css('td:nth-child(5)').text # 雑誌掲載作品
196
+ row[:materials] = tr.css('td:nth-child(6)').text # 資料
197
+ row[:original_picture] = tr.css('td:nth-child(7)').text # 原画
198
+ row[:other] = tr.css('td:nth-child(8)').text # その他
199
+
200
+ result << row
201
+ end
202
+ result
203
+ end
204
+
205
+ def parse_magazine_search_result(res_body)
206
+ result = []
207
+ doc = Nokogiri::HTML.parse(res_body)
208
+ doc.css('div.resultTabB table > tbody > tr').each do |tr|
209
+ row = {}
210
+ row[:type] = 'magazine_titles'
211
+ row[:title] = clip_text(tr.css('td:nth-child(1)'))
212
+ row[:magazine_titles_id] = clip_id(tr.css('td:nth-child(1) > a'))
213
+ row[:publisher] = tr.css('td:nth-child(2)').text
214
+ row[:published_interval] = tr.css('td:nth-child(3)').text
215
+ row[:published_start_date] = tr.css('td:nth-child(4)').text
216
+ row[:published_end_date] = tr.css('td:nth-child(5)').text
217
+ row[:tags] = tr.css('td:nth-child(6)').text
218
+
219
+ result << row
220
+ end
221
+ result
222
+ end
223
+
224
+ def parse_author_search_result(res_body)
225
+ result = []
226
+ doc = Nokogiri::HTML.parse(res_body)
227
+ doc.css('div.resultTabC table > tbody > tr').each do |tr|
228
+ row = {}
229
+ row[:type] = 'none' # 何も値がないレコードがあるので、既定のtypeをnoneにしておく
230
+ # リンクがauthoritiesとmagazine_worksの場合がある
231
+ if tr.css('td:nth-child(1) > a').empty?
232
+ row[:author_name] = tr.css('td:nth-child(1)').text
233
+ else
234
+ row[:type] = 'author'
235
+ row[:author_id] = clip_id(tr.css('td:nth-child(1) > a'))
236
+ row[:author_name] = clip_text(tr.css('td:nth-child(1)'))
237
+ end
238
+ row[:author_name_kana] = tr.css('td:nth-child(2)').text
239
+ row[:related_author_id] = clip_id(tr.css('td:nth-child(3) > a'))
240
+ row[:related_author_name] = clip_text(tr.css('td:nth-child(3)')).gsub(/\n/, '').strip
241
+ row[:comic_title_quantity] = tr.css('td:nth-child(4)').text
242
+ if tr.css('td:nth-child(5) > a').empty?
243
+ row[:magazine_works_name] = tr.css('td:nth-child(5)').text.gsub(/\n/, '').strip
244
+ else
245
+ row[:type] = 'magazine_works'
246
+ row[:magazine_works_id] = clip_id(tr.css('td:nth-child(5) > a'))
247
+ row[:magazine_works_name] = clip_text(tr.css('td:nth-child(5)')).gsub(/\n/, '').strip
248
+ end
249
+
250
+ result << row
251
+ end
252
+ result
253
+ end
254
+
255
+ def parse_comic_search_result(res_body)
256
+ result = []
257
+ doc = Nokogiri::HTML.parse(res_body)
258
+ doc.css('div.resultTabD_subA > div > table > tbody > tr').each do |tr|
259
+ row = {}
260
+ row[:type] = 'comic'
261
+ tmp_id = tr.css('td:nth-child(1)').text.split('<br>')
262
+ if tmp_id.count == 1
263
+ row[:isbn] = '-' # ISBNは無くてもキーを作る
264
+ else
265
+ row[:isbn] = tmp_id[0] # ISBN
266
+ end
267
+ row[:comic_title] = clip_text(tr.css('td:nth-child(2)')) # 単行本名
268
+ row[:comic_id] = clip_id(tr.css('td:nth-child(2) > a'))
269
+ row[:label] = tr.css('td:nth-child(3)').text # 単行本レーベル
270
+ row[:volume] = tr.css('td:nth-child(4)').text # 巻
271
+ row[:author] = tr.css('td:nth-child(5)').text # 著者名
272
+ row[:publisher] = tr.css('td:nth-child(6)').text # 出版者
273
+ row[:published_date] = tr.css('td:nth-child(7)').text # 発行年月
274
+
275
+ result << row
276
+ end
277
+ result
278
+ end
279
+
280
+ def parse_magazine_volume_search_result(res_body)
281
+ result = []
282
+ doc = Nokogiri::HTML.parse(res_body)
283
+ doc.css('div.resultTabD_subB > div > table > tbody > tr').each do |tr|
284
+ row = {}
285
+ row[:type] = 'magazine'
286
+ row[:magazine_title] = clip_text(tr.css('td:nth-child(2)')) # 雑誌名
287
+ row[:magazine_id] = clip_id(tr.css('td:nth-child(2) > a'))
288
+ row[:volume] = tr.css('td:nth-child(3)').text # 巻・合・通巻
289
+ row[:display_volume] = tr.css('td:nth-child(4)').text # 表示号数
290
+ row[:display_sub_volume] = tr.css('td:nth-child(5)').text # 補助号数
291
+ row[:publisher] = tr.css('td:nth-child(6)').text # 出版者
292
+ row[:published_date] = tr.css('td:nth-child(7)').text # 表示年月
293
+
294
+ result << row
295
+ end
296
+ result
297
+ end
298
+
299
+ def parse_material_search_result(res_body)
300
+ result = []
301
+ doc = Nokogiri::HTML.parse(res_body)
302
+ doc.css('div.resultTabD_subC > div > table > tbody > tr').each do |tr|
303
+ row = {}
304
+ row[:type] = 'material'
305
+ row[:material_title] = clip_text(tr.css('td:nth-child(2)')) # 資料名
306
+ row[:material_id] = clip_id(tr.css('td:nth-child(2) > a'))
307
+ row[:category] = tr.css('td:nth-child(3)').text # 分類・カテゴリー
308
+ row[:number] = tr.css('td:nth-child(4)').text # 順序
309
+ row[:author] = tr.css('td:nth-child(5)').text # 著者名
310
+ row[:related_material_title] = tr.css('td:nth-child(6)').text # 関連物
311
+ row[:published_date] = tr.css('td:nth-child(7)').text # 時期
312
+
313
+ result << row
314
+ end
315
+ result
316
+ end
317
+
318
+ def parse_original_picture_search_result(res_body)
319
+ result = []
320
+ doc = Nokogiri::HTML.parse(res_body)
321
+ doc.css('div.resultTabD_subD > div > table > tbody > tr').each do |tr|
322
+ row = {}
323
+ row[:type] = 'original_picture'
324
+ row[:original_picture_title] = clip_text(tr.css('td:nth-child(2)')) # 原画作品名
325
+ row[:original_picture_id] = clip_id(tr.css('td:nth-child(2) > a'))
326
+ row[:recorded] = tr.css('td:nth-child(3)').text # 収録
327
+ row[:number] = tr.css('td:nth-child(4)').text # 順序
328
+ row[:quantity] = tr.css('td:nth-child(5)').text # 枚数
329
+ row[:author] = tr.css('td:nth-child(6)').text # 著者名
330
+ row[:published_date] = tr.css('td:nth-child(7)').text # 初出
331
+ row[:writing_time] = tr.css('td:nth-child(8)').text # 執筆期間
332
+
333
+ result << row
334
+ end
335
+ result
336
+ end
337
+
338
+ def parse_booklet_search_result(res_body)
339
+ result = []
340
+ doc = Nokogiri::HTML.parse(res_body)
341
+ doc.css('div.resultTabD_subE > div > table > tbody > tr').each do |tr|
342
+ row = {}
343
+ row[:type] = 'booklet'
344
+ row[:booklet_title] = clip_text(tr.css('td:nth-child(2)')) # 冊子名
345
+ row[:booklet_id] = clip_id(tr.css('td:nth-child(2) > a'))
346
+ row[:series] = tr.css('td:nth-child(3)').text # シリーズ
347
+ row[:volume] = tr.css('td:nth-child(4)').text # 巻
348
+ row[:author] = tr.css('td:nth-child(6)').text # 著者名
349
+ row[:publisher] = tr.css('td:nth-child(7)').text # 出版者・サークル名
350
+ row[:published_date] = tr.css('td:nth-child(8)').text # 発行年月
351
+
352
+ result << row
353
+ end
354
+ result
355
+ end
356
+
357
+ def parse_comic_works_result(res_body)
358
+ result = {}
359
+ doc = Nokogiri::HTML.parse(res_body)
360
+ doc.css('body > article > div.main > section > table > tbody > tr').each do |tr|
361
+ case tr.css('th:nth-child(1)').text
362
+ # HTML構造の誤りにより「マンガID」が取得できない
363
+ # when 'マンガID' ; result[:comic_id] = tr.css('td').text
364
+ when 'マンガ作品名' ; result[:title] = tr.css('td').text
365
+ when 'マンガ作品名ヨミ' ; result[:title_kana] = tr.css('td').text
366
+ when '別題・副題・原題' ; result[:sub_title] = tr.css('td').text
367
+ when 'ローマ字表記' ; result[:title_alphabet] = tr.css('td').text
368
+ when '著者(責任表示)' ; result[:author] = tr.css('td').text
369
+ # 著者が複数の場合、著者典拠IDも複数になるが、それについてはまだ未実装
370
+ when '著者典拠ID' ; result[:author_id] = clip_id(tr.css('td > a'))
371
+ when '公表時期' ; result[:published_date] = tr.css('td').text
372
+ when '出典(初出)' ; result[:source] = tr.css('td').text
373
+ when 'マンガ作品紹介文・解説' ; result[:introduction] = tr.css('td').text
374
+ when '分類' ; result[:category] = tr.css('td').text
375
+ when 'タグ' ; result[:tags] = tr.css('td').text
376
+ when 'レイティング' ; result[:rating] = tr.css('td').text
377
+ end
378
+ end
379
+
380
+ result[:comic_titles] = [] # 単行本全巻
381
+ doc.css('body > article > div.sub > section:nth-child(1) table').each do |table|
382
+ table.css('tr').each do |tr|
383
+ next if tr.css('td').empty?
384
+ comic_title = {}
385
+ comic_title[:title] = clip_text(tr.css('td:nth-child(1)'))
386
+ comic_title[:comic_titles_id] = clip_id(tr.css('td:nth-child(1) > a'))
387
+ comic_title[:author] = tr.css('td:nth-child(2)').text
388
+ comic_title[:total_comic_volume] = tr.css('td:nth-child(3)').text
389
+ result[:comic_titles] << comic_title
390
+ end
391
+ end
392
+
393
+ result[:magazine_works] = [] # 雑誌掲載作品
394
+ doc.css('body > article > div.sub > section:nth-child(2) table').each do |table|
395
+ table.css('tr').each do |tr|
396
+ next if tr.css('td').empty?
397
+ magazine_works = {}
398
+ magazine_works[:title] = clip_text(tr.css('td:nth-child(1)'))
399
+ magazine_works[:magazine_works_id] = clip_id(tr.css('td:nth-child(1) > a'))
400
+ magazine_works[:author] = tr.css('td:nth-child(2)').text
401
+ magazine_works[:magazine_title] = tr.css('td:nth-child(3)').text
402
+ magazine_works[:published_date] = tr.css('td:nth-child(4)').text
403
+ result[:magazine_works] << magazine_works
404
+ end
405
+ end
406
+
407
+ # 資料、マンガ原画、その他の冊子、関連マンガ作品はサンプルが見つからないので未実装
408
+
409
+ result
410
+ end
411
+
412
+ def parse_comic_titles_result(res_body)
413
+ result = {}
414
+ doc = Nokogiri::HTML.parse(res_body)
415
+ doc.css('body > article > div.main > section > table > tbody > tr').each do |tr|
416
+ case tr.css('th:nth-child(1)').text
417
+ when '作品ID' ; result[:comic_works_id] = clip_id(tr.css('td > a'))
418
+ when '単行本全巻名' ; result[:title] = tr.css('td').text
419
+ when '単行本全巻名 ヨミ' ; result[:title_kana] = tr.css('td').text
420
+ when '単行本全巻名 追記' ; result[:title_append] = tr.css('td').text
421
+ when '単行本全巻名 追記 ヨミ' ; result[:title_append_kana] = tr.css('td').text
422
+ when '単行本全巻名 別版表示' ; result[:title_other] = tr.css('td').text
423
+ when '単行本全巻数' ; result[:total_comic_volume] = tr.css('td').text
424
+ when '責任表示' ; result[:responsible] = tr.css('td').text
425
+ when '著者典拠ID' ; result[:author_id] = clip_id(tr.css('td > a'))
426
+ when '作者・著者' ; result[:author] = tr.css('td').text
427
+ when '作者・著者 ヨミ' ; result[:author_kana] = tr.css('td').text
428
+ when '原作・原案' ; result[:origina] = tr.css('td').text
429
+ when '原作・原案 ヨミ' ; result[:origina_kana] = tr.css('td').text
430
+ when '協力者' ; result[:collaborator] = tr.css('td').text
431
+ when '協力者 ヨミ' ; result[:collaborator_kana] = tr.css('td').text
432
+ when '標目' ; result[:headings] = tr.css('td').text
433
+ when '単行本レーベル' ; result[:label] = tr.css('td').text
434
+ when '単行本レーベル ヨミ' ; result[:label_kana] = tr.css('td').text
435
+ when 'シリーズ' ; result[:series] = tr.css('td').text
436
+ when 'シリーズ ヨミ' ; result[:series_kana] = tr.css('td').text
437
+ when '出版者名' ; result[:publisher] = tr.css('td').text
438
+ when '出版地' ; result[:published_area] = tr.css('td').text
439
+ when '縦の長さ×横の長さ' ; result[:size] = tr.css('td').text
440
+ when 'ISBNなどのセットコード' ; result[:isbn] = tr.css('td').text
441
+ when '言語区分' ; result[:langage] = tr.css('td').text
442
+ when '分類' ; result[:category] = tr.css('td').text
443
+ when 'レイティング' ; result[:rating] = tr.css('td').text
444
+ when '単行本全巻紹介文' ; result[:introduction] = tr.css('td').text
445
+ when '単行本全巻タグ' ; result[:tags] = tr.css('td').text
446
+ when '単行本全巻備考' ; result[:note] = tr.css('td').text
447
+ end
448
+ end
449
+
450
+ result[:comics] = [] # 単行本
451
+ doc.css('body > article > div.sub > section:nth-child(1) table tbody tr').each do |tr|
452
+ next if tr.css('td').empty?
453
+ comic_title = {}
454
+ comic_title[:title] = clip_text(tr.css('td:nth-child(1)'))
455
+ comic_title[:comic_id] = clip_id(tr.css('td:nth-child(1) > a'))
456
+ comic_title[:comic_title_append] = tr.css('td:nth-child(2)').text
457
+ comic_title[:volume] = tr.css('td:nth-child(3)').text
458
+ result[:comics] << comic_title
459
+ end
460
+
461
+ result
462
+ end
463
+
464
+ def parse_comic_result(res_body)
465
+ result = {
466
+ next_id: '',
467
+ prev_id: '',
468
+ basic_information: nil,
469
+ author_information: nil,
470
+ publisher_information: nil,
471
+ other_information: nil
472
+ }
473
+ doc = Nokogiri::HTML.parse(res_body)
474
+ return result if doc.css('body > article > div.main > section:nth-child(1) > table').empty?
475
+
476
+ # 前巻/次巻
477
+ result[:prev_id] = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.prv > a'))
478
+ result[:next_id] = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.nxt > a'))
479
+
480
+ # 基本情報
481
+ tbody = doc.css('body > article > div.main > section:nth-child(1) > table > tbody')
482
+ basic_information = {}
483
+ basic_information[:comic_titles_is] = clip_id(tbody.css('tr:nth-child(1) > td:nth-child(4) > a'))
484
+ basic_information[:comic_title] = tbody.css('tr:nth-child(2) > td').text
485
+ basic_information[:comic_title_kana] = tbody.css('tr:nth-child(3) > td').text
486
+ basic_information[:comic_title_append] = tbody.css('tr:nth-child(4) > td').text
487
+ basic_information[:comic_title_append_kana] = tbody.css('tr:nth-child(5) > td').text
488
+ basic_information[:volume] = tbody.css('tr:nth-child(6) > td:nth-child(2)').text
489
+ basic_information[:volume_sort_number] = tbody.css('tr:nth-child(6) > td:nth-child(4)').text
490
+ basic_information[:volume_other_number] = tbody.css('tr:nth-child(7) > td').text
491
+ basic_information[:introduction] = tbody.css('tr:nth-child(8) > td').text
492
+ result[:basic_information] = basic_information
493
+ # 著者表示
494
+ tbody = doc.css('body > article > div.main > section:nth-child(2) > table > tbody')
495
+ author_information = {}
496
+ author_information[:responsible] = tbody.css('tr:nth-child(1) > td').text
497
+ author_information[:author_id] = clip_id(tbody.css('tr:nth-child(2) > td > a:nth-child(1)'))
498
+ author_information[:headings] = tbody.css('tr:nth-child(3) > td').text
499
+ author_information[:auhtor] = tbody.css('tr:nth-child(4) > td:nth-child(2)').text
500
+ author_information[:auhtor_kana] = tbody.css('tr:nth-child(4) > td:nth-child(4)').text
501
+ author_information[:original_title] = tbody.css('tr:nth-child(5) > td:nth-child(2)').text
502
+ author_information[:original_title_kana] = tbody.css('tr:nth-child(5) > td:nth-child(4)').text
503
+ author_information[:collaborator] = tbody.css('tr:nth-child(6) > td:nth-child(2)').text
504
+ author_information[:collaborator_kana] = tbody.css('tr:nth-child(6) > td:nth-child(4)').text
505
+ result[:author_information] = author_information
506
+ # 出版者・レーベル
507
+ tbody = doc.css('body > article > div.main > section:nth-child(3) > table > tbody')
508
+ publisher_information = {}
509
+ publisher_information[:publisher] = tbody.css('tr:nth-child(1) > td').text
510
+ publisher_information[:label] = tbody.css('tr:nth-child(3) > td').text
511
+ publisher_information[:label_kana] = tbody.css('tr:nth-child(4) > td').text
512
+ publisher_information[:label_number] = tbody.css('tr:nth-child(5) > td:nth-child(2)').text
513
+ publisher_information[:series] = tbody.css('tr:nth-child(6) > td:nth-child(2)').text
514
+ publisher_information[:series_kana] = tbody.css('tr:nth-child(6) > td:nth-child(4)').text
515
+ result[:publisher_information] = publisher_information
516
+ # その他
517
+ tbody = doc.css('body > article > div.main > section:nth-child(4) > table > tbody')
518
+ other_information = {}
519
+ other_information[:published_date] = tbody.css('tr:nth-child(1) > td:nth-child(2)').text
520
+ other_information[:first_price] = tbody.css('tr:nth-child(1) > td:nth-child(4)').text
521
+ other_information[:isbn] = tbody.css('tr:nth-child(2) > td').text
522
+ other_information[:japan_book_number] = tbody.css('tr:nth-child(3) > td').text
523
+ other_information[:total_page] = tbody.css('tr:nth-child(4) > td:nth-child(2)').text
524
+ other_information[:size] = tbody.css('tr:nth-child(4) > td:nth-child(4)').text
525
+ other_information[:langage] = tbody.css('tr:nth-child(5) > td:nth-child(2)').text
526
+ other_information[:published_area] = tbody.css('tr:nth-child(5) > td:nth-child(4)').text
527
+ other_information[:rating] = tbody.css('tr:nth-child(6) > td').text
528
+ other_information[:category] = tbody.css('tr:nth-child(7) > td').text
529
+ other_information[:tags] = tbody.css('tr:nth-child(8) > td').text
530
+ other_information[:note] = tbody.css('tr:nth-child(9) > td').text
531
+ result[:other_information] = other_information
532
+
533
+ result
534
+ end
535
+
536
+ def parse_magazine_works_result(res_body)
537
+ result = {}
538
+ doc = Nokogiri::HTML.parse(res_body)
539
+ doc.css('body > article > div.main > section > table > tbody > tr').each do |tr|
540
+ case tr.css('th:nth-child(1)').text
541
+ when '雑誌作品名' ; result[:title] = tr.css('td').text
542
+ when '雑誌作品名 ヨミ' ; result[:title_kana] = tr.css('td').text
543
+ when '作者・著者' ; result[:author] = tr.css('td').text
544
+ when '作者・著者 ヨミ' ; result[:author_kana] = tr.css('td').text
545
+ when '原作・原案' ; result[:original] = tr.css('td').text
546
+ when '原作・原案 ヨミ' ; result[:original_kana] = tr.css('td').text
547
+ when '協力者' ; result[:collaborator] = tr.css('td').text
548
+ when '協力者 ヨミ' ; result[:collaborator_kana] = tr.css('td').text
549
+ when 'タグ' ; result[:tags] = tr.css('td').text
550
+ when '備考' ; result[:note] = tr.css('td').text
551
+ end
552
+ end
553
+
554
+ result[:magazines] = [] # 雑誌巻号
555
+ doc.css('body > article > div.sub > section > div.moreBlock table.infoTbl2 tbody tr').each do |tr|
556
+ next if tr.css('td').empty?
557
+ magazine = {}
558
+ magazine[:title] = clip_text(tr.css('td:nth-child(1)'))
559
+ magazine[:magazine_id] = clip_id(tr.css('td:nth-child(1) > a'))
560
+ magazine[:published_date] = tr.css('td:nth-child(2)').text
561
+ magazine[:display_volume] = tr.css('td:nth-child(3)').text
562
+ magazine[:display_sub_volume] = tr.css('td:nth-child(4)').text
563
+ result[:magazines] << magazine
564
+ end
565
+
566
+ result
567
+ end
568
+
569
+ def parse_magazine_titles_result(res_body)
570
+ result = {}
571
+ doc = Nokogiri::HTML.parse(res_body)
572
+ doc.css('body > article > div.main > section > table > tbody > tr').each do |tr|
573
+ case tr.css('th:nth-child(1)').text
574
+ when '雑誌名' ; result[:title] = tr.css('td').text
575
+ when '雑誌名 ヨミ' ; result[:title_kana] = tr.css('td').text
576
+ when '出版者名' ; result[:publisher] = tr.css('td').text
577
+ when '出版地' ; result[:published_area] = tr.css('td').text
578
+ when '発行頻度' ; result[:published_interval] = tr.css('td').text
579
+ when '変遷' ; result[:history] = tr.css('td').text
580
+ when '紹介文' ; result[:introduction] = tr.css('td').text
581
+ when '創刊年月日' ; result[:published_start_date] = tr.css('td').text
582
+ when '終刊年月日' ; result[:published_end_date] = tr.css('td').text
583
+ when '終刊表示号数' ; result[:display_last_volume] = tr.css('td').text
584
+ when '終刊巻'
585
+ result[:last_volume] = tr.css('td:nth-child(2)').text
586
+ result[:volume] = tr.css('td:nth-child(4)').text
587
+ result[:volume2] = tr.css('td:nth-child(6)').text
588
+ when 'ISSN' ; result[:issn] = tr.css('td').text
589
+ when '全国書誌番号' ; result[:japan_book_number] = tr.css('td').text
590
+ when '大阪タイトルコード' ; result[:osaka_title_code] = tr.css('td').text
591
+ when '言語区分' ; result[:langage] = tr.css('td').text
592
+ when 'タグ' ; result[:tags] = tr.css('td').text
593
+ when '備考' ; result[:note] = tr.css('td').text
594
+ end
595
+ end
596
+
597
+ result[:magazines] = [] # 雑誌巻号
598
+ doc.css('body > article > div.sub > section > div.moreBlock table.infoTbl2 tbody tr').each do |tr|
599
+ next if tr.css('td').empty?
600
+ magazine = {}
601
+ magazine[:title] = clip_text(tr.css('td:nth-child(1)'))
602
+ magazine[:magazine_id] = clip_id(tr.css('td:nth-child(1) > a'))
603
+ magazine[:published_date] = tr.css('td:nth-child(2)').text
604
+ magazine[:display_volume] = tr.css('td:nth-child(3)').text
605
+ magazine[:display_sub_volume] = tr.css('td:nth-child(4)').text
606
+ result[:magazines] << magazine
607
+ end
608
+
609
+ result
610
+ end
611
+
612
+ def parse_magazine_result(res_body)
613
+ result = {
614
+ next_id: '',
615
+ prev_id: '',
616
+ basic_information: nil,
617
+ other_information: nil,
618
+ contents: []
619
+ }
620
+ doc = Nokogiri::HTML.parse(res_body)
621
+ return result if doc.css('body > article > div.main > section:nth-child(1) > table').empty?
622
+
623
+ # Next/Prev
624
+ result[:prev_id] = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.prv > a'))
625
+ result[:next_id] = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.nxt > a'))
626
+
627
+ # 基本情報
628
+ basic_information = {}
629
+ doc.css('body > article > div.main > section:nth-child(1) > table > tbody > tr').each do |tr|
630
+ case tr.css('th:nth-child(1)').text
631
+ when 'サブタイトル' ; basic_information[:sub_title] = tr.css('td').text
632
+ when 'サブタイトルヨミ' ; basic_information[:sub_title_kana] = tr.css('td').text
633
+ when '表示年月日' ; basic_information[:display_date] = tr.css('td').text
634
+ when '表示月日(合併)' ; basic_information[:display_date_merger] = tr.css('td').text
635
+ when '発行年月日' ; basic_information[:published_date] = tr.css('td').text
636
+ when '発行月日(合併)' ; basic_information[:published_date_merger] = tr.css('td').text
637
+ when '発売年月日' ; basic_information[:release_date] = tr.css('td').text
638
+ when '表示号数' ; basic_information[:display_volume] = tr.css('td').text
639
+ when '表示合併号数' ; basic_information[:display_merger_volume] = tr.css('td').text.gsub(/\n/, '').strip
640
+ when '補助号数' ; basic_information[:display_sub_volume] = tr.css('td').text
641
+ when '巻'
642
+ basic_information[:volume] = tr.css('td:nth-child(2)').text
643
+ basic_information[:volume2] = tr.css('td:nth-child(4)').text # 適当な名前がわからない
644
+ basic_information[:volume3] = tr.css('td:nth-child(6)').text # 適当な名前がわからない
645
+ end
646
+ end
647
+ result[:basic_information] = basic_information
648
+
649
+ # 出版者、ページ数、価格
650
+ other_information = {}
651
+ doc.css('body > article > div.main > section:nth-child(2) > table > tbody > tr').each do |tr|
652
+ case tr.css('th:nth-child(1)').text
653
+ when '出版者名' ; other_information[:publisher] = tr.css('td').text
654
+ when '発行人' ; other_information[:publisher2] = tr.css('td').text
655
+ when '編集人' ; other_information[:publisher3] = tr.css('td').text
656
+ when 'ページ数' ; other_information[:total_page] = tr.css('td').text
657
+ when '製本' ; other_information[:binding] = tr.css('td').text
658
+ when '分類' ; other_information[:category] = tr.css('td').text
659
+ when 'レイティング' ; other_information[:rating] = tr.css('td').text
660
+ when '縦の長さx横の長さ' ; other_information[:size] = tr.css('td').text
661
+ when '価格' ; other_information[:price] = tr.css('td').text
662
+ when '雑誌コード' ; other_information[:magazine_code] = tr.css('td').text
663
+ when 'タグ' ; other_information[:tags] = tr.css('td').text
664
+ when '備考' ; other_information[:note] = tr.css('td').text
665
+ end
666
+ end
667
+ result[:other_information] = other_information
668
+
669
+ # 雑誌巻号
670
+ doc.css('body > article > div.sub > section:nth-child(2) > table > tbody > tr').each do |tr|
671
+ next if tr.css('td').empty?
672
+ contents = {}
673
+ contents[:category] = tr.css('td:nth-child(1)').text
674
+ contents[:title] = clip_text(tr.css('td:nth-child(2)'))
675
+ contents[:magazine_works_id] = clip_id(tr.css('td:nth-child(2) > a'))
676
+ contents[:author] = tr.css('td:nth-child(3)').text
677
+ contents[:sub_title] = tr.css('td:nth-child(4)').text
678
+ contents[:start_page] = tr.css('td:nth-child(5)').text
679
+ contents[:total_page] = tr.css('td:nth-child(6)').text
680
+ contents[:note] = tr.css('td:nth-child(7)').text
681
+ contents[:format] = tr.css('td:nth-child(8)').text
682
+ result[:contents] << contents
683
+ end
684
+
685
+ result
686
+ end
687
+
688
+ def parse_author_result(res_body)
689
+ result = {}
690
+ doc = Nokogiri::HTML.parse(res_body)
691
+ doc.css('body > article > div.main > section > table > tbody > tr').each do |tr|
692
+ case tr.css('th:nth-child(1)').text
693
+ # HTML構造の誤りにより「マンガID」が取得できない
694
+ # when 'マンガID' ; result[:comic_id] = tr.css('td').text
695
+ when '標目' ; result[:headings] = tr.css('td').text
696
+ when '名称' ; result[:name] = tr.css('td').text
697
+ when 'ヨミ' ; result[:name_kana] = tr.css('td').text
698
+ when 'ローマ字' ; result[:name_alphabet] = tr.css('td').text
699
+ when 'をも見よ参照' ; result[:reference] = tr.css('td').text
700
+ # 著者が複数の場合、著者典拠IDも複数になるが、それについてはまだ未実装
701
+ when '別名(表記ミス・ユレ、本名、新字旧字など)' ; result[:other_name] = clip_id(tr.css('td > a'))
702
+ when '生年月日(結成年月日)' ; result[:birthday] = tr.css('td').text
703
+ when '没年月日' ; result[:death_date] = tr.css('td').text
704
+ end
705
+ end
706
+
707
+ result[:comic_works] = [] # 単行本化された作品 ※マンガ作品
708
+ doc.css('body > article > div.sub > section:nth-child(1) table').each do |table|
709
+ table.css('tr').each do |tr|
710
+ next if tr.css('td').empty?
711
+ comic_works = {}
712
+ comic_works[:title] = clip_text(tr.css('td:nth-child(1)'))
713
+ comic_works[:comic_works_id] = clip_id(tr.css('td:nth-child(1) > a'))
714
+ comic_works[:author] = tr.css('td:nth-child(2)').text
715
+ result[:comic_works] << comic_works
716
+ end
717
+ end
718
+
719
+ result[:comic_titles] = [] # 単行本全巻
720
+ doc.css('body > article > div.sub > section:nth-child(2) table').each do |table|
721
+ table.css('tr').each do |tr|
722
+ next if tr.css('td').empty?
723
+ comic_titles = {}
724
+ comic_titles[:title] = clip_text(tr.css('td:nth-child(1)'))
725
+ comic_titles[:comic_titles_id] = clip_id(tr.css('td:nth-child(1) > a'))
726
+ comic_titles[:author] = tr.css('td:nth-child(2)').text
727
+ comic_titles[:total_comic_volume] = tr.css('td:nth-child(3)').text
728
+ result[:comic_titles] << comic_titles
729
+ end
730
+ end
731
+
732
+ # 資料、マンガ原画、その他の冊子、関連マンガ作品はサンプルが見つからないので未実装
733
+
734
+ result
735
+ end
736
+
737
+ def parse_material_result(res_body)
738
+ # 未実装
739
+ {}
740
+ end
741
+
742
+ def parse_original_picture_result(res_body)
743
+ # 未実装
744
+ {}
745
+ end
746
+
747
+ def parse_booklet_result(res_body)
748
+ result = {
749
+ basic_information: nil,
750
+ author_information: nil,
751
+ publisher_information: nil,
752
+ other_information: nil
753
+ }
754
+ doc = Nokogiri::HTML.parse(res_body)
755
+
756
+ basic_information = {}
757
+ doc.css('body > article > div.main > section:nth-child(1) > table > tbody > tr').each do |tr|
758
+ case tr.css('th:nth-child(1)').text
759
+ when 'その他の冊子ID' ; basic_information[:comic_works_id] = clip_id(tr.css('td:nth-child(4) > a'))
760
+ when '分類' ; basic_information[:category] = tr.css('td').text
761
+ when '冊子名'
762
+ basic_information[:title] = tr.css('td').text.gsub(/\n/, '').strip
763
+ # basic_information[:title_kana] = tr.next.css('td').text # このやり方では取れない
764
+ when '冊子名追記'
765
+ basic_information[:title_append] = tr.css('td').text
766
+ # basic_information[:title_append_kana] = tr.next.css('td').text # このやり方では取れない
767
+ when '巻'
768
+ basic_information[:volume] = tr.css('td:nth-child(2)').text
769
+ basic_information[:volume_sort_number] = tr.css('td:nth-child(4)').text
770
+ when '冊子名別版表示' ; basic_information[:title_other] = tr.css('td').text
771
+ when '紹介文' ; basic_information[:introduction] = tr.css('td').text
772
+ end
773
+ end
774
+ result[:basic_information] = basic_information
775
+
776
+ author_information = {}
777
+ doc.css('body > article > div.main > section:nth-child(2) > table > tbody > tr').each do |tr|
778
+ case tr.css('th:nth-child(1)').text
779
+ when '責任表示' ; author_information[:authority] = tr.css('td').text
780
+ when '著者典拠ID'
781
+ author_information[:author_id] = clip_id(tr.css('td a').text)
782
+ when '作者・著者'
783
+ author_information[:author] = tr.css('td:nth-child(2)').text
784
+ author_information[:author_kana] = tr.css('td:nth-child(4)').text
785
+ when '原作・原案'
786
+ author_information[:original] = tr.css('td:nth-child(2)').text
787
+ author_information[:original_kana] = tr.css('td:nth-child(4)').text
788
+ when '協力者'
789
+ author_information[:collaborator] = tr.css('td:nth-child(2)').text
790
+ author_information[:collaborator_kana] = tr.css('td:nth-child(4)').text
791
+ when '標目' ; author_information[:headings] = tr.css('td').text
792
+ end
793
+ end
794
+ result[:author_information] = author_information
795
+
796
+ publisher_information = {}
797
+ doc.css('body > article > div.main > section:nth-child(3) > table > tbody > tr').each do |tr|
798
+ case tr.css('th:nth-child(1)').text
799
+ when '出版者名(サークル名)' ; publisher_information[:publisher] = tr.css('td').text
800
+ when 'シリーズ' ; publisher_information[:series] = tr.css('td').text
801
+ when 'カナ' ; publisher_information[:series_kana] = tr.css('td').text
802
+ when 'シリーズ番号' ; publisher_information[:series_number] = tr.css('td').text
803
+ when '頒布イベント' ; publisher_information[:published_event] = tr.css('td').text
804
+ end
805
+ end
806
+ result[:publisher_information] = publisher_information
807
+
808
+ other_information = {}
809
+ doc.css('body > article > div.main > section:nth-child(4) > table > tbody > tr').each do |tr|
810
+ case tr.css('th:nth-child(1)').text
811
+ when '初版発行年月日'
812
+ other_information[:published_data] = tr.css('td:nth-child(2)').text
813
+ other_information[:price] = tr.css('td:nth-child(4)').text
814
+ when '発行日備考' ; other_information[:published_data_note] = tr.css('td').text
815
+ when '全国書誌番号' ; other_information[:japan_book_number] = tr.css('td').text
816
+ when '製本・造本形態' ; other_information[:format] = tr.css('td').text
817
+ when 'ページ数'
818
+ other_information[:total_page] = tr.css('td:nth-child(2)').text
819
+ other_information[:size] = tr.css('td:nth-child(4)').text
820
+ when '発行地'
821
+ other_information[:published_area] = tr.css('td:nth-child(2)').text
822
+ other_information[:publisher] = tr.css('td:nth-child(4)').text
823
+ when '言語区分' ; other_information[:langage] = tr.css('td').text
824
+ when 'レイティング' ; other_information[:rating] = tr.css('td').text
825
+ when 'タグ' ; other_information[:tags] = tr.css('td').text
826
+ when '備考' ; other_information[:note] = tr.css('td').text
827
+ end
828
+ end
829
+ result[:other_information] = other_information
830
+
831
+ result
832
+ end
833
+
834
+ def search_request(uri, params)
835
+ query = {
836
+ query: {
837
+ utf8: '✓',
838
+ commit: '送信'
839
+ }
840
+ }
841
+ params.each_key { |k| query[:query][k] = params[k] }
842
+ http_get(uri, query)
843
+ end
844
+
845
+ def clip_text(node)
846
+ # return '' unless node.class == Nokogiri::XML::NodeSet
847
+ begin
848
+ node.css('a').empty? ? node.text : node.css('a').text
849
+ rescue
850
+ ''
851
+ end
852
+ end
853
+
854
+ def clip_id(node)
855
+ # return '' unless node.class == Nokogiri::XML::NodeSet && node.attribute('href')
856
+ begin
857
+ uri = node.attribute('href').value
858
+ # urlにqueryパラメータがある場合、?以降をを取り除く
859
+ if uri.include?('?')
860
+ index = uri =~ /\?/
861
+ uri = uri[0..index - 1]
862
+ end
863
+ uri.slice(/[0-9]+$/)
864
+ rescue
865
+ ''
866
+ end
867
+ end
868
+ end
869
+ end
870
+ end