media_arts_db 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +111 -0
- data/Rakefile +7 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/exe/media_arts_db +3 -0
- data/lib/media_arts_db/comic/comic.rb +29 -0
- data/lib/media_arts_db/comic.rb +870 -0
- data/lib/media_arts_db/http_base.rb +35 -0
- data/lib/media_arts_db/uri.rb +49 -0
- data/lib/media_arts_db/version.rb +3 -0
- data/lib/media_arts_db.rb +10 -0
- data/media_arts_db.gemspec +32 -0
- metadata +134 -0
@@ -0,0 +1,870 @@
|
|
1
|
+
module MediaArtsDb
|
2
|
+
|
3
|
+
module ComicSearchOption
|
4
|
+
TARGET_COMIC = 1
|
5
|
+
TARGET_MAGAZINE_VOLUME = 2
|
6
|
+
TARGET_MATERIAL = 3
|
7
|
+
TARGET_ORIGINAL_PICTURE = 4
|
8
|
+
TARGET_BOOKLET = 5
|
9
|
+
|
10
|
+
START_YEAR = 'start_year' # 日付範囲指定(From年)
|
11
|
+
START_MONTH = 'start_month' # 日付範囲指定(From月)
|
12
|
+
END_YEAR = 'end_year' # 日付範囲指定(To年)
|
13
|
+
END_MONTH = 'end_month' # 日付範囲指定(To月)
|
14
|
+
|
15
|
+
ID = 1 # ID(ISBNなど)
|
16
|
+
TITLE = 2 # 名称
|
17
|
+
VOLUME = 3 # 巻・順序
|
18
|
+
PERSON_NAME = 4 # 人名
|
19
|
+
AUHTORITY_ID = 5 # 典拠ID
|
20
|
+
PUBLISHER = 6 # 出版者
|
21
|
+
LABEL = 7 # レーベル
|
22
|
+
BOOK_FORMAT = 8 # 本の形状など
|
23
|
+
TAG = 9 # タグ
|
24
|
+
CATEGORY = 10 # 分類
|
25
|
+
NOTE = 11 # 備考
|
26
|
+
MAGAZINE_DISPLAY_VOLUME = 12 # [雑誌巻号]表示号数
|
27
|
+
MAGAZINE_DISPLAY_SUB_VOLUME = 13 # [雑誌巻号]補助号数
|
28
|
+
MAGAZINE_VOLUME = 14 # [雑誌巻号]巻・号・通巻
|
29
|
+
|
30
|
+
def self.enable_targets
|
31
|
+
[TARGET_COMIC, TARGET_MAGAZINE_VOLUME, TARGET_MATERIAL, TARGET_ORIGINAL_PICTURE, TARGET_BOOKLET]
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.enable_optins_for_time_range
|
35
|
+
[START_YEAR, START_MONTH, END_YEAR, END_MONTH]
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.enable_options
|
39
|
+
[ID, TITLE, VOLUME, PERSON_NAME, AUHTORITY_ID, PUBLISHER, LABEL, BOOK_FORMAT, TAG, CATEGORY, NOTE]
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.enable_options_for_magazine
|
43
|
+
enable_options + [MAGAZINE_DISPLAY_VOLUME, MAGAZINE_DISPLAY_SUB_VOLUME, MAGAZINE_VOLUME]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class Comic < HttpBase
|
48
|
+
|
49
|
+
include MediaArtsDb
|
50
|
+
include MediaArtsDb::ComicSearchOption
|
51
|
+
|
52
|
+
class << self
|
53
|
+
|
54
|
+
def search_by_keyword(title: nil, magazine: nil, author: nil, per: 100, page: 1)
|
55
|
+
uri = MediaArtsDb.comic_search_uri
|
56
|
+
params = { per: per, page: page }
|
57
|
+
if title
|
58
|
+
params[:keyword_title] = title
|
59
|
+
res_body = search_request(uri, params)
|
60
|
+
parse_title_search_result(res_body)
|
61
|
+
elsif magazine
|
62
|
+
params[:keyword_magazine] = magazine
|
63
|
+
res_body = search_request(uri, params)
|
64
|
+
parse_magazine_search_result(res_body)
|
65
|
+
elsif author
|
66
|
+
params[:keyword_author] = author
|
67
|
+
res_body = search_request(uri, params)
|
68
|
+
parse_author_search_result(res_body)
|
69
|
+
else
|
70
|
+
return []
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def search_by_source(target: ComicSearchOption::TARGET_COMIC, options: {}, per: 100, page: 1)
|
75
|
+
return [] unless ComicSearchOption.enable_targets.include?(target)
|
76
|
+
uri = MediaArtsDb.comic_search_uri
|
77
|
+
params = { per: per, page: page }
|
78
|
+
params['msf[target][]'] = target
|
79
|
+
option_index = 1
|
80
|
+
options.each do |key, value|
|
81
|
+
case key
|
82
|
+
when *ComicSearchOption.enable_optins_for_time_range
|
83
|
+
params["msf[#{key}"] = value
|
84
|
+
when *ComicSearchOption.enable_options
|
85
|
+
next if option_index > 5
|
86
|
+
params["msf[select#{option_index}]"] = key
|
87
|
+
params["msf[text#{option_index}]"] = value
|
88
|
+
option_index += 1
|
89
|
+
when *ComicSearchOption.enable_options_for_magazine
|
90
|
+
next unless target == ComicSearchOption::TARGET_MAGAZINE_VOLUME
|
91
|
+
next if option_index > 5
|
92
|
+
params["msf[select#{option_index}]"] = key
|
93
|
+
params["msf[text#{option_index}]"] = value
|
94
|
+
option_index += 1
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
res_body = search_request(uri, params)
|
99
|
+
case target
|
100
|
+
when ComicSearchOption::TARGET_COMIC
|
101
|
+
parse_comic_search_result(res_body)
|
102
|
+
when ComicSearchOption::TARGET_MAGAZINE_VOLUME
|
103
|
+
parse_magazine_volume_search_result(res_body)
|
104
|
+
when ComicSearchOption::TARGET_MATERIAL
|
105
|
+
parse_material_search_result(res_body)
|
106
|
+
when ComicSearchOption::TARGET_ORIGINAL_PICTURE
|
107
|
+
parse_original_picture_search_result(res_body)
|
108
|
+
when ComicSearchOption::TARGET_BOOKLET
|
109
|
+
parse_booklet_search_result(res_body)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
def find_comic_works(id)
|
115
|
+
uri = MediaArtsDb.comic_comic_works_uri(id)
|
116
|
+
res_body = http_get(uri)
|
117
|
+
parse_comic_works_result(res_body)
|
118
|
+
end
|
119
|
+
|
120
|
+
def find_comic_titles(id, per: 100, page: 1)
|
121
|
+
uri = MediaArtsDb.comic_comic_titles_uri(id)
|
122
|
+
params = { per: per, page: page }
|
123
|
+
res_body = search_request(uri, params)
|
124
|
+
parse_comic_titles_result(res_body)
|
125
|
+
end
|
126
|
+
|
127
|
+
def find_comic(id)
|
128
|
+
uri = MediaArtsDb.comic_comic_uri(id)
|
129
|
+
res_body = http_get(uri)
|
130
|
+
parse_comic_result(res_body)
|
131
|
+
end
|
132
|
+
|
133
|
+
def find_magazine_works(id)
|
134
|
+
uri = MediaArtsDb.comic_magazine_works_uri(id)
|
135
|
+
res_body = http_get(uri)
|
136
|
+
parse_magazine_works_result(res_body)
|
137
|
+
end
|
138
|
+
|
139
|
+
def find_magazine_titles(id)
|
140
|
+
uri = MediaArtsDb.comic_magazine_titles_uri(id)
|
141
|
+
res_body = http_get(uri)
|
142
|
+
parse_magazine_titles_result(res_body)
|
143
|
+
end
|
144
|
+
|
145
|
+
def find_magazine(id)
|
146
|
+
uri = MediaArtsDb.comic_magazine_uri(id)
|
147
|
+
res_body = http_get(uri)
|
148
|
+
parse_magazine_result(res_body)
|
149
|
+
end
|
150
|
+
|
151
|
+
def find_author(id)
|
152
|
+
uri = MediaArtsDb.comic_author_uri(id)
|
153
|
+
res_body = http_get(uri)
|
154
|
+
parse_author_result(res_body)
|
155
|
+
end
|
156
|
+
|
157
|
+
def find_material(id)
|
158
|
+
uri = MediaArtsDb.comic_material_uri(id)
|
159
|
+
res_body = http_get(uri)
|
160
|
+
parse_material_result(res_body)
|
161
|
+
end
|
162
|
+
|
163
|
+
def find_original_picture(id)
|
164
|
+
uri = MediaArtsDb.comic_original_picture_uri(id)
|
165
|
+
res_body = http_get(uri)
|
166
|
+
parse_original_picture_result(res_body)
|
167
|
+
end
|
168
|
+
|
169
|
+
def find_booklet(id)
|
170
|
+
uri = MediaArtsDb.comic_booklet_uri(id)
|
171
|
+
res_body = http_get(uri)
|
172
|
+
parse_booklet_result(res_body)
|
173
|
+
end
|
174
|
+
|
175
|
+
private
|
176
|
+
|
177
|
+
def parse_title_search_result(res_body)
|
178
|
+
result = []
|
179
|
+
doc = Nokogiri::HTML.parse(res_body)
|
180
|
+
doc.css('div.resultTabA table > tbody > tr').each do |tr|
|
181
|
+
row = {}
|
182
|
+
link_url = tr.css('td:nth-child(1) > a').attribute('href').value
|
183
|
+
# リンクがcomic_worksとmagazine_worksの場合がある
|
184
|
+
if link_url =~ /comic_works/
|
185
|
+
row[:type] = 'comic_works'
|
186
|
+
row[:comic_works_id] = clip_id(tr.css('td:nth-child(1) > a'))
|
187
|
+
elsif link_url =~ /magazine_works/
|
188
|
+
row[:type] = 'magazine_works'
|
189
|
+
row[:magazine_works_id] = clip_id(tr.css('td:nth-child(1) > a'))
|
190
|
+
end
|
191
|
+
row[:title] = clip_text(tr.css('td:nth-child(1)')) # 作品名
|
192
|
+
row[:author] = tr.css('td:nth-child(2)').text # 著者名
|
193
|
+
row[:tags] = tr.css('td:nth-child(3)').text # タグ
|
194
|
+
row[:total_comic_volume] = tr.css('td:nth-child(4)').text # 単行本全巻
|
195
|
+
row[:total_magazine_volume] = tr.css('td:nth-child(5)').text # 雑誌掲載作品
|
196
|
+
row[:materials] = tr.css('td:nth-child(6)').text # 資料
|
197
|
+
row[:original_picture] = tr.css('td:nth-child(7)').text # 原画
|
198
|
+
row[:other] = tr.css('td:nth-child(8)').text # その他
|
199
|
+
|
200
|
+
result << row
|
201
|
+
end
|
202
|
+
result
|
203
|
+
end
|
204
|
+
|
205
|
+
def parse_magazine_search_result(res_body)
|
206
|
+
result = []
|
207
|
+
doc = Nokogiri::HTML.parse(res_body)
|
208
|
+
doc.css('div.resultTabB table > tbody > tr').each do |tr|
|
209
|
+
row = {}
|
210
|
+
row[:type] = 'magazine_titles'
|
211
|
+
row[:title] = clip_text(tr.css('td:nth-child(1)'))
|
212
|
+
row[:magazine_titles_id] = clip_id(tr.css('td:nth-child(1) > a'))
|
213
|
+
row[:publisher] = tr.css('td:nth-child(2)').text
|
214
|
+
row[:published_interval] = tr.css('td:nth-child(3)').text
|
215
|
+
row[:published_start_date] = tr.css('td:nth-child(4)').text
|
216
|
+
row[:published_end_date] = tr.css('td:nth-child(5)').text
|
217
|
+
row[:tags] = tr.css('td:nth-child(6)').text
|
218
|
+
|
219
|
+
result << row
|
220
|
+
end
|
221
|
+
result
|
222
|
+
end
|
223
|
+
|
224
|
+
def parse_author_search_result(res_body)
|
225
|
+
result = []
|
226
|
+
doc = Nokogiri::HTML.parse(res_body)
|
227
|
+
doc.css('div.resultTabC table > tbody > tr').each do |tr|
|
228
|
+
row = {}
|
229
|
+
row[:type] = 'none' # 何も値がないレコードがあるので、既定のtypeをnoneにしておく
|
230
|
+
# リンクがauthoritiesとmagazine_worksの場合がある
|
231
|
+
if tr.css('td:nth-child(1) > a').empty?
|
232
|
+
row[:author_name] = tr.css('td:nth-child(1)').text
|
233
|
+
else
|
234
|
+
row[:type] = 'author'
|
235
|
+
row[:author_id] = clip_id(tr.css('td:nth-child(1) > a'))
|
236
|
+
row[:author_name] = clip_text(tr.css('td:nth-child(1)'))
|
237
|
+
end
|
238
|
+
row[:author_name_kana] = tr.css('td:nth-child(2)').text
|
239
|
+
row[:related_author_id] = clip_id(tr.css('td:nth-child(3) > a'))
|
240
|
+
row[:related_author_name] = clip_text(tr.css('td:nth-child(3)')).gsub(/\n/, '').strip
|
241
|
+
row[:comic_title_quantity] = tr.css('td:nth-child(4)').text
|
242
|
+
if tr.css('td:nth-child(5) > a').empty?
|
243
|
+
row[:magazine_works_name] = tr.css('td:nth-child(5)').text.gsub(/\n/, '').strip
|
244
|
+
else
|
245
|
+
row[:type] = 'magazine_works'
|
246
|
+
row[:magazine_works_id] = clip_id(tr.css('td:nth-child(5) > a'))
|
247
|
+
row[:magazine_works_name] = clip_text(tr.css('td:nth-child(5)')).gsub(/\n/, '').strip
|
248
|
+
end
|
249
|
+
|
250
|
+
result << row
|
251
|
+
end
|
252
|
+
result
|
253
|
+
end
|
254
|
+
|
255
|
+
def parse_comic_search_result(res_body)
|
256
|
+
result = []
|
257
|
+
doc = Nokogiri::HTML.parse(res_body)
|
258
|
+
doc.css('div.resultTabD_subA > div > table > tbody > tr').each do |tr|
|
259
|
+
row = {}
|
260
|
+
row[:type] = 'comic'
|
261
|
+
tmp_id = tr.css('td:nth-child(1)').text.split('<br>')
|
262
|
+
if tmp_id.count == 1
|
263
|
+
row[:isbn] = '-' # ISBNは無くてもキーを作る
|
264
|
+
else
|
265
|
+
row[:isbn] = tmp_id[0] # ISBN
|
266
|
+
end
|
267
|
+
row[:comic_title] = clip_text(tr.css('td:nth-child(2)')) # 単行本名
|
268
|
+
row[:comic_id] = clip_id(tr.css('td:nth-child(2) > a'))
|
269
|
+
row[:label] = tr.css('td:nth-child(3)').text # 単行本レーベル
|
270
|
+
row[:volume] = tr.css('td:nth-child(4)').text # 巻
|
271
|
+
row[:author] = tr.css('td:nth-child(5)').text # 著者名
|
272
|
+
row[:publisher] = tr.css('td:nth-child(6)').text # 出版者
|
273
|
+
row[:published_date] = tr.css('td:nth-child(7)').text # 発行年月
|
274
|
+
|
275
|
+
result << row
|
276
|
+
end
|
277
|
+
result
|
278
|
+
end
|
279
|
+
|
280
|
+
def parse_magazine_volume_search_result(res_body)
|
281
|
+
result = []
|
282
|
+
doc = Nokogiri::HTML.parse(res_body)
|
283
|
+
doc.css('div.resultTabD_subB > div > table > tbody > tr').each do |tr|
|
284
|
+
row = {}
|
285
|
+
row[:type] = 'magazine'
|
286
|
+
row[:magazine_title] = clip_text(tr.css('td:nth-child(2)')) # 雑誌名
|
287
|
+
row[:magazine_id] = clip_id(tr.css('td:nth-child(2) > a'))
|
288
|
+
row[:volume] = tr.css('td:nth-child(3)').text # 巻・合・通巻
|
289
|
+
row[:display_volume] = tr.css('td:nth-child(4)').text # 表示号数
|
290
|
+
row[:display_sub_volume] = tr.css('td:nth-child(5)').text # 補助号数
|
291
|
+
row[:publisher] = tr.css('td:nth-child(6)').text # 出版者
|
292
|
+
row[:published_date] = tr.css('td:nth-child(7)').text # 表示年月
|
293
|
+
|
294
|
+
result << row
|
295
|
+
end
|
296
|
+
result
|
297
|
+
end
|
298
|
+
|
299
|
+
def parse_material_search_result(res_body)
|
300
|
+
result = []
|
301
|
+
doc = Nokogiri::HTML.parse(res_body)
|
302
|
+
doc.css('div.resultTabD_subC > div > table > tbody > tr').each do |tr|
|
303
|
+
row = {}
|
304
|
+
row[:type] = 'material'
|
305
|
+
row[:material_title] = clip_text(tr.css('td:nth-child(2)')) # 資料名
|
306
|
+
row[:material_id] = clip_id(tr.css('td:nth-child(2) > a'))
|
307
|
+
row[:category] = tr.css('td:nth-child(3)').text # 分類・カテゴリー
|
308
|
+
row[:number] = tr.css('td:nth-child(4)').text # 順序
|
309
|
+
row[:author] = tr.css('td:nth-child(5)').text # 著者名
|
310
|
+
row[:related_material_title] = tr.css('td:nth-child(6)').text # 関連物
|
311
|
+
row[:published_date] = tr.css('td:nth-child(7)').text # 時期
|
312
|
+
|
313
|
+
result << row
|
314
|
+
end
|
315
|
+
result
|
316
|
+
end
|
317
|
+
|
318
|
+
def parse_original_picture_search_result(res_body)
|
319
|
+
result = []
|
320
|
+
doc = Nokogiri::HTML.parse(res_body)
|
321
|
+
doc.css('div.resultTabD_subD > div > table > tbody > tr').each do |tr|
|
322
|
+
row = {}
|
323
|
+
row[:type] = 'original_picture'
|
324
|
+
row[:original_picture_title] = clip_text(tr.css('td:nth-child(2)')) # 原画作品名
|
325
|
+
row[:original_picture_id] = clip_id(tr.css('td:nth-child(2) > a'))
|
326
|
+
row[:recorded] = tr.css('td:nth-child(3)').text # 収録
|
327
|
+
row[:number] = tr.css('td:nth-child(4)').text # 順序
|
328
|
+
row[:quantity] = tr.css('td:nth-child(5)').text # 枚数
|
329
|
+
row[:author] = tr.css('td:nth-child(6)').text # 著者名
|
330
|
+
row[:published_date] = tr.css('td:nth-child(7)').text # 初出
|
331
|
+
row[:writing_time] = tr.css('td:nth-child(8)').text # 執筆期間
|
332
|
+
|
333
|
+
result << row
|
334
|
+
end
|
335
|
+
result
|
336
|
+
end
|
337
|
+
|
338
|
+
def parse_booklet_search_result(res_body)
|
339
|
+
result = []
|
340
|
+
doc = Nokogiri::HTML.parse(res_body)
|
341
|
+
doc.css('div.resultTabD_subE > div > table > tbody > tr').each do |tr|
|
342
|
+
row = {}
|
343
|
+
row[:type] = 'booklet'
|
344
|
+
row[:booklet_title] = clip_text(tr.css('td:nth-child(2)')) # 冊子名
|
345
|
+
row[:booklet_id] = clip_id(tr.css('td:nth-child(2) > a'))
|
346
|
+
row[:series] = tr.css('td:nth-child(3)').text # シリーズ
|
347
|
+
row[:volume] = tr.css('td:nth-child(4)').text # 巻
|
348
|
+
row[:author] = tr.css('td:nth-child(6)').text # 著者名
|
349
|
+
row[:publisher] = tr.css('td:nth-child(7)').text # 出版者・サークル名
|
350
|
+
row[:published_date] = tr.css('td:nth-child(8)').text # 発行年月
|
351
|
+
|
352
|
+
result << row
|
353
|
+
end
|
354
|
+
result
|
355
|
+
end
|
356
|
+
|
357
|
+
def parse_comic_works_result(res_body)
|
358
|
+
result = {}
|
359
|
+
doc = Nokogiri::HTML.parse(res_body)
|
360
|
+
doc.css('body > article > div.main > section > table > tbody > tr').each do |tr|
|
361
|
+
case tr.css('th:nth-child(1)').text
|
362
|
+
# HTML構造の誤りにより「マンガID」が取得できない
|
363
|
+
# when 'マンガID' ; result[:comic_id] = tr.css('td').text
|
364
|
+
when 'マンガ作品名' ; result[:title] = tr.css('td').text
|
365
|
+
when 'マンガ作品名ヨミ' ; result[:title_kana] = tr.css('td').text
|
366
|
+
when '別題・副題・原題' ; result[:sub_title] = tr.css('td').text
|
367
|
+
when 'ローマ字表記' ; result[:title_alphabet] = tr.css('td').text
|
368
|
+
when '著者(責任表示)' ; result[:author] = tr.css('td').text
|
369
|
+
# 著者が複数の場合、著者典拠IDも複数になるが、それについてはまだ未実装
|
370
|
+
when '著者典拠ID' ; result[:author_id] = clip_id(tr.css('td > a'))
|
371
|
+
when '公表時期' ; result[:published_date] = tr.css('td').text
|
372
|
+
when '出典(初出)' ; result[:source] = tr.css('td').text
|
373
|
+
when 'マンガ作品紹介文・解説' ; result[:introduction] = tr.css('td').text
|
374
|
+
when '分類' ; result[:category] = tr.css('td').text
|
375
|
+
when 'タグ' ; result[:tags] = tr.css('td').text
|
376
|
+
when 'レイティング' ; result[:rating] = tr.css('td').text
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
result[:comic_titles] = [] # 単行本全巻
|
381
|
+
doc.css('body > article > div.sub > section:nth-child(1) table').each do |table|
|
382
|
+
table.css('tr').each do |tr|
|
383
|
+
next if tr.css('td').empty?
|
384
|
+
comic_title = {}
|
385
|
+
comic_title[:title] = clip_text(tr.css('td:nth-child(1)'))
|
386
|
+
comic_title[:comic_titles_id] = clip_id(tr.css('td:nth-child(1) > a'))
|
387
|
+
comic_title[:author] = tr.css('td:nth-child(2)').text
|
388
|
+
comic_title[:total_comic_volume] = tr.css('td:nth-child(3)').text
|
389
|
+
result[:comic_titles] << comic_title
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
result[:magazine_works] = [] # 雑誌掲載作品
|
394
|
+
doc.css('body > article > div.sub > section:nth-child(2) table').each do |table|
|
395
|
+
table.css('tr').each do |tr|
|
396
|
+
next if tr.css('td').empty?
|
397
|
+
magazine_works = {}
|
398
|
+
magazine_works[:title] = clip_text(tr.css('td:nth-child(1)'))
|
399
|
+
magazine_works[:magazine_works_id] = clip_id(tr.css('td:nth-child(1) > a'))
|
400
|
+
magazine_works[:author] = tr.css('td:nth-child(2)').text
|
401
|
+
magazine_works[:magazine_title] = tr.css('td:nth-child(3)').text
|
402
|
+
magazine_works[:published_date] = tr.css('td:nth-child(4)').text
|
403
|
+
result[:magazine_works] << magazine_works
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
# 資料、マンガ原画、その他の冊子、関連マンガ作品はサンプルが見つからないので未実装
|
408
|
+
|
409
|
+
result
|
410
|
+
end
|
411
|
+
|
412
|
+
def parse_comic_titles_result(res_body)
|
413
|
+
result = {}
|
414
|
+
doc = Nokogiri::HTML.parse(res_body)
|
415
|
+
doc.css('body > article > div.main > section > table > tbody > tr').each do |tr|
|
416
|
+
case tr.css('th:nth-child(1)').text
|
417
|
+
when '作品ID' ; result[:comic_works_id] = clip_id(tr.css('td > a'))
|
418
|
+
when '単行本全巻名' ; result[:title] = tr.css('td').text
|
419
|
+
when '単行本全巻名 ヨミ' ; result[:title_kana] = tr.css('td').text
|
420
|
+
when '単行本全巻名 追記' ; result[:title_append] = tr.css('td').text
|
421
|
+
when '単行本全巻名 追記 ヨミ' ; result[:title_append_kana] = tr.css('td').text
|
422
|
+
when '単行本全巻名 別版表示' ; result[:title_other] = tr.css('td').text
|
423
|
+
when '単行本全巻数' ; result[:total_comic_volume] = tr.css('td').text
|
424
|
+
when '責任表示' ; result[:responsible] = tr.css('td').text
|
425
|
+
when '著者典拠ID' ; result[:author_id] = clip_id(tr.css('td > a'))
|
426
|
+
when '作者・著者' ; result[:author] = tr.css('td').text
|
427
|
+
when '作者・著者 ヨミ' ; result[:author_kana] = tr.css('td').text
|
428
|
+
when '原作・原案' ; result[:origina] = tr.css('td').text
|
429
|
+
when '原作・原案 ヨミ' ; result[:origina_kana] = tr.css('td').text
|
430
|
+
when '協力者' ; result[:collaborator] = tr.css('td').text
|
431
|
+
when '協力者 ヨミ' ; result[:collaborator_kana] = tr.css('td').text
|
432
|
+
when '標目' ; result[:headings] = tr.css('td').text
|
433
|
+
when '単行本レーベル' ; result[:label] = tr.css('td').text
|
434
|
+
when '単行本レーベル ヨミ' ; result[:label_kana] = tr.css('td').text
|
435
|
+
when 'シリーズ' ; result[:series] = tr.css('td').text
|
436
|
+
when 'シリーズ ヨミ' ; result[:series_kana] = tr.css('td').text
|
437
|
+
when '出版者名' ; result[:publisher] = tr.css('td').text
|
438
|
+
when '出版地' ; result[:published_area] = tr.css('td').text
|
439
|
+
when '縦の長さ×横の長さ' ; result[:size] = tr.css('td').text
|
440
|
+
when 'ISBNなどのセットコード' ; result[:isbn] = tr.css('td').text
|
441
|
+
when '言語区分' ; result[:langage] = tr.css('td').text
|
442
|
+
when '分類' ; result[:category] = tr.css('td').text
|
443
|
+
when 'レイティング' ; result[:rating] = tr.css('td').text
|
444
|
+
when '単行本全巻紹介文' ; result[:introduction] = tr.css('td').text
|
445
|
+
when '単行本全巻タグ' ; result[:tags] = tr.css('td').text
|
446
|
+
when '単行本全巻備考' ; result[:note] = tr.css('td').text
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
result[:comics] = [] # 単行本
|
451
|
+
doc.css('body > article > div.sub > section:nth-child(1) table tbody tr').each do |tr|
|
452
|
+
next if tr.css('td').empty?
|
453
|
+
comic_title = {}
|
454
|
+
comic_title[:title] = clip_text(tr.css('td:nth-child(1)'))
|
455
|
+
comic_title[:comic_id] = clip_id(tr.css('td:nth-child(1) > a'))
|
456
|
+
comic_title[:comic_title_append] = tr.css('td:nth-child(2)').text
|
457
|
+
comic_title[:volume] = tr.css('td:nth-child(3)').text
|
458
|
+
result[:comics] << comic_title
|
459
|
+
end
|
460
|
+
|
461
|
+
result
|
462
|
+
end
|
463
|
+
|
464
|
+
def parse_comic_result(res_body)
|
465
|
+
result = {
|
466
|
+
next_id: '',
|
467
|
+
prev_id: '',
|
468
|
+
basic_information: nil,
|
469
|
+
author_information: nil,
|
470
|
+
publisher_information: nil,
|
471
|
+
other_information: nil
|
472
|
+
}
|
473
|
+
doc = Nokogiri::HTML.parse(res_body)
|
474
|
+
return result if doc.css('body > article > div.main > section:nth-child(1) > table').empty?
|
475
|
+
|
476
|
+
# 前巻/次巻
|
477
|
+
result[:prev_id] = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.prv > a'))
|
478
|
+
result[:next_id] = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.nxt > a'))
|
479
|
+
|
480
|
+
# 基本情報
|
481
|
+
tbody = doc.css('body > article > div.main > section:nth-child(1) > table > tbody')
|
482
|
+
basic_information = {}
|
483
|
+
basic_information[:comic_titles_is] = clip_id(tbody.css('tr:nth-child(1) > td:nth-child(4) > a'))
|
484
|
+
basic_information[:comic_title] = tbody.css('tr:nth-child(2) > td').text
|
485
|
+
basic_information[:comic_title_kana] = tbody.css('tr:nth-child(3) > td').text
|
486
|
+
basic_information[:comic_title_append] = tbody.css('tr:nth-child(4) > td').text
|
487
|
+
basic_information[:comic_title_append_kana] = tbody.css('tr:nth-child(5) > td').text
|
488
|
+
basic_information[:volume] = tbody.css('tr:nth-child(6) > td:nth-child(2)').text
|
489
|
+
basic_information[:volume_sort_number] = tbody.css('tr:nth-child(6) > td:nth-child(4)').text
|
490
|
+
basic_information[:volume_other_number] = tbody.css('tr:nth-child(7) > td').text
|
491
|
+
basic_information[:introduction] = tbody.css('tr:nth-child(8) > td').text
|
492
|
+
result[:basic_information] = basic_information
|
493
|
+
# 著者表示
|
494
|
+
tbody = doc.css('body > article > div.main > section:nth-child(2) > table > tbody')
|
495
|
+
author_information = {}
|
496
|
+
author_information[:responsible] = tbody.css('tr:nth-child(1) > td').text
|
497
|
+
author_information[:author_id] = clip_id(tbody.css('tr:nth-child(2) > td > a:nth-child(1)'))
|
498
|
+
author_information[:headings] = tbody.css('tr:nth-child(3) > td').text
|
499
|
+
author_information[:auhtor] = tbody.css('tr:nth-child(4) > td:nth-child(2)').text
|
500
|
+
author_information[:auhtor_kana] = tbody.css('tr:nth-child(4) > td:nth-child(4)').text
|
501
|
+
author_information[:original_title] = tbody.css('tr:nth-child(5) > td:nth-child(2)').text
|
502
|
+
author_information[:original_title_kana] = tbody.css('tr:nth-child(5) > td:nth-child(4)').text
|
503
|
+
author_information[:collaborator] = tbody.css('tr:nth-child(6) > td:nth-child(2)').text
|
504
|
+
author_information[:collaborator_kana] = tbody.css('tr:nth-child(6) > td:nth-child(4)').text
|
505
|
+
result[:author_information] = author_information
|
506
|
+
# 出版者・レーベル
|
507
|
+
tbody = doc.css('body > article > div.main > section:nth-child(3) > table > tbody')
|
508
|
+
publisher_information = {}
|
509
|
+
publisher_information[:publisher] = tbody.css('tr:nth-child(1) > td').text
|
510
|
+
publisher_information[:label] = tbody.css('tr:nth-child(3) > td').text
|
511
|
+
publisher_information[:label_kana] = tbody.css('tr:nth-child(4) > td').text
|
512
|
+
publisher_information[:label_number] = tbody.css('tr:nth-child(5) > td:nth-child(2)').text
|
513
|
+
publisher_information[:series] = tbody.css('tr:nth-child(6) > td:nth-child(2)').text
|
514
|
+
publisher_information[:series_kana] = tbody.css('tr:nth-child(6) > td:nth-child(4)').text
|
515
|
+
result[:publisher_information] = publisher_information
|
516
|
+
# その他
|
517
|
+
tbody = doc.css('body > article > div.main > section:nth-child(4) > table > tbody')
|
518
|
+
other_information = {}
|
519
|
+
other_information[:published_date] = tbody.css('tr:nth-child(1) > td:nth-child(2)').text
|
520
|
+
other_information[:first_price] = tbody.css('tr:nth-child(1) > td:nth-child(4)').text
|
521
|
+
other_information[:isbn] = tbody.css('tr:nth-child(2) > td').text
|
522
|
+
other_information[:japan_book_number] = tbody.css('tr:nth-child(3) > td').text
|
523
|
+
other_information[:total_page] = tbody.css('tr:nth-child(4) > td:nth-child(2)').text
|
524
|
+
other_information[:size] = tbody.css('tr:nth-child(4) > td:nth-child(4)').text
|
525
|
+
other_information[:langage] = tbody.css('tr:nth-child(5) > td:nth-child(2)').text
|
526
|
+
other_information[:published_area] = tbody.css('tr:nth-child(5) > td:nth-child(4)').text
|
527
|
+
other_information[:rating] = tbody.css('tr:nth-child(6) > td').text
|
528
|
+
other_information[:category] = tbody.css('tr:nth-child(7) > td').text
|
529
|
+
other_information[:tags] = tbody.css('tr:nth-child(8) > td').text
|
530
|
+
other_information[:note] = tbody.css('tr:nth-child(9) > td').text
|
531
|
+
result[:other_information] = other_information
|
532
|
+
|
533
|
+
result
|
534
|
+
end
|
535
|
+
|
536
|
+
def parse_magazine_works_result(res_body)
|
537
|
+
result = {}
|
538
|
+
doc = Nokogiri::HTML.parse(res_body)
|
539
|
+
doc.css('body > article > div.main > section > table > tbody > tr').each do |tr|
|
540
|
+
case tr.css('th:nth-child(1)').text
|
541
|
+
when '雑誌作品名' ; result[:title] = tr.css('td').text
|
542
|
+
when '雑誌作品名 ヨミ' ; result[:title_kana] = tr.css('td').text
|
543
|
+
when '作者・著者' ; result[:author] = tr.css('td').text
|
544
|
+
when '作者・著者 ヨミ' ; result[:author_kana] = tr.css('td').text
|
545
|
+
when '原作・原案' ; result[:original] = tr.css('td').text
|
546
|
+
when '原作・原案 ヨミ' ; result[:original_kana] = tr.css('td').text
|
547
|
+
when '協力者' ; result[:collaborator] = tr.css('td').text
|
548
|
+
when '協力者 ヨミ' ; result[:collaborator_kana] = tr.css('td').text
|
549
|
+
when 'タグ' ; result[:tags] = tr.css('td').text
|
550
|
+
when '備考' ; result[:note] = tr.css('td').text
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
result[:magazines] = [] # 雑誌巻号
|
555
|
+
doc.css('body > article > div.sub > section > div.moreBlock table.infoTbl2 tbody tr').each do |tr|
|
556
|
+
next if tr.css('td').empty?
|
557
|
+
magazine = {}
|
558
|
+
magazine[:title] = clip_text(tr.css('td:nth-child(1)'))
|
559
|
+
magazine[:magazine_id] = clip_id(tr.css('td:nth-child(1) > a'))
|
560
|
+
magazine[:published_date] = tr.css('td:nth-child(2)').text
|
561
|
+
magazine[:display_volume] = tr.css('td:nth-child(3)').text
|
562
|
+
magazine[:display_sub_volume] = tr.css('td:nth-child(4)').text
|
563
|
+
result[:magazines] << magazine
|
564
|
+
end
|
565
|
+
|
566
|
+
result
|
567
|
+
end
|
568
|
+
|
569
|
+
def parse_magazine_titles_result(res_body)
|
570
|
+
result = {}
|
571
|
+
doc = Nokogiri::HTML.parse(res_body)
|
572
|
+
doc.css('body > article > div.main > section > table > tbody > tr').each do |tr|
|
573
|
+
case tr.css('th:nth-child(1)').text
|
574
|
+
when '雑誌名' ; result[:title] = tr.css('td').text
|
575
|
+
when '雑誌名 ヨミ' ; result[:title_kana] = tr.css('td').text
|
576
|
+
when '出版者名' ; result[:publisher] = tr.css('td').text
|
577
|
+
when '出版地' ; result[:published_area] = tr.css('td').text
|
578
|
+
when '発行頻度' ; result[:published_interval] = tr.css('td').text
|
579
|
+
when '変遷' ; result[:history] = tr.css('td').text
|
580
|
+
when '紹介文' ; result[:introduction] = tr.css('td').text
|
581
|
+
when '創刊年月日' ; result[:published_start_date] = tr.css('td').text
|
582
|
+
when '終刊年月日' ; result[:published_end_date] = tr.css('td').text
|
583
|
+
when '終刊表示号数' ; result[:display_last_volume] = tr.css('td').text
|
584
|
+
when '終刊巻'
|
585
|
+
result[:last_volume] = tr.css('td:nth-child(2)').text
|
586
|
+
result[:volume] = tr.css('td:nth-child(4)').text
|
587
|
+
result[:volume2] = tr.css('td:nth-child(6)').text
|
588
|
+
when 'ISSN' ; result[:issn] = tr.css('td').text
|
589
|
+
when '全国書誌番号' ; result[:japan_book_number] = tr.css('td').text
|
590
|
+
when '大阪タイトルコード' ; result[:osaka_title_code] = tr.css('td').text
|
591
|
+
when '言語区分' ; result[:langage] = tr.css('td').text
|
592
|
+
when 'タグ' ; result[:tags] = tr.css('td').text
|
593
|
+
when '備考' ; result[:note] = tr.css('td').text
|
594
|
+
end
|
595
|
+
end
|
596
|
+
|
597
|
+
result[:magazines] = [] # 雑誌巻号
|
598
|
+
doc.css('body > article > div.sub > section > div.moreBlock table.infoTbl2 tbody tr').each do |tr|
|
599
|
+
next if tr.css('td').empty?
|
600
|
+
magazine = {}
|
601
|
+
magazine[:title] = clip_text(tr.css('td:nth-child(1)'))
|
602
|
+
magazine[:magazine_id] = clip_id(tr.css('td:nth-child(1) > a'))
|
603
|
+
magazine[:published_date] = tr.css('td:nth-child(2)').text
|
604
|
+
magazine[:display_volume] = tr.css('td:nth-child(3)').text
|
605
|
+
magazine[:display_sub_volume] = tr.css('td:nth-child(4)').text
|
606
|
+
result[:magazines] << magazine
|
607
|
+
end
|
608
|
+
|
609
|
+
result
|
610
|
+
end
|
611
|
+
|
612
|
+
def parse_magazine_result(res_body)
|
613
|
+
result = {
|
614
|
+
next_id: '',
|
615
|
+
prev_id: '',
|
616
|
+
basic_information: nil,
|
617
|
+
other_information: nil,
|
618
|
+
contents: []
|
619
|
+
}
|
620
|
+
doc = Nokogiri::HTML.parse(res_body)
|
621
|
+
return result if doc.css('body > article > div.main > section:nth-child(1) > table').empty?
|
622
|
+
|
623
|
+
# Next/Prev
|
624
|
+
result[:prev_id] = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.prv > a'))
|
625
|
+
result[:next_id] = clip_id(doc.css('body > article > header > ul > li.bookSkip > ul > li.nxt > a'))
|
626
|
+
|
627
|
+
# 基本情報
|
628
|
+
basic_information = {}
|
629
|
+
doc.css('body > article > div.main > section:nth-child(1) > table > tbody > tr').each do |tr|
|
630
|
+
case tr.css('th:nth-child(1)').text
|
631
|
+
when 'サブタイトル' ; basic_information[:sub_title] = tr.css('td').text
|
632
|
+
when 'サブタイトルヨミ' ; basic_information[:sub_title_kana] = tr.css('td').text
|
633
|
+
when '表示年月日' ; basic_information[:display_date] = tr.css('td').text
|
634
|
+
when '表示月日(合併)' ; basic_information[:display_date_merger] = tr.css('td').text
|
635
|
+
when '発行年月日' ; basic_information[:published_date] = tr.css('td').text
|
636
|
+
when '発行月日(合併)' ; basic_information[:published_date_merger] = tr.css('td').text
|
637
|
+
when '発売年月日' ; basic_information[:release_date] = tr.css('td').text
|
638
|
+
when '表示号数' ; basic_information[:display_volume] = tr.css('td').text
|
639
|
+
when '表示合併号数' ; basic_information[:display_merger_volume] = tr.css('td').text.gsub(/\n/, '').strip
|
640
|
+
when '補助号数' ; basic_information[:display_sub_volume] = tr.css('td').text
|
641
|
+
when '巻'
|
642
|
+
basic_information[:volume] = tr.css('td:nth-child(2)').text
|
643
|
+
basic_information[:volume2] = tr.css('td:nth-child(4)').text # 適当な名前がわからない
|
644
|
+
basic_information[:volume3] = tr.css('td:nth-child(6)').text # 適当な名前がわからない
|
645
|
+
end
|
646
|
+
end
|
647
|
+
result[:basic_information] = basic_information
|
648
|
+
|
649
|
+
# 出版者、ページ数、価格
|
650
|
+
other_information = {}
|
651
|
+
doc.css('body > article > div.main > section:nth-child(2) > table > tbody > tr').each do |tr|
|
652
|
+
case tr.css('th:nth-child(1)').text
|
653
|
+
when '出版者名' ; other_information[:publisher] = tr.css('td').text
|
654
|
+
when '発行人' ; other_information[:publisher2] = tr.css('td').text
|
655
|
+
when '編集人' ; other_information[:publisher3] = tr.css('td').text
|
656
|
+
when 'ページ数' ; other_information[:total_page] = tr.css('td').text
|
657
|
+
when '製本' ; other_information[:binding] = tr.css('td').text
|
658
|
+
when '分類' ; other_information[:category] = tr.css('td').text
|
659
|
+
when 'レイティング' ; other_information[:rating] = tr.css('td').text
|
660
|
+
when '縦の長さx横の長さ' ; other_information[:size] = tr.css('td').text
|
661
|
+
when '価格' ; other_information[:price] = tr.css('td').text
|
662
|
+
when '雑誌コード' ; other_information[:magazine_code] = tr.css('td').text
|
663
|
+
when 'タグ' ; other_information[:tags] = tr.css('td').text
|
664
|
+
when '備考' ; other_information[:note] = tr.css('td').text
|
665
|
+
end
|
666
|
+
end
|
667
|
+
result[:other_information] = other_information
|
668
|
+
|
669
|
+
# 雑誌巻号
|
670
|
+
doc.css('body > article > div.sub > section:nth-child(2) > table > tbody > tr').each do |tr|
|
671
|
+
next if tr.css('td').empty?
|
672
|
+
contents = {}
|
673
|
+
contents[:category] = tr.css('td:nth-child(1)').text
|
674
|
+
contents[:title] = clip_text(tr.css('td:nth-child(2)'))
|
675
|
+
contents[:magazine_works_id] = clip_id(tr.css('td:nth-child(2) > a'))
|
676
|
+
contents[:author] = tr.css('td:nth-child(3)').text
|
677
|
+
contents[:sub_title] = tr.css('td:nth-child(4)').text
|
678
|
+
contents[:start_page] = tr.css('td:nth-child(5)').text
|
679
|
+
contents[:total_page] = tr.css('td:nth-child(6)').text
|
680
|
+
contents[:note] = tr.css('td:nth-child(7)').text
|
681
|
+
contents[:format] = tr.css('td:nth-child(8)').text
|
682
|
+
result[:contents] << contents
|
683
|
+
end
|
684
|
+
|
685
|
+
result
|
686
|
+
end
|
687
|
+
|
688
|
+
def parse_author_result(res_body)
|
689
|
+
result = {}
|
690
|
+
doc = Nokogiri::HTML.parse(res_body)
|
691
|
+
doc.css('body > article > div.main > section > table > tbody > tr').each do |tr|
|
692
|
+
case tr.css('th:nth-child(1)').text
|
693
|
+
# HTML構造の誤りにより「マンガID」が取得できない
|
694
|
+
# when 'マンガID' ; result[:comic_id] = tr.css('td').text
|
695
|
+
when '標目' ; result[:headings] = tr.css('td').text
|
696
|
+
when '名称' ; result[:name] = tr.css('td').text
|
697
|
+
when 'ヨミ' ; result[:name_kana] = tr.css('td').text
|
698
|
+
when 'ローマ字' ; result[:name_alphabet] = tr.css('td').text
|
699
|
+
when 'をも見よ参照' ; result[:reference] = tr.css('td').text
|
700
|
+
# 著者が複数の場合、著者典拠IDも複数になるが、それについてはまだ未実装
|
701
|
+
when '別名(表記ミス・ユレ、本名、新字旧字など)' ; result[:other_name] = clip_id(tr.css('td > a'))
|
702
|
+
when '生年月日(結成年月日)' ; result[:birthday] = tr.css('td').text
|
703
|
+
when '没年月日' ; result[:death_date] = tr.css('td').text
|
704
|
+
end
|
705
|
+
end
|
706
|
+
|
707
|
+
result[:comic_works] = [] # 単行本化された作品 ※マンガ作品
|
708
|
+
doc.css('body > article > div.sub > section:nth-child(1) table').each do |table|
|
709
|
+
table.css('tr').each do |tr|
|
710
|
+
next if tr.css('td').empty?
|
711
|
+
comic_works = {}
|
712
|
+
comic_works[:title] = clip_text(tr.css('td:nth-child(1)'))
|
713
|
+
comic_works[:comic_works_id] = clip_id(tr.css('td:nth-child(1) > a'))
|
714
|
+
comic_works[:author] = tr.css('td:nth-child(2)').text
|
715
|
+
result[:comic_works] << comic_works
|
716
|
+
end
|
717
|
+
end
|
718
|
+
|
719
|
+
result[:comic_titles] = [] # 単行本全巻
|
720
|
+
doc.css('body > article > div.sub > section:nth-child(2) table').each do |table|
|
721
|
+
table.css('tr').each do |tr|
|
722
|
+
next if tr.css('td').empty?
|
723
|
+
comic_titles = {}
|
724
|
+
comic_titles[:title] = clip_text(tr.css('td:nth-child(1)'))
|
725
|
+
comic_titles[:comic_titles_id] = clip_id(tr.css('td:nth-child(1) > a'))
|
726
|
+
comic_titles[:author] = tr.css('td:nth-child(2)').text
|
727
|
+
comic_titles[:total_comic_volume] = tr.css('td:nth-child(3)').text
|
728
|
+
result[:comic_titles] << comic_titles
|
729
|
+
end
|
730
|
+
end
|
731
|
+
|
732
|
+
# 資料、マンガ原画、その他の冊子、関連マンガ作品はサンプルが見つからないので未実装
|
733
|
+
|
734
|
+
result
|
735
|
+
end
|
736
|
+
|
737
|
+
def parse_material_result(res_body)
|
738
|
+
# 未実装
|
739
|
+
{}
|
740
|
+
end
|
741
|
+
|
742
|
+
def parse_original_picture_result(res_body)
|
743
|
+
# 未実装
|
744
|
+
{}
|
745
|
+
end
|
746
|
+
|
747
|
+
def parse_booklet_result(res_body)
|
748
|
+
result = {
|
749
|
+
basic_information: nil,
|
750
|
+
author_information: nil,
|
751
|
+
publisher_information: nil,
|
752
|
+
other_information: nil
|
753
|
+
}
|
754
|
+
doc = Nokogiri::HTML.parse(res_body)
|
755
|
+
|
756
|
+
basic_information = {}
|
757
|
+
doc.css('body > article > div.main > section:nth-child(1) > table > tbody > tr').each do |tr|
|
758
|
+
case tr.css('th:nth-child(1)').text
|
759
|
+
when 'その他の冊子ID' ; basic_information[:comic_works_id] = clip_id(tr.css('td:nth-child(4) > a'))
|
760
|
+
when '分類' ; basic_information[:category] = tr.css('td').text
|
761
|
+
when '冊子名'
|
762
|
+
basic_information[:title] = tr.css('td').text.gsub(/\n/, '').strip
|
763
|
+
# basic_information[:title_kana] = tr.next.css('td').text # このやり方では取れない
|
764
|
+
when '冊子名追記'
|
765
|
+
basic_information[:title_append] = tr.css('td').text
|
766
|
+
# basic_information[:title_append_kana] = tr.next.css('td').text # このやり方では取れない
|
767
|
+
when '巻'
|
768
|
+
basic_information[:volume] = tr.css('td:nth-child(2)').text
|
769
|
+
basic_information[:volume_sort_number] = tr.css('td:nth-child(4)').text
|
770
|
+
when '冊子名別版表示' ; basic_information[:title_other] = tr.css('td').text
|
771
|
+
when '紹介文' ; basic_information[:introduction] = tr.css('td').text
|
772
|
+
end
|
773
|
+
end
|
774
|
+
result[:basic_information] = basic_information
|
775
|
+
|
776
|
+
author_information = {}
|
777
|
+
doc.css('body > article > div.main > section:nth-child(2) > table > tbody > tr').each do |tr|
|
778
|
+
case tr.css('th:nth-child(1)').text
|
779
|
+
when '責任表示' ; author_information[:authority] = tr.css('td').text
|
780
|
+
when '著者典拠ID'
|
781
|
+
author_information[:author_id] = clip_id(tr.css('td a').text)
|
782
|
+
when '作者・著者'
|
783
|
+
author_information[:author] = tr.css('td:nth-child(2)').text
|
784
|
+
author_information[:author_kana] = tr.css('td:nth-child(4)').text
|
785
|
+
when '原作・原案'
|
786
|
+
author_information[:original] = tr.css('td:nth-child(2)').text
|
787
|
+
author_information[:original_kana] = tr.css('td:nth-child(4)').text
|
788
|
+
when '協力者'
|
789
|
+
author_information[:collaborator] = tr.css('td:nth-child(2)').text
|
790
|
+
author_information[:collaborator_kana] = tr.css('td:nth-child(4)').text
|
791
|
+
when '標目' ; author_information[:headings] = tr.css('td').text
|
792
|
+
end
|
793
|
+
end
|
794
|
+
result[:author_information] = author_information
|
795
|
+
|
796
|
+
publisher_information = {}
|
797
|
+
doc.css('body > article > div.main > section:nth-child(3) > table > tbody > tr').each do |tr|
|
798
|
+
case tr.css('th:nth-child(1)').text
|
799
|
+
when '出版者名(サークル名)' ; publisher_information[:publisher] = tr.css('td').text
|
800
|
+
when 'シリーズ' ; publisher_information[:series] = tr.css('td').text
|
801
|
+
when 'カナ' ; publisher_information[:series_kana] = tr.css('td').text
|
802
|
+
when 'シリーズ番号' ; publisher_information[:series_number] = tr.css('td').text
|
803
|
+
when '頒布イベント' ; publisher_information[:published_event] = tr.css('td').text
|
804
|
+
end
|
805
|
+
end
|
806
|
+
result[:publisher_information] = publisher_information
|
807
|
+
|
808
|
+
other_information = {}
|
809
|
+
doc.css('body > article > div.main > section:nth-child(4) > table > tbody > tr').each do |tr|
|
810
|
+
case tr.css('th:nth-child(1)').text
|
811
|
+
when '初版発行年月日'
|
812
|
+
other_information[:published_data] = tr.css('td:nth-child(2)').text
|
813
|
+
other_information[:price] = tr.css('td:nth-child(4)').text
|
814
|
+
when '発行日備考' ; other_information[:published_data_note] = tr.css('td').text
|
815
|
+
when '全国書誌番号' ; other_information[:japan_book_number] = tr.css('td').text
|
816
|
+
when '製本・造本形態' ; other_information[:format] = tr.css('td').text
|
817
|
+
when 'ページ数'
|
818
|
+
other_information[:total_page] = tr.css('td:nth-child(2)').text
|
819
|
+
other_information[:size] = tr.css('td:nth-child(4)').text
|
820
|
+
when '発行地'
|
821
|
+
other_information[:published_area] = tr.css('td:nth-child(2)').text
|
822
|
+
other_information[:publisher] = tr.css('td:nth-child(4)').text
|
823
|
+
when '言語区分' ; other_information[:langage] = tr.css('td').text
|
824
|
+
when 'レイティング' ; other_information[:rating] = tr.css('td').text
|
825
|
+
when 'タグ' ; other_information[:tags] = tr.css('td').text
|
826
|
+
when '備考' ; other_information[:note] = tr.css('td').text
|
827
|
+
end
|
828
|
+
end
|
829
|
+
result[:other_information] = other_information
|
830
|
+
|
831
|
+
result
|
832
|
+
end
|
833
|
+
|
834
|
+
def search_request(uri, params)
|
835
|
+
query = {
|
836
|
+
query: {
|
837
|
+
utf8: '✓',
|
838
|
+
commit: '送信'
|
839
|
+
}
|
840
|
+
}
|
841
|
+
params.each_key { |k| query[:query][k] = params[k] }
|
842
|
+
http_get(uri, query)
|
843
|
+
end
|
844
|
+
|
845
|
+
def clip_text(node)
|
846
|
+
# return '' unless node.class == Nokogiri::XML::NodeSet
|
847
|
+
begin
|
848
|
+
node.css('a').empty? ? node.text : node.css('a').text
|
849
|
+
rescue
|
850
|
+
''
|
851
|
+
end
|
852
|
+
end
|
853
|
+
|
854
|
+
def clip_id(node)
|
855
|
+
# return '' unless node.class == Nokogiri::XML::NodeSet && node.attribute('href')
|
856
|
+
begin
|
857
|
+
uri = node.attribute('href').value
|
858
|
+
# urlにqueryパラメータがある場合、?以降をを取り除く
|
859
|
+
if uri.include?('?')
|
860
|
+
index = uri =~ /\?/
|
861
|
+
uri = uri[0..index - 1]
|
862
|
+
end
|
863
|
+
uri.slice(/[0-9]+$/)
|
864
|
+
rescue
|
865
|
+
''
|
866
|
+
end
|
867
|
+
end
|
868
|
+
end
|
869
|
+
end
|
870
|
+
end
|