novel-downloader 1.4.1__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +70 -11
  3. novel_downloader/config/adapter.py +43 -9
  4. novel_downloader/core/__init__.py +19 -1
  5. novel_downloader/core/downloaders/base.py +26 -29
  6. novel_downloader/core/downloaders/biquge.py +1 -3
  7. novel_downloader/core/downloaders/common.py +41 -7
  8. novel_downloader/core/downloaders/esjzone.py +1 -3
  9. novel_downloader/core/downloaders/linovelib.py +1 -3
  10. novel_downloader/core/downloaders/qianbi.py +1 -3
  11. novel_downloader/core/downloaders/qidian.py +61 -37
  12. novel_downloader/core/downloaders/sfacg.py +1 -3
  13. novel_downloader/core/downloaders/yamibo.py +1 -3
  14. novel_downloader/core/exporters/common/epub.py +153 -68
  15. novel_downloader/core/exporters/epub_util.py +1358 -0
  16. novel_downloader/core/exporters/linovelib/epub.py +147 -190
  17. novel_downloader/core/factory/downloader.py +3 -6
  18. novel_downloader/core/fetchers/base/browser.py +32 -12
  19. novel_downloader/core/fetchers/esjzone/browser.py +8 -6
  20. novel_downloader/core/fetchers/qidian/browser.py +62 -10
  21. novel_downloader/core/fetchers/yamibo/browser.py +3 -3
  22. novel_downloader/core/interfaces/downloader.py +13 -12
  23. novel_downloader/core/parsers/qidian/chapter_encrypted.py +11 -2
  24. novel_downloader/core/parsers/qidian/chapter_normal.py +8 -1
  25. novel_downloader/core/parsers/qidian/main_parser.py +7 -2
  26. novel_downloader/core/parsers/qidian/utils/__init__.py +2 -0
  27. novel_downloader/core/parsers/qidian/utils/helpers.py +9 -0
  28. novel_downloader/locales/en.json +2 -0
  29. novel_downloader/locales/zh.json +2 -0
  30. novel_downloader/models/__init__.py +2 -0
  31. novel_downloader/models/config.py +9 -0
  32. novel_downloader/resources/config/settings.toml +1 -0
  33. novel_downloader/tui/screens/home.py +13 -6
  34. novel_downloader/utils/constants.py +0 -29
  35. novel_downloader/utils/{model_loader.py → fontocr/model_loader.py} +2 -2
  36. novel_downloader/utils/fontocr/ocr_v1.py +2 -1
  37. novel_downloader/utils/fontocr/ocr_v2.py +2 -1
  38. novel_downloader/utils/text_utils/__init__.py +8 -1
  39. novel_downloader/utils/text_utils/text_cleaning.py +51 -0
  40. {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/METADATA +5 -2
  41. {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/RECORD +45 -50
  42. novel_downloader/core/exporters/epub_utils/__init__.py +0 -40
  43. novel_downloader/core/exporters/epub_utils/css_builder.py +0 -75
  44. novel_downloader/core/exporters/epub_utils/image_loader.py +0 -131
  45. novel_downloader/core/exporters/epub_utils/initializer.py +0 -100
  46. novel_downloader/core/exporters/epub_utils/text_to_html.py +0 -178
  47. novel_downloader/core/exporters/epub_utils/volume_intro.py +0 -60
  48. {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/WHEEL +0 -0
  49. {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/entry_points.txt +0 -0
  50. {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/licenses/LICENSE +0 -0
  51. {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/top_level.txt +0 -0
@@ -12,24 +12,17 @@ import html
12
12
  import json
13
13
  import re
14
14
  from pathlib import Path
15
- from typing import TYPE_CHECKING, Any
15
+ from typing import TYPE_CHECKING
16
16
 
17
- from ebooklib import epub
18
-
19
- from novel_downloader.core.exporters.epub_utils import (
20
- add_images_from_dir,
21
- add_images_from_list,
22
- chapter_txt_to_html,
23
- create_css_items,
24
- create_volume_intro,
25
- init_epub,
17
+ from novel_downloader.core.exporters.epub_util import (
18
+ Book,
19
+ Chapter,
20
+ StyleSheet,
21
+ Volume,
26
22
  )
27
23
  from novel_downloader.utils.constants import (
24
+ CSS_MAIN_PATH,
28
25
  DEFAULT_HEADERS,
29
- EPUB_IMAGE_FOLDER,
30
- EPUB_IMAGE_WRAPPER,
31
- EPUB_OPTIONS,
32
- EPUB_TEXT_FOLDER,
33
26
  )
34
27
  from novel_downloader.utils.file_utils import sanitize_filename
35
28
  from novel_downloader.utils.network import download_image
@@ -37,9 +30,15 @@ from novel_downloader.utils.network import download_image
37
30
  if TYPE_CHECKING:
38
31
  from .main_exporter import LinovelibExporter
39
32
 
33
+ _IMAGE_WRAPPER = (
34
+ '<div class="duokan-image-single illus"><img src="../Images/{filename}" /></div>'
35
+ )
40
36
  _IMG_TAG_PATTERN = re.compile(
41
37
  r'<img\s+[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', re.IGNORECASE
42
38
  )
39
+ _RAW_HTML_RE = re.compile(
40
+ r'^(<img\b[^>]*?\/>|<div class="duokan-image-single illus">.*?<\/div>)$', re.DOTALL
41
+ )
43
42
  _IMG_HEADERS = DEFAULT_HEADERS.copy()
44
43
  _IMG_HEADERS["Referer"] = "https://www.linovelib.com/"
45
44
 
@@ -79,12 +78,12 @@ def export_whole_book(
79
78
  return
80
79
 
81
80
  book_name = book_info.get("book_name", book_id)
81
+ book_author = book_info.get("author", "")
82
82
  exporter.logger.info(
83
83
  "%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
84
84
  )
85
85
 
86
86
  # --- Generate intro + cover ---
87
- intro_html = _generate_intro_html(book_info)
88
87
  cover_path: Path | None = None
89
88
  cover_url = book_info.get("cover_url", "")
90
89
  if config.include_cover and cover_url:
@@ -99,63 +98,56 @@ def export_whole_book(
99
98
  exporter.logger.warning("Failed to download cover from %s", cover_url)
100
99
 
101
100
  # --- Initialize EPUB ---
102
- book, spine, toc_list = init_epub(
103
- book_info=book_info,
104
- book_id=book_id,
105
- intro_html=intro_html,
106
- book_cover_path=cover_path,
107
- include_toc=config.include_toc,
101
+ book = Book(
102
+ title=book_name,
103
+ author=book_author,
104
+ description=book_info.get("summary", ""),
105
+ cover_path=cover_path,
106
+ subject=book_info.get("subject", []),
107
+ serial_status=book_info.get("serial_status", ""),
108
+ word_count=book_info.get("word_count", ""),
109
+ uid=f"{exporter.site}_{book_id}",
108
110
  )
109
- for css in create_css_items(
110
- include_main=True,
111
- include_volume=True,
112
- ):
113
- book.add_item(css)
111
+ main_css = StyleSheet(
112
+ id="main_style",
113
+ content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
114
+ filename="main.css",
115
+ )
116
+ book.add_stylesheet(main_css)
114
117
 
115
118
  # --- Compile chapters ---
116
119
  volumes = book_info.get("volumes", [])
117
120
  for vol_index, vol in enumerate(volumes, start=1):
118
- vol_name = vol.get("volume_name", "").strip() or f"Unknown Volume {vol_index}"
119
- vol_name = vol_name.replace(book_name, "").strip()
121
+ raw_vol_name = vol.get("volume_name", "")
122
+ raw_vol_name = raw_vol_name.replace(book_name, "").strip()
123
+ vol_name = raw_vol_name or f"Volume {vol_index}"
124
+ exporter.logger.info("Processing volume %d: %s", vol_index, vol_name)
125
+
120
126
  vol_cover_path: Path | None = None
121
127
  vol_cover_url = vol.get("volume_cover", "")
122
- if config.include_cover and vol_cover_url:
128
+ if vol_cover_url:
123
129
  vol_cover_path = download_image(
124
130
  vol_cover_url,
125
131
  img_dir,
126
- headers=_IMG_HEADERS,
127
132
  on_exist="skip",
128
133
  )
129
134
 
130
- exporter.logger.info("Processing volume %d: %s", vol_index, vol_name)
131
-
132
- # Volume intro
133
- vol_intro = epub.EpubHtml(
135
+ curr_vol = Volume(
136
+ id=f"vol_{vol_index}",
134
137
  title=vol_name,
135
- file_name=f"{EPUB_TEXT_FOLDER}/volume_intro_{vol_index}.xhtml",
136
- lang="zh",
138
+ intro=vol.get("volume_intro", ""),
139
+ cover=vol_cover_path,
137
140
  )
138
- vol_intro.content = _generate_vol_intro_html(
139
- vol_name,
140
- vol.get("volume_intro", ""),
141
- vol_cover_path,
142
- )
143
- vol_intro.add_link(
144
- href="../Styles/volume-intro.css",
145
- rel="stylesheet",
146
- type="text/css",
147
- )
148
- book.add_item(vol_intro)
149
- spine.append(vol_intro)
150
-
151
- section = epub.Section(vol_name, vol_intro.file_name)
152
- chapter_items: list[epub.EpubHtml] = []
153
141
 
154
142
  for chap in vol.get("chapters", []):
155
143
  chap_id = chap.get("chapterId")
156
144
  chap_title = chap.get("title", "")
157
145
  if not chap_id:
158
- exporter.logger.warning("%s Missing chapterId, skipping: %s", TAG, chap)
146
+ exporter.logger.warning(
147
+ "%s Missing chapterId, skipping: %s",
148
+ TAG,
149
+ chap,
150
+ )
159
151
  continue
160
152
 
161
153
  chapter_data = exporter._get_chapter(book_id, chap_id)
@@ -168,38 +160,30 @@ def export_whole_book(
168
160
  )
169
161
  continue
170
162
 
171
- title = chapter_data.get("title", "") or chap_id
163
+ title = chapter_data.get("title") or chap_id
172
164
  content: str = chapter_data.get("content", "")
173
- content, _ = _inline_remote_images(content, img_dir)
174
- chap_html = chapter_txt_to_html(
165
+ content, img_paths = _inline_remote_images(content, img_dir)
166
+ chap_html = _txt_to_html(
175
167
  chapter_title=title,
176
168
  chapter_text=content,
177
- author_say="",
169
+ extras={
170
+ "作者说": chapter_data.get("author_say", ""),
171
+ },
178
172
  )
179
-
180
- chap_path = f"{EPUB_TEXT_FOLDER}/{chap_id}.xhtml"
181
- item = epub.EpubHtml(title=chap_title, file_name=chap_path, lang="zh")
182
- item.content = chap_html
183
- item.add_link(
184
- href="../Styles/main.css",
185
- rel="stylesheet",
186
- type="text/css",
173
+ curr_vol.add_chapter(
174
+ Chapter(
175
+ id=f"c_{chap_id}",
176
+ title=title,
177
+ content=chap_html,
178
+ css=[main_css],
179
+ )
187
180
  )
188
- book.add_item(item)
189
- spine.append(item)
190
- chapter_items.append(item)
191
-
192
- toc_list.append((section, chapter_items))
181
+ for img_path in img_paths:
182
+ book.add_image(img_path)
193
183
 
194
- book = add_images_from_dir(book, img_dir)
184
+ book.add_volume(curr_vol)
195
185
 
196
186
  # --- 5. Finalize EPUB ---
197
- exporter.logger.info("%s Building TOC and spine...", TAG)
198
- book.toc = toc_list
199
- book.spine = spine
200
- book.add_item(epub.EpubNcx())
201
- book.add_item(epub.EpubNav())
202
-
203
187
  out_name = exporter.get_filename(
204
188
  title=book_name,
205
189
  author=book_info.get("author"),
@@ -208,7 +192,7 @@ def export_whole_book(
208
192
  out_path = out_dir / sanitize_filename(out_name)
209
193
 
210
194
  try:
211
- epub.write_epub(out_path, book, EPUB_OPTIONS)
195
+ book.export(out_path)
212
196
  exporter.logger.info("%s EPUB successfully written to %s", TAG, out_path)
213
197
  except Exception as e:
214
198
  exporter.logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
@@ -243,18 +227,25 @@ def export_by_volume(
243
227
  return
244
228
 
245
229
  book_name = book_info.get("book_name", book_id)
230
+ book_author = book_info.get("author", "")
231
+ book_summary = book_info.get("summary", "")
246
232
  exporter.logger.info(
247
233
  "%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
248
234
  )
249
- css_items = create_css_items(
250
- include_main=True,
251
- include_volume=True,
235
+
236
+ main_css = StyleSheet(
237
+ id="main_style",
238
+ content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
239
+ filename="main.css",
252
240
  )
253
241
 
254
242
  # --- Compile columes ---
255
243
  volumes = book_info.get("volumes", [])
256
244
  for vol_index, vol in enumerate(volumes, start=1):
257
- vol_name = vol.get("volume_name", "").strip() or f"Unknown Volume {vol_index}"
245
+ raw_vol_name = vol.get("volume_name", "")
246
+ raw_vol_name = raw_vol_name.replace(book_name, "").strip()
247
+ vol_name = raw_vol_name or f"Volume {vol_index}"
248
+
258
249
  vol_cover_path: Path | None = None
259
250
  vol_cover_url = vol.get("volume_cover", "")
260
251
  if config.include_cover and vol_cover_url:
@@ -264,23 +255,28 @@ def export_by_volume(
264
255
  headers=_IMG_HEADERS,
265
256
  on_exist="skip",
266
257
  )
267
- intro_html = _generate_intro_html(vol)
268
-
269
- book, spine, toc_list = init_epub(
270
- book_info=vol,
271
- book_id=f"{book_id}_{vol_index}",
272
- intro_html=intro_html,
273
- book_cover_path=vol_cover_path,
274
- include_toc=config.include_toc,
258
+
259
+ book = Book(
260
+ title=vol_name,
261
+ author=book_author,
262
+ description=vol.get("volume_intro") or book_summary,
263
+ cover_path=vol_cover_path,
264
+ subject=book_info.get("subject", []),
265
+ serial_status=vol.get("serial_status", ""),
266
+ word_count=vol.get("word_count", ""),
267
+ uid=f"{exporter.site}_{book_id}_v{vol_index}",
275
268
  )
276
- for css in css_items:
277
- book.add_item(css)
269
+ book.add_stylesheet(main_css)
278
270
 
279
271
  for chap in vol.get("chapters", []):
280
272
  chap_id = chap.get("chapterId")
281
273
  chap_title = chap.get("title", "")
282
274
  if not chap_id:
283
- exporter.logger.warning("%s Missing chapterId, skipping: %s", TAG, chap)
275
+ exporter.logger.warning(
276
+ "%s Missing chapterId, skipping: %s",
277
+ TAG,
278
+ chap,
279
+ )
284
280
  continue
285
281
 
286
282
  chapter_data = exporter._get_chapter(book_id, chap_id)
@@ -296,29 +292,21 @@ def export_by_volume(
296
292
  title = chapter_data.get("title", "") or chap_id
297
293
  content: str = chapter_data.get("content", "")
298
294
  content, imgs = _inline_remote_images(content, img_dir)
299
- chap_html = chapter_txt_to_html(
295
+ chap_html = _txt_to_html(
300
296
  chapter_title=title,
301
297
  chapter_text=content,
302
- author_say="",
298
+ extras={},
303
299
  )
304
- add_images_from_list(book, imgs)
305
-
306
- chap_path = f"{EPUB_TEXT_FOLDER}/{chap_id}.xhtml"
307
- item = epub.EpubHtml(title=chap_title, file_name=chap_path, lang="zh")
308
- item.content = chap_html
309
- item.add_link(
310
- href="../Styles/main.css",
311
- rel="stylesheet",
312
- type="text/css",
300
+ book.add_chapter(
301
+ Chapter(
302
+ id=f"c_{chap_id}",
303
+ title=title,
304
+ content=chap_html,
305
+ css=[main_css],
306
+ )
313
307
  )
314
- book.add_item(item)
315
- spine.append(item)
316
- toc_list.append(item)
317
-
318
- book.toc = toc_list
319
- book.spine = spine
320
- book.add_item(epub.EpubNcx())
321
- book.add_item(epub.EpubNav())
308
+ for img_path in imgs:
309
+ book.add_image(img_path)
322
310
 
323
311
  out_name = exporter.get_filename(
324
312
  title=vol_name,
@@ -328,96 +316,20 @@ def export_by_volume(
328
316
  out_path = out_dir / sanitize_filename(out_name)
329
317
 
330
318
  try:
331
- epub.write_epub(out_path, book, EPUB_OPTIONS)
319
+ book.export(out_path)
332
320
  exporter.logger.info("%s EPUB successfully written to %s", TAG, out_path)
333
321
  except Exception as e:
334
322
  exporter.logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
335
323
  return
336
324
 
337
325
 
338
- def _generate_intro_html(
339
- info: dict[str, Any],
340
- default_author: str = "",
341
- ) -> str:
342
- """
343
- Generate an HTML snippet containing book metadata and summary.
344
-
345
- :param info: A dict that may contain book info
346
- :param default_author: Fallback author name.
347
-
348
- :return: An HTML-formatted string.
349
- """
350
- title = info.get("book_name") or info.get("volume_name")
351
- author = info.get("author") or default_author
352
- status = info.get("serial_status")
353
- words = info.get("word_count")
354
- raw_summary = (info.get("summary") or info.get("volume_intro") or "").strip()
355
-
356
- html_parts = [
357
- "<h1>书籍简介</h1>",
358
- '<div class="list">',
359
- "<ul>",
360
- ]
361
- metadata = [
362
- ("书名", title),
363
- ("作者", author),
364
- ("状态", status),
365
- ("字数", words),
366
- ]
367
- for label, value in metadata:
368
- if value is not None and str(value).strip():
369
- safe = html.escape(str(value))
370
- if label == "书名":
371
- safe = f"《{safe}》"
372
- html_parts.append(f"<li>{label}: {safe}</li>")
373
-
374
- html_parts.extend(["</ul>", "</div>"])
375
-
376
- if raw_summary:
377
- html_parts.append('<p class="new-page-after"><br/></p>')
378
- html_parts.append("<h2>简介</h2>")
379
- for para in filter(None, (p.strip() for p in raw_summary.split("\n\n"))):
380
- safe_para = html.escape(para).replace("\n", "<br/>")
381
- html_parts.append(f"<p>{safe_para}</p>")
382
-
383
- return "\n".join(html_parts)
384
-
385
-
386
- def _generate_vol_intro_html(
387
- title: str,
388
- intro: str = "",
389
- cover_path: Path | None = None,
390
- ) -> str:
391
- """
392
- Generate the HTML snippet for a volume's introduction section.
393
-
394
- :param title: Title of the volume.
395
- :param intro: Optional introduction text for the volume.
396
- :param cover_path: Path of the volume cover.
397
- :return: HTML string representing the volume's intro section.
398
- """
399
- if cover_path is None:
400
- return create_volume_intro(title, intro)
401
-
402
- html_parts = [
403
- f'<h1 class="volume-title-line1">{title}</h1>',
404
- f'<img class="width100" src="../{EPUB_IMAGE_FOLDER}/{cover_path.name}" />',
405
- '<p class="new-page-after"><br/></p>',
406
- ]
407
-
408
- if intro.strip():
409
- html_parts.append(f'<p class="intro">{intro}</p>')
410
-
411
- return "\n".join(html_parts)
412
-
413
-
414
326
  def _inline_remote_images(
415
327
  content: str,
416
328
  image_dir: str | Path,
417
329
  ) -> tuple[str, list[Path]]:
418
330
  """
419
331
  Download every remote `<img src="...">` in `content` into `image_dir`,
420
- and replace the original tag with EPUB_IMAGE_WRAPPER
332
+ and replace the original tag with _IMAGE_WRAPPER
421
333
  pointing to the local filename.
422
334
 
423
335
  :param content: HTML/text of the chapter containing <img> tags.
@@ -441,9 +353,54 @@ def _inline_remote_images(
441
353
  return match.group(0)
442
354
 
443
355
  downloaded_images.append(local_path)
444
- return EPUB_IMAGE_WRAPPER.format(filename=local_path.name)
356
+ return _IMAGE_WRAPPER.format(filename=local_path.name)
445
357
  except Exception:
446
358
  return match.group(0)
447
359
 
448
360
  modified_content = _IMG_TAG_PATTERN.sub(_replace, content)
449
361
  return modified_content, downloaded_images
362
+
363
+
364
+ def _txt_to_html(
365
+ chapter_title: str,
366
+ chapter_text: str,
367
+ extras: dict[str, str] | None = None,
368
+ ) -> str:
369
+ """
370
+ Convert chapter text and author note to styled HTML.
371
+
372
+ :param chapter_title: Title of the chapter.
373
+ :param chapter_text: Main content of the chapter.
374
+ :param extras: Optional dict of titles and content, e.g. {"作者说": "text"}.
375
+ :return: Rendered HTML as a string.
376
+ """
377
+
378
+ def _render_block(text: str) -> str:
379
+ lines = (line.strip() for line in text.splitlines() if line.strip())
380
+ out = []
381
+ for line in lines:
382
+ # preserve raw HTML, otherwise wrap in <p>
383
+ if _RAW_HTML_RE.match(line):
384
+ out.append(line)
385
+ else:
386
+ out.append(f"<p>{html.escape(line)}</p>")
387
+ return "\n".join(out)
388
+
389
+ parts = []
390
+ parts.append(f"<h2>{html.escape(chapter_title)}</h2>")
391
+ parts.append(_render_block(chapter_text))
392
+
393
+ if extras:
394
+ for title, note in extras.items():
395
+ note = note.strip()
396
+ if not note:
397
+ continue
398
+ parts.extend(
399
+ [
400
+ "<hr />",
401
+ f"<p>{html.escape(title)}</p>",
402
+ _render_block(note),
403
+ ]
404
+ )
405
+
406
+ return "\n".join(parts)
@@ -22,14 +22,13 @@ from novel_downloader.core.downloaders import (
22
22
  )
23
23
  from novel_downloader.core.interfaces import (
24
24
  DownloaderProtocol,
25
- ExporterProtocol,
26
25
  FetcherProtocol,
27
26
  ParserProtocol,
28
27
  )
29
28
  from novel_downloader.models import DownloaderConfig
30
29
 
31
30
  DownloaderBuilder = Callable[
32
- [FetcherProtocol, ParserProtocol, ExporterProtocol, DownloaderConfig],
31
+ [FetcherProtocol, ParserProtocol, DownloaderConfig],
33
32
  DownloaderProtocol,
34
33
  ]
35
34
 
@@ -47,7 +46,6 @@ _site_map: dict[str, DownloaderBuilder] = {
47
46
  def get_downloader(
48
47
  fetcher: FetcherProtocol,
49
48
  parser: ParserProtocol,
50
- exporter: ExporterProtocol,
51
49
  site: str,
52
50
  config: DownloaderConfig,
53
51
  ) -> DownloaderProtocol:
@@ -56,7 +54,6 @@ def get_downloader(
56
54
 
57
55
  :param fetcher: Fetcher implementation
58
56
  :param parser: Parser implementation
59
- :param exporter: Exporter implementation
60
57
  :param site: Site name (e.g., 'qidian')
61
58
  :param config: Downloader configuration
62
59
 
@@ -66,11 +63,11 @@ def get_downloader(
66
63
 
67
64
  # site-specific
68
65
  if site_key in _site_map:
69
- return _site_map[site_key](fetcher, parser, exporter, config)
66
+ return _site_map[site_key](fetcher, parser, config)
70
67
 
71
68
  # fallback
72
69
  site_rules = load_site_rules()
73
70
  if site_key not in site_rules:
74
71
  raise ValueError(f"Unsupported site: {site}")
75
72
 
76
- return CommonDownloader(fetcher, parser, exporter, config, site_key)
73
+ return CommonDownloader(fetcher, parser, config, site_key)
@@ -201,19 +201,9 @@ class BaseBrowser(FetcherProtocol, abc.ABC):
201
201
  **kwargs: Any,
202
202
  ) -> str:
203
203
  if self._reuse_page:
204
- if not self._page:
205
- self._page = await self.context.new_page()
206
- page = self._page
204
+ return await self._fetch_with_reuse(url, wait_until, referer, **kwargs)
207
205
  else:
208
- page = await self.context.new_page()
209
-
210
- await page.goto(url, wait_until=wait_until, referer=referer)
211
- content = await page.content()
212
-
213
- if not self._reuse_page:
214
- await page.close()
215
-
216
- return str(content)
206
+ return await self._fetch_with_new(url, wait_until, referer, **kwargs)
217
207
 
218
208
  async def load_state(self) -> bool:
219
209
  """ """
@@ -286,6 +276,36 @@ class BaseBrowser(FetcherProtocol, abc.ABC):
286
276
  await self.init(headless=headless)
287
277
  self.logger.debug("[browser] Browser restarted (headless=%s).", headless)
288
278
 
279
+ async def _fetch_with_new(
280
+ self,
281
+ url: str,
282
+ wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
283
+ | None = "load",
284
+ referer: str | None = None,
285
+ **kwargs: Any,
286
+ ) -> str:
287
+ page = await self.context.new_page()
288
+ try:
289
+ await page.goto(url, wait_until=wait_until, referer=referer, **kwargs)
290
+ html: str = await page.content()
291
+ return html
292
+ finally:
293
+ await page.close()
294
+
295
+ async def _fetch_with_reuse(
296
+ self,
297
+ url: str,
298
+ wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
299
+ | None = "load",
300
+ referer: str | None = None,
301
+ **kwargs: Any,
302
+ ) -> str:
303
+ if not self._page:
304
+ self._page = await self.context.new_page()
305
+ await self._page.goto(url, wait_until=wait_until, referer=referer, **kwargs)
306
+ html: str = await self._page.content()
307
+ return html
308
+
289
309
  @property
290
310
  def hostname(self) -> str:
291
311
  return ""
@@ -49,15 +49,17 @@ class EsjzoneBrowser(BaseBrowser):
49
49
 
50
50
  login_page = await self.context.new_page()
51
51
 
52
- await login_page.goto(self.API_LOGIN_URL_1, wait_until="networkidle")
52
+ try:
53
+ await login_page.goto(self.API_LOGIN_URL_1, wait_until="networkidle")
53
54
 
54
- await login_page.fill('input[name="email"]', username)
55
- await login_page.fill('input[name="pwd"]', password)
55
+ await login_page.fill('input[name="email"]', username)
56
+ await login_page.fill('input[name="pwd"]', password)
56
57
 
57
- await login_page.click('a.btn-send[data-send="mem_login"]')
58
+ await login_page.click('a.btn-send[data-send="mem_login"]')
58
59
 
59
- await login_page.wait_for_load_state("networkidle")
60
- await login_page.close()
60
+ await login_page.wait_for_load_state("networkidle")
61
+ finally:
62
+ await login_page.close()
61
63
 
62
64
  self._is_logged_in = await self._check_login_status()
63
65