novel-downloader 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. novel_downloader/__init__.py +14 -0
  2. novel_downloader/cli/__init__.py +14 -0
  3. novel_downloader/cli/clean.py +134 -0
  4. novel_downloader/cli/download.py +132 -0
  5. novel_downloader/cli/interactive.py +67 -0
  6. novel_downloader/cli/main.py +45 -0
  7. novel_downloader/cli/settings.py +177 -0
  8. novel_downloader/config/__init__.py +52 -0
  9. novel_downloader/config/adapter.py +153 -0
  10. novel_downloader/config/loader.py +177 -0
  11. novel_downloader/config/models.py +173 -0
  12. novel_downloader/config/site_rules.py +97 -0
  13. novel_downloader/core/__init__.py +25 -0
  14. novel_downloader/core/downloaders/__init__.py +22 -0
  15. novel_downloader/core/downloaders/base_async_downloader.py +157 -0
  16. novel_downloader/core/downloaders/base_downloader.py +187 -0
  17. novel_downloader/core/downloaders/common_asynb_downloader.py +207 -0
  18. novel_downloader/core/downloaders/common_downloader.py +191 -0
  19. novel_downloader/core/downloaders/qidian_downloader.py +208 -0
  20. novel_downloader/core/factory/__init__.py +33 -0
  21. novel_downloader/core/factory/downloader_factory.py +149 -0
  22. novel_downloader/core/factory/parser_factory.py +62 -0
  23. novel_downloader/core/factory/requester_factory.py +106 -0
  24. novel_downloader/core/factory/saver_factory.py +49 -0
  25. novel_downloader/core/interfaces/__init__.py +32 -0
  26. novel_downloader/core/interfaces/async_downloader_protocol.py +37 -0
  27. novel_downloader/core/interfaces/async_requester_protocol.py +68 -0
  28. novel_downloader/core/interfaces/downloader_protocol.py +37 -0
  29. novel_downloader/core/interfaces/parser_protocol.py +40 -0
  30. novel_downloader/core/interfaces/requester_protocol.py +65 -0
  31. novel_downloader/core/interfaces/saver_protocol.py +61 -0
  32. novel_downloader/core/parsers/__init__.py +28 -0
  33. novel_downloader/core/parsers/base_parser.py +96 -0
  34. novel_downloader/core/parsers/common_parser/__init__.py +14 -0
  35. novel_downloader/core/parsers/common_parser/helper.py +321 -0
  36. novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
  37. novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
  38. novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
  39. novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
  40. novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
  41. novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
  42. novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
  43. novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
  44. novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
  45. novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
  46. novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
  47. novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
  48. novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
  49. novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
  50. novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
  51. novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
  52. novel_downloader/core/requesters/__init__.py +31 -0
  53. novel_downloader/core/requesters/base_async_session.py +297 -0
  54. novel_downloader/core/requesters/base_browser.py +210 -0
  55. novel_downloader/core/requesters/base_session.py +243 -0
  56. novel_downloader/core/requesters/common_requester/__init__.py +18 -0
  57. novel_downloader/core/requesters/common_requester/common_async_session.py +96 -0
  58. novel_downloader/core/requesters/common_requester/common_session.py +126 -0
  59. novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
  60. novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
  61. novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
  62. novel_downloader/core/savers/__init__.py +20 -0
  63. novel_downloader/core/savers/base_saver.py +169 -0
  64. novel_downloader/core/savers/common_saver/__init__.py +13 -0
  65. novel_downloader/core/savers/common_saver/common_epub.py +232 -0
  66. novel_downloader/core/savers/common_saver/common_txt.py +176 -0
  67. novel_downloader/core/savers/common_saver/main_saver.py +86 -0
  68. novel_downloader/core/savers/epub_utils/__init__.py +27 -0
  69. novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
  70. novel_downloader/core/savers/epub_utils/initializer.py +98 -0
  71. novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
  72. novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
  73. novel_downloader/core/savers/qidian_saver.py +22 -0
  74. novel_downloader/locales/en.json +91 -0
  75. novel_downloader/locales/zh.json +91 -0
  76. novel_downloader/resources/config/rules.toml +196 -0
  77. novel_downloader/resources/config/settings.yaml +73 -0
  78. novel_downloader/resources/css_styles/main.css +104 -0
  79. novel_downloader/resources/css_styles/volume-intro.css +56 -0
  80. novel_downloader/resources/images/volume_border.png +0 -0
  81. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
  82. novel_downloader/resources/json/replace_word_map.json +4 -0
  83. novel_downloader/resources/text/blacklist.txt +22 -0
  84. novel_downloader/utils/__init__.py +0 -0
  85. novel_downloader/utils/cache.py +24 -0
  86. novel_downloader/utils/constants.py +158 -0
  87. novel_downloader/utils/crypto_utils.py +144 -0
  88. novel_downloader/utils/file_utils/__init__.py +43 -0
  89. novel_downloader/utils/file_utils/io.py +252 -0
  90. novel_downloader/utils/file_utils/normalize.py +68 -0
  91. novel_downloader/utils/file_utils/sanitize.py +77 -0
  92. novel_downloader/utils/fontocr/__init__.py +23 -0
  93. novel_downloader/utils/fontocr/ocr_v1.py +304 -0
  94. novel_downloader/utils/fontocr/ocr_v2.py +658 -0
  95. novel_downloader/utils/hash_store.py +288 -0
  96. novel_downloader/utils/hash_utils.py +103 -0
  97. novel_downloader/utils/i18n.py +41 -0
  98. novel_downloader/utils/logger.py +104 -0
  99. novel_downloader/utils/model_loader.py +72 -0
  100. novel_downloader/utils/network.py +287 -0
  101. novel_downloader/utils/state.py +156 -0
  102. novel_downloader/utils/text_utils/__init__.py +27 -0
  103. novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
  104. novel_downloader/utils/text_utils/diff_display.py +75 -0
  105. novel_downloader/utils/text_utils/font_mapping.py +31 -0
  106. novel_downloader/utils/text_utils/text_cleaning.py +57 -0
  107. novel_downloader/utils/time_utils/__init__.py +22 -0
  108. novel_downloader/utils/time_utils/datetime_utils.py +146 -0
  109. novel_downloader/utils/time_utils/sleep_utils.py +49 -0
  110. novel_downloader-1.1.0.dist-info/METADATA +157 -0
  111. novel_downloader-1.1.0.dist-info/RECORD +115 -0
  112. novel_downloader-1.1.0.dist-info/WHEEL +5 -0
  113. novel_downloader-1.1.0.dist-info/entry_points.txt +2 -0
  114. novel_downloader-1.1.0.dist-info/licenses/LICENSE +21 -0
  115. novel_downloader-1.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.factory.requester_factory
5
+ -----------------------------------------------
6
+
7
+ This module implements a factory function for retrieving requester instances
8
+ based on the target novel platform (site).
9
+
10
+ - get_async_requester -> returns AsyncRequesterProtocol
11
+ - get_sync_requester -> returns RequesterProtocol
12
+ - get_requester -> dispatches to one of the above based on config.mode
13
+
14
+ To add support for new sites or modes, extend the `_site_map` accordingly.
15
+ """
16
+
17
+ from typing import Callable, Union
18
+
19
+ from novel_downloader.config import RequesterConfig, load_site_rules
20
+ from novel_downloader.core.interfaces import AsyncRequesterProtocol, RequesterProtocol
21
+ from novel_downloader.core.requesters import (
22
+ CommonAsyncSession,
23
+ CommonSession,
24
+ QidianBrowser,
25
+ QidianSession,
26
+ )
27
+
28
+ _site_map: dict[
29
+ str,
30
+ dict[str, Callable[[RequesterConfig], RequesterProtocol]],
31
+ ] = {
32
+ "qidian": {
33
+ "session": QidianSession,
34
+ "browser": QidianBrowser,
35
+ },
36
+ }
37
+
38
+
39
+ def get_async_requester(
40
+ site: str,
41
+ config: RequesterConfig,
42
+ ) -> AsyncRequesterProtocol:
43
+ """
44
+ Returns an AsyncRequesterProtocol for the given site.
45
+
46
+ :param site: Site name (e.g., 'qidian')
47
+ :param config: Configuration for the requester
48
+ :return: An instance of a requester class
49
+ """
50
+ site_key = site.lower()
51
+ site_rules = load_site_rules()
52
+ site_rule = site_rules.get(site_key)
53
+ if site_rule is None:
54
+ raise ValueError(f"Unsupported site: {site}")
55
+ profile = site_rule["profile"]
56
+ return CommonAsyncSession(config, site_key, profile)
57
+
58
+
59
+ def get_sync_requester(
60
+ site: str,
61
+ config: RequesterConfig,
62
+ ) -> RequesterProtocol:
63
+ """
64
+ Returns a RequesterProtocol for the given site.
65
+
66
+ :param site: Site name (e.g., 'qidian')
67
+ :param config: Configuration for the requester
68
+ :return: An instance of a requester class
69
+ """
70
+ site_key = site.lower()
71
+ site_entry = _site_map.get(site_key)
72
+
73
+ # site-specific implementation for this mode
74
+ if site_entry:
75
+ cls = site_entry.get(config.mode)
76
+ if cls:
77
+ return cls(config)
78
+
79
+ # fallback to CommonSession
80
+ site_rules = load_site_rules()
81
+ site_rule = site_rules.get(site_key)
82
+ if site_rule is None:
83
+ raise ValueError(f"Unsupported site: {site}")
84
+ profile = site_rule["profile"]
85
+ return CommonSession(config, site_key, profile)
86
+
87
+
88
+ def get_requester(
89
+ site: str,
90
+ config: RequesterConfig,
91
+ ) -> Union[AsyncRequesterProtocol, RequesterProtocol]:
92
+ """
93
+ Dispatches to either get_async_requester or get_sync_requester
94
+ based on config.mode. Treats 'browser' and 'async' as async modes,
95
+ 'session' as sync; anything else is an error.
96
+
97
+ :param site: Site name (e.g., 'qidian')
98
+ :param config: Configuration for the requester
99
+ :return: An instance of a requester class
100
+ """
101
+ mode = config.mode.lower()
102
+ if mode == "async":
103
+ return get_async_requester(site, config)
104
+ if mode in ("browser", "session"):
105
+ return get_sync_requester(site, config)
106
+ raise ValueError(f"Unknown mode '{config.mode}' for site '{site}'")
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.factory.parser_factory
5
+ --------------------------------------------
6
+
7
+ This module implements a factory function for creating saver instances
8
+ based on the site name and parser mode specified in the configuration.
9
+
10
+ Currently supported:
11
+ - Site: 'qidian'
12
+ - QidianSaver
13
+
14
+ To add support for new sites or modes, extend the `_site_map` accordingly.
15
+ """
16
+
17
+ from novel_downloader.config import SaverConfig, load_site_rules
18
+ from novel_downloader.core.interfaces import SaverProtocol
19
+ from novel_downloader.core.savers import (
20
+ CommonSaver,
21
+ QidianSaver,
22
+ )
23
+
24
+ _site_map = {
25
+ "qidian": QidianSaver,
26
+ # "biquge": ...
27
+ }
28
+
29
+
30
+ def get_saver(site: str, config: SaverConfig) -> SaverProtocol:
31
+ """
32
+ Returns a site-specific saver instance.
33
+
34
+ :param site: Site name (e.g., 'qidian')
35
+ :param config: Configuration for the saver
36
+ :return: An instance of a saver class
37
+ """
38
+ site_key = site.lower()
39
+
40
+ saver_class = _site_map.get(site_key)
41
+ if saver_class:
42
+ return saver_class(config)
43
+
44
+ # Fallback: check site_rules
45
+ site_rules = load_site_rules()
46
+ if site_key not in site_rules:
47
+ raise ValueError(f"Unsupported site: {site}")
48
+
49
+ return CommonSaver(config, site_key)
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces
5
+ --------------------------------
6
+
7
+ This package centralizes the protocol definitions used across the
8
+ system to promote interface-based design and type-safe dependency
9
+ injection.
10
+
11
+ Included protocols:
12
+ - DownloaderProtocol
13
+ - ParserProtocol
14
+ - RequesterProtocol
15
+ - SaverProtocol
16
+ """
17
+
18
+ from .async_downloader_protocol import AsyncDownloaderProtocol
19
+ from .async_requester_protocol import AsyncRequesterProtocol
20
+ from .downloader_protocol import DownloaderProtocol
21
+ from .parser_protocol import ParserProtocol
22
+ from .requester_protocol import RequesterProtocol
23
+ from .saver_protocol import SaverProtocol
24
+
25
+ __all__ = [
26
+ "AsyncDownloaderProtocol",
27
+ "AsyncRequesterProtocol",
28
+ "DownloaderProtocol",
29
+ "ParserProtocol",
30
+ "RequesterProtocol",
31
+ "SaverProtocol",
32
+ ]
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces.async_downloader_protocol
5
+ ----------------------------------------------------------
6
+
7
+ This module defines the AsyncDownloaderProtocol, a structural interface
8
+ that outlines the expected behavior of any downloader class.
9
+ """
10
+
11
+ from typing import List, Protocol
12
+
13
+
14
+ class AsyncDownloaderProtocol(Protocol):
15
+ """
16
+ Protocol for fully‐asynchronous downloader classes.
17
+
18
+ Defines the expected interface for any downloader implementation,
19
+ including both batch and single book downloads,
20
+ as well as optional pre-download hooks.
21
+ """
22
+
23
+ async def download(self, book_ids: List[str]) -> None:
24
+ """
25
+ Batch download entry point.
26
+
27
+ :param book_ids: List of book IDs to download.
28
+ """
29
+ ...
30
+
31
+ async def download_one(self, book_id: str) -> None:
32
+ """
33
+ Download logic for a single book.
34
+
35
+ :param book_id: The identifier of the book.
36
+ """
37
+ ...
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces.async_requester_protocol
5
+ --------------------------------------------------------
6
+
7
+ Defines the AsyncRequesterProtocol interface for fetching raw HTML or JSON
8
+ for book info pages, individual chapters, managing request lifecycle,
9
+ and optionally retrieving a user's authenticated bookcase — all in async style.
10
+ """
11
+
12
+ from typing import Optional, Protocol, runtime_checkable
13
+
14
+
15
+ @runtime_checkable
16
+ class AsyncRequesterProtocol(Protocol):
17
+ """
18
+ An async requester must be able to fetch raw HTML/data for:
19
+ - a book's info page,
20
+ - a specific chapter page,
21
+ and manage login/shutdown asynchronously.
22
+ """
23
+
24
+ async def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
25
+ """
26
+ Attempt to log in asynchronously.
27
+ :returns: True if login succeeded.
28
+ """
29
+ ...
30
+
31
+ async def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
32
+ """
33
+ Fetch the raw HTML (or JSON) of the book info page asynchronously.
34
+
35
+ :param book_id: The book identifier.
36
+ :param wait_time: Base number of seconds to wait before returning content.
37
+ :return: The page content as a string.
38
+ """
39
+ ...
40
+
41
+ async def get_book_chapter(
42
+ self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
43
+ ) -> str:
44
+ """
45
+ Fetch the raw HTML (or JSON) of a single chapter asynchronously.
46
+
47
+ :param book_id: The book identifier.
48
+ :param chapter_id: The chapter identifier.
49
+ :param wait_time: Base number of seconds to wait before returning content.
50
+ :return: The chapter content as a string.
51
+ """
52
+ ...
53
+
54
+ async def get_bookcase(self, wait_time: Optional[int] = None) -> str:
55
+ """
56
+ Optional: Retrieve the HTML content of the authenticated
57
+ user's bookcase page asynchronously.
58
+
59
+ :param wait_time: Base number of seconds to wait before returning content.
60
+ :return: The HTML markup of the bookcase page.
61
+ """
62
+ ...
63
+
64
+ async def shutdown(self) -> None:
65
+ """
66
+ Shutdown and clean up any resources (e.g., close aiohttp session).
67
+ """
68
+ ...
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces.downloader_protocol
5
+ ----------------------------------------------------
6
+
7
+ This module defines the DownloaderProtocol, a structural interface
8
+ that outlines the expected behavior of any downloader class.
9
+ """
10
+
11
+ from typing import List, Protocol
12
+
13
+
14
+ class DownloaderProtocol(Protocol):
15
+ """
16
+ Protocol for downloader classes.
17
+
18
+ Defines the expected interface for any downloader implementation,
19
+ including both batch and single book downloads,
20
+ as well as optional pre-download hooks.
21
+ """
22
+
23
+ def download(self, book_ids: List[str]) -> None:
24
+ """
25
+ Batch download entry point.
26
+
27
+ :param book_ids: List of book IDs to download.
28
+ """
29
+ ...
30
+
31
+ def download_one(self, book_id: str) -> None:
32
+ """
33
+ Download logic for a single book.
34
+
35
+ :param book_id: The identifier of the book.
36
+ """
37
+ ...
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces.parser_protocol
5
+ ------------------------------------------------
6
+
7
+ Defines the ParserProtocol interface for extracting book metadata,
8
+ parsing individual chapter content, and setting parser context via book_id.
9
+ """
10
+
11
+ from typing import Any, Dict, Protocol, runtime_checkable
12
+
13
+
14
+ @runtime_checkable
15
+ class ParserProtocol(Protocol):
16
+ """
17
+ A parser must be able to:
18
+ - extract book metadata from an HTML string,
19
+ - extract a single chapter's text from an HTML string,
20
+ - accept a book_id context for multi-step workflows.
21
+ """
22
+
23
+ def parse_book_info(self, html_str: str) -> Dict[str, Any]:
24
+ """
25
+ Parse and return a dictionary of book information from the raw HTML.
26
+
27
+ :param html_str: The HTML of a book's info page.
28
+ :return: A dict containing metadata like title, author, chapters list, etc.
29
+ """
30
+ ...
31
+
32
+ def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
33
+ """
34
+ Parse and return the text content of one chapter.
35
+
36
+ :param html_str: The HTML of the chapter page.
37
+ :param chapter_id: Identifier of the chapter being parsed.
38
+ :return: The chapter's text.
39
+ """
40
+ ...
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces.requester_protocol
5
+ --------------------------------------------------
6
+
7
+ Defines the RequesterProtocol interface for fetching raw HTML or JSON
8
+ for book info pages, individual chapters, managing request lifecycle,
9
+ and optionally retrieving a user's authenticated bookcase.
10
+ """
11
+
12
+ from typing import Optional, Protocol, runtime_checkable
13
+
14
+
15
+ @runtime_checkable
16
+ class RequesterProtocol(Protocol):
17
+ """
18
+ A requester must be able to fetch raw HTML/data for:
19
+ - a book's info page,
20
+ - a specific chapter page.
21
+ """
22
+
23
+ def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
24
+ """
25
+ Attempt to log in
26
+ """
27
+ ...
28
+
29
+ def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
30
+ """
31
+ Fetch the raw HTML (or JSON) of the book info page.
32
+
33
+ :param book_id: The book identifier.
34
+ :param wait_time: Base number of seconds to wait before returning content.
35
+ :return: The page content as a string.
36
+ """
37
+ ...
38
+
39
+ def get_book_chapter(
40
+ self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
41
+ ) -> str:
42
+ """
43
+ Fetch the raw HTML (or JSON) of a single chapter.
44
+
45
+ :param book_id: The book identifier.
46
+ :param chapter_id: The chapter identifier.
47
+ :param wait_time: Base number of seconds to wait before returning content.
48
+ :return: The chapter content as a string.
49
+ """
50
+ ...
51
+
52
+ def shutdown(self) -> None:
53
+ """
54
+ Shutdown and cleans up resources.
55
+ """
56
+ ...
57
+
58
+ def get_bookcase(self, wait_time: Optional[int] = None) -> str:
59
+ """
60
+ Optional: Retrieve the HTML content of the authenticated user's bookcase page.
61
+
62
+ :param wait_time: Base number of seconds to wait before returning content.
63
+ :return: The HTML markup of the bookcase page.
64
+ """
65
+ ...
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces.saver_protocol
5
+ ------------------------------------------------
6
+
7
+ Defines the SaverProtocol interface for persisting completed books in
8
+ TXT, EPUB, Markdown, and PDF formats.
9
+ """
10
+
11
+ from typing import Protocol, runtime_checkable
12
+
13
+
14
+ @runtime_checkable
15
+ class SaverProtocol(Protocol):
16
+ """
17
+ A saver must implement a method to persist a completed book as plain text.
18
+
19
+ It may also optionally implement an EPUB (or other format) saver.
20
+ """
21
+
22
+ def save(self, book_id: str) -> None:
23
+ """
24
+ Save the book in the formats specified in config.
25
+ If a method is not implemented or fails, log the error and continue.
26
+
27
+ :param book_id: The book identifier (used for filename, lookup, etc.)
28
+ """
29
+ ...
30
+
31
+ def save_as_txt(self, book_id: str) -> None:
32
+ """
33
+ Persist the assembled book as a .txt file.
34
+
35
+ :param book_id: The book identifier (used for filename or lookup).
36
+ """
37
+ ...
38
+
39
+ def save_as_epub(self, book_id: str) -> None:
40
+ """
41
+ Optional: Persist the assembled book as an .epub file.
42
+
43
+ :param book_id: The book identifier.
44
+ """
45
+ ...
46
+
47
+ def save_as_md(self, book_id: str) -> None:
48
+ """
49
+ Optional: Persist the assembled book as a Markdown (.md) file.
50
+
51
+ :param book_id: The book identifier.
52
+ """
53
+ ...
54
+
55
+ def save_as_pdf(self, book_id: str) -> None:
56
+ """
57
+ Optional: Persist the assembled book as a PDF file.
58
+
59
+ :param book_id: The book identifier.
60
+ """
61
+ ...
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.parsers
5
+ -----------------------------
6
+
7
+ This package defines all site-specific parsing modules
8
+ for the novel_downloader framework.
9
+
10
+ Currently supported:
11
+ - Qidian (起点中文网) via browser-rendered page parsing.
12
+
13
+ Modules:
14
+ - qidian_parser
15
+ - common_parser
16
+ """
17
+
18
+ from .common_parser import CommonParser
19
+ from .qidian_parser import (
20
+ QidianBrowserParser,
21
+ QidianSessionParser,
22
+ )
23
+
24
+ __all__ = [
25
+ "CommonParser",
26
+ "QidianBrowserParser",
27
+ "QidianSessionParser",
28
+ ]
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.parsers.base_parser
5
+ -----------------------------------------
6
+
7
+ This module defines the BaseParser abstract class, which implements the
8
+ ParserProtocol interface and provides a structured foundation for
9
+ site-specific parsers.
10
+
11
+ BaseParser manages internal parser state and enforces
12
+ a standard parsing interface for:
13
+ - Book info pages (e.g. metadata, chapter list)
14
+ - Chapter pages (e.g. textual content)
15
+ """
16
+
17
+ import abc
18
+ from pathlib import Path
19
+ from typing import Any, Dict, Optional
20
+
21
+ from novel_downloader.config import ParserConfig
22
+ from novel_downloader.core.interfaces import ParserProtocol
23
+
24
+
25
+ class BaseParser(ParserProtocol, abc.ABC):
26
+ """
27
+ BaseParser defines the interface for extracting book metadata and chapter content
28
+ from raw HTML.
29
+
30
+ This base class manages internal book state (e.g. current book ID) and supports
31
+ configuration-driven behavior such as content cleaning or formatting.
32
+
33
+ Subclasses must implement actual parsing logic for specific sites.
34
+ """
35
+
36
+ def __init__(self, config: ParserConfig):
37
+ """
38
+ Initialize the parser with a configuration object.
39
+
40
+ :param config: ParserConfig object controlling parsing behavior.
41
+ """
42
+ self._config = config
43
+ self._book_id: Optional[str] = None
44
+
45
+ self._base_cache_dir = Path(config.cache_dir)
46
+
47
+ @abc.abstractmethod
48
+ def parse_book_info(self, html: str) -> Dict[str, Any]:
49
+ """
50
+ Parse a book info page and extract metadata and chapter structure.
51
+
52
+ Depending on the site structure, the return dict may include a
53
+ flat `chapters` list or nested `volumes` with chapter groups.
54
+
55
+ :param html: Raw HTML of the book info page.
56
+ :return: Parsed metadata and chapter structure as a dictionary.
57
+ """
58
+ ...
59
+
60
+ @abc.abstractmethod
61
+ def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
62
+ """
63
+ Parse a single chapter page and extract clean text or simplified HTML.
64
+
65
+ :param html: Raw HTML of the chapter page.
66
+ :param chapter_id: Identifier of the chapter being parsed.
67
+ :return: Cleaned chapter content as plain text or minimal HTML.
68
+ """
69
+ ...
70
+
71
+ @property
72
+ def book_id(self) -> Optional[str]:
73
+ """
74
+ Current book ID in context.
75
+
76
+ :return: The current book identifier.
77
+ """
78
+ return self._book_id
79
+
80
+ @book_id.setter
81
+ def book_id(self, value: str) -> None:
82
+ """
83
+ Set current book ID and update debug paths if needed.
84
+
85
+ :param value: Book identifier.
86
+ """
87
+ self._book_id = value
88
+ self._on_book_id_set()
89
+
90
+ def _on_book_id_set(self) -> None:
91
+ """
92
+ Hook called when a new book ID is set.
93
+ Subclasses can override this to initialize
94
+ book-related folders or states.
95
+ """
96
+ pass
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.parsers.common_parser
5
+ -------------------------------------------
6
+
7
+ This module provides a CommonParser class that implements
8
+ general-purpose parsing logic for extracting novel metadata
9
+ and chapter content based on site-specific rules.
10
+ """
11
+
12
+ from .main_parser import CommonParser
13
+
14
+ __all__ = ["CommonParser"]