novel-downloader 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. novel_downloader/__init__.py +14 -0
  2. novel_downloader/cli/__init__.py +14 -0
  3. novel_downloader/cli/clean.py +134 -0
  4. novel_downloader/cli/download.py +98 -0
  5. novel_downloader/cli/interactive.py +67 -0
  6. novel_downloader/cli/main.py +45 -0
  7. novel_downloader/cli/settings.py +177 -0
  8. novel_downloader/config/__init__.py +52 -0
  9. novel_downloader/config/adapter.py +150 -0
  10. novel_downloader/config/loader.py +177 -0
  11. novel_downloader/config/models.py +170 -0
  12. novel_downloader/config/site_rules.py +97 -0
  13. novel_downloader/core/__init__.py +25 -0
  14. novel_downloader/core/downloaders/__init__.py +20 -0
  15. novel_downloader/core/downloaders/base_downloader.py +187 -0
  16. novel_downloader/core/downloaders/common_downloader.py +192 -0
  17. novel_downloader/core/downloaders/qidian_downloader.py +208 -0
  18. novel_downloader/core/factory/__init__.py +21 -0
  19. novel_downloader/core/factory/downloader_factory.py +62 -0
  20. novel_downloader/core/factory/parser_factory.py +62 -0
  21. novel_downloader/core/factory/requester_factory.py +62 -0
  22. novel_downloader/core/factory/saver_factory.py +49 -0
  23. novel_downloader/core/interfaces/__init__.py +28 -0
  24. novel_downloader/core/interfaces/downloader_protocol.py +37 -0
  25. novel_downloader/core/interfaces/parser_protocol.py +40 -0
  26. novel_downloader/core/interfaces/requester_protocol.py +65 -0
  27. novel_downloader/core/interfaces/saver_protocol.py +61 -0
  28. novel_downloader/core/parsers/__init__.py +28 -0
  29. novel_downloader/core/parsers/base_parser.py +96 -0
  30. novel_downloader/core/parsers/common_parser/__init__.py +14 -0
  31. novel_downloader/core/parsers/common_parser/helper.py +321 -0
  32. novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
  33. novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
  34. novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
  35. novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
  36. novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
  37. novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
  38. novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
  39. novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
  40. novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
  41. novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
  42. novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
  43. novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
  44. novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
  45. novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
  46. novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
  47. novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
  48. novel_downloader/core/requesters/__init__.py +27 -0
  49. novel_downloader/core/requesters/base_browser.py +210 -0
  50. novel_downloader/core/requesters/base_session.py +243 -0
  51. novel_downloader/core/requesters/common_requester/__init__.py +14 -0
  52. novel_downloader/core/requesters/common_requester/common_session.py +126 -0
  53. novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
  54. novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
  55. novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
  56. novel_downloader/core/savers/__init__.py +20 -0
  57. novel_downloader/core/savers/base_saver.py +169 -0
  58. novel_downloader/core/savers/common_saver/__init__.py +13 -0
  59. novel_downloader/core/savers/common_saver/common_epub.py +232 -0
  60. novel_downloader/core/savers/common_saver/common_txt.py +176 -0
  61. novel_downloader/core/savers/common_saver/main_saver.py +86 -0
  62. novel_downloader/core/savers/epub_utils/__init__.py +27 -0
  63. novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
  64. novel_downloader/core/savers/epub_utils/initializer.py +98 -0
  65. novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
  66. novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
  67. novel_downloader/core/savers/qidian_saver.py +22 -0
  68. novel_downloader/locales/en.json +91 -0
  69. novel_downloader/locales/zh.json +91 -0
  70. novel_downloader/resources/config/rules.toml +196 -0
  71. novel_downloader/resources/config/settings.yaml +70 -0
  72. novel_downloader/resources/css_styles/main.css +104 -0
  73. novel_downloader/resources/css_styles/volume-intro.css +56 -0
  74. novel_downloader/resources/images/volume_border.png +0 -0
  75. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
  76. novel_downloader/resources/json/replace_word_map.json +4 -0
  77. novel_downloader/resources/text/blacklist.txt +22 -0
  78. novel_downloader/utils/__init__.py +0 -0
  79. novel_downloader/utils/cache.py +24 -0
  80. novel_downloader/utils/constants.py +158 -0
  81. novel_downloader/utils/crypto_utils.py +144 -0
  82. novel_downloader/utils/file_utils/__init__.py +43 -0
  83. novel_downloader/utils/file_utils/io.py +252 -0
  84. novel_downloader/utils/file_utils/normalize.py +68 -0
  85. novel_downloader/utils/file_utils/sanitize.py +77 -0
  86. novel_downloader/utils/fontocr/__init__.py +23 -0
  87. novel_downloader/utils/fontocr/ocr_v1.py +304 -0
  88. novel_downloader/utils/fontocr/ocr_v2.py +658 -0
  89. novel_downloader/utils/hash_store.py +288 -0
  90. novel_downloader/utils/hash_utils.py +103 -0
  91. novel_downloader/utils/i18n.py +41 -0
  92. novel_downloader/utils/logger.py +104 -0
  93. novel_downloader/utils/model_loader.py +72 -0
  94. novel_downloader/utils/network.py +287 -0
  95. novel_downloader/utils/state.py +156 -0
  96. novel_downloader/utils/text_utils/__init__.py +27 -0
  97. novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
  98. novel_downloader/utils/text_utils/diff_display.py +75 -0
  99. novel_downloader/utils/text_utils/font_mapping.py +31 -0
  100. novel_downloader/utils/text_utils/text_cleaning.py +57 -0
  101. novel_downloader/utils/time_utils/__init__.py +22 -0
  102. novel_downloader/utils/time_utils/datetime_utils.py +146 -0
  103. novel_downloader/utils/time_utils/sleep_utils.py +49 -0
  104. novel_downloader-1.1.1.dist-info/METADATA +137 -0
  105. novel_downloader-1.1.1.dist-info/RECORD +109 -0
  106. novel_downloader-1.1.1.dist-info/WHEEL +5 -0
  107. novel_downloader-1.1.1.dist-info/entry_points.txt +2 -0
  108. novel_downloader-1.1.1.dist-info/licenses/LICENSE +21 -0
  109. novel_downloader-1.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.factory.parser_factory
5
+ --------------------------------------------
6
+
7
+ This module implements a factory function for creating parser instances
8
+ based on the site name and parser mode specified in the configuration.
9
+
10
+ Currently supported:
11
+ - Site: 'qidian'
12
+ - Modes:
13
+ - 'browser': QidianBrowserParser
14
+ - 'session': (Not implemented yet)
15
+
16
+ To add support for new sites or modes, extend the `_site_map` accordingly.
17
+ """
18
+
19
+ from novel_downloader.config import ParserConfig, load_site_rules
20
+ from novel_downloader.core.interfaces import ParserProtocol
21
+ from novel_downloader.core.parsers import (
22
+ CommonParser,
23
+ QidianBrowserParser,
24
+ QidianSessionParser,
25
+ )
26
+
27
+ _site_map = {
28
+ "qidian": {
29
+ "browser": QidianBrowserParser,
30
+ "session": QidianSessionParser,
31
+ },
32
+ # "biquge": ...
33
+ }
34
+
35
+
36
+ def get_parser(site: str, config: ParserConfig) -> ParserProtocol:
37
+ """
38
+ Returns a site-specific parser instance.
39
+
40
+ :param site: Site name (e.g., 'qidian')
41
+ :param config: Configuration for the parser
42
+ :return: An instance of a parser class
43
+ """
44
+ site_key = site.lower()
45
+
46
+ if site_key in _site_map:
47
+ site_entry = _site_map[site_key]
48
+ if isinstance(site_entry, dict):
49
+ parser_class = site_entry.get(config.mode)
50
+ if parser_class is None:
51
+ raise ValueError(f"Unsupported mode '{config.mode}' for site '{site}'")
52
+ else:
53
+ parser_class = site_entry
54
+ return parser_class(config)
55
+
56
+ # Fallback: site not mapped specially, try to load rule
57
+ site_rules = load_site_rules()
58
+ site_rule = site_rules.get(site_key)
59
+ if site_rule is None:
60
+ raise ValueError(f"Unsupported site: {site}")
61
+
62
+ return CommonParser(config, site_key, site_rule)
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.factory.requester_factory
5
+ -----------------------------------------------
6
+
7
+ This module implements a factory function for retrieving requester instances
8
+ based on the target novel platform (site).
9
+
10
+ Currently supported:
11
+ - Site: 'qidian'
12
+ - Modes:
13
+ - 'browser': QidianBrowser
14
+ - 'session': (Not implemented yet)
15
+
16
+ To add support for new sites or modes, extend the `_site_map` accordingly.
17
+ """
18
+
19
+ from novel_downloader.config import RequesterConfig, load_site_rules
20
+ from novel_downloader.core.interfaces import RequesterProtocol
21
+ from novel_downloader.core.requesters import (
22
+ CommonSession,
23
+ QidianBrowser,
24
+ QidianSession,
25
+ )
26
+
27
+ _site_map = {
28
+ "qidian": {
29
+ "browser": QidianBrowser,
30
+ "session": QidianSession,
31
+ },
32
+ # "biquge": ...
33
+ }
34
+
35
+
36
+ def get_requester(site: str, config: RequesterConfig) -> RequesterProtocol:
37
+ """
38
+ Returns a site-specific requester instance.
39
+
40
+ :param site: Site name (e.g., 'qidian')
41
+ :param config: Configuration for the requester
42
+ :return: An instance of a requester class
43
+ """
44
+ site_key = site.lower()
45
+
46
+ site_entry = _site_map.get(site_key)
47
+ if site_entry:
48
+ requester_class = (
49
+ site_entry.get(config.mode) if isinstance(site_entry, dict) else site_entry
50
+ )
51
+ if requester_class:
52
+ return requester_class(config)
53
+ raise ValueError(f"Unsupported mode '{config.mode}' for site '{site}'")
54
+
55
+ # Fallback: Load site rules
56
+ site_rules = load_site_rules()
57
+ site_rule = site_rules.get(site_key)
58
+ if site_rule is None:
59
+ raise ValueError(f"Unsupported site: {site}")
60
+
61
+ site_profile = site_rule["profile"]
62
+ return CommonSession(config, site_key, site_profile)
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.factory.parser_factory
5
+ --------------------------------------------
6
+
7
+ This module implements a factory function for creating saver instances
8
+ based on the site name and parser mode specified in the configuration.
9
+
10
+ Currently supported:
11
+ - Site: 'qidian'
12
+ - QidianSaver
13
+
14
+ To add support for new sites or modes, extend the `_site_map` accordingly.
15
+ """
16
+
17
+ from novel_downloader.config import SaverConfig, load_site_rules
18
+ from novel_downloader.core.interfaces import SaverProtocol
19
+ from novel_downloader.core.savers import (
20
+ CommonSaver,
21
+ QidianSaver,
22
+ )
23
+
24
+ _site_map = {
25
+ "qidian": QidianSaver,
26
+ # "biquge": ...
27
+ }
28
+
29
+
30
+ def get_saver(site: str, config: SaverConfig) -> SaverProtocol:
31
+ """
32
+ Returns a site-specific saver instance.
33
+
34
+ :param site: Site name (e.g., 'qidian')
35
+ :param config: Configuration for the saver
36
+ :return: An instance of a saver class
37
+ """
38
+ site_key = site.lower()
39
+
40
+ saver_class = _site_map.get(site_key)
41
+ if saver_class:
42
+ return saver_class(config)
43
+
44
+ # Fallback: check site_rules
45
+ site_rules = load_site_rules()
46
+ if site_key not in site_rules:
47
+ raise ValueError(f"Unsupported site: {site}")
48
+
49
+ return CommonSaver(config, site_key)
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces
5
+ --------------------------------
6
+
7
+ This package centralizes the protocol definitions used across the
8
+ system to promote interface-based design and type-safe dependency
9
+ injection.
10
+
11
+ Included protocols:
12
+ - DownloaderProtocol
13
+ - ParserProtocol
14
+ - RequesterProtocol
15
+ - SaverProtocol
16
+ """
17
+
18
+ from .downloader_protocol import DownloaderProtocol
19
+ from .parser_protocol import ParserProtocol
20
+ from .requester_protocol import RequesterProtocol
21
+ from .saver_protocol import SaverProtocol
22
+
23
+ __all__ = [
24
+ "DownloaderProtocol",
25
+ "ParserProtocol",
26
+ "RequesterProtocol",
27
+ "SaverProtocol",
28
+ ]
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces.downloader_protocol
5
+ ----------------------------------------------------
6
+
7
+ This module defines the DownloaderProtocol, a structural interface
8
+ that outlines the expected behavior of any downloader class.
9
+ """
10
+
11
+ from typing import List, Protocol
12
+
13
+
14
+ class DownloaderProtocol(Protocol):
15
+ """
16
+ Protocol for downloader classes.
17
+
18
+ Defines the expected interface for any downloader implementation,
19
+ including both batch and single book downloads,
20
+ as well as optional pre-download hooks.
21
+ """
22
+
23
+ def download(self, book_ids: List[str]) -> None:
24
+ """
25
+ Batch download entry point.
26
+
27
+ :param book_ids: List of book IDs to download.
28
+ """
29
+ ...
30
+
31
+ def download_one(self, book_id: str) -> None:
32
+ """
33
+ Download logic for a single book.
34
+
35
+ :param book_id: The identifier of the book.
36
+ """
37
+ ...
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces.parser_protocol
5
+ ------------------------------------------------
6
+
7
+ Defines the ParserProtocol interface for extracting book metadata,
8
+ parsing individual chapter content, and setting parser context via book_id.
9
+ """
10
+
11
+ from typing import Any, Dict, Protocol, runtime_checkable
12
+
13
+
14
+ @runtime_checkable
15
+ class ParserProtocol(Protocol):
16
+ """
17
+ A parser must be able to:
18
+ - extract book metadata from an HTML string,
19
+ - extract a single chapter's text from an HTML string,
20
+ - accept a book_id context for multi-step workflows.
21
+ """
22
+
23
+ def parse_book_info(self, html_str: str) -> Dict[str, Any]:
24
+ """
25
+ Parse and return a dictionary of book information from the raw HTML.
26
+
27
+ :param html_str: The HTML of a book's info page.
28
+ :return: A dict containing metadata like title, author, chapters list, etc.
29
+ """
30
+ ...
31
+
32
+ def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
33
+ """
34
+ Parse and return the text content of one chapter.
35
+
36
+ :param html_str: The HTML of the chapter page.
37
+ :param chapter_id: Identifier of the chapter being parsed.
38
+ :return: The chapter's text.
39
+ """
40
+ ...
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces.requester_protocol
5
+ --------------------------------------------------
6
+
7
+ Defines the RequesterProtocol interface for fetching raw HTML or JSON
8
+ for book info pages, individual chapters, managing request lifecycle,
9
+ and optionally retrieving a user's authenticated bookcase.
10
+ """
11
+
12
+ from typing import Optional, Protocol, runtime_checkable
13
+
14
+
15
+ @runtime_checkable
16
+ class RequesterProtocol(Protocol):
17
+ """
18
+ A requester must be able to fetch raw HTML/data for:
19
+ - a book's info page,
20
+ - a specific chapter page.
21
+ """
22
+
23
+ def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
24
+ """
25
+ Attempt to log in
26
+ """
27
+ ...
28
+
29
+ def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
30
+ """
31
+ Fetch the raw HTML (or JSON) of the book info page.
32
+
33
+ :param book_id: The book identifier.
34
+ :param wait_time: Base number of seconds to wait before returning content.
35
+ :return: The page content as a string.
36
+ """
37
+ ...
38
+
39
+ def get_book_chapter(
40
+ self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
41
+ ) -> str:
42
+ """
43
+ Fetch the raw HTML (or JSON) of a single chapter.
44
+
45
+ :param book_id: The book identifier.
46
+ :param chapter_id: The chapter identifier.
47
+ :param wait_time: Base number of seconds to wait before returning content.
48
+ :return: The chapter content as a string.
49
+ """
50
+ ...
51
+
52
+ def shutdown(self) -> None:
53
+ """
54
+ Shutdown and cleans up resources.
55
+ """
56
+ ...
57
+
58
+ def get_bookcase(self, wait_time: Optional[int] = None) -> str:
59
+ """
60
+ Optional: Retrieve the HTML content of the authenticated user's bookcase page.
61
+
62
+ :param wait_time: Base number of seconds to wait before returning content.
63
+ :return: The HTML markup of the bookcase page.
64
+ """
65
+ ...
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.interfaces.saver_protocol
5
+ ------------------------------------------------
6
+
7
+ Defines the SaverProtocol interface for persisting completed books in
8
+ TXT, EPUB, Markdown, and PDF formats.
9
+ """
10
+
11
+ from typing import Protocol, runtime_checkable
12
+
13
+
14
+ @runtime_checkable
15
+ class SaverProtocol(Protocol):
16
+ """
17
+ A saver must implement a method to persist a completed book as plain text.
18
+
19
+ It may also optionally implement an EPUB (or other format) saver.
20
+ """
21
+
22
+ def save(self, book_id: str) -> None:
23
+ """
24
+ Save the book in the formats specified in config.
25
+ If a method is not implemented or fails, log the error and continue.
26
+
27
+ :param book_id: The book identifier (used for filename, lookup, etc.)
28
+ """
29
+ ...
30
+
31
+ def save_as_txt(self, book_id: str) -> None:
32
+ """
33
+ Persist the assembled book as a .txt file.
34
+
35
+ :param book_id: The book identifier (used for filename or lookup).
36
+ """
37
+ ...
38
+
39
+ def save_as_epub(self, book_id: str) -> None:
40
+ """
41
+ Optional: Persist the assembled book as an .epub file.
42
+
43
+ :param book_id: The book identifier.
44
+ """
45
+ ...
46
+
47
+ def save_as_md(self, book_id: str) -> None:
48
+ """
49
+ Optional: Persist the assembled book as a Markdown (.md) file.
50
+
51
+ :param book_id: The book identifier.
52
+ """
53
+ ...
54
+
55
+ def save_as_pdf(self, book_id: str) -> None:
56
+ """
57
+ Optional: Persist the assembled book as a PDF file.
58
+
59
+ :param book_id: The book identifier.
60
+ """
61
+ ...
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.parsers
5
+ -----------------------------
6
+
7
+ This package defines all site-specific parsing modules
8
+ for the novel_downloader framework.
9
+
10
+ Currently supported:
11
+ - Qidian (起点中文网) via browser-rendered page parsing.
12
+
13
+ Modules:
14
+ - qidian_parser
15
+ - common_parser
16
+ """
17
+
18
+ from .common_parser import CommonParser
19
+ from .qidian_parser import (
20
+ QidianBrowserParser,
21
+ QidianSessionParser,
22
+ )
23
+
24
+ __all__ = [
25
+ "CommonParser",
26
+ "QidianBrowserParser",
27
+ "QidianSessionParser",
28
+ ]
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.parsers.base_parser
5
+ -----------------------------------------
6
+
7
+ This module defines the BaseParser abstract class, which implements the
8
+ ParserProtocol interface and provides a structured foundation for
9
+ site-specific parsers.
10
+
11
+ BaseParser manages internal parser state and enforces
12
+ a standard parsing interface for:
13
+ - Book info pages (e.g. metadata, chapter list)
14
+ - Chapter pages (e.g. textual content)
15
+ """
16
+
17
+ import abc
18
+ from pathlib import Path
19
+ from typing import Any, Dict, Optional
20
+
21
+ from novel_downloader.config import ParserConfig
22
+ from novel_downloader.core.interfaces import ParserProtocol
23
+
24
+
25
+ class BaseParser(ParserProtocol, abc.ABC):
26
+ """
27
+ BaseParser defines the interface for extracting book metadata and chapter content
28
+ from raw HTML.
29
+
30
+ This base class manages internal book state (e.g. current book ID) and supports
31
+ configuration-driven behavior such as content cleaning or formatting.
32
+
33
+ Subclasses must implement actual parsing logic for specific sites.
34
+ """
35
+
36
+ def __init__(self, config: ParserConfig):
37
+ """
38
+ Initialize the parser with a configuration object.
39
+
40
+ :param config: ParserConfig object controlling parsing behavior.
41
+ """
42
+ self._config = config
43
+ self._book_id: Optional[str] = None
44
+
45
+ self._base_cache_dir = Path(config.cache_dir)
46
+
47
+ @abc.abstractmethod
48
+ def parse_book_info(self, html: str) -> Dict[str, Any]:
49
+ """
50
+ Parse a book info page and extract metadata and chapter structure.
51
+
52
+ Depending on the site structure, the return dict may include a
53
+ flat `chapters` list or nested `volumes` with chapter groups.
54
+
55
+ :param html: Raw HTML of the book info page.
56
+ :return: Parsed metadata and chapter structure as a dictionary.
57
+ """
58
+ ...
59
+
60
+ @abc.abstractmethod
61
+ def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
62
+ """
63
+ Parse a single chapter page and extract clean text or simplified HTML.
64
+
65
+ :param html: Raw HTML of the chapter page.
66
+ :param chapter_id: Identifier of the chapter being parsed.
67
+ :return: Cleaned chapter content as plain text or minimal HTML.
68
+ """
69
+ ...
70
+
71
+ @property
72
+ def book_id(self) -> Optional[str]:
73
+ """
74
+ Current book ID in context.
75
+
76
+ :return: The current book identifier.
77
+ """
78
+ return self._book_id
79
+
80
+ @book_id.setter
81
+ def book_id(self, value: str) -> None:
82
+ """
83
+ Set current book ID and update debug paths if needed.
84
+
85
+ :param value: Book identifier.
86
+ """
87
+ self._book_id = value
88
+ self._on_book_id_set()
89
+
90
+ def _on_book_id_set(self) -> None:
91
+ """
92
+ Hook called when a new book ID is set.
93
+ Subclasses can override this to initialize
94
+ book-related folders or states.
95
+ """
96
+ pass
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.parsers.common_parser
5
+ -------------------------------------------
6
+
7
+ This module provides a CommonParser class that implements
8
+ general-purpose parsing logic for extracting novel metadata
9
+ and chapter content based on site-specific rules.
10
+ """
11
+
12
+ from .main_parser import CommonParser
13
+
14
+ __all__ = ["CommonParser"]