novel-downloader 1.3.1__py3-none-any.whl → 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +1 -1
  3. novel_downloader/config/adapter.py +3 -0
  4. novel_downloader/config/models.py +3 -0
  5. novel_downloader/core/downloaders/__init__.py +23 -1
  6. novel_downloader/core/downloaders/biquge/__init__.py +2 -0
  7. novel_downloader/core/downloaders/biquge/biquge_async.py +27 -0
  8. novel_downloader/core/downloaders/biquge/biquge_sync.py +5 -3
  9. novel_downloader/core/downloaders/common/common_async.py +5 -11
  10. novel_downloader/core/downloaders/common/common_sync.py +18 -18
  11. novel_downloader/core/downloaders/esjzone/__init__.py +14 -0
  12. novel_downloader/core/downloaders/esjzone/esjzone_async.py +27 -0
  13. novel_downloader/core/downloaders/esjzone/esjzone_sync.py +27 -0
  14. novel_downloader/core/downloaders/qianbi/__init__.py +14 -0
  15. novel_downloader/core/downloaders/qianbi/qianbi_async.py +27 -0
  16. novel_downloader/core/downloaders/qianbi/qianbi_sync.py +27 -0
  17. novel_downloader/core/downloaders/qidian/qidian_sync.py +9 -14
  18. novel_downloader/core/downloaders/sfacg/__init__.py +14 -0
  19. novel_downloader/core/downloaders/sfacg/sfacg_async.py +27 -0
  20. novel_downloader/core/downloaders/sfacg/sfacg_sync.py +27 -0
  21. novel_downloader/core/downloaders/yamibo/__init__.py +14 -0
  22. novel_downloader/core/downloaders/yamibo/yamibo_async.py +27 -0
  23. novel_downloader/core/downloaders/yamibo/yamibo_sync.py +27 -0
  24. novel_downloader/core/factory/downloader.py +35 -7
  25. novel_downloader/core/factory/parser.py +23 -2
  26. novel_downloader/core/factory/requester.py +32 -7
  27. novel_downloader/core/factory/saver.py +14 -2
  28. novel_downloader/core/interfaces/async_requester.py +3 -3
  29. novel_downloader/core/interfaces/parser.py +7 -2
  30. novel_downloader/core/interfaces/sync_requester.py +3 -3
  31. novel_downloader/core/parsers/__init__.py +15 -5
  32. novel_downloader/core/parsers/base.py +7 -2
  33. novel_downloader/core/parsers/biquge/main_parser.py +13 -4
  34. novel_downloader/core/parsers/common/main_parser.py +13 -4
  35. novel_downloader/core/parsers/esjzone/__init__.py +10 -0
  36. novel_downloader/core/parsers/esjzone/main_parser.py +220 -0
  37. novel_downloader/core/parsers/qianbi/__init__.py +10 -0
  38. novel_downloader/core/parsers/qianbi/main_parser.py +142 -0
  39. novel_downloader/core/parsers/qidian/browser/main_parser.py +13 -4
  40. novel_downloader/core/parsers/qidian/session/main_parser.py +13 -4
  41. novel_downloader/core/parsers/sfacg/__init__.py +10 -0
  42. novel_downloader/core/parsers/sfacg/main_parser.py +166 -0
  43. novel_downloader/core/parsers/yamibo/__init__.py +10 -0
  44. novel_downloader/core/parsers/yamibo/main_parser.py +194 -0
  45. novel_downloader/core/requesters/__init__.py +33 -3
  46. novel_downloader/core/requesters/base/async_session.py +14 -10
  47. novel_downloader/core/requesters/base/browser.py +4 -7
  48. novel_downloader/core/requesters/base/session.py +25 -11
  49. novel_downloader/core/requesters/biquge/__init__.py +2 -0
  50. novel_downloader/core/requesters/biquge/async_session.py +71 -0
  51. novel_downloader/core/requesters/biquge/session.py +6 -6
  52. novel_downloader/core/requesters/common/async_session.py +4 -4
  53. novel_downloader/core/requesters/common/session.py +6 -6
  54. novel_downloader/core/requesters/esjzone/__init__.py +13 -0
  55. novel_downloader/core/requesters/esjzone/async_session.py +211 -0
  56. novel_downloader/core/requesters/esjzone/session.py +235 -0
  57. novel_downloader/core/requesters/qianbi/__init__.py +13 -0
  58. novel_downloader/core/requesters/qianbi/async_session.py +96 -0
  59. novel_downloader/core/requesters/qianbi/session.py +125 -0
  60. novel_downloader/core/requesters/qidian/broswer.py +9 -9
  61. novel_downloader/core/requesters/qidian/session.py +14 -11
  62. novel_downloader/core/requesters/sfacg/__init__.py +13 -0
  63. novel_downloader/core/requesters/sfacg/async_session.py +204 -0
  64. novel_downloader/core/requesters/sfacg/session.py +242 -0
  65. novel_downloader/core/requesters/yamibo/__init__.py +13 -0
  66. novel_downloader/core/requesters/yamibo/async_session.py +211 -0
  67. novel_downloader/core/requesters/yamibo/session.py +237 -0
  68. novel_downloader/core/savers/__init__.py +15 -3
  69. novel_downloader/core/savers/base.py +3 -7
  70. novel_downloader/core/savers/common/epub.py +21 -33
  71. novel_downloader/core/savers/common/main_saver.py +3 -1
  72. novel_downloader/core/savers/common/txt.py +1 -2
  73. novel_downloader/core/savers/epub_utils/__init__.py +14 -5
  74. novel_downloader/core/savers/epub_utils/css_builder.py +1 -0
  75. novel_downloader/core/savers/epub_utils/image_loader.py +89 -0
  76. novel_downloader/core/savers/epub_utils/initializer.py +1 -0
  77. novel_downloader/core/savers/epub_utils/text_to_html.py +48 -1
  78. novel_downloader/core/savers/epub_utils/volume_intro.py +1 -0
  79. novel_downloader/core/savers/esjzone.py +25 -0
  80. novel_downloader/core/savers/qianbi.py +25 -0
  81. novel_downloader/core/savers/sfacg.py +25 -0
  82. novel_downloader/core/savers/yamibo.py +25 -0
  83. novel_downloader/locales/en.json +1 -0
  84. novel_downloader/locales/zh.json +1 -0
  85. novel_downloader/resources/config/settings.toml +40 -4
  86. novel_downloader/utils/constants.py +4 -0
  87. novel_downloader/utils/file_utils/io.py +1 -1
  88. novel_downloader/utils/network.py +51 -38
  89. novel_downloader/utils/time_utils/__init__.py +2 -1
  90. novel_downloader/utils/time_utils/datetime_utils.py +3 -1
  91. novel_downloader/utils/time_utils/sleep_utils.py +44 -2
  92. {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/METADATA +29 -24
  93. novel_downloader-1.3.3.dist-info/RECORD +166 -0
  94. novel_downloader-1.3.1.dist-info/RECORD +0 -127
  95. {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/WHEEL +0 -0
  96. {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/entry_points.txt +0 -0
  97. {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/licenses/LICENSE +0 -0
  98. {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.qianbi
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ from .main_parser import QianbiParser
9
+
10
+ __all__ = ["QianbiParser"]
@@ -0,0 +1,142 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.biquge.main_parser
4
+ ------------------------------------------------
5
+
6
+ """
7
+
8
+ from datetime import datetime
9
+ from typing import Any
10
+
11
+ from lxml import etree
12
+
13
+ from novel_downloader.core.parsers.base import BaseParser
14
+ from novel_downloader.utils.chapter_storage import ChapterDict
15
+
16
+
17
+ class QianbiParser(BaseParser):
18
+ """ """
19
+
20
+ def parse_book_info(
21
+ self,
22
+ html_str: list[str],
23
+ **kwargs: Any,
24
+ ) -> dict[str, Any]:
25
+ """
26
+ Parse a book info page and extract metadata and chapter structure.
27
+
28
+ :param html: Raw HTML of the book info page.
29
+ :return: Parsed metadata and chapter structure as a dictionary.
30
+ """
31
+ if len(html_str) < 2:
32
+ return {}
33
+
34
+ info_tree = etree.HTML(html_str[0])
35
+ catalog_tree = etree.HTML(html_str[1])
36
+ result: dict[str, Any] = {}
37
+
38
+ title = info_tree.xpath('//h1[@class="page-title"]/text()')
39
+ result["book_name"] = title[0].strip() if title else ""
40
+
41
+ author = info_tree.xpath('//a[contains(@href,"/author/")]/@title')
42
+ result["author"] = author[0].strip() if author else ""
43
+
44
+ cover = info_tree.xpath('//div[@class="novel-cover"]//img/@data-src')
45
+ result["cover_url"] = cover[0].strip() if cover else ""
46
+
47
+ status = info_tree.xpath(
48
+ '//a[@class="tag-link" and (text()="完结" or text()="连载")]/text()'
49
+ )
50
+ result["serial_status"] = status[0] if status else ""
51
+
52
+ word_count_raw = info_tree.xpath('//span[contains(text(), "万字")]/text()')
53
+ result["word_count"] = word_count_raw[0].strip() if word_count_raw else ""
54
+
55
+ summary_node = info_tree.xpath(
56
+ '//div[@class="novel-info-item novel-info-content"]/span'
57
+ )
58
+ if summary_node and summary_node[0] is not None:
59
+ result["summary"] = etree.tostring(
60
+ summary_node[0], encoding="unicode", method="text"
61
+ ).strip()
62
+ else:
63
+ result["summary"] = ""
64
+
65
+ result["update_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
66
+
67
+ volumes: list[dict[str, Any]] = []
68
+ current_volume = None
69
+
70
+ for elem in catalog_tree.xpath('//div[@class="box"]/*'):
71
+ class_attr = elem.get("class", "")
72
+ class_list = class_attr.split()
73
+
74
+ if elem.tag == "h2" and "module-title" in class_list:
75
+ if current_volume:
76
+ volumes.append(current_volume)
77
+ current_volume = {
78
+ "volume_name": elem.text.strip() if elem.text else "",
79
+ "chapters": [],
80
+ }
81
+ elif (
82
+ elem.tag == "div" and "module-row-info" in class_list and current_volume
83
+ ):
84
+ a_tag = elem.xpath('.//a[@class="module-row-text"]')
85
+ if a_tag:
86
+ title = a_tag[0].xpath(".//span/text()")
87
+ href = a_tag[0].attrib.get("href", "")
88
+ chapter_id = (
89
+ href.split("/")[-1].replace(".html", "") if href else ""
90
+ )
91
+ current_volume["chapters"].append(
92
+ {
93
+ "title": title[0].strip() if title else "",
94
+ "url": href,
95
+ "chapterId": chapter_id,
96
+ }
97
+ )
98
+
99
+ if current_volume:
100
+ volumes.append(current_volume)
101
+
102
+ result["volumes"] = volumes
103
+
104
+ return result
105
+
106
+ def parse_chapter(
107
+ self,
108
+ html_str: list[str],
109
+ chapter_id: str,
110
+ **kwargs: Any,
111
+ ) -> ChapterDict | None:
112
+ """
113
+ Parse a single chapter page and extract clean text or simplified HTML.
114
+
115
+ :param html: Raw HTML of the chapter page.
116
+ :param chapter_id: Identifier of the chapter being parsed.
117
+ :return: Cleaned chapter content as plain text or minimal HTML.
118
+ """
119
+ if not html_str:
120
+ return None
121
+ tree = etree.HTML(html_str[0])
122
+
123
+ paras = tree.xpath('//div[@class="article-content"]/p/text()')
124
+ content_text = "\n\n".join(p.strip() for p in paras if p.strip())
125
+ if not content_text:
126
+ return None
127
+
128
+ title = tree.xpath('//h1[@class="article-title"]/text()')
129
+ title_text = title[0].strip() if title else ""
130
+
131
+ volume = tree.xpath('//h3[@class="text-muted"]/text()')
132
+ volume_text = volume[0].strip() if volume else ""
133
+
134
+ return {
135
+ "id": chapter_id,
136
+ "title": title_text,
137
+ "content": content_text,
138
+ "extra": {
139
+ "site": "qianbi",
140
+ "volume": volume_text,
141
+ },
142
+ }
@@ -69,26 +69,35 @@ class QidianBrowserParser(BaseParser):
69
69
  self._font_debug_dir = self._base_cache_dir / "qidian" / "font_debug"
70
70
  self._font_debug_dir.mkdir(parents=True, exist_ok=True)
71
71
 
72
- def parse_book_info(self, html_str: str) -> dict[str, Any]:
72
+ def parse_book_info(
73
+ self,
74
+ html_str: list[str],
75
+ **kwargs: Any,
76
+ ) -> dict[str, Any]:
73
77
  """
74
78
  Parse a book info page and extract metadata and chapter structure.
75
79
 
76
80
  :param html_str: Raw HTML of the book info page.
77
81
  :return: Parsed metadata and chapter structure as a dictionary.
78
82
  """
79
- return parse_book_info(html_str)
83
+ if not html_str:
84
+ return {}
85
+ return parse_book_info(html_str[0])
80
86
 
81
87
  def parse_chapter(
82
88
  self,
83
- html_str: str,
89
+ html_str: list[str],
84
90
  chapter_id: str,
91
+ **kwargs: Any,
85
92
  ) -> ChapterDict | None:
86
93
  """
87
94
  :param html: Raw HTML of the chapter page.
88
95
  :param chapter_id: Identifier of the chapter being parsed.
89
96
  :return: Cleaned chapter content as plain text.
90
97
  """
91
- return parse_chapter(self, html_str, chapter_id)
98
+ if not html_str:
99
+ return None
100
+ return parse_chapter(self, html_str[0], chapter_id)
92
101
 
93
102
  def is_encrypted(self, html_str: str) -> bool:
94
103
  """
@@ -72,26 +72,35 @@ class QidianSessionParser(BaseParser):
72
72
  self._font_debug_dir = self._base_cache_dir / "qidian" / "font_debug"
73
73
  self._font_debug_dir.mkdir(parents=True, exist_ok=True)
74
74
 
75
- def parse_book_info(self, html_str: str) -> dict[str, Any]:
75
+ def parse_book_info(
76
+ self,
77
+ html_str: list[str],
78
+ **kwargs: Any,
79
+ ) -> dict[str, Any]:
76
80
  """
77
81
  Parse a book info page and extract metadata and chapter structure.
78
82
 
79
83
  :param html_str: Raw HTML of the book info page.
80
84
  :return: Parsed metadata and chapter structure as a dictionary.
81
85
  """
82
- return parse_book_info(html_str)
86
+ if not html_str:
87
+ return {}
88
+ return parse_book_info(html_str[0])
83
89
 
84
90
  def parse_chapter(
85
91
  self,
86
- html_str: str,
92
+ html_str: list[str],
87
93
  chapter_id: str,
94
+ **kwargs: Any,
88
95
  ) -> ChapterDict | None:
89
96
  """
90
97
  :param html: Raw HTML of the chapter page.
91
98
  :param chapter_id: Identifier of the chapter being parsed.
92
99
  :return: Cleaned chapter content as plain text.
93
100
  """
94
- return parse_chapter(self, html_str, chapter_id)
101
+ if not html_str:
102
+ return None
103
+ return parse_chapter(self, html_str[0], chapter_id)
95
104
 
96
105
  def is_encrypted(self, html_str: str) -> bool:
97
106
  """
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.sfacg
4
+ -----------------------------------
5
+
6
+ """
7
+
8
+ from .main_parser import SfacgParser
9
+
10
+ __all__ = ["SfacgParser"]
@@ -0,0 +1,166 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.sfacg.main_parser
4
+ -----------------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import etree
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.utils.chapter_storage import ChapterDict
14
+
15
+
16
+ class SfacgParser(BaseParser):
17
+ """ """
18
+
19
+ # Book info XPaths
20
+ _BOOK_NAME_XPATH = '//ul[@class="book_info"]//span[@class="book_newtitle"]/text()'
21
+ _AUTHOR_INFO_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/text()'
22
+ _UPDATE_TIME_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/br/following-sibling::text()' # noqa: E501
23
+ _COVER_URL_XPATH = '//ul[@class="book_info"]//li/img/@src'
24
+ _STATUS_XPATH = '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
25
+ _SUMMARY_XPATH = '//ul[@class="book_profile"]/li[@class="book_bk_qs1"]/text()'
26
+
27
+ # Catalog XPaths
28
+ _VOLUME_TITLE_XPATH = '//div[@class="mulu"]/text()'
29
+ _VOLUME_CONTENT_XPATH = '//div[@class="Content_Frame"]'
30
+ _CHAPTER_LIST_XPATH = './/ul[@class="mulu_list"]/a'
31
+
32
+ # Chapter XPaths
33
+ _CHAPTER_TEXT_XPATH = (
34
+ '//div[@class="yuedu Content_Frame"]//div[@style="text-indent: 2em;"]/text()'
35
+ )
36
+ _CHAPTER_CONTENT_NODES_XPATH = (
37
+ '//div[@class="yuedu Content_Frame"]//div[@style="text-indent: 2em;"]/*'
38
+ )
39
+ _CHAPTER_TITLE_XPATH = '//ul[@class="menu_top_list book_view_top"]/li[2]/text()'
40
+
41
+ def parse_book_info(
42
+ self,
43
+ html_str: list[str],
44
+ **kwargs: Any,
45
+ ) -> dict[str, Any]:
46
+ """
47
+ Parse a book info page and extract metadata and chapter structure.
48
+
49
+ :param html: Raw HTML of the book info page.
50
+ :return: Parsed metadata and chapter structure as a dictionary.
51
+ """
52
+ if len(html_str) < 2:
53
+ return {}
54
+
55
+ info_tree = etree.HTML(html_str[0])
56
+ catalog_tree = etree.HTML(html_str[1])
57
+
58
+ result: dict[str, Any] = {}
59
+
60
+ # Book metadata
61
+ book_name = info_tree.xpath(self._BOOK_NAME_XPATH)
62
+ result["book_name"] = book_name[0].strip() if book_name else ""
63
+
64
+ book_info3 = info_tree.xpath(self._AUTHOR_INFO_XPATH)
65
+ result["author"] = book_info3[0].split("/")[0].strip() if book_info3 else ""
66
+ result["word_count"] = (
67
+ book_info3[0].split("/")[1].strip()
68
+ if book_info3 and len(book_info3[0].split("/")) > 1
69
+ else ""
70
+ )
71
+
72
+ book_info3_br = info_tree.xpath(self._UPDATE_TIME_XPATH)
73
+ result["update_time"] = book_info3_br[0].strip() if book_info3_br else ""
74
+
75
+ cover_url = info_tree.xpath(self._COVER_URL_XPATH)
76
+ result["cover_url"] = "https:" + cover_url[0] if cover_url else ""
77
+
78
+ serial_status = info_tree.xpath(self._STATUS_XPATH)
79
+ result["serial_status"] = next(
80
+ (s for s in serial_status if "完结" in s or "连载" in s), ""
81
+ )
82
+
83
+ summary = info_tree.xpath(self._SUMMARY_XPATH)
84
+ result["summary"] = "".join(summary).strip()
85
+
86
+ # Chapter structure
87
+ volume_titles = catalog_tree.xpath(self._VOLUME_TITLE_XPATH)
88
+ volume_blocks = catalog_tree.xpath(self._VOLUME_CONTENT_XPATH)
89
+
90
+ volumes = []
91
+ for vol_title, vol_block in zip(volume_titles, volume_blocks, strict=False):
92
+ chapters = []
93
+ for a in vol_block.xpath(self._CHAPTER_LIST_XPATH):
94
+ href = a.xpath("./@href")[0] if a.xpath("./@href") else ""
95
+ title = "".join(a.xpath(".//li//text()")).strip()
96
+ chapter_id = href.split("/")[-2] if href else ""
97
+ chapters.append(
98
+ {
99
+ "title": title,
100
+ "url": href,
101
+ "chapterId": chapter_id,
102
+ }
103
+ )
104
+ volumes.append(
105
+ {
106
+ "volume_name": vol_title.strip(),
107
+ "chapters": chapters,
108
+ }
109
+ )
110
+ result["volumes"] = volumes
111
+
112
+ return result
113
+
114
+ def parse_chapter(
115
+ self,
116
+ html_str: list[str],
117
+ chapter_id: str,
118
+ **kwargs: Any,
119
+ ) -> ChapterDict | None:
120
+ """
121
+ Parse a single chapter page and extract clean text or simplified HTML.
122
+
123
+ :param html: Raw HTML of the chapter page.
124
+ :param chapter_id: Identifier of the chapter being parsed.
125
+ :return: Cleaned chapter content as plain text or minimal HTML.
126
+ """
127
+ if not html_str:
128
+ return None
129
+ keywords = [
130
+ "本章为VIP章节", # 本章为VIP章节,订阅后可立即阅读
131
+ ]
132
+ if any(kw in html_str[0] for kw in keywords):
133
+ return None
134
+ tree = etree.HTML(html_str[0])
135
+
136
+ content_lines: list[str] = []
137
+ content_nodes = tree.xpath(self._CHAPTER_CONTENT_NODES_XPATH)
138
+ for node in content_nodes:
139
+ tag = node.tag.lower()
140
+ if tag == "p":
141
+ text = "".join(node.xpath(".//text()")).strip()
142
+ if text:
143
+ content_lines.append(text)
144
+ elif tag == "img":
145
+ src = node.get("src", "").strip()
146
+ if src:
147
+ # embed image as HTML tag
148
+ content_lines.append(f'<img src="{src}" />')
149
+
150
+ if not content_lines:
151
+ raw_text_parts = tree.xpath(self._CHAPTER_TEXT_XPATH)
152
+ content_lines = [txt.strip() for txt in raw_text_parts if txt.strip()]
153
+
154
+ content = "\n\n".join(content_lines).strip()
155
+ if not content:
156
+ return None
157
+
158
+ title_part = tree.xpath(self._CHAPTER_TITLE_XPATH)
159
+ title = title_part[0].strip() if title_part else ""
160
+
161
+ return {
162
+ "id": chapter_id,
163
+ "title": title,
164
+ "content": content,
165
+ "extra": {"site": "sfacg"},
166
+ }
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.yamibo
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ from .main_parser import YamiboParser
9
+
10
+ __all__ = ["YamiboParser"]
@@ -0,0 +1,194 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.yamibo.main_parser
4
+ ------------------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import etree
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.utils.chapter_storage import ChapterDict
14
+
15
+
16
+ class YamiboParser(BaseParser):
17
+ """ """
18
+
19
+ BASE_URL = "https://www.yamibo.com"
20
+ # Book info XPaths
21
+ _BOOK_NAME_XPATH = 'string(//h3[contains(@class, "col-md-12")])'
22
+ _AUTHOR_XPATH = 'string(//h5[contains(@class, "text-warning")])'
23
+ _COVER_URL_XPATH = '//img[contains(@class, "img-responsive")]/@src'
24
+ _UPDATE_TIME_XPATH = '//p[contains(text(), "更新时间:")]'
25
+ _SERIAL_STATUS_XPATH = '//p[contains(text(), "作品状态:")]'
26
+ _TYPE_XPATH = '//p[contains(text(), "作品分类:")]'
27
+ _SUMMARY_XPATH = 'string(//div[@id="w0-collapse1"]/div)'
28
+
29
+ _VOLUME_NODE_XPATH = (
30
+ '//div[contains(@class, "panel-info") and contains(@class, "panel-default")]'
31
+ )
32
+ _VOLUME_TITLE_XPATH = './/div[contains(@class, "panel-heading")]//a/text()'
33
+ _CHAPTER_NODE_XPATH = (
34
+ './/div[contains(@class, "panel-body")]//a[contains(@href, "view-chapter")]'
35
+ )
36
+ _CHAPTER_FLAT_XPATH = (
37
+ '//div[@class="panel-body"]//a[contains(@href, "view-chapter")]'
38
+ )
39
+
40
+ # Chapter field XPaths
41
+ _CHAPTER_TITLE_XPATH = "string(//section[contains(@class, 'col-md-9')]//h3)"
42
+ _CHAPTER_TIME_XPATH = (
43
+ "//div[contains(@class, 'row')]//div[contains(text(), '更新时间')]"
44
+ )
45
+ _CHAPTER_WORD_COUNT_XPATH = (
46
+ "//div[contains(@class, 'row')]//div[contains(text(), '章节字数')]"
47
+ )
48
+ _CHAPTER_CONTENT_XPATH = "//div[@id='w0-collapse1']//p//text()"
49
+
50
+ def parse_book_info(
51
+ self,
52
+ html_str: list[str],
53
+ **kwargs: Any,
54
+ ) -> dict[str, Any]:
55
+ """
56
+ Parse a book info page and extract metadata and chapter structure.
57
+
58
+ :param html: Raw HTML of the book info page.
59
+ :return: Parsed metadata and chapter structure as a dictionary.
60
+ """
61
+ if not html_str:
62
+ return {}
63
+
64
+ tree = etree.HTML(html_str[0])
65
+ result: dict[str, Any] = {}
66
+
67
+ result["book_name"] = tree.xpath(self._BOOK_NAME_XPATH).strip()
68
+ result["author"] = tree.xpath(self._AUTHOR_XPATH).strip()
69
+
70
+ cover = tree.xpath(self._COVER_URL_XPATH)
71
+ result["cover_url"] = f"{self.BASE_URL}{cover[0]}" if cover else ""
72
+
73
+ update_node = tree.xpath(self._UPDATE_TIME_XPATH)
74
+ result["update_time"] = (
75
+ update_node[0].xpath("string()").replace("更新时间:", "").strip()
76
+ if update_node
77
+ else ""
78
+ )
79
+
80
+ serial_node = tree.xpath(self._SERIAL_STATUS_XPATH)
81
+ result["serial_status"] = (
82
+ serial_node[0].xpath("string()").replace("作品状态:", "").strip()
83
+ if serial_node
84
+ else ""
85
+ )
86
+
87
+ type_node = tree.xpath(self._TYPE_XPATH)
88
+ result["type"] = (
89
+ type_node[0].xpath("string()").replace("作品分类:", "").strip()
90
+ if type_node
91
+ else ""
92
+ )
93
+
94
+ result["summary"] = tree.xpath(self._SUMMARY_XPATH).strip()
95
+
96
+ volumes = []
97
+ volume_nodes = tree.xpath(self._VOLUME_NODE_XPATH)
98
+
99
+ if volume_nodes:
100
+ for volume_node in volume_nodes:
101
+ title_node = volume_node.xpath(self._VOLUME_TITLE_XPATH)
102
+ volume_name = title_node[0].strip() if title_node else "未命名卷"
103
+
104
+ chapter_nodes = volume_node.xpath(self._CHAPTER_NODE_XPATH)
105
+ chapters = []
106
+ for chap in chapter_nodes:
107
+ title = chap.xpath("string()").strip()
108
+ url = chap.get("href", "")
109
+ chapter_id = url.split("id=")[-1] if "id=" in url else ""
110
+ chapters.append(
111
+ {
112
+ "title": title,
113
+ "url": url,
114
+ "chapterId": chapter_id,
115
+ }
116
+ )
117
+
118
+ volumes.append(
119
+ {
120
+ "volume_name": volume_name,
121
+ "chapters": chapters,
122
+ }
123
+ )
124
+
125
+ else:
126
+ # fallback: flat list
127
+ chapter_nodes = tree.xpath(self._CHAPTER_FLAT_XPATH)
128
+ chapters = []
129
+ for chap in chapter_nodes:
130
+ title = chap.xpath("string()").strip()
131
+ url = chap.get("href", "")
132
+ chapter_id = url.split("id=")[-1] if "id=" in url else ""
133
+ chapters.append(
134
+ {
135
+ "title": title,
136
+ "url": url,
137
+ "chapterId": chapter_id,
138
+ }
139
+ )
140
+
141
+ volumes = [
142
+ {
143
+ "volume_name": "单卷",
144
+ "chapters": chapters,
145
+ }
146
+ ]
147
+
148
+ result["volumes"] = volumes
149
+
150
+ return result
151
+
152
+ def parse_chapter(
153
+ self,
154
+ html_str: list[str],
155
+ chapter_id: str,
156
+ **kwargs: Any,
157
+ ) -> ChapterDict | None:
158
+ """
159
+ Parse a single chapter page and extract clean text or simplified HTML.
160
+
161
+ :param html: Raw HTML of the chapter page.
162
+ :param chapter_id: Identifier of the chapter being parsed.
163
+ :return: Cleaned chapter content as plain text or minimal HTML.
164
+ """
165
+ if not html_str:
166
+ return None
167
+ tree = etree.HTML(html_str[0])
168
+
169
+ content_lines = tree.xpath(self._CHAPTER_CONTENT_XPATH)
170
+ content = "\n\n".join(line.strip() for line in content_lines if line.strip())
171
+ if not content:
172
+ return None
173
+
174
+ title = tree.xpath(self._CHAPTER_TITLE_XPATH).strip()
175
+
176
+ update_node = tree.xpath(self._CHAPTER_TIME_XPATH)
177
+ updated_at = (
178
+ update_node[0].text.strip().replace("更新时间:", "") if update_node else ""
179
+ )
180
+
181
+ word_node = tree.xpath(self._CHAPTER_WORD_COUNT_XPATH)
182
+ word = word_node[0].text.strip().replace("章节字数:", "") if word_node else ""
183
+ word_count = int(word) if word.isdigit() else 0
184
+
185
+ return {
186
+ "id": chapter_id,
187
+ "title": title,
188
+ "content": content,
189
+ "extra": {
190
+ "site": "yamibo",
191
+ "word_count": word_count,
192
+ "updated_at": updated_at,
193
+ },
194
+ }
@@ -9,27 +9,57 @@ to perform network interactions, such as logging in, sending requests,
9
9
  or interacting with browser/session-based sources.
10
10
 
11
11
  Subpackages:
12
- - common
13
- - biquge
14
- - qidian
12
+ - biquge (笔趣阁)
13
+ - esjzone (ESJ Zone)
14
+ - qianbi (铅笔小说)
15
+ - qidian (起点中文网)
16
+ - sfacg (SF轻小说)
17
+ - yamibo (百合会)
18
+ - common (通用架构)
15
19
  """
16
20
 
17
21
  from .biquge import (
22
+ BiqugeAsyncSession,
18
23
  BiqugeSession,
19
24
  )
20
25
  from .common import (
21
26
  CommonAsyncSession,
22
27
  CommonSession,
23
28
  )
29
+ from .esjzone import (
30
+ EsjzoneAsyncSession,
31
+ EsjzoneSession,
32
+ )
33
+ from .qianbi import (
34
+ QianbiAsyncSession,
35
+ QianbiSession,
36
+ )
24
37
  from .qidian import (
25
38
  QidianBrowser,
26
39
  QidianSession,
27
40
  )
41
+ from .sfacg import (
42
+ SfacgAsyncSession,
43
+ SfacgSession,
44
+ )
45
+ from .yamibo import (
46
+ YamiboAsyncSession,
47
+ YamiboSession,
48
+ )
28
49
 
29
50
  __all__ = [
51
+ "BiqugeAsyncSession",
30
52
  "BiqugeSession",
31
53
  "CommonAsyncSession",
32
54
  "CommonSession",
55
+ "EsjzoneAsyncSession",
56
+ "EsjzoneSession",
57
+ "QianbiAsyncSession",
58
+ "QianbiSession",
33
59
  "QidianBrowser",
34
60
  "QidianSession",
61
+ "SfacgAsyncSession",
62
+ "SfacgSession",
63
+ "YamiboAsyncSession",
64
+ "YamiboSession",
35
65
  ]