novel-downloader 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/clean.py +97 -78
  3. novel_downloader/cli/config.py +177 -0
  4. novel_downloader/cli/download.py +132 -87
  5. novel_downloader/cli/export.py +77 -0
  6. novel_downloader/cli/main.py +21 -28
  7. novel_downloader/config/__init__.py +1 -25
  8. novel_downloader/config/adapter.py +32 -31
  9. novel_downloader/config/loader.py +3 -3
  10. novel_downloader/config/site_rules.py +1 -2
  11. novel_downloader/core/__init__.py +3 -6
  12. novel_downloader/core/downloaders/__init__.py +10 -13
  13. novel_downloader/core/downloaders/base.py +233 -0
  14. novel_downloader/core/downloaders/biquge.py +27 -0
  15. novel_downloader/core/downloaders/common.py +414 -0
  16. novel_downloader/core/downloaders/esjzone.py +27 -0
  17. novel_downloader/core/downloaders/linovelib.py +27 -0
  18. novel_downloader/core/downloaders/qianbi.py +27 -0
  19. novel_downloader/core/downloaders/qidian.py +352 -0
  20. novel_downloader/core/downloaders/sfacg.py +27 -0
  21. novel_downloader/core/downloaders/yamibo.py +27 -0
  22. novel_downloader/core/exporters/__init__.py +37 -0
  23. novel_downloader/core/{savers → exporters}/base.py +73 -39
  24. novel_downloader/core/exporters/biquge.py +25 -0
  25. novel_downloader/core/exporters/common/__init__.py +12 -0
  26. novel_downloader/core/{savers → exporters}/common/epub.py +22 -22
  27. novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +35 -40
  28. novel_downloader/core/{savers → exporters}/common/txt.py +20 -23
  29. novel_downloader/core/{savers → exporters}/epub_utils/__init__.py +8 -3
  30. novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -2
  31. novel_downloader/core/{savers → exporters}/epub_utils/image_loader.py +46 -4
  32. novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -4
  33. novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +3 -3
  34. novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -2
  35. novel_downloader/core/exporters/esjzone.py +25 -0
  36. novel_downloader/core/exporters/linovelib/__init__.py +10 -0
  37. novel_downloader/core/exporters/linovelib/epub.py +449 -0
  38. novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
  39. novel_downloader/core/exporters/linovelib/txt.py +129 -0
  40. novel_downloader/core/exporters/qianbi.py +25 -0
  41. novel_downloader/core/{savers → exporters}/qidian.py +8 -8
  42. novel_downloader/core/exporters/sfacg.py +25 -0
  43. novel_downloader/core/exporters/yamibo.py +25 -0
  44. novel_downloader/core/factory/__init__.py +5 -17
  45. novel_downloader/core/factory/downloader.py +24 -126
  46. novel_downloader/core/factory/exporter.py +58 -0
  47. novel_downloader/core/factory/fetcher.py +96 -0
  48. novel_downloader/core/factory/parser.py +17 -12
  49. novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
  50. novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
  51. novel_downloader/core/fetchers/base/browser.py +383 -0
  52. novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
  53. novel_downloader/core/fetchers/base/session.py +419 -0
  54. novel_downloader/core/fetchers/biquge/__init__.py +14 -0
  55. novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
  56. novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
  57. novel_downloader/core/fetchers/common/__init__.py +14 -0
  58. novel_downloader/core/fetchers/common/browser.py +79 -0
  59. novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
  60. novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
  61. novel_downloader/core/fetchers/esjzone/browser.py +202 -0
  62. novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
  63. novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
  64. novel_downloader/core/fetchers/linovelib/browser.py +178 -0
  65. novel_downloader/core/fetchers/linovelib/session.py +178 -0
  66. novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
  67. novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
  68. novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
  69. novel_downloader/core/fetchers/qidian/__init__.py +14 -0
  70. novel_downloader/core/fetchers/qidian/browser.py +266 -0
  71. novel_downloader/core/fetchers/qidian/session.py +326 -0
  72. novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
  73. novel_downloader/core/fetchers/sfacg/browser.py +189 -0
  74. novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
  75. novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
  76. novel_downloader/core/fetchers/yamibo/browser.py +229 -0
  77. novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
  78. novel_downloader/core/interfaces/__init__.py +8 -12
  79. novel_downloader/core/interfaces/downloader.py +54 -0
  80. novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
  81. novel_downloader/core/interfaces/fetcher.py +162 -0
  82. novel_downloader/core/interfaces/parser.py +6 -7
  83. novel_downloader/core/parsers/__init__.py +5 -6
  84. novel_downloader/core/parsers/base.py +9 -13
  85. novel_downloader/core/parsers/biquge/main_parser.py +12 -13
  86. novel_downloader/core/parsers/common/helper.py +3 -3
  87. novel_downloader/core/parsers/common/main_parser.py +39 -34
  88. novel_downloader/core/parsers/esjzone/main_parser.py +20 -14
  89. novel_downloader/core/parsers/linovelib/__init__.py +10 -0
  90. novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
  91. novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
  92. novel_downloader/core/parsers/qidian/__init__.py +2 -11
  93. novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
  94. novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
  95. novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
  96. novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
  97. novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
  98. novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
  99. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
  100. novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
  101. novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
  102. novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
  103. novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
  104. novel_downloader/locales/en.json +18 -2
  105. novel_downloader/locales/zh.json +18 -2
  106. novel_downloader/models/__init__.py +64 -0
  107. novel_downloader/models/browser.py +21 -0
  108. novel_downloader/models/chapter.py +25 -0
  109. novel_downloader/models/config.py +100 -0
  110. novel_downloader/models/login.py +20 -0
  111. novel_downloader/models/site_rules.py +99 -0
  112. novel_downloader/models/tasks.py +33 -0
  113. novel_downloader/models/types.py +15 -0
  114. novel_downloader/resources/config/settings.toml +31 -25
  115. novel_downloader/resources/json/linovelib_font_map.json +3573 -0
  116. novel_downloader/tui/__init__.py +7 -0
  117. novel_downloader/tui/app.py +32 -0
  118. novel_downloader/tui/main.py +17 -0
  119. novel_downloader/tui/screens/__init__.py +14 -0
  120. novel_downloader/tui/screens/home.py +191 -0
  121. novel_downloader/tui/screens/login.py +74 -0
  122. novel_downloader/tui/styles/home_layout.tcss +79 -0
  123. novel_downloader/tui/widgets/richlog_handler.py +24 -0
  124. novel_downloader/utils/__init__.py +6 -0
  125. novel_downloader/utils/chapter_storage.py +25 -38
  126. novel_downloader/utils/constants.py +11 -5
  127. novel_downloader/utils/cookies.py +66 -0
  128. novel_downloader/utils/crypto_utils.py +1 -74
  129. novel_downloader/utils/fontocr/ocr_v1.py +2 -1
  130. novel_downloader/utils/fontocr/ocr_v2.py +2 -2
  131. novel_downloader/utils/hash_store.py +10 -18
  132. novel_downloader/utils/hash_utils.py +3 -2
  133. novel_downloader/utils/logger.py +2 -3
  134. novel_downloader/utils/network.py +2 -1
  135. novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
  136. novel_downloader/utils/text_utils/font_mapping.py +1 -1
  137. novel_downloader/utils/text_utils/text_cleaning.py +1 -1
  138. novel_downloader/utils/time_utils/datetime_utils.py +3 -3
  139. novel_downloader/utils/time_utils/sleep_utils.py +1 -1
  140. {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +69 -35
  141. novel_downloader-1.4.0.dist-info/RECORD +170 -0
  142. {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
  143. {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
  144. novel_downloader/cli/interactive.py +0 -66
  145. novel_downloader/cli/settings.py +0 -177
  146. novel_downloader/config/models.py +0 -187
  147. novel_downloader/core/downloaders/base/__init__.py +0 -14
  148. novel_downloader/core/downloaders/base/base_async.py +0 -153
  149. novel_downloader/core/downloaders/base/base_sync.py +0 -208
  150. novel_downloader/core/downloaders/biquge/__init__.py +0 -14
  151. novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
  152. novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
  153. novel_downloader/core/downloaders/common/__init__.py +0 -14
  154. novel_downloader/core/downloaders/common/common_async.py +0 -210
  155. novel_downloader/core/downloaders/common/common_sync.py +0 -202
  156. novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
  157. novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
  158. novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
  159. novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
  160. novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
  161. novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
  162. novel_downloader/core/downloaders/qidian/__init__.py +0 -10
  163. novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -219
  164. novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
  165. novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
  166. novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
  167. novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
  168. novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
  169. novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
  170. novel_downloader/core/factory/requester.py +0 -144
  171. novel_downloader/core/factory/saver.py +0 -56
  172. novel_downloader/core/interfaces/async_downloader.py +0 -36
  173. novel_downloader/core/interfaces/async_requester.py +0 -84
  174. novel_downloader/core/interfaces/sync_downloader.py +0 -36
  175. novel_downloader/core/interfaces/sync_requester.py +0 -82
  176. novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
  177. novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
  178. novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
  179. novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
  180. novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
  181. novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
  182. novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
  183. novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
  184. novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
  185. novel_downloader/core/requesters/base/async_session.py +0 -410
  186. novel_downloader/core/requesters/base/browser.py +0 -337
  187. novel_downloader/core/requesters/base/session.py +0 -378
  188. novel_downloader/core/requesters/biquge/__init__.py +0 -14
  189. novel_downloader/core/requesters/common/__init__.py +0 -17
  190. novel_downloader/core/requesters/common/session.py +0 -113
  191. novel_downloader/core/requesters/esjzone/__init__.py +0 -13
  192. novel_downloader/core/requesters/esjzone/session.py +0 -235
  193. novel_downloader/core/requesters/qianbi/__init__.py +0 -13
  194. novel_downloader/core/requesters/qidian/__init__.py +0 -21
  195. novel_downloader/core/requesters/qidian/broswer.py +0 -307
  196. novel_downloader/core/requesters/qidian/session.py +0 -290
  197. novel_downloader/core/requesters/sfacg/__init__.py +0 -13
  198. novel_downloader/core/requesters/sfacg/session.py +0 -242
  199. novel_downloader/core/requesters/yamibo/__init__.py +0 -13
  200. novel_downloader/core/requesters/yamibo/session.py +0 -237
  201. novel_downloader/core/savers/__init__.py +0 -34
  202. novel_downloader/core/savers/biquge.py +0 -25
  203. novel_downloader/core/savers/common/__init__.py +0 -12
  204. novel_downloader/core/savers/esjzone.py +0 -25
  205. novel_downloader/core/savers/qianbi.py +0 -25
  206. novel_downloader/core/savers/sfacg.py +0 -25
  207. novel_downloader/core/savers/yamibo.py +0 -25
  208. novel_downloader/resources/config/rules.toml +0 -196
  209. novel_downloader-1.3.3.dist-info/RECORD +0 -166
  210. {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
  211. {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,37 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.parsers.qidian.shared
4
- -------------------------------------------
5
-
6
- Shared parsing utilities for Qidian parser components.
7
-
8
- This subpackage provides common functions used across
9
- different Qidian parsing strategies. It encapsulates logic for:
10
-
11
- - Parsing the SSR-rendered page context and chapter metadata.
12
- - Determining access control and encryption status of chapters.
13
- - Basic HTML preprocessing and fallback parsing behavior.
14
- - Extracting structured book info from the main book page.
15
- """
16
-
17
- from .book_info_parser import parse_book_info
18
- from .helpers import (
19
- can_view_chapter,
20
- extract_chapter_info,
21
- find_ssr_page_context,
22
- html_to_soup,
23
- is_encrypted,
24
- is_vip,
25
- vip_status,
26
- )
27
-
28
- __all__ = [
29
- "parse_book_info",
30
- "html_to_soup",
31
- "is_vip",
32
- "can_view_chapter",
33
- "is_encrypted",
34
- "vip_status",
35
- "find_ssr_page_context",
36
- "extract_chapter_info",
37
- ]
@@ -1,150 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.parsers.qidian.shared.book_info_parser
4
- ------------------------------------------------------------
5
-
6
- This module provides parsing of Qidian book info pages.
7
-
8
- It extracts metadata such as title, author, cover URL, update
9
- time, status, word count, summary, and volume-chapter structure.
10
- """
11
-
12
- import logging
13
- import re
14
- from typing import Any
15
-
16
- from bs4.element import Tag
17
-
18
- from .helpers import html_to_soup
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
-
23
- def _chapter_url_to_id(url: str) -> str:
24
- """
25
- Extract chapterId as the last non-empty segment of the URL.
26
- """
27
- return url.rstrip("/").split("/")[-1]
28
-
29
-
30
- def _get_volume_name(vol_div: Tag) -> str:
31
- """
32
- Extracts the volume title from a <div class="volume"> element
33
- """
34
- h3 = vol_div.select_one("h3")
35
- if not h3:
36
- return ""
37
- for a in h3.find_all("a"):
38
- a.decompose()
39
- text: str = h3.get_text(strip=True)
40
- return text.split(chr(183))[0].strip()
41
-
42
-
43
- def safe_select_text(
44
- soup: Tag,
45
- selector: str,
46
- *,
47
- separator: str = "",
48
- strip: bool = False,
49
- default: str = "",
50
- ) -> str:
51
- """
52
- Safely select the first element matching a CSS selector and return its text.
53
-
54
- :param soup: A BeautifulSoup Tag or sub-tree to query.
55
- :param selector: A CSS selector string.
56
- :param separator: Separator to use between strings when joining.
57
- :param strip: Whether to strip whitespace from the result.
58
- :param default: Value to return if no element is found.
59
- :return: The element's text, or `default` if not found.
60
- """
61
- tag = soup.select_one(selector)
62
- return (
63
- tag.get_text(separator=separator, strip=strip)
64
- if isinstance(tag, Tag)
65
- else default
66
- )
67
-
68
-
69
- def safe_select_attr(
70
- soup: Tag,
71
- selector: str,
72
- attr: str,
73
- *,
74
- default: str = "",
75
- ) -> str:
76
- """
77
- Safely select the first element matching a CSS selector and return one attributes.
78
-
79
- :param soup: A BeautifulSoup Tag or sub-tree to query.
80
- :param selector: A CSS selector string.
81
- :param attr: The attribute name to retrieve from the selected element.
82
- :param default: Value to return if no element or attribute is found.
83
- :return: The attribute's value stripped of whitespace, or `default` if not found.
84
- """
85
- tag = soup.select_one(selector)
86
- if isinstance(tag, Tag) and attr in tag.attrs:
87
- value = tag.attrs[attr]
88
- if isinstance(value, list):
89
- return " ".join(value).strip()
90
- elif isinstance(value, str):
91
- return value.strip()
92
- return default
93
-
94
-
95
- def parse_book_info(html_str: str) -> dict[str, Any]:
96
- """
97
- Extract metadata: title, author, cover_url, update_time, status,
98
- word_count, summary, and volumes with chapters.
99
-
100
- :param html_str: Raw HTML of the book info page.
101
- :return: A dict containing book metadata.
102
- """
103
- info: dict[str, Any] = {}
104
- try:
105
- soup = html_to_soup(html_str)
106
- info["book_name"] = safe_select_text(soup, "em#bookName", strip=True)
107
- info["author"] = safe_select_text(soup, "a.writer", strip=True)
108
- info["cover_url"] = safe_select_attr(soup, "div.book-img img", "src")
109
- info["update_time"] = (
110
- safe_select_text(soup, "span.book-update-time", strip=True)
111
- .replace("更新时间", "")
112
- .strip()
113
- )
114
- info["serial_status"] = safe_select_text(soup, "span.blue", strip=True)
115
-
116
- # Word count via regex fallback
117
- match = re.search(r"<em>([\d.]+)</em>\s*<cite>(.*?)字</cite>", html_str)
118
- info["word_count"] = (
119
- f"{match.group(1)}{match.group(2)}字" if match else "Unknown"
120
- )
121
-
122
- info["summary"] = safe_select_text(
123
- soup, "div.book-intro p", separator="\n", strip=True
124
- )
125
- # volumes
126
- vols = []
127
- for vol_div in soup.select("div.volume-wrap div.volume"):
128
- name = _get_volume_name(vol_div)
129
- chaps = []
130
- for li in vol_div.select("li"):
131
- a = li.select_one("a")
132
- if not isinstance(a, Tag) or "href" not in a.attrs:
133
- continue
134
- href_val = a["href"]
135
- if isinstance(href_val, list):
136
- href = href_val[0].strip()
137
- else:
138
- href = str(href_val).strip()
139
- chaps.append(
140
- {
141
- "title": a.get_text(strip=True),
142
- "url": href,
143
- "chapterId": _chapter_url_to_id(href),
144
- }
145
- )
146
- vols.append({"volume_name": name, "chapters": chaps})
147
- info["volumes"] = vols
148
- except Exception as e:
149
- logger.warning("[Parser] Error parsing book info: %s", e)
150
- return info
@@ -1,410 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- novel_downloader.core.requesters.base.async_session
4
- ---------------------------------------------------
5
-
6
- This module defines the BaseAsyncSession class, which provides asynchronous
7
- HTTP request capabilities using aiohttp. It maintains a persistent
8
- client session and supports retries, headers, timeout configurations,
9
- cookie handling, and defines abstract methods for subclasses.
10
- """
11
-
12
- import abc
13
- import asyncio
14
- import logging
15
- import random
16
- import time
17
- import types
18
- from typing import Any, Literal, Self
19
-
20
- import aiohttp
21
- from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
22
-
23
- from novel_downloader.config.models import RequesterConfig
24
- from novel_downloader.core.interfaces import AsyncRequesterProtocol
25
- from novel_downloader.utils.constants import DEFAULT_USER_HEADERS
26
-
27
-
28
- class RateLimiter:
29
- """
30
- Simple async token-bucket rate limiter:
31
- ensures no more than rate_per_sec
32
- requests are started per second, across all coroutines.
33
- """
34
-
35
- def __init__(self, rate_per_sec: float):
36
- self._interval = 1.0 / rate_per_sec
37
- self._lock = asyncio.Lock()
38
- self._last = time.monotonic()
39
-
40
- async def wait(self) -> None:
41
- async with self._lock:
42
- now = time.monotonic()
43
- elapsed = now - self._last
44
- delay = self._interval - elapsed
45
- if delay > 0:
46
- jitter = random.uniform(0, 0.3)
47
- await asyncio.sleep(delay + jitter)
48
- self._last = time.monotonic()
49
-
50
-
51
- class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
52
- """
53
- BaseAsyncSession wraps basic HTTP operations using aiohttp.ClientSession,
54
- supporting retry logic, timeout, persistent connections, and cookie management.
55
-
56
- Attributes:
57
- _session (ClientSession): The persistent aiohttp client session.
58
- _timeout (float): Timeout for each request in seconds.
59
- _retry_times (int): Number of retry attempts on failure.
60
- _retry_interval (float): Delay (in seconds) between retries.
61
- _headers (Dict[str, str]): Default HTTP headers to send.
62
- _cookies (Dict[str, str]): Optional cookie jar for the session.
63
- """
64
-
65
- def is_async(self) -> Literal[True]:
66
- return True
67
-
68
- def __init__(
69
- self,
70
- config: RequesterConfig,
71
- cookies: dict[str, str] | None = None,
72
- ) -> None:
73
- """
74
- Initialize the async session with configuration.
75
-
76
- :param config: Configuration object for session behavior
77
- (timeouts, retries, headers, etc.)
78
- :param cookies: Optional initial cookies to set on the session.
79
- """
80
- self._config = config
81
- self._retry_times = config.retry_times
82
- self._retry_interval = config.backoff_factor
83
- self._timeout = config.timeout
84
- self._max_rps = config.max_rps
85
- self._max_connections = config.max_connections
86
-
87
- self._cookies = cookies or {}
88
- self._headers = DEFAULT_USER_HEADERS.copy()
89
- self._session: ClientSession | None = None
90
- self._rate_limiter: RateLimiter | None = None
91
-
92
- self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
93
-
94
- self._init_session()
95
-
96
- def _init_session(self) -> None:
97
- """
98
- Set up the aiohttp.ClientSession with timeout, connector, headers, and cookies.
99
- """
100
- if self._max_rps is not None:
101
- self._rate_limiter = RateLimiter(self._max_rps)
102
-
103
- timeout = ClientTimeout(total=self._timeout)
104
- connector = TCPConnector(limit_per_host=self._max_connections)
105
- self._session = ClientSession(
106
- timeout=timeout,
107
- connector=connector,
108
- headers=self._headers,
109
- cookies=self._cookies,
110
- )
111
-
112
- async def login(
113
- self,
114
- username: str = "",
115
- password: str = "",
116
- manual_login: bool = False,
117
- **kwargs: Any,
118
- ) -> bool:
119
- """
120
- Attempt to log in asynchronously.
121
- Override in subclasses that require authentication.
122
-
123
- :returns: True if login succeeded, False otherwise.
124
- """
125
- return True
126
-
127
- @abc.abstractmethod
128
- async def get_book_info(
129
- self,
130
- book_id: str,
131
- **kwargs: Any,
132
- ) -> list[str]:
133
- """
134
- Fetch the raw HTML (or JSON) of the book info page asynchronously.
135
-
136
- :param book_id: The book identifier.
137
- :param wait_time: Base number of seconds to wait before returning content.
138
- :return: The page content as a string.
139
- """
140
- ...
141
-
142
- @abc.abstractmethod
143
- async def get_book_chapter(
144
- self,
145
- book_id: str,
146
- chapter_id: str,
147
- **kwargs: Any,
148
- ) -> list[str]:
149
- """
150
- Fetch the raw HTML (or JSON) of a single chapter asynchronously.
151
-
152
- :param book_id: The book identifier.
153
- :param chapter_id: The chapter identifier.
154
- :param wait_time: Base number of seconds to wait before returning content.
155
- :return: The chapter content as a string.
156
- """
157
- ...
158
-
159
- async def get_bookcase(
160
- self,
161
- page: int = 1,
162
- **kwargs: Any,
163
- ) -> list[str]:
164
- """
165
- Optional: Retrieve the HTML content of the authenticated user's bookcase page.
166
- Subclasses that support user login/bookcase should override this.
167
-
168
- :param wait_time: Base number of seconds to wait before returning content.
169
- :return: The HTML of the bookcase page.
170
- """
171
- raise NotImplementedError(
172
- "Bookcase fetching is not supported by this session type. "
173
- "Override get_bookcase() in your subclass to enable it."
174
- )
175
-
176
- async def fetch(self, url: str, **kwargs: Any) -> str:
177
- """
178
- Fetch the content from the given URL asynchronously, with retry support.
179
-
180
- :param url: The target URL to fetch.
181
- :param kwargs: Additional keyword arguments to pass to `session.get`.
182
- :return: The response body as text.
183
- :raises: aiohttp.ClientError on final failure.
184
- """
185
- if self._rate_limiter:
186
- await self._rate_limiter.wait()
187
-
188
- for attempt in range(self._retry_times + 1):
189
- try:
190
- async with self.session.get(url, **kwargs) as resp:
191
- resp.raise_for_status()
192
- text: str = await resp.text()
193
- return text
194
- except aiohttp.ClientError:
195
- if attempt < self._retry_times:
196
- await asyncio.sleep(self._retry_interval)
197
- continue
198
- raise
199
-
200
- raise RuntimeError("Unreachable code reached in fetch()")
201
-
202
- async def get(
203
- self,
204
- url: str,
205
- params: dict[str, Any] | None = None,
206
- **kwargs: Any,
207
- ) -> ClientResponse:
208
- """
209
- Send an HTTP GET request asynchronously.
210
-
211
- :param url: The target URL.
212
- :param params: Query parameters to include in the request.
213
- :param kwargs: Additional args passed to session.get().
214
- :return: aiohttp.ClientResponse object.
215
- :raises RuntimeError: If the session is not initialized.
216
- """
217
- return await self._request("GET", url, params=params, **kwargs)
218
-
219
- async def post(
220
- self,
221
- url: str,
222
- data: dict[str, Any] | bytes | None = None,
223
- json: dict[str, Any] | None = None,
224
- **kwargs: Any,
225
- ) -> ClientResponse:
226
- """
227
- Send an HTTP POST request asynchronously.
228
-
229
- :param url: The target URL.
230
- :param data: Form data to include in the request body.
231
- :param json: JSON body to include in the request.
232
- :param kwargs: Additional args passed to session.post().
233
- :return: aiohttp.ClientResponse object.
234
- :raises RuntimeError: If the session is not initialized.
235
- """
236
- return await self._request("POST", url, data=data, json=json, **kwargs)
237
-
238
- @property
239
- def session(self) -> ClientSession:
240
- """
241
- Return the active aiohttp.ClientSession.
242
-
243
- :raises RuntimeError: If the session is uninitialized.
244
- """
245
- if self._session is None:
246
- raise RuntimeError("Session is not initialized or has been shut down.")
247
- return self._session
248
-
249
- @property
250
- def cookies(self) -> dict[str, str]:
251
- """
252
- Get the current session cookies.
253
-
254
- :return: A dict mapping cookie names to their values.
255
- """
256
- if self._session:
257
- return {c.key: c.value for c in self._session.cookie_jar}
258
- else:
259
- return self._cookies
260
-
261
- @property
262
- def headers(self) -> dict[str, str]:
263
- """
264
- Get a copy of the current session headers for temporary use.
265
-
266
- :return: A dict mapping header names to their values.
267
- """
268
- if self._session:
269
- return dict(self._session.headers)
270
- return self._headers.copy()
271
-
272
- def get_header(self, key: str, default: Any = None) -> Any:
273
- """
274
- Retrieve a specific header value by name.
275
-
276
- :param key: The header name to look up.
277
- :param default: The value to return if the header is not present.
278
- :return: The header value if present, else default.
279
- """
280
- if self._session:
281
- return self._session.headers.get(key, default)
282
- else:
283
- return self._headers.get(key, default)
284
-
285
- def update_header(self, key: str, value: str) -> None:
286
- """
287
- Update or add a single header in the session.
288
-
289
- :param key: The name of the header.
290
- :param value: The value of the header.
291
- """
292
- self._headers[key] = value
293
- if self._session:
294
- self._session.headers[key] = value
295
-
296
- def update_headers(self, headers: dict[str, str]) -> None:
297
- """
298
- Update or add multiple headers in the session.
299
-
300
- :param headers: A dictionary of header key-value pairs.
301
- """
302
- self._headers.update(headers)
303
- if self._session:
304
- self._session.headers.update(headers)
305
-
306
- def update_cookie(self, key: str, value: str) -> None:
307
- """
308
- Update or add a single cookie in the session.
309
-
310
- :param key: The name of the cookie.
311
- :param value: The value of the cookie.
312
- """
313
- self._cookies[key] = value
314
- if self._session:
315
- self._session.cookie_jar.update_cookies({key: value})
316
-
317
- def update_cookies(
318
- self,
319
- cookies: dict[str, str],
320
- ) -> None:
321
- """
322
- Update or add multiple cookies in the session.
323
-
324
- :param cookies: A dictionary of cookie key-value pairs.
325
- """
326
- self._cookies.update(cookies)
327
- if self._session:
328
- self._session.cookie_jar.update_cookies(cookies)
329
-
330
- def clear_cookies(self) -> None:
331
- """
332
- Clear cookies from the session.
333
- """
334
- self._cookies = {}
335
- if self._session:
336
- self._session.cookie_jar.clear()
337
-
338
- async def _request(
339
- self,
340
- method: str,
341
- url: str,
342
- **kwargs: Any,
343
- ) -> ClientResponse:
344
- if self._rate_limiter:
345
- await self._rate_limiter.wait()
346
- return await self.session.request(method, url, **kwargs)
347
-
348
- async def _on_close(self) -> None:
349
- """
350
- Async hook method called before closing.
351
- Override in subclass.
352
- """
353
- pass
354
-
355
- async def close(self) -> None:
356
- """
357
- Shutdown and clean up the session. Closes connection pool.
358
- """
359
- await self._on_close()
360
- if self._session:
361
- await self._session.close()
362
- self._session = None
363
-
364
- def sync_close(self) -> None:
365
- """
366
- Sync wrapper for closing the aiohttp session
367
- when called from sync contexts.
368
- """
369
- if self._session:
370
- try:
371
- loop = asyncio.get_running_loop()
372
- loop.create_task(self.close())
373
- except RuntimeError:
374
- loop = asyncio.new_event_loop()
375
- asyncio.set_event_loop(loop)
376
- loop.run_until_complete(self.close())
377
- loop.close()
378
-
379
- async def __aenter__(self) -> Self:
380
- if self._session is None:
381
- self._init_session()
382
- return self
383
-
384
- async def __aexit__(
385
- self,
386
- exc_type: type[BaseException] | None,
387
- exc_val: BaseException | None,
388
- tb: types.TracebackType | None,
389
- ) -> None:
390
- await self.close()
391
-
392
- def __del__(self) -> None:
393
- self.sync_close()
394
-
395
- def __getstate__(self) -> dict[str, Any]:
396
- """
397
- Prepare object state for serialization: remove unpickleable session.
398
- """
399
- self.sync_close()
400
- state = self.__dict__.copy()
401
- state.pop("_session", None)
402
- state.pop("_rate_limiter", None)
403
- return state
404
-
405
- def __setstate__(self, state: dict[str, Any]) -> None:
406
- """
407
- Restore object state. Session will be lazily reinitialized on next request.
408
- """
409
- self.__dict__.update(state)
410
- self._session = None