novel-downloader 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +11 -8
  3. novel_downloader/cli/export.py +17 -17
  4. novel_downloader/cli/ui.py +28 -1
  5. novel_downloader/config/adapter.py +27 -1
  6. novel_downloader/core/archived/deqixs/fetcher.py +1 -28
  7. novel_downloader/core/downloaders/__init__.py +2 -0
  8. novel_downloader/core/downloaders/base.py +34 -85
  9. novel_downloader/core/downloaders/common.py +147 -171
  10. novel_downloader/core/downloaders/qianbi.py +30 -64
  11. novel_downloader/core/downloaders/qidian.py +157 -184
  12. novel_downloader/core/downloaders/qqbook.py +292 -0
  13. novel_downloader/core/downloaders/registry.py +2 -2
  14. novel_downloader/core/exporters/__init__.py +2 -0
  15. novel_downloader/core/exporters/base.py +37 -59
  16. novel_downloader/core/exporters/common.py +620 -0
  17. novel_downloader/core/exporters/linovelib.py +47 -0
  18. novel_downloader/core/exporters/qidian.py +41 -12
  19. novel_downloader/core/exporters/qqbook.py +28 -0
  20. novel_downloader/core/exporters/registry.py +2 -2
  21. novel_downloader/core/fetchers/__init__.py +4 -2
  22. novel_downloader/core/fetchers/aaatxt.py +2 -22
  23. novel_downloader/core/fetchers/b520.py +3 -23
  24. novel_downloader/core/fetchers/base.py +80 -105
  25. novel_downloader/core/fetchers/biquyuedu.py +2 -22
  26. novel_downloader/core/fetchers/dxmwx.py +10 -22
  27. novel_downloader/core/fetchers/esjzone.py +6 -29
  28. novel_downloader/core/fetchers/guidaye.py +2 -22
  29. novel_downloader/core/fetchers/hetushu.py +9 -29
  30. novel_downloader/core/fetchers/i25zw.py +2 -16
  31. novel_downloader/core/fetchers/ixdzs8.py +2 -16
  32. novel_downloader/core/fetchers/jpxs123.py +2 -16
  33. novel_downloader/core/fetchers/lewenn.py +2 -22
  34. novel_downloader/core/fetchers/linovelib.py +4 -20
  35. novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
  36. novel_downloader/core/fetchers/piaotia.py +2 -16
  37. novel_downloader/core/fetchers/qbtr.py +2 -16
  38. novel_downloader/core/fetchers/qianbi.py +1 -20
  39. novel_downloader/core/fetchers/qidian.py +7 -33
  40. novel_downloader/core/fetchers/qqbook.py +177 -0
  41. novel_downloader/core/fetchers/quanben5.py +9 -29
  42. novel_downloader/core/fetchers/rate_limiter.py +22 -53
  43. novel_downloader/core/fetchers/sfacg.py +3 -16
  44. novel_downloader/core/fetchers/shencou.py +2 -16
  45. novel_downloader/core/fetchers/shuhaige.py +2 -22
  46. novel_downloader/core/fetchers/tongrenquan.py +2 -22
  47. novel_downloader/core/fetchers/ttkan.py +3 -14
  48. novel_downloader/core/fetchers/wanbengo.py +2 -22
  49. novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
  50. novel_downloader/core/fetchers/xiguashuwu.py +4 -20
  51. novel_downloader/core/fetchers/xs63b.py +3 -15
  52. novel_downloader/core/fetchers/xshbook.py +2 -22
  53. novel_downloader/core/fetchers/yamibo.py +4 -28
  54. novel_downloader/core/fetchers/yibige.py +13 -26
  55. novel_downloader/core/interfaces/exporter.py +19 -7
  56. novel_downloader/core/interfaces/fetcher.py +21 -47
  57. novel_downloader/core/parsers/__init__.py +4 -2
  58. novel_downloader/core/parsers/b520.py +2 -2
  59. novel_downloader/core/parsers/base.py +4 -39
  60. novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +5 -5
  61. novel_downloader/core/parsers/{qidian/main_parser.py → qidian.py} +147 -266
  62. novel_downloader/core/parsers/qqbook.py +709 -0
  63. novel_downloader/core/parsers/xiguashuwu.py +3 -4
  64. novel_downloader/core/searchers/__init__.py +2 -2
  65. novel_downloader/core/searchers/b520.py +1 -1
  66. novel_downloader/core/searchers/base.py +2 -2
  67. novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
  68. novel_downloader/models/__init__.py +2 -0
  69. novel_downloader/models/book.py +1 -0
  70. novel_downloader/models/config.py +12 -0
  71. novel_downloader/resources/config/settings.toml +23 -5
  72. novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
  73. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
  74. novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
  75. novel_downloader/utils/constants.py +6 -0
  76. novel_downloader/utils/crypto_utils/aes_util.py +1 -1
  77. novel_downloader/utils/epub/constants.py +1 -6
  78. novel_downloader/utils/fontocr/core.py +2 -0
  79. novel_downloader/utils/fontocr/loader.py +10 -8
  80. novel_downloader/utils/node_decryptor/__init__.py +13 -0
  81. novel_downloader/utils/node_decryptor/decryptor.py +342 -0
  82. novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
  83. novel_downloader/web/pages/download.py +1 -1
  84. novel_downloader/web/pages/search.py +1 -1
  85. novel_downloader/web/services/task_manager.py +2 -0
  86. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +4 -1
  87. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/RECORD +91 -94
  88. novel_downloader/core/exporters/common/__init__.py +0 -11
  89. novel_downloader/core/exporters/common/epub.py +0 -198
  90. novel_downloader/core/exporters/common/main_exporter.py +0 -64
  91. novel_downloader/core/exporters/common/txt.py +0 -146
  92. novel_downloader/core/exporters/epub_util.py +0 -215
  93. novel_downloader/core/exporters/linovelib/__init__.py +0 -11
  94. novel_downloader/core/exporters/linovelib/epub.py +0 -349
  95. novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
  96. novel_downloader/core/exporters/linovelib/txt.py +0 -139
  97. novel_downloader/core/exporters/txt_util.py +0 -67
  98. novel_downloader/core/parsers/qidian/__init__.py +0 -10
  99. novel_downloader/core/parsers/qidian/utils/__init__.py +0 -11
  100. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
  101. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
  102. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
  103. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
  104. {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.parsers.eightnovel
4
- ----------------------------------------
3
+ novel_downloader.core.parsers.n8novel
4
+ -------------------------------------
5
5
 
6
6
  """
7
7
 
@@ -21,9 +21,9 @@ from novel_downloader.models import (
21
21
 
22
22
 
23
23
  @register_parser(
24
- site_keys=["eightnovel", "8novel"],
24
+ site_keys=["n8novel", "8novel"],
25
25
  )
26
- class EightnovelParser(BaseParser):
26
+ class N8novelParser(BaseParser):
27
27
  """
28
28
  Parser for 无限轻小说 book pages.
29
29
  """
@@ -177,7 +177,7 @@ class EightnovelParser(BaseParser):
177
177
  "id": chapter_id,
178
178
  "title": title,
179
179
  "content": content,
180
- "extra": {"site": "eightnovel"},
180
+ "extra": {"site": "n8novel"},
181
181
  }
182
182
 
183
183
  @staticmethod
@@ -1,16 +1,14 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.parsers.qidian.main_parser
4
- ------------------------------------------------
3
+ novel_downloader.core.parsers.qidian
4
+ ------------------------------------
5
5
 
6
- Main parser class for handling Qidian HTML
7
6
  """
8
7
 
9
8
  from __future__ import annotations
10
9
 
11
10
  import json
12
11
  import logging
13
- import re
14
12
  from contextlib import suppress
15
13
  from html import unescape
16
14
  from pathlib import Path
@@ -34,10 +32,7 @@ from novel_downloader.utils import (
34
32
  from novel_downloader.utils.constants import DATA_DIR
35
33
  from novel_downloader.utils.cookies import get_cookie_value
36
34
  from novel_downloader.utils.fontocr import get_font_ocr
37
-
38
- from .utils import (
39
- get_decryptor,
40
- )
35
+ from novel_downloader.utils.node_decryptor import get_decryptor
41
36
 
42
37
  logger = logging.getLogger(__name__)
43
38
 
@@ -69,25 +64,16 @@ class QidianParser(BaseParser):
69
64
  Parser for 起点中文网 site.
70
65
  """
71
66
 
72
- _RE_P_DELIM = re.compile(r"(?i)<\s*p\s*>")
73
- _RE_ATTR = re.compile(r"attr\(\s*([^)]+?)\s*\)", re.I)
74
- _RE_SCALEX = re.compile(r"scalex\(\s*-?1\s*\)", re.I)
75
-
76
- def __init__(
77
- self,
78
- config: ParserConfig,
79
- fuid: str = "",
80
- ):
67
+ def __init__(self, config: ParserConfig, fuid: str = ""):
81
68
  """
82
69
  Initialize the QidianParser with the given configuration.
83
-
84
- :param config: ParserConfig object controlling:
85
70
  """
86
71
  super().__init__(config)
87
72
 
88
- self._fixed_font_dir: Path = self._base_cache_dir / "fixed_fonts"
89
- self._fixed_font_dir.mkdir(parents=True, exist_ok=True)
90
- self._debug_dir: Path = Path.cwd() / "debug"
73
+ self._rand_path = self._base_cache_dir / "qidian" / "randomFont.ttf"
74
+ self._fixed_font_dir = self._base_cache_dir / "qidian" / "fixed_fonts"
75
+ self._fixed_map_dir = self._base_cache_dir / "qidian" / "fixed_font_map"
76
+ self._debug_dir = Path.cwd() / "debug" / "qidian"
91
77
 
92
78
  state_files = [
93
79
  DATA_DIR / "qidian" / "session_state.cookies",
@@ -99,12 +85,6 @@ class QidianParser(BaseParser):
99
85
  html_list: list[str],
100
86
  **kwargs: Any,
101
87
  ) -> BookInfoDict | None:
102
- """
103
- Parse a book info page and extract metadata and chapter structure.
104
-
105
- :param html_list: Raw HTML of the book info page.
106
- :return: Parsed metadata and chapter structure as a dictionary.
107
- """
108
88
  if not html_list:
109
89
  return None
110
90
 
@@ -172,57 +152,39 @@ class QidianParser(BaseParser):
172
152
  chapter_id: str,
173
153
  **kwargs: Any,
174
154
  ) -> ChapterDict | None:
175
- """
176
- :param html_list: Raw HTML of the chapter page.
177
- :param chapter_id: Identifier of the chapter being parsed.
178
- :return: Cleaned chapter content as plain text.
179
- """
180
155
  if not html_list:
156
+ logger.warning("[Parser] chapter_id=%s :: html_list is empty", chapter_id)
181
157
  return None
182
158
  try:
183
159
  ssr_data = self._find_ssr_page_context(html_list[0])
184
160
  chapter_info = self._extract_chapter_info(ssr_data)
185
- if not chapter_info:
186
- logger.warning(
187
- "[Parser] ssr_chapterInfo not found for chapter '%s'", chapter_id
188
- )
189
- return None
190
-
191
- if not self._can_view_chapter(chapter_info):
192
- logger.warning(
193
- "[Parser] Chapter '%s' is not purchased or inaccessible.",
194
- chapter_id,
195
- )
196
- return None
197
-
198
- if self._is_encrypted(ssr_data):
199
- if not self._decode_font:
200
- return None
201
- return self.parse_encrypted_chapter(chapter_info, chapter_id)
202
-
203
- return self.parse_normal_chapter(chapter_info, chapter_id)
204
-
205
161
  except Exception as e:
206
- logger.warning("[Parser] parse error for chapter '%s': %s", chapter_id, e)
207
- return None
162
+ logger.warning(
163
+ "[Parser] chapter_id=%s :: failed to locate ssr_pageContext block: %s",
164
+ chapter_id,
165
+ e,
166
+ )
167
+ return None
208
168
 
209
- def parse_normal_chapter(
210
- self,
211
- chapter_info: dict[str, Any],
212
- chapter_id: str,
213
- ) -> ChapterDict | None:
214
- """
215
- Extract structured chapter info from a normal Qidian page.
169
+ if not chapter_info:
170
+ logger.warning(
171
+ "[Parser] ssr_chapterInfo not found for chapter '%s'", chapter_id
172
+ )
173
+ return None
174
+
175
+ if not self._can_view_chapter(chapter_info):
176
+ logger.warning(
177
+ "[Parser] Chapter '%s' is not purchased or inaccessible.",
178
+ chapter_id,
179
+ )
180
+ return None
216
181
 
217
- :param chapter_info: Parsed chapter info block from ssr data.
218
- :param chapter_id: Chapter identifier (string).
219
- :return: a dictionary with keys like 'id', 'title', 'content', etc.
220
- """
221
182
  duplicated = self._is_duplicated(chapter_info)
183
+ encrypted = self._is_encrypted(chapter_info)
222
184
 
223
185
  title = chapter_info.get("chapterName", "Untitled")
224
186
  raw_html = chapter_info.get("content", "")
225
- chapter_id = chapter_info.get("chapterId", chapter_id)
187
+ cid = str(chapter_info.get("chapterId") or chapter_id)
226
188
  fkp = chapter_info.get("fkp", "")
227
189
  author_say = chapter_info.get("authorSay", "").strip()
228
190
  update_time = chapter_info.get("updateTime", "")
@@ -234,19 +196,25 @@ class QidianParser(BaseParser):
234
196
 
235
197
  if self._is_vip(chapter_info):
236
198
  decryptor = get_decryptor()
237
- raw_html = decryptor.decrypt(raw_html, chapter_id, fkp, self._fuid)
199
+ raw_html = decryptor.decrypt_qd(raw_html, cid, fkp, self._fuid)
238
200
 
239
- parts = self._RE_P_DELIM.split(raw_html)
240
- paragraphs = [unescape(p).strip() for p in parts if p.strip()]
241
- chapter_text = "\n".join(paragraphs)
201
+ chapter_text = (
202
+ self._parse_font_encrypted(raw_html, chapter_info, cid)
203
+ if encrypted
204
+ else self._parse_normal(raw_html)
205
+ )
242
206
  if not chapter_text:
207
+ logger.warning(
208
+ "[Parser] chapter_id=%s :: content empty after decryption/font-mapping",
209
+ chapter_id,
210
+ )
243
211
  return None
244
212
 
245
213
  if self._use_truncation and duplicated:
246
214
  chapter_text = truncate_half_lines(chapter_text)
247
215
 
248
216
  return {
249
- "id": str(chapter_id),
217
+ "id": cid,
250
218
  "title": title,
251
219
  "content": chapter_text,
252
220
  "extra": {
@@ -258,107 +226,116 @@ class QidianParser(BaseParser):
258
226
  "duplicated": duplicated,
259
227
  "seq": seq,
260
228
  "volume": volume,
261
- "encrypted": False,
229
+ "encrypted": encrypted,
262
230
  },
263
231
  }
264
232
 
265
- def parse_encrypted_chapter(
233
+ def _parse_normal(self, raw_html: str) -> str:
234
+ """
235
+ Extract structured chapter content from a normal Qidian page.
236
+ """
237
+ parts = raw_html.split("<p>")
238
+ paragraphs = [unescape(p).strip() for p in parts if p.strip()]
239
+ chapter_text = "\n".join(paragraphs)
240
+ if not chapter_text:
241
+ return ""
242
+ return chapter_text
243
+
244
+ def _parse_font_encrypted(
266
245
  self,
246
+ raw_html: str,
267
247
  chapter_info: dict[str, Any],
268
- chapter_id: str,
269
- ) -> ChapterDict | None:
248
+ cid: str,
249
+ ) -> str:
270
250
  """
271
- Extract and return the formatted textual content of an encrypted chapter.
272
-
273
251
  Steps:
274
252
  1. Decode and save randomFont bytes; download fixedFont via download().
275
253
  2. Parse CSS rules and save debug JSON.
276
254
  3. Render encrypted paragraphs, then run OCR font-mapping.
277
255
  4. Extracts paragraph texts and formats them.
278
-
279
- :param chapter_info: Parsed chapter info block from ssr data.
280
- :return: Formatted chapter text or empty string if not parsable.
281
256
  """
282
- debug_dir = self._debug_dir / "qidian" / "font_debug" / chapter_id
257
+ if not self._decode_font:
258
+ logger.warning(
259
+ "[Parser] chapter_id=%s :: font decryption skipped "
260
+ "(set `decode_font=True` to enable)",
261
+ cid,
262
+ )
263
+ return ""
264
+
265
+ css_str = chapter_info.get("css")
266
+ random_font_str = chapter_info.get("randomFont")
267
+ rf = json.loads(random_font_str) if isinstance(random_font_str, str) else None
268
+ rf_data = rf.get("data") if rf else None
269
+ fixed_woff2_url = chapter_info.get("fixedFontWoff2")
270
+
271
+ if not css_str:
272
+ logger.warning("[Parser] cid=%s :: css missing or empty", cid)
273
+ return ""
274
+ if not rf_data:
275
+ logger.warning("[Parser] cid=%s :: randomFont.data missing or empty", cid)
276
+ return ""
277
+ if not fixed_woff2_url:
278
+ logger.warning("[Parser] cid=%s :: fixedFontWoff2 missing or empty", cid)
279
+ return ""
280
+
281
+ debug_dir = self._debug_dir / "font_debug" / cid
283
282
  if self._save_font_debug:
284
283
  debug_dir.mkdir(parents=True, exist_ok=True)
285
284
 
286
- duplicated = self._is_duplicated(chapter_info)
287
-
288
- css_str = chapter_info["css"]
289
- randomFont_str = chapter_info["randomFont"]
290
- fixedFontWoff2_url = chapter_info["fixedFontWoff2"]
291
-
292
- title = chapter_info.get("chapterName", "Untitled")
293
- raw_html = chapter_info.get("content", "")
294
- chapter_id = chapter_info.get("chapterId", chapter_id)
295
- fkp = chapter_info.get("fkp", "")
296
- author_say = chapter_info.get("authorSay", "").strip()
297
- update_time = chapter_info.get("updateTime", "")
298
- update_timestamp = chapter_info.get("updateTimestamp", 0)
299
- modify_time = chapter_info.get("modifyTime", 0)
300
- word_count = chapter_info.get("actualWords", 0)
301
- seq = chapter_info.get("seq")
302
- volume = chapter_info.get("extra", {}).get("volumeName", "")
303
-
304
- # extract + save font
305
- rf = json.loads(randomFont_str)
306
- rand_path = self._base_cache_dir / "randomFont.ttf"
307
- rand_path.parent.mkdir(parents=True, exist_ok=True)
308
- rand_path.write_bytes(bytes(rf["data"]))
285
+ try:
286
+ self._rand_path.parent.mkdir(parents=True, exist_ok=True)
287
+ self._rand_path.write_bytes(bytes(rf_data))
288
+ except Exception as e:
289
+ logger.error(
290
+ "[Parser] cid=%s :: failed to write randomFont.ttf",
291
+ cid,
292
+ exc_info=e,
293
+ )
294
+ return ""
309
295
 
310
296
  fixed_path = download(
311
- url=fixedFontWoff2_url,
297
+ url=fixed_woff2_url,
312
298
  target_dir=self._fixed_font_dir,
299
+ on_exist="skip",
313
300
  )
314
301
  if fixed_path is None:
315
302
  logger.warning(
316
- "[Parser] failed to download fixedfont for chapter '%s'", chapter_id
317
- )
318
- return None
319
-
320
- # Extract and render paragraphs from HTML with CSS rules
321
- if self._is_vip(chapter_info):
322
- decryptor = get_decryptor()
323
- raw_html = decryptor.decrypt(
324
- raw_html,
325
- chapter_id,
326
- fkp,
327
- self._fuid,
303
+ "[Parser] failed to download fixedfont for chapter '%s'", cid
328
304
  )
305
+ return ""
329
306
 
330
307
  css_rules = self._parse_css_rules(css_str)
331
308
  paragraphs_str, refl_list = self._render_visible_text(raw_html, css_rules)
332
309
  if self._save_font_debug:
333
- paragraphs_str_path = debug_dir / f"{chapter_id}_debug.txt"
334
- paragraphs_str_path.write_text(paragraphs_str, encoding="utf-8")
310
+ (debug_dir / f"{cid}_debug.txt").write_text(
311
+ paragraphs_str, encoding="utf-8"
312
+ )
335
313
 
336
314
  # Run OCR + fallback mapping
337
315
  char_set = {c for c in paragraphs_str if c not in {" ", "\n", "\u3000"}}
338
316
  refl_set = set(refl_list)
339
317
  char_set = char_set - refl_set
340
318
  if self._save_font_debug:
341
- char_sets_path = debug_dir / "char_set_debug.txt"
342
- temp = f"char_set:\n{char_set}\n\nrefl_set:\n{refl_set}"
343
- char_sets_path.write_text(
344
- temp,
319
+ (debug_dir / "char_set_debug.txt").write_text(
320
+ f"char_set:\n{char_set}\n\nrefl_set:\n{refl_set}",
345
321
  encoding="utf-8",
346
322
  )
347
323
 
348
324
  mapping_result = self._generate_font_map(
349
325
  fixed_font_path=fixed_path,
350
- random_font_path=rand_path,
326
+ random_font_path=self._rand_path,
351
327
  char_set=char_set,
352
328
  refl_set=refl_set,
353
- cache_dir=self._base_cache_dir,
354
- batch_size=self._config.batch_size,
329
+ batch_size=self._batch_size,
355
330
  )
356
331
  if not mapping_result:
357
- return None
332
+ logger.warning(
333
+ "[Parser] font mapping returned empty result for chapter '%s'", cid
334
+ )
335
+ return ""
358
336
 
359
337
  if self._save_font_debug:
360
- mapping_json_path = debug_dir / "font_mapping.json"
361
- mapping_json_path.write_text(
338
+ (debug_dir / "font_mapping.json").write_text(
362
339
  json.dumps(mapping_result, ensure_ascii=False, indent=2),
363
340
  encoding="utf-8",
364
341
  )
@@ -369,28 +346,9 @@ class QidianParser(BaseParser):
369
346
  font_map=mapping_result,
370
347
  )
371
348
 
372
- final_paragraphs_str = "\n".join(
349
+ return "\n".join(
373
350
  line.strip() for line in original_text.splitlines() if line.strip()
374
351
  )
375
- if self._use_truncation and duplicated:
376
- final_paragraphs_str = truncate_half_lines(final_paragraphs_str)
377
-
378
- return {
379
- "id": str(chapter_id),
380
- "title": str(title),
381
- "content": final_paragraphs_str,
382
- "extra": {
383
- "author_say": author_say,
384
- "updated_at": update_time,
385
- "update_timestamp": update_timestamp,
386
- "modify_time": modify_time,
387
- "word_count": word_count,
388
- "duplicated": duplicated,
389
- "seq": seq,
390
- "volume": volume,
391
- "encrypted": True,
392
- },
393
- }
394
352
 
395
353
  @staticmethod
396
354
  def _find_ssr_page_context(html_str: str) -> dict[str, Any]:
@@ -417,17 +375,6 @@ class QidianParser(BaseParser):
417
375
  chapter_info = page_data.get("chapterInfo", {})
418
376
  return chapter_info if isinstance(chapter_info, dict) else {}
419
377
 
420
- @staticmethod
421
- def _is_restricted_page(html_str: str) -> bool:
422
- """
423
- Return True if page content indicates access restriction
424
- (e.g. not subscribed/purchased).
425
-
426
- :param html_str: Raw HTML string.
427
- """
428
- markers = ["这是VIP章节", "需要订阅", "订阅后才能阅读"]
429
- return any(m in html_str for m in markers)
430
-
431
378
  @classmethod
432
379
  def _is_vip(cls, chapter_info: dict[str, Any]) -> bool:
433
380
  """
@@ -458,30 +405,22 @@ class QidianParser(BaseParser):
458
405
  return bool(efw_flag == 1)
459
406
 
460
407
  @classmethod
461
- def _is_encrypted(cls, content: str | dict[str, Any]) -> bool:
408
+ def _is_encrypted(cls, chapter_info: dict[str, Any]) -> bool:
462
409
  """
463
410
  Return True if content is encrypted.
464
411
 
465
412
  Chapter Encryption Status (cES):
466
413
  * 0: 内容是'明文'
467
414
  * 2: 字体加密
468
-
469
- :param content: HTML content, either as a raw string or a BeautifulSoup object.
470
- :return: True if encrypted marker is found, else False.
471
415
  """
472
- ssr_data = (
473
- cls._find_ssr_page_context(content) if isinstance(content, str) else content
474
- )
475
- chapter_info = cls._extract_chapter_info(ssr_data)
476
416
  return int(chapter_info.get("cES", 0)) == 2
477
417
 
478
- @staticmethod
479
418
  def _generate_font_map(
419
+ self,
480
420
  fixed_font_path: Path,
481
421
  random_font_path: Path,
482
422
  char_set: set[str],
483
423
  refl_set: set[str],
484
- cache_dir: Path,
485
424
  batch_size: int = 32,
486
425
  ) -> dict[str, str]:
487
426
  """
@@ -494,17 +433,16 @@ class QidianParser(BaseParser):
494
433
  :param random_font_path: random font file.
495
434
  :param char_set: Characters to match directly.
496
435
  :param refl_set: Characters to match in flipped form.
497
- :param cache_dir: Directory to save/load cached results.
498
436
  :param batch_size: How many chars to OCR per batch.
499
437
 
500
438
  :return: { obf_char: real_char, ... }
501
439
  """
502
- font_ocr = get_font_ocr()
440
+ font_ocr = get_font_ocr(self._fontocr_cfg)
503
441
  if not font_ocr:
504
442
  return {}
505
443
 
506
444
  mapping_result: dict[str, str] = {}
507
- fixed_map_file = cache_dir / "fixed_font_map" / f"{fixed_font_path.stem}.json"
445
+ fixed_map_file = self._fixed_map_dir / f"{fixed_font_path.stem}.json"
508
446
  fixed_map_file.parent.mkdir(parents=True, exist_ok=True)
509
447
 
510
448
  # load existing cache
@@ -587,9 +525,8 @@ class QidianParser(BaseParser):
587
525
 
588
526
  Returns None if can't extract a tag.
589
527
  """
590
- sel = selector.strip()
591
528
  # If it has spaces, take the rightmost simple selector
592
- last = sel.split()[-1]
529
+ last = selector.strip().split()[-1]
593
530
  # Drop ::pseudo
594
531
  last = last.split("::", 1)[0]
595
532
  # If it's like 'span[attr=..]' keep 'span'
@@ -604,50 +541,12 @@ class QidianParser(BaseParser):
604
541
  """
605
542
  Parse 'name:value;...' inside a block. Tolerates quotes and attr().
606
543
  """
607
- decls: list[tuple[str, str]] = []
608
- i = 0
609
- n = len(block)
610
- name: list[str] = []
611
- val: list[str] = []
612
- in_name = True
613
- quote = None # track ' or "
614
- while i < n:
615
- c = block[i]
616
- if quote:
617
- # inside quotes
618
- if c == "\\" and i + 1 < n:
619
- # keep escaped char
620
- (name if in_name else val).append(c)
621
- i += 1
622
- (name if in_name else val).append(block[i])
623
- elif c == quote:
624
- (name if in_name else val).append(c)
625
- quote = None
626
- else:
627
- (name if in_name else val).append(c)
628
- else:
629
- if c in ("'", '"'):
630
- (name if in_name else val).append(c)
631
- quote = c
632
- elif in_name and c == ":":
633
- in_name = False
634
- elif c == ";":
635
- nm = "".join(name).strip().lower()
636
- vl = "".join(val).strip()
637
- if nm:
638
- decls.append((nm, vl))
639
- name.clear()
640
- val.clear()
641
- in_name = True
642
- else:
643
- (name if in_name else val).append(c)
644
- i += 1
645
-
646
- if name or val:
647
- nm = "".join(name).strip().lower()
648
- vl = "".join(val).strip()
649
- if nm:
650
- decls.append((nm, vl))
544
+ parts = [d.strip() for d in block.split(";") if d.strip()]
545
+ decls = []
546
+ for p in parts:
547
+ if ":" in p:
548
+ name, val = p.split(":", 1)
549
+ decls.append((name.strip().lower(), val.strip()))
651
550
  return decls
652
551
 
653
552
  @classmethod
@@ -661,83 +560,65 @@ class QidianParser(BaseParser):
661
560
  rules: Rules = {"orders": [], "sy": {}, "p_rules": {}}
662
561
  order_pairs: list[tuple[str, int]] = []
663
562
 
664
- i = 0
563
+ pos = 0
665
564
  while True:
666
- b1 = css_str.find("{", i)
565
+ b1 = css_str.find("{", pos)
667
566
  if b1 == -1:
668
567
  break
669
- selector = css_str[i:b1].strip().lower()
568
+ selector = css_str[pos:b1].strip().lower()
670
569
  b2 = css_str.find("}", b1 + 1)
671
570
  if b2 == -1:
672
571
  break
673
572
  block = css_str[b1 + 1 : b2]
674
- i = b2 + 1
573
+ pos = b2 + 1
675
574
 
676
575
  decls = cls._parse_decls(block)
677
-
678
576
  new_rule: Rule = {}
679
577
  order_val: int | None = None
680
578
 
681
579
  for name, value in decls:
682
580
  v = value.strip()
683
581
  if name == "font-size" and v == "0":
684
- if "::first-letter" in selector:
685
- new_rule["delete_first"] = True
686
- else:
687
- new_rule["delete_all"] = True
688
- elif name == "transform":
689
- if cls._RE_SCALEX.search(v.replace(" ", "")):
690
- new_rule["transform_flip_x"] = True
582
+ new_rule[
583
+ "delete_first" if "::first-letter" in selector else "delete_all"
584
+ ] = True
585
+ elif name == "transform" and "scalex(-1" in v.replace(" ", "").lower():
586
+ new_rule["transform_flip_x"] = True
691
587
  elif name == "order":
692
- with suppress(ValueError, TypeError):
588
+ with suppress(ValueError):
693
589
  order_val = int(v)
694
590
  elif name == "content":
695
- # normalize: remove outer quotes
696
591
  if "::after" in selector:
697
- m = cls._RE_ATTR.search(v)
698
- if m:
699
- new_rule["append_end_attr"] = m.group(1)
592
+ if v.lower().startswith("attr("):
593
+ new_rule["append_end_attr"] = v[5:-1].strip()
700
594
  else:
701
- s = v.strip().strip("\"'")
702
- new_rule["append_end_char"] = s
595
+ new_rule["append_end_char"] = v.strip().strip("\"'")
703
596
  elif "::before" in selector:
704
- m = cls._RE_ATTR.search(v)
705
- if m:
706
- new_rule["append_start_attr"] = m.group(1)
597
+ if v.lower().startswith("attr("):
598
+ new_rule["append_start_attr"] = v[5:-1].strip()
707
599
  else:
708
- s = v.strip().strip("\"'")
709
- new_rule["append_start_char"] = s
600
+ new_rule["append_start_char"] = v.strip().strip("\"'")
710
601
 
711
- # classification
712
602
  if selector.startswith(".sy-"):
713
603
  key = selector.lstrip(".")
714
- old = rules["sy"].get(key)
715
- rules["sy"][key] = {**old, **new_rule} if old else (new_rule or {})
716
-
604
+ rules["sy"][key] = {**rules["sy"].get(key, {}), **new_rule}
717
605
  elif selector.startswith(".p") and " " in selector:
718
606
  p_cls, right = selector.split(" ", 1)
719
- p_cls = p_cls.lstrip(".")
720
607
  tag = cls._only_tag(right)
721
608
  if tag:
722
- prev = rules["p_rules"].setdefault(p_cls, {}).get(tag)
723
- rules["p_rules"][p_cls][tag] = (
724
- {**prev, **new_rule} if prev else (new_rule or {})
725
- )
609
+ p_cls = p_cls.lstrip(".")
610
+ rules["p_rules"].setdefault(p_cls, {})
611
+ rules["p_rules"][p_cls][tag] = {
612
+ **rules["p_rules"][p_cls].get(tag, {}),
613
+ **new_rule,
614
+ }
726
615
 
727
616
  if order_val is not None:
728
- tag_for_order = cls._only_tag(selector)
729
- if tag_for_order:
730
- order_pairs.append((tag_for_order, order_val))
731
-
732
- # normalize orders
733
- order_pairs.sort(key=lambda t: t[1])
734
- seen = set()
735
- orders: list[str] = []
736
- for tag, _ in order_pairs:
737
- if tag not in seen:
738
- seen.add(tag)
739
- orders.append(tag)
740
- rules["orders"] = orders
617
+ tag = cls._only_tag(selector)
618
+ if tag:
619
+ order_pairs.append((tag, order_val))
620
+
621
+ rules["orders"] = [t for t, _ in sorted(order_pairs, key=lambda x: x[1])]
741
622
  return rules
742
623
 
743
624
  @staticmethod