novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,253 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.utils.text_utils.numeric_conversion
4
+ ----------------------------------------------------
5
+
6
+ Utility functions to convert between Chinese numeral strings
7
+ and Python integers.
8
+ """
9
+
10
+ CHINESE_NUMERALS = {
11
+ "零": 0,
12
+ "〇": 0,
13
+ "一": 1,
14
+ "壹": 1,
15
+ "二": 2,
16
+ "两": 2,
17
+ "贰": 2,
18
+ "貮": 2,
19
+ "三": 3,
20
+ "叁": 3,
21
+ "四": 4,
22
+ "肆": 4,
23
+ "五": 5,
24
+ "伍": 5,
25
+ "六": 6,
26
+ "陆": 6,
27
+ "七": 7,
28
+ "柒": 7,
29
+ "八": 8,
30
+ "捌": 8,
31
+ "九": 9,
32
+ "玖": 9,
33
+ }
34
+
35
+ CHINESE_UNITS = {
36
+ "十": 10,
37
+ "拾": 10,
38
+ "百": 100,
39
+ "佰": 100,
40
+ "千": 1000,
41
+ "仟": 1000,
42
+ "万": 10_000,
43
+ "萬": 10_000,
44
+ "亿": 100_000_000,
45
+ "億": 100_000_000,
46
+ "兆": 10**12,
47
+ "京": 10**16,
48
+ "垓": 10**20,
49
+ }
50
+
51
+ LARGE_UNITS = [
52
+ ("垓", 10**20),
53
+ ("京", 10**16),
54
+ ("兆", 10**12),
55
+ ("亿", 10**8),
56
+ ("億", 10**8),
57
+ ("万", 10**4),
58
+ ("萬", 10**4),
59
+ ]
60
+
61
+
62
+ def chinese_to_arabic(s: str) -> int:
63
+ """
64
+ Convert a Chinese numeral string into its integer value.
65
+
66
+ Examples:
67
+ ---
68
+ >>> chinese_to_arabic("一千二百三十四")
69
+ 1234
70
+ >>> chinese_to_arabic("负一千二百三十四")
71
+ -1234
72
+ >>> chinese_to_arabic("一万零三")
73
+ 10003
74
+ >>> chinese_to_arabic("三亿二千五百")
75
+ 3000002500
76
+
77
+ :param s: A string of Chinese numerals, e.g. "三千零二十一", "五亿零七万".
78
+ :return: The integer value represented by the input string.
79
+ :raises KeyError: If `s` contains characters not found in the supported
80
+ numeral or unit mappings.
81
+ """
82
+ if not s:
83
+ raise ValueError("Input string is empty")
84
+
85
+ sign = 1
86
+ if s[0] in ("负", "-"):
87
+ sign = -1
88
+ s = s[1:]
89
+
90
+ def _parse_section(sec: str) -> int:
91
+ """Parse up to 千 unit."""
92
+ num = 0
93
+ section_total = 0
94
+ for ch in sec:
95
+ if ch in CHINESE_NUMERALS:
96
+ num = num * 10 + CHINESE_NUMERALS[ch]
97
+ else:
98
+ unit = CHINESE_UNITS[ch]
99
+ section_total += (num or 1) * unit
100
+ num = 0
101
+ return section_total + num
102
+
103
+ total = 0
104
+ rest = s
105
+ for char, val in LARGE_UNITS:
106
+ if char in rest:
107
+ left, rest = rest.split(char, 1)
108
+ total += _parse_section(left) * val
109
+
110
+ total += _parse_section(rest)
111
+
112
+ return sign * total
113
+
114
+
115
+ def arabic_to_chinese(num: int) -> str:
116
+ """
117
+ Convert an integer to its Chinese numeral representation.
118
+
119
+ Examples:
120
+ ---
121
+ >>> arabic_to_chinese(0)
122
+ "零"
123
+ >>> arabic_to_chinese(1234)
124
+ "一千二百三十四"
125
+ >>> arabic_to_chinese(10003)
126
+ "一万零三"
127
+ >>> arabic_to_chinese(-205)
128
+ "负二百零五"
129
+ >>> arabic_to_chinese(3000002500)
130
+ "三十亿零二百五百" # 3 000 002 500
131
+
132
+ :param num: The integer to convert (e.g. 42, -1300).
133
+ :return: The Chinese-numeral string for `num`.
134
+ :raises TypeError: If `num` is not an integer.
135
+ """
136
+ if not isinstance(num, int):
137
+ raise TypeError("Input must be an integer.")
138
+ if num == 0:
139
+ return "零"
140
+
141
+ digits = "零一二三四五六七八九"
142
+ small_units = ["", "十", "百", "千"]
143
+ big_units = ["", "万", "亿", "兆", "京", "垓"]
144
+
145
+ negative = num < 0
146
+ num = -num if negative else num
147
+
148
+ def _section_to_chinese(sec: int) -> str:
149
+ """
150
+ Convert a value 1..9999 into Chinese using 千/百/十 units,
151
+ without any large unit (万, 亿, ...) or leading '零'.
152
+ """
153
+ s = ""
154
+ unit_pos = 0
155
+ zero_flag = True
156
+ while sec > 0:
157
+ d = sec % 10
158
+ if d == 0:
159
+ # only emit one '零' for consecutive zeros
160
+ if not zero_flag:
161
+ s = digits[0] + s
162
+ zero_flag = True
163
+ else:
164
+ s = digits[d] + small_units[unit_pos] + s
165
+ zero_flag = False
166
+ unit_pos += 1
167
+ sec //= 10
168
+ return s
169
+
170
+ result = ""
171
+ section_pos = 0
172
+
173
+ while num > 0:
174
+ section = num % 10_000
175
+ if section != 0:
176
+ sec_str = _section_to_chinese(section)
177
+ result = sec_str + big_units[section_pos] + result
178
+ else:
179
+ # if there's already something in `result`, and the next non-zero
180
+ # block will appear further left, we need a '零' separator
181
+ if result and not result.startswith("零"):
182
+ result = "零" + result
183
+
184
+ num //= 10_000
185
+ section_pos += 1
186
+
187
+ if negative:
188
+ result = "负" + result
189
+
190
+ return result
191
+
192
+
193
+ if __name__ == "__main__":
194
+ import random
195
+
196
+ RED = "\033[91m"
197
+ GREEN = "\033[92m"
198
+ RESET = "\033[0m"
199
+ random.seed(42)
200
+
201
+ fail_count = 0
202
+ num_list = [
203
+ ("一千二百三十四", 1234),
204
+ ("一万五千", 15000),
205
+ ("一万零三", 10003),
206
+ ("三亿二千五百", 300002500),
207
+ ]
208
+ print("=== chinese_to_arabic() with fixed cases ===")
209
+ for s, expected in num_list:
210
+ actual = chinese_to_arabic(s)
211
+ if actual != expected:
212
+ print(f"{RED}FAIL:{RESET} “{s}” -> expected {expected}, got {actual}")
213
+ fail_count += 1
214
+
215
+ if fail_count:
216
+ print(f"{RED}{fail_count} chinese_to_arabic() tests failed.{RESET}\n")
217
+ else:
218
+ print(f"{GREEN}All {len(num_list)} chinese_to_arabic() tests passed!{RESET}\n")
219
+
220
+ fail_count = 0
221
+ print("=== Round-trip test for values 0 - 9999 ===")
222
+ for i in range(10_000):
223
+ s = arabic_to_chinese(i)
224
+ r = chinese_to_arabic(s)
225
+ if r != i:
226
+ print(f'{RED}FAIL round-trip:{RESET} {i} -> "{s}" -> {r}')
227
+ fail_count += 1
228
+ break
229
+
230
+ if fail_count:
231
+ print(f"{RED}{fail_count} round-trip failures in 0 - 9999.{RESET}\n")
232
+ else:
233
+ print(f"{GREEN}0 - 9999 round-trip all passed!{RESET}\n")
234
+
235
+ fail_count = 0
236
+ exponents = range(5, 22) # test around 10^5...
237
+ print("=== Random round-trip at larger scales ===")
238
+ for exp in exponents:
239
+ lower = 10**exp
240
+ upper = 10 ** (exp + 1)
241
+ for _ in range(2):
242
+ i = random.randint(lower, upper - 1)
243
+ for val in (i, -i):
244
+ s = arabic_to_chinese(val)
245
+ r = chinese_to_arabic(s)
246
+ if r != val:
247
+ print(f'{RED}FAIL:{RESET} {val} -> "{s}" -> {r}')
248
+ fail_count += 1
249
+
250
+ if fail_count:
251
+ print(f"{RED}{fail_count} random large-scale failures.{RESET}")
252
+ else:
253
+ print(f"{GREEN}All random large-scale round-trips passed!{RESET}")
@@ -0,0 +1,179 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.utils.text_utils.text_cleaner
4
+ ----------------------------------------------
5
+
6
+ Provides utilities to clean novel titles and content
7
+ by removing unwanted patterns, replacing strings.
8
+ """
9
+
10
+ import re
11
+ from re import Match, Pattern
12
+ from typing import Protocol, runtime_checkable
13
+
14
+ from novel_downloader.models import TextCleanerConfig
15
+
16
+
17
+ @runtime_checkable
18
+ class Cleaner(Protocol):
19
+ def clean(self, text: str, *, as_title: bool = False) -> str:
20
+ ...
21
+
22
+ def clean_title(self, text: str) -> str:
23
+ ...
24
+
25
+ def clean_content(self, text: str) -> str:
26
+ ...
27
+
28
+
29
+ class NullCleaner(Cleaner):
30
+ def clean_title(self, text: str) -> str:
31
+ return text
32
+
33
+ def clean_content(self, text: str) -> str:
34
+ return text
35
+
36
+ def clean(self, text: str, *, as_title: bool = False) -> str:
37
+ return text
38
+
39
+
40
+ class TextCleaner(Cleaner):
41
+ """
42
+ TextCleaner removes invisible characters, strips unwanted patterns,
43
+ and applies literal replacements in a single pass using a combined regex.
44
+
45
+ For regex that never matches, reference:
46
+
47
+ https://stackoverflow.com/questions/2930182/regex-to-not-match-anything
48
+ """
49
+
50
+ _INVISIBLE_PATTERN: Pattern[str] = re.compile(r"[\ufeff\u200B\u200C\u200D\u2060]")
51
+
52
+ def __init__(self, config: TextCleanerConfig) -> None:
53
+ """
54
+ Initialize TextCleaner with the given configuration.
55
+
56
+ :param config: TextCleanerConfig instance containing:
57
+
58
+ - remove_invisible: whether to strip BOM/zero-width chars
59
+ - title_remove_patterns: list of regex patterns to delete from titles
60
+ - content_remove_patterns: list of regex patterns to delete from content
61
+ - title_replacements: dict of literal replacements for titles
62
+ - content_replacements: dict of literal replacements for content
63
+ """
64
+ self._remove_invisible = config.remove_invisible
65
+
66
+ # Build literal‐to‐literal replacement maps
67
+ self._title_repl_map = config.title_replacements
68
+ self._content_repl_map = config.content_replacements
69
+
70
+ # Deduplicate removal patterns (keep order)
71
+ title_remove = list(dict.fromkeys(config.title_remove_patterns))
72
+ content_remove = list(dict.fromkeys(config.content_remove_patterns))
73
+
74
+ # Build a single combined regex for title:
75
+ # all delete‐patterns OR all escaped replacement‐keys
76
+ title_parts = title_remove + [re.escape(k) for k in self._title_repl_map]
77
+ title_parts.sort(
78
+ key=len, reverse=True
79
+ ) # longer first to avoid prefix collisions
80
+ title_pattern = "|".join(title_parts) if title_parts else r"$^"
81
+ self._title_combined_rx: Pattern[str] = re.compile(title_pattern)
82
+
83
+ # Build a single combined regex for content (multiline mode)
84
+ content_parts = content_remove + [re.escape(k) for k in self._content_repl_map]
85
+ content_parts.sort(key=len, reverse=True)
86
+ content_pattern = "|".join(content_parts) if content_parts else r"$^"
87
+ self._content_combined_rx: Pattern[str] = re.compile(
88
+ content_pattern, flags=re.MULTILINE
89
+ )
90
+
91
+ def clean_title(self, text: str) -> str:
92
+ """
93
+ Clean a title string.
94
+
95
+ Steps:
96
+ 1. Optionally strip BOM & zero-width characters.
97
+ 2. Remove unwanted patterns and apply literal replacements in one pass.
98
+ 3. Trim leading/trailing whitespace.
99
+
100
+ :param text: Raw title text.
101
+ :return: Cleaned title.
102
+ """
103
+ return self._do_clean(text, self._title_combined_rx, self._title_repl_map)
104
+
105
+ def clean_content(self, text: str) -> str:
106
+ """
107
+ Clean a content string.
108
+
109
+ Steps:
110
+ 1. Optionally strip BOM & zero-width characters.
111
+ 2. Remove unwanted patterns and apply literal replacements in one pass.
112
+ 3. Trim leading/trailing whitespace.
113
+
114
+ :param text: Raw content/body text.
115
+ :return: Cleaned content.
116
+ """
117
+ return self._do_clean(text, self._content_combined_rx, self._content_repl_map)
118
+
119
+ def clean(self, text: str, *, as_title: bool = False) -> str:
120
+ """
121
+ Unified clean method to process text as either title or content.
122
+
123
+ :param text: Raw text to clean.
124
+ :param as_title: If True, use title rules; otherwise content rules.
125
+ :return: Cleaned text.
126
+ """
127
+ return self.clean_title(text) if as_title else self.clean_content(text)
128
+
129
+ @classmethod
130
+ def _remove_bom_and_invisible(cls, text: str) -> str:
131
+ """
132
+ Remove BOM and zero-width/invisible characters from the text.
133
+
134
+ Matches:
135
+ - U+FEFF (BOM)
136
+ - U+200B ZERO WIDTH SPACE
137
+ - U+200C ZERO WIDTH NON-JOINER
138
+ - U+200D ZERO WIDTH JOINER
139
+ - U+2060 WORD JOINER
140
+
141
+ :param text: Input string possibly containing invisible chars.
142
+ :return: String with those characters stripped.
143
+ """
144
+ return cls._INVISIBLE_PATTERN.sub("", text)
145
+
146
+ def _do_clean(
147
+ self,
148
+ text: str,
149
+ combined_rx: Pattern[str],
150
+ repl_map: dict[str, str],
151
+ ) -> str:
152
+ """
153
+ Core cleaning logic:
154
+ optional invisible removal, single-pass remove/replace, trimming.
155
+
156
+ :param text: Text to clean.
157
+ :param combined_rx: Compiled regex for removal patterns and replacement keys.
158
+ :param repl_map: Mapping from matched token to replacement text.
159
+ :return: Cleaned text.
160
+ """
161
+ # Strip invisible chars if configured
162
+ if self._remove_invisible:
163
+ text = self._remove_bom_and_invisible(text)
164
+
165
+ # Single‐pass removal & replacement
166
+ def _sub(match: Match[str]) -> str:
167
+ token = match.group(0)
168
+ # If token in repl_map -> replacement; else -> delete (empty string)
169
+ return repl_map.get(token, "")
170
+
171
+ text = combined_rx.sub(_sub, text)
172
+ return text.strip()
173
+
174
+
175
+ def get_cleaner(
176
+ enabled: bool,
177
+ config: TextCleanerConfig,
178
+ ) -> Cleaner:
179
+ return TextCleaner(config) if enabled else NullCleaner()
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.utils.text_utils.truncate_utils
4
+ ------------------------------------------------
5
+
6
+ Tools for truncating text.
7
+ """
8
+
9
+ __all__ = [
10
+ "content_prefix",
11
+ "truncate_half_lines",
12
+ ]
13
+
14
+ import math
15
+
16
+
17
+ def content_prefix(
18
+ text: str,
19
+ n: int,
20
+ ignore_chars: set[str] | None = None,
21
+ ) -> str:
22
+ """
23
+ Return the prefix of `text` containing the first `n` non-ignored characters.
24
+
25
+ :param text: The full input string.
26
+ :param n: Number of content characters to include.
27
+ :param ignore_chars: Characters to ignore when counting content.
28
+ :return: Truncated string preserving original whitespace and line breaks.
29
+ """
30
+ ignore = ignore_chars or set()
31
+ cnt = 0
32
+
33
+ for i, ch in enumerate(text):
34
+ if ch not in ignore:
35
+ cnt += 1
36
+ if cnt >= n:
37
+ return text[: i + 1]
38
+
39
+ return text
40
+
41
+
42
+ def truncate_half_lines(text: str) -> str:
43
+ """
44
+ Keep the first half of the lines (rounded up), preserving line breaks.
45
+
46
+ :param text: Full input text
47
+ :return: Truncated text with first half of lines
48
+ """
49
+ lines = text.splitlines()
50
+ non_empty_lines = [line for line in lines if line.strip()]
51
+ keep_count = math.ceil(len(non_empty_lines) / 2)
52
+
53
+ result_lines = []
54
+ count = 0
55
+ for line in lines:
56
+ result_lines.append(line)
57
+ if line.strip():
58
+ count += 1
59
+ if count >= keep_count:
60
+ break
61
+
62
+ return "\n".join(result_lines)
@@ -4,19 +4,13 @@ novel_downloader.utils.time_utils
4
4
  ---------------------------------
5
5
 
6
6
  Utility functions for time and date-related operations.
7
-
8
- Includes:
9
- - calculate_time_difference:
10
- Computes time delta between two timezone-aware datetime strings.
11
- - sleep_with_random_delay:
12
- Sleeps for a random duration, useful for human-like delays or rate limiting.
13
7
  """
14
8
 
15
- from .datetime_utils import calculate_time_difference
16
- from .sleep_utils import async_sleep_with_random_delay, sleep_with_random_delay
17
-
18
9
  __all__ = [
19
- "calculate_time_difference",
20
- "async_sleep_with_random_delay",
21
- "sleep_with_random_delay",
10
+ "time_diff",
11
+ "async_jitter_sleep",
12
+ "jitter_sleep",
22
13
  ]
14
+
15
+ from .datetime_utils import time_diff
16
+ from .sleep_utils import async_jitter_sleep, jitter_sleep
@@ -4,14 +4,12 @@ novel_downloader.utils.time_utils.datetime_utils
4
4
  ------------------------------------------------
5
5
 
6
6
  Time utility functions for timezone-aware date calculations.
7
-
8
- Includes:
9
- - _parse_utc_offset():
10
- Converts UTC offset string (e.g. 'UTC+8') to a timezone object.
11
- - calculate_time_difference():
12
- Computes timedelta between two datetime strings, with optional timezones.
13
7
  """
14
8
 
9
+ __all__ = [
10
+ "time_diff",
11
+ ]
12
+
15
13
  import logging
16
14
  import re
17
15
  from datetime import UTC, datetime, timedelta, timezone
@@ -49,7 +47,7 @@ def _parse_utc_offset(tz_str: str) -> timezone:
49
47
  Parse a timezone string like 'UTC+8' or 'UTC-5' into a datetime.timezone object.
50
48
 
51
49
  :param tz_str: Timezone in 'UTC±<hours>' format, e.g. 'UTC', 'UTC+8', 'UTC-05'
52
- :return: Corresponding timezone object
50
+ :return: Corresponding timezone object
53
51
  :raises ValueError: if tz_str is not a valid UTC offset format
54
52
  """
55
53
  tz_str_clean = tz_str.upper().strip()
@@ -70,20 +68,20 @@ def _parse_datetime_flexible(dt_str: str) -> datetime:
70
68
  """
71
69
  Parse a date/time string in any of several common formats:
72
70
 
73
- ISO 8601: 'YYYY-MM-DDTHH:MM:SSZ'
74
- ISO w/ offset: 'YYYY-MM-DDTHH:MM:SS+HH:MM'
75
- 'YYYY-MM-DD HH:MM:SS'
76
- 'YYYY-MM-DD' (time defaults to 00:00:00)
77
- 'YYYY/MM/DD HH:MM:SS'
78
- 'YYYY/MM/DD HH:MM'
79
- 'YYYY/MM/DD'
80
- 'MM/DD/YYYY HH:MM[:SS] AM/PM'
81
- 'MM/DD/YYYY'
82
- 'DD.MM.YYYY HH:MM'
83
- 'DD.MM.YYYY'
71
+ * ISO 8601: 'YYYY-MM-DDTHH:MM:SSZ'
72
+ * ISO w/ offset: 'YYYY-MM-DDTHH:MM:SS+HH:MM'
73
+ * 'YYYY-MM-DD HH:MM:SS'
74
+ * 'YYYY-MM-DD' (time defaults to 00:00:00)
75
+ * 'YYYY/MM/DD HH:MM:SS'
76
+ * 'YYYY/MM/DD HH:MM'
77
+ * 'YYYY/MM/DD'
78
+ * 'MM/DD/YYYY HH:MM[:SS] AM/PM'
79
+ * 'MM/DD/YYYY'
80
+ * 'DD.MM.YYYY HH:MM'
81
+ * 'DD.MM.YYYY'
84
82
 
85
83
  :param dt_str: Date/time string to parse.
86
- :return: A naive datetime object.
84
+ :return: A naive datetime object.
87
85
  :raises ValueError: If dt_str does not match the expected formats.
88
86
  """
89
87
  s = dt_str.strip()
@@ -91,13 +89,10 @@ def _parse_datetime_flexible(dt_str: str) -> datetime:
91
89
  if re.fullmatch(pattern, s):
92
90
  return datetime.strptime(s, fmt)
93
91
 
94
- supported = "\n".join(f" {fmt}" for _, fmt in _DATETIME_FORMATS)
95
- raise ValueError(
96
- f"Invalid date/time format: '{dt_str}'\n" f"Supported formats are:\n{supported}"
97
- )
92
+ raise ValueError(f"Invalid date/time format: '{dt_str}'")
98
93
 
99
94
 
100
- def calculate_time_difference(
95
+ def time_diff(
101
96
  from_time_str: str,
102
97
  tz_str: str = "UTC",
103
98
  to_time_str: str | None = None,
@@ -107,10 +102,10 @@ def calculate_time_difference(
107
102
  Calculate the difference between two datetime values.
108
103
 
109
104
  :param from_time_str: Date-time string "YYYY-MM-DD HH:MM:SS" for the start.
110
- :param tz_str: Timezone of from_time_str, e.g. 'UTC+8'. Defaults to 'UTC'.
111
- :param to_time_str: Optional date-time string for the end; if None, uses now().
112
- :param to_tz_str: Timezone of to_time_str. Defaults to 'UTC'.
113
- :return: Tuple (days, hours, minutes, seconds).
105
+ :param tz_str: Timezone of from_time_str, e.g. 'UTC+8'. Defaults to 'UTC'.
106
+ :param to_time_str: Optional date-time string for the end; if None, uses now().
107
+ :param to_tz_str: Timezone of to_time_str. Defaults to 'UTC'.
108
+ :return: Tuple (days, hours, minutes, seconds).
114
109
  """
115
110
  try:
116
111
  # parse start time
@@ -139,8 +134,3 @@ def calculate_time_difference(
139
134
  except Exception as e:
140
135
  logger.warning("[time] Failed to calculate time difference: %s", e)
141
136
  return 999, 23, 59, 59
142
-
143
-
144
- __all__ = [
145
- "calculate_time_difference",
146
- ]
@@ -4,12 +4,10 @@ novel_downloader.utils.time_utils.sleep_utils
4
4
  ---------------------------------------------
5
5
 
6
6
  Utilities for adding randomized delays in scripts and bots.
7
-
8
- Includes:
9
- - sleep_with_random_delay(): Sleep between base and base+spread seconds,
10
- optionally capped with a max_sleep limit.
11
7
  """
12
8
 
9
+ __all__ = ["jitter_sleep", "async_jitter_sleep"]
10
+
13
11
  import asyncio
14
12
  import logging
15
13
  import random
@@ -18,7 +16,7 @@ import time
18
16
  logger = logging.getLogger(__name__)
19
17
 
20
18
 
21
- def sleep_with_random_delay(
19
+ def jitter_sleep(
22
20
  base: float,
23
21
  add_spread: float = 0.0,
24
22
  mul_spread: float = 1.0,
@@ -39,7 +37,7 @@ def sleep_with_random_delay(
39
37
  :param mul_spread: Maximum multiplier factor for base; drawn from [1.0, mul_spread].
40
38
  :param max_sleep: Optional upper limit for the final sleep duration.
41
39
  """
42
- if base < 0 or add_spread < 0 or mul_spread < 0:
40
+ if base < 0 or add_spread < 0 or mul_spread < 1.0:
43
41
  logger.warning(
44
42
  "[sleep] Invalid parameters: base=%s, add_spread=%s, mul_spread=%s",
45
43
  base,
@@ -61,7 +59,7 @@ def sleep_with_random_delay(
61
59
  return
62
60
 
63
61
 
64
- async def async_sleep_with_random_delay(
62
+ async def async_jitter_sleep(
65
63
  base: float,
66
64
  add_spread: float = 0.0,
67
65
  mul_spread: float = 1.0,
@@ -100,6 +98,3 @@ async def async_sleep_with_random_delay(
100
98
 
101
99
  logger.debug("[async time] Sleeping for %.2f seconds", duration)
102
100
  await asyncio.sleep(duration)
103
-
104
-
105
- __all__ = ["sleep_with_random_delay", "async_sleep_with_random_delay"]
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.web
4
+ --------------------
5
+
6
+ This module exposes the WEB entry point.
7
+ """
8
+
9
+ __all__ = [
10
+ "web_main",
11
+ ]
12
+
13
+ from .main import web_main
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.web.components
4
+ -------------------------------
5
+
6
+ Entry point for reusable web UI components
7
+ """
8
+
9
+ __all__ = ["navbar"]
10
+
11
+ from .navigation import navbar