novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -4
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +27 -104
- novel_downloader/cli/download.py +78 -66
- novel_downloader/cli/export.py +20 -21
- novel_downloader/cli/main.py +3 -1
- novel_downloader/cli/search.py +120 -0
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +10 -14
- novel_downloader/config/adapter.py +195 -99
- novel_downloader/config/{loader.py → file_io.py} +53 -27
- novel_downloader/core/__init__.py +14 -13
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/archived/qidian/searcher.py +79 -0
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +8 -30
- novel_downloader/core/downloaders/base.py +182 -30
- novel_downloader/core/downloaders/common.py +217 -384
- novel_downloader/core/downloaders/qianbi.py +332 -4
- novel_downloader/core/downloaders/qidian.py +250 -290
- novel_downloader/core/downloaders/registry.py +69 -0
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +8 -26
- novel_downloader/core/exporters/base.py +107 -31
- novel_downloader/core/exporters/common/__init__.py +3 -4
- novel_downloader/core/exporters/common/epub.py +92 -171
- novel_downloader/core/exporters/common/main_exporter.py +14 -67
- novel_downloader/core/exporters/common/txt.py +90 -86
- novel_downloader/core/exporters/epub_util.py +184 -1327
- novel_downloader/core/exporters/linovelib/__init__.py +3 -2
- novel_downloader/core/exporters/linovelib/epub.py +165 -222
- novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
- novel_downloader/core/exporters/linovelib/txt.py +76 -66
- novel_downloader/core/exporters/qidian.py +15 -11
- novel_downloader/core/exporters/registry.py +55 -0
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/fetchers/__init__.py +57 -56
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
- novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
- novel_downloader/core/fetchers/biquyuedu.py +83 -0
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +60 -0
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +8 -14
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +26 -0
- novel_downloader/core/parsers/__init__.py +58 -22
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
- novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
- novel_downloader/core/parsers/qidian/main_parser.py +19 -57
- novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +57 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +155 -0
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +51 -0
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/b520.py +84 -0
- novel_downloader/core/searchers/base.py +168 -0
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +102 -0
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +165 -0
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +79 -0
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +36 -79
- novel_downloader/locales/zh.json +37 -80
- novel_downloader/models/__init__.py +23 -50
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +16 -43
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +21 -0
- novel_downloader/resources/config/settings.toml +39 -74
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +43 -0
- novel_downloader/utils/chapter_storage.py +247 -226
- novel_downloader/utils/constants.py +5 -50
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +118 -0
- novel_downloader/utils/epub/documents.py +297 -0
- novel_downloader/utils/epub/models.py +120 -0
- novel_downloader/utils/epub/utils.py +179 -0
- novel_downloader/utils/file_utils/__init__.py +5 -30
- novel_downloader/utils/file_utils/io.py +9 -150
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -7
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +10 -16
- novel_downloader/utils/network.py +111 -252
- novel_downloader/utils/state.py +5 -90
- novel_downloader/utils/text_utils/__init__.py +16 -21
- novel_downloader/utils/text_utils/diff_display.py +6 -9
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +6 -12
- novel_downloader/utils/time_utils/datetime_utils.py +23 -33
- novel_downloader/utils/time_utils/sleep_utils.py +5 -10
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/downloaders/biquge.py +0 -25
- novel_downloader/core/downloaders/esjzone.py +0 -25
- novel_downloader/core/downloaders/linovelib.py +0 -25
- novel_downloader/core/downloaders/sfacg.py +0 -25
- novel_downloader/core/downloaders/yamibo.py +0 -25
- novel_downloader/core/exporters/biquge.py +0 -25
- novel_downloader/core/exporters/esjzone.py +0 -25
- novel_downloader/core/exporters/qianbi.py +0 -25
- novel_downloader/core/exporters/sfacg.py +0 -25
- novel_downloader/core/exporters/yamibo.py +0 -25
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -403
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -204
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -193
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -318
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -189
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -229
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/biquge/main_parser.py +0 -134
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/models/types.py +0 -15
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -303
- novel_downloader/utils/fontocr/ocr_v2.py +0 -752
- novel_downloader/utils/hash_store.py +0 -279
- novel_downloader/utils/hash_utils.py +0 -103
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/METADATA +0 -196
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,253 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.utils.text_utils.numeric_conversion
|
4
|
+
----------------------------------------------------
|
5
|
+
|
6
|
+
Utility functions to convert between Chinese numeral strings
|
7
|
+
and Python integers.
|
8
|
+
"""
|
9
|
+
|
10
|
+
CHINESE_NUMERALS = {
|
11
|
+
"零": 0,
|
12
|
+
"〇": 0,
|
13
|
+
"一": 1,
|
14
|
+
"壹": 1,
|
15
|
+
"二": 2,
|
16
|
+
"两": 2,
|
17
|
+
"贰": 2,
|
18
|
+
"貮": 2,
|
19
|
+
"三": 3,
|
20
|
+
"叁": 3,
|
21
|
+
"四": 4,
|
22
|
+
"肆": 4,
|
23
|
+
"五": 5,
|
24
|
+
"伍": 5,
|
25
|
+
"六": 6,
|
26
|
+
"陆": 6,
|
27
|
+
"七": 7,
|
28
|
+
"柒": 7,
|
29
|
+
"八": 8,
|
30
|
+
"捌": 8,
|
31
|
+
"九": 9,
|
32
|
+
"玖": 9,
|
33
|
+
}
|
34
|
+
|
35
|
+
CHINESE_UNITS = {
|
36
|
+
"十": 10,
|
37
|
+
"拾": 10,
|
38
|
+
"百": 100,
|
39
|
+
"佰": 100,
|
40
|
+
"千": 1000,
|
41
|
+
"仟": 1000,
|
42
|
+
"万": 10_000,
|
43
|
+
"萬": 10_000,
|
44
|
+
"亿": 100_000_000,
|
45
|
+
"億": 100_000_000,
|
46
|
+
"兆": 10**12,
|
47
|
+
"京": 10**16,
|
48
|
+
"垓": 10**20,
|
49
|
+
}
|
50
|
+
|
51
|
+
LARGE_UNITS = [
|
52
|
+
("垓", 10**20),
|
53
|
+
("京", 10**16),
|
54
|
+
("兆", 10**12),
|
55
|
+
("亿", 10**8),
|
56
|
+
("億", 10**8),
|
57
|
+
("万", 10**4),
|
58
|
+
("萬", 10**4),
|
59
|
+
]
|
60
|
+
|
61
|
+
|
62
|
+
def chinese_to_arabic(s: str) -> int:
|
63
|
+
"""
|
64
|
+
Convert a Chinese numeral string into its integer value.
|
65
|
+
|
66
|
+
Examples:
|
67
|
+
---
|
68
|
+
>>> chinese_to_arabic("一千二百三十四")
|
69
|
+
1234
|
70
|
+
>>> chinese_to_arabic("负一千二百三十四")
|
71
|
+
-1234
|
72
|
+
>>> chinese_to_arabic("一万零三")
|
73
|
+
10003
|
74
|
+
>>> chinese_to_arabic("三亿二千五百")
|
75
|
+
3000002500
|
76
|
+
|
77
|
+
:param s: A string of Chinese numerals, e.g. "三千零二十一", "五亿零七万".
|
78
|
+
:return: The integer value represented by the input string.
|
79
|
+
:raises KeyError: If `s` contains characters not found in the supported
|
80
|
+
numeral or unit mappings.
|
81
|
+
"""
|
82
|
+
if not s:
|
83
|
+
raise ValueError("Input string is empty")
|
84
|
+
|
85
|
+
sign = 1
|
86
|
+
if s[0] in ("负", "-"):
|
87
|
+
sign = -1
|
88
|
+
s = s[1:]
|
89
|
+
|
90
|
+
def _parse_section(sec: str) -> int:
|
91
|
+
"""Parse up to 千 unit."""
|
92
|
+
num = 0
|
93
|
+
section_total = 0
|
94
|
+
for ch in sec:
|
95
|
+
if ch in CHINESE_NUMERALS:
|
96
|
+
num = num * 10 + CHINESE_NUMERALS[ch]
|
97
|
+
else:
|
98
|
+
unit = CHINESE_UNITS[ch]
|
99
|
+
section_total += (num or 1) * unit
|
100
|
+
num = 0
|
101
|
+
return section_total + num
|
102
|
+
|
103
|
+
total = 0
|
104
|
+
rest = s
|
105
|
+
for char, val in LARGE_UNITS:
|
106
|
+
if char in rest:
|
107
|
+
left, rest = rest.split(char, 1)
|
108
|
+
total += _parse_section(left) * val
|
109
|
+
|
110
|
+
total += _parse_section(rest)
|
111
|
+
|
112
|
+
return sign * total
|
113
|
+
|
114
|
+
|
115
|
+
def arabic_to_chinese(num: int) -> str:
|
116
|
+
"""
|
117
|
+
Convert an integer to its Chinese numeral representation.
|
118
|
+
|
119
|
+
Examples:
|
120
|
+
---
|
121
|
+
>>> arabic_to_chinese(0)
|
122
|
+
"零"
|
123
|
+
>>> arabic_to_chinese(1234)
|
124
|
+
"一千二百三十四"
|
125
|
+
>>> arabic_to_chinese(10003)
|
126
|
+
"一万零三"
|
127
|
+
>>> arabic_to_chinese(-205)
|
128
|
+
"负二百零五"
|
129
|
+
>>> arabic_to_chinese(3000002500)
|
130
|
+
"三十亿零二百五百" # 3 000 002 500
|
131
|
+
|
132
|
+
:param num: The integer to convert (e.g. 42, -1300).
|
133
|
+
:return: The Chinese-numeral string for `num`.
|
134
|
+
:raises TypeError: If `num` is not an integer.
|
135
|
+
"""
|
136
|
+
if not isinstance(num, int):
|
137
|
+
raise TypeError("Input must be an integer.")
|
138
|
+
if num == 0:
|
139
|
+
return "零"
|
140
|
+
|
141
|
+
digits = "零一二三四五六七八九"
|
142
|
+
small_units = ["", "十", "百", "千"]
|
143
|
+
big_units = ["", "万", "亿", "兆", "京", "垓"]
|
144
|
+
|
145
|
+
negative = num < 0
|
146
|
+
num = -num if negative else num
|
147
|
+
|
148
|
+
def _section_to_chinese(sec: int) -> str:
|
149
|
+
"""
|
150
|
+
Convert a value 1..9999 into Chinese using 千/百/十 units,
|
151
|
+
without any large unit (万, 亿, ...) or leading '零'.
|
152
|
+
"""
|
153
|
+
s = ""
|
154
|
+
unit_pos = 0
|
155
|
+
zero_flag = True
|
156
|
+
while sec > 0:
|
157
|
+
d = sec % 10
|
158
|
+
if d == 0:
|
159
|
+
# only emit one '零' for consecutive zeros
|
160
|
+
if not zero_flag:
|
161
|
+
s = digits[0] + s
|
162
|
+
zero_flag = True
|
163
|
+
else:
|
164
|
+
s = digits[d] + small_units[unit_pos] + s
|
165
|
+
zero_flag = False
|
166
|
+
unit_pos += 1
|
167
|
+
sec //= 10
|
168
|
+
return s
|
169
|
+
|
170
|
+
result = ""
|
171
|
+
section_pos = 0
|
172
|
+
|
173
|
+
while num > 0:
|
174
|
+
section = num % 10_000
|
175
|
+
if section != 0:
|
176
|
+
sec_str = _section_to_chinese(section)
|
177
|
+
result = sec_str + big_units[section_pos] + result
|
178
|
+
else:
|
179
|
+
# if there's already something in `result`, and the next non-zero
|
180
|
+
# block will appear further left, we need a '零' separator
|
181
|
+
if result and not result.startswith("零"):
|
182
|
+
result = "零" + result
|
183
|
+
|
184
|
+
num //= 10_000
|
185
|
+
section_pos += 1
|
186
|
+
|
187
|
+
if negative:
|
188
|
+
result = "负" + result
|
189
|
+
|
190
|
+
return result
|
191
|
+
|
192
|
+
|
193
|
+
if __name__ == "__main__":
|
194
|
+
import random
|
195
|
+
|
196
|
+
RED = "\033[91m"
|
197
|
+
GREEN = "\033[92m"
|
198
|
+
RESET = "\033[0m"
|
199
|
+
random.seed(42)
|
200
|
+
|
201
|
+
fail_count = 0
|
202
|
+
num_list = [
|
203
|
+
("一千二百三十四", 1234),
|
204
|
+
("一万五千", 15000),
|
205
|
+
("一万零三", 10003),
|
206
|
+
("三亿二千五百", 300002500),
|
207
|
+
]
|
208
|
+
print("=== chinese_to_arabic() with fixed cases ===")
|
209
|
+
for s, expected in num_list:
|
210
|
+
actual = chinese_to_arabic(s)
|
211
|
+
if actual != expected:
|
212
|
+
print(f"{RED}FAIL:{RESET} “{s}” -> expected {expected}, got {actual}")
|
213
|
+
fail_count += 1
|
214
|
+
|
215
|
+
if fail_count:
|
216
|
+
print(f"{RED}{fail_count} chinese_to_arabic() tests failed.{RESET}\n")
|
217
|
+
else:
|
218
|
+
print(f"{GREEN}All {len(num_list)} chinese_to_arabic() tests passed!{RESET}\n")
|
219
|
+
|
220
|
+
fail_count = 0
|
221
|
+
print("=== Round-trip test for values 0 - 9999 ===")
|
222
|
+
for i in range(10_000):
|
223
|
+
s = arabic_to_chinese(i)
|
224
|
+
r = chinese_to_arabic(s)
|
225
|
+
if r != i:
|
226
|
+
print(f'{RED}FAIL round-trip:{RESET} {i} -> "{s}" -> {r}')
|
227
|
+
fail_count += 1
|
228
|
+
break
|
229
|
+
|
230
|
+
if fail_count:
|
231
|
+
print(f"{RED}{fail_count} round-trip failures in 0 - 9999.{RESET}\n")
|
232
|
+
else:
|
233
|
+
print(f"{GREEN}0 - 9999 round-trip all passed!{RESET}\n")
|
234
|
+
|
235
|
+
fail_count = 0
|
236
|
+
exponents = range(5, 22) # test around 10^5...
|
237
|
+
print("=== Random round-trip at larger scales ===")
|
238
|
+
for exp in exponents:
|
239
|
+
lower = 10**exp
|
240
|
+
upper = 10 ** (exp + 1)
|
241
|
+
for _ in range(2):
|
242
|
+
i = random.randint(lower, upper - 1)
|
243
|
+
for val in (i, -i):
|
244
|
+
s = arabic_to_chinese(val)
|
245
|
+
r = chinese_to_arabic(s)
|
246
|
+
if r != val:
|
247
|
+
print(f'{RED}FAIL:{RESET} {val} -> "{s}" -> {r}')
|
248
|
+
fail_count += 1
|
249
|
+
|
250
|
+
if fail_count:
|
251
|
+
print(f"{RED}{fail_count} random large-scale failures.{RESET}")
|
252
|
+
else:
|
253
|
+
print(f"{GREEN}All random large-scale round-trips passed!{RESET}")
|
@@ -0,0 +1,179 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.utils.text_utils.text_cleaner
|
4
|
+
----------------------------------------------
|
5
|
+
|
6
|
+
Provides utilities to clean novel titles and content
|
7
|
+
by removing unwanted patterns, replacing strings.
|
8
|
+
"""
|
9
|
+
|
10
|
+
import re
|
11
|
+
from re import Match, Pattern
|
12
|
+
from typing import Protocol, runtime_checkable
|
13
|
+
|
14
|
+
from novel_downloader.models import TextCleanerConfig
|
15
|
+
|
16
|
+
|
17
|
+
@runtime_checkable
|
18
|
+
class Cleaner(Protocol):
|
19
|
+
def clean(self, text: str, *, as_title: bool = False) -> str:
|
20
|
+
...
|
21
|
+
|
22
|
+
def clean_title(self, text: str) -> str:
|
23
|
+
...
|
24
|
+
|
25
|
+
def clean_content(self, text: str) -> str:
|
26
|
+
...
|
27
|
+
|
28
|
+
|
29
|
+
class NullCleaner(Cleaner):
|
30
|
+
def clean_title(self, text: str) -> str:
|
31
|
+
return text
|
32
|
+
|
33
|
+
def clean_content(self, text: str) -> str:
|
34
|
+
return text
|
35
|
+
|
36
|
+
def clean(self, text: str, *, as_title: bool = False) -> str:
|
37
|
+
return text
|
38
|
+
|
39
|
+
|
40
|
+
class TextCleaner(Cleaner):
|
41
|
+
"""
|
42
|
+
TextCleaner removes invisible characters, strips unwanted patterns,
|
43
|
+
and applies literal replacements in a single pass using a combined regex.
|
44
|
+
|
45
|
+
For regex that never matches, reference:
|
46
|
+
|
47
|
+
https://stackoverflow.com/questions/2930182/regex-to-not-match-anything
|
48
|
+
"""
|
49
|
+
|
50
|
+
_INVISIBLE_PATTERN: Pattern[str] = re.compile(r"[\ufeff\u200B\u200C\u200D\u2060]")
|
51
|
+
|
52
|
+
def __init__(self, config: TextCleanerConfig) -> None:
|
53
|
+
"""
|
54
|
+
Initialize TextCleaner with the given configuration.
|
55
|
+
|
56
|
+
:param config: TextCleanerConfig instance containing:
|
57
|
+
|
58
|
+
- remove_invisible: whether to strip BOM/zero-width chars
|
59
|
+
- title_remove_patterns: list of regex patterns to delete from titles
|
60
|
+
- content_remove_patterns: list of regex patterns to delete from content
|
61
|
+
- title_replacements: dict of literal replacements for titles
|
62
|
+
- content_replacements: dict of literal replacements for content
|
63
|
+
"""
|
64
|
+
self._remove_invisible = config.remove_invisible
|
65
|
+
|
66
|
+
# Build literal‐to‐literal replacement maps
|
67
|
+
self._title_repl_map = config.title_replacements
|
68
|
+
self._content_repl_map = config.content_replacements
|
69
|
+
|
70
|
+
# Deduplicate removal patterns (keep order)
|
71
|
+
title_remove = list(dict.fromkeys(config.title_remove_patterns))
|
72
|
+
content_remove = list(dict.fromkeys(config.content_remove_patterns))
|
73
|
+
|
74
|
+
# Build a single combined regex for title:
|
75
|
+
# all delete‐patterns OR all escaped replacement‐keys
|
76
|
+
title_parts = title_remove + [re.escape(k) for k in self._title_repl_map]
|
77
|
+
title_parts.sort(
|
78
|
+
key=len, reverse=True
|
79
|
+
) # longer first to avoid prefix collisions
|
80
|
+
title_pattern = "|".join(title_parts) if title_parts else r"$^"
|
81
|
+
self._title_combined_rx: Pattern[str] = re.compile(title_pattern)
|
82
|
+
|
83
|
+
# Build a single combined regex for content (multiline mode)
|
84
|
+
content_parts = content_remove + [re.escape(k) for k in self._content_repl_map]
|
85
|
+
content_parts.sort(key=len, reverse=True)
|
86
|
+
content_pattern = "|".join(content_parts) if content_parts else r"$^"
|
87
|
+
self._content_combined_rx: Pattern[str] = re.compile(
|
88
|
+
content_pattern, flags=re.MULTILINE
|
89
|
+
)
|
90
|
+
|
91
|
+
def clean_title(self, text: str) -> str:
|
92
|
+
"""
|
93
|
+
Clean a title string.
|
94
|
+
|
95
|
+
Steps:
|
96
|
+
1. Optionally strip BOM & zero-width characters.
|
97
|
+
2. Remove unwanted patterns and apply literal replacements in one pass.
|
98
|
+
3. Trim leading/trailing whitespace.
|
99
|
+
|
100
|
+
:param text: Raw title text.
|
101
|
+
:return: Cleaned title.
|
102
|
+
"""
|
103
|
+
return self._do_clean(text, self._title_combined_rx, self._title_repl_map)
|
104
|
+
|
105
|
+
def clean_content(self, text: str) -> str:
|
106
|
+
"""
|
107
|
+
Clean a content string.
|
108
|
+
|
109
|
+
Steps:
|
110
|
+
1. Optionally strip BOM & zero-width characters.
|
111
|
+
2. Remove unwanted patterns and apply literal replacements in one pass.
|
112
|
+
3. Trim leading/trailing whitespace.
|
113
|
+
|
114
|
+
:param text: Raw content/body text.
|
115
|
+
:return: Cleaned content.
|
116
|
+
"""
|
117
|
+
return self._do_clean(text, self._content_combined_rx, self._content_repl_map)
|
118
|
+
|
119
|
+
def clean(self, text: str, *, as_title: bool = False) -> str:
|
120
|
+
"""
|
121
|
+
Unified clean method to process text as either title or content.
|
122
|
+
|
123
|
+
:param text: Raw text to clean.
|
124
|
+
:param as_title: If True, use title rules; otherwise content rules.
|
125
|
+
:return: Cleaned text.
|
126
|
+
"""
|
127
|
+
return self.clean_title(text) if as_title else self.clean_content(text)
|
128
|
+
|
129
|
+
@classmethod
|
130
|
+
def _remove_bom_and_invisible(cls, text: str) -> str:
|
131
|
+
"""
|
132
|
+
Remove BOM and zero-width/invisible characters from the text.
|
133
|
+
|
134
|
+
Matches:
|
135
|
+
- U+FEFF (BOM)
|
136
|
+
- U+200B ZERO WIDTH SPACE
|
137
|
+
- U+200C ZERO WIDTH NON-JOINER
|
138
|
+
- U+200D ZERO WIDTH JOINER
|
139
|
+
- U+2060 WORD JOINER
|
140
|
+
|
141
|
+
:param text: Input string possibly containing invisible chars.
|
142
|
+
:return: String with those characters stripped.
|
143
|
+
"""
|
144
|
+
return cls._INVISIBLE_PATTERN.sub("", text)
|
145
|
+
|
146
|
+
def _do_clean(
|
147
|
+
self,
|
148
|
+
text: str,
|
149
|
+
combined_rx: Pattern[str],
|
150
|
+
repl_map: dict[str, str],
|
151
|
+
) -> str:
|
152
|
+
"""
|
153
|
+
Core cleaning logic:
|
154
|
+
optional invisible removal, single-pass remove/replace, trimming.
|
155
|
+
|
156
|
+
:param text: Text to clean.
|
157
|
+
:param combined_rx: Compiled regex for removal patterns and replacement keys.
|
158
|
+
:param repl_map: Mapping from matched token to replacement text.
|
159
|
+
:return: Cleaned text.
|
160
|
+
"""
|
161
|
+
# Strip invisible chars if configured
|
162
|
+
if self._remove_invisible:
|
163
|
+
text = self._remove_bom_and_invisible(text)
|
164
|
+
|
165
|
+
# Single‐pass removal & replacement
|
166
|
+
def _sub(match: Match[str]) -> str:
|
167
|
+
token = match.group(0)
|
168
|
+
# If token in repl_map -> replacement; else -> delete (empty string)
|
169
|
+
return repl_map.get(token, "")
|
170
|
+
|
171
|
+
text = combined_rx.sub(_sub, text)
|
172
|
+
return text.strip()
|
173
|
+
|
174
|
+
|
175
|
+
def get_cleaner(
|
176
|
+
enabled: bool,
|
177
|
+
config: TextCleanerConfig,
|
178
|
+
) -> Cleaner:
|
179
|
+
return TextCleaner(config) if enabled else NullCleaner()
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.utils.text_utils.truncate_utils
|
4
|
+
------------------------------------------------
|
5
|
+
|
6
|
+
Tools for truncating text.
|
7
|
+
"""
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"content_prefix",
|
11
|
+
"truncate_half_lines",
|
12
|
+
]
|
13
|
+
|
14
|
+
import math
|
15
|
+
|
16
|
+
|
17
|
+
def content_prefix(
|
18
|
+
text: str,
|
19
|
+
n: int,
|
20
|
+
ignore_chars: set[str] | None = None,
|
21
|
+
) -> str:
|
22
|
+
"""
|
23
|
+
Return the prefix of `text` containing the first `n` non-ignored characters.
|
24
|
+
|
25
|
+
:param text: The full input string.
|
26
|
+
:param n: Number of content characters to include.
|
27
|
+
:param ignore_chars: Characters to ignore when counting content.
|
28
|
+
:return: Truncated string preserving original whitespace and line breaks.
|
29
|
+
"""
|
30
|
+
ignore = ignore_chars or set()
|
31
|
+
cnt = 0
|
32
|
+
|
33
|
+
for i, ch in enumerate(text):
|
34
|
+
if ch not in ignore:
|
35
|
+
cnt += 1
|
36
|
+
if cnt >= n:
|
37
|
+
return text[: i + 1]
|
38
|
+
|
39
|
+
return text
|
40
|
+
|
41
|
+
|
42
|
+
def truncate_half_lines(text: str) -> str:
|
43
|
+
"""
|
44
|
+
Keep the first half of the lines (rounded up), preserving line breaks.
|
45
|
+
|
46
|
+
:param text: Full input text
|
47
|
+
:return: Truncated text with first half of lines
|
48
|
+
"""
|
49
|
+
lines = text.splitlines()
|
50
|
+
non_empty_lines = [line for line in lines if line.strip()]
|
51
|
+
keep_count = math.ceil(len(non_empty_lines) / 2)
|
52
|
+
|
53
|
+
result_lines = []
|
54
|
+
count = 0
|
55
|
+
for line in lines:
|
56
|
+
result_lines.append(line)
|
57
|
+
if line.strip():
|
58
|
+
count += 1
|
59
|
+
if count >= keep_count:
|
60
|
+
break
|
61
|
+
|
62
|
+
return "\n".join(result_lines)
|
@@ -4,19 +4,13 @@ novel_downloader.utils.time_utils
|
|
4
4
|
---------------------------------
|
5
5
|
|
6
6
|
Utility functions for time and date-related operations.
|
7
|
-
|
8
|
-
Includes:
|
9
|
-
- calculate_time_difference:
|
10
|
-
Computes time delta between two timezone-aware datetime strings.
|
11
|
-
- sleep_with_random_delay:
|
12
|
-
Sleeps for a random duration, useful for human-like delays or rate limiting.
|
13
7
|
"""
|
14
8
|
|
15
|
-
from .datetime_utils import calculate_time_difference
|
16
|
-
from .sleep_utils import async_sleep_with_random_delay, sleep_with_random_delay
|
17
|
-
|
18
9
|
__all__ = [
|
19
|
-
"
|
20
|
-
"
|
21
|
-
"
|
10
|
+
"time_diff",
|
11
|
+
"async_jitter_sleep",
|
12
|
+
"jitter_sleep",
|
22
13
|
]
|
14
|
+
|
15
|
+
from .datetime_utils import time_diff
|
16
|
+
from .sleep_utils import async_jitter_sleep, jitter_sleep
|
@@ -4,14 +4,12 @@ novel_downloader.utils.time_utils.datetime_utils
|
|
4
4
|
------------------------------------------------
|
5
5
|
|
6
6
|
Time utility functions for timezone-aware date calculations.
|
7
|
-
|
8
|
-
Includes:
|
9
|
-
- _parse_utc_offset():
|
10
|
-
Converts UTC offset string (e.g. 'UTC+8') to a timezone object.
|
11
|
-
- calculate_time_difference():
|
12
|
-
Computes timedelta between two datetime strings, with optional timezones.
|
13
7
|
"""
|
14
8
|
|
9
|
+
__all__ = [
|
10
|
+
"time_diff",
|
11
|
+
]
|
12
|
+
|
15
13
|
import logging
|
16
14
|
import re
|
17
15
|
from datetime import UTC, datetime, timedelta, timezone
|
@@ -49,7 +47,7 @@ def _parse_utc_offset(tz_str: str) -> timezone:
|
|
49
47
|
Parse a timezone string like 'UTC+8' or 'UTC-5' into a datetime.timezone object.
|
50
48
|
|
51
49
|
:param tz_str: Timezone in 'UTC±<hours>' format, e.g. 'UTC', 'UTC+8', 'UTC-05'
|
52
|
-
:return:
|
50
|
+
:return: Corresponding timezone object
|
53
51
|
:raises ValueError: if tz_str is not a valid UTC offset format
|
54
52
|
"""
|
55
53
|
tz_str_clean = tz_str.upper().strip()
|
@@ -70,20 +68,20 @@ def _parse_datetime_flexible(dt_str: str) -> datetime:
|
|
70
68
|
"""
|
71
69
|
Parse a date/time string in any of several common formats:
|
72
70
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
71
|
+
* ISO 8601: 'YYYY-MM-DDTHH:MM:SSZ'
|
72
|
+
* ISO w/ offset: 'YYYY-MM-DDTHH:MM:SS+HH:MM'
|
73
|
+
* 'YYYY-MM-DD HH:MM:SS'
|
74
|
+
* 'YYYY-MM-DD' (time defaults to 00:00:00)
|
75
|
+
* 'YYYY/MM/DD HH:MM:SS'
|
76
|
+
* 'YYYY/MM/DD HH:MM'
|
77
|
+
* 'YYYY/MM/DD'
|
78
|
+
* 'MM/DD/YYYY HH:MM[:SS] AM/PM'
|
79
|
+
* 'MM/DD/YYYY'
|
80
|
+
* 'DD.MM.YYYY HH:MM'
|
81
|
+
* 'DD.MM.YYYY'
|
84
82
|
|
85
83
|
:param dt_str: Date/time string to parse.
|
86
|
-
:return:
|
84
|
+
:return: A naive datetime object.
|
87
85
|
:raises ValueError: If dt_str does not match the expected formats.
|
88
86
|
"""
|
89
87
|
s = dt_str.strip()
|
@@ -91,13 +89,10 @@ def _parse_datetime_flexible(dt_str: str) -> datetime:
|
|
91
89
|
if re.fullmatch(pattern, s):
|
92
90
|
return datetime.strptime(s, fmt)
|
93
91
|
|
94
|
-
|
95
|
-
raise ValueError(
|
96
|
-
f"Invalid date/time format: '{dt_str}'\n" f"Supported formats are:\n{supported}"
|
97
|
-
)
|
92
|
+
raise ValueError(f"Invalid date/time format: '{dt_str}'")
|
98
93
|
|
99
94
|
|
100
|
-
def
|
95
|
+
def time_diff(
|
101
96
|
from_time_str: str,
|
102
97
|
tz_str: str = "UTC",
|
103
98
|
to_time_str: str | None = None,
|
@@ -107,10 +102,10 @@ def calculate_time_difference(
|
|
107
102
|
Calculate the difference between two datetime values.
|
108
103
|
|
109
104
|
:param from_time_str: Date-time string "YYYY-MM-DD HH:MM:SS" for the start.
|
110
|
-
:param tz_str:
|
111
|
-
:param to_time_str:
|
112
|
-
:param to_tz_str:
|
113
|
-
:return:
|
105
|
+
:param tz_str: Timezone of from_time_str, e.g. 'UTC+8'. Defaults to 'UTC'.
|
106
|
+
:param to_time_str: Optional date-time string for the end; if None, uses now().
|
107
|
+
:param to_tz_str: Timezone of to_time_str. Defaults to 'UTC'.
|
108
|
+
:return: Tuple (days, hours, minutes, seconds).
|
114
109
|
"""
|
115
110
|
try:
|
116
111
|
# parse start time
|
@@ -139,8 +134,3 @@ def calculate_time_difference(
|
|
139
134
|
except Exception as e:
|
140
135
|
logger.warning("[time] Failed to calculate time difference: %s", e)
|
141
136
|
return 999, 23, 59, 59
|
142
|
-
|
143
|
-
|
144
|
-
__all__ = [
|
145
|
-
"calculate_time_difference",
|
146
|
-
]
|
@@ -4,12 +4,10 @@ novel_downloader.utils.time_utils.sleep_utils
|
|
4
4
|
---------------------------------------------
|
5
5
|
|
6
6
|
Utilities for adding randomized delays in scripts and bots.
|
7
|
-
|
8
|
-
Includes:
|
9
|
-
- sleep_with_random_delay(): Sleep between base and base+spread seconds,
|
10
|
-
optionally capped with a max_sleep limit.
|
11
7
|
"""
|
12
8
|
|
9
|
+
__all__ = ["jitter_sleep", "async_jitter_sleep"]
|
10
|
+
|
13
11
|
import asyncio
|
14
12
|
import logging
|
15
13
|
import random
|
@@ -18,7 +16,7 @@ import time
|
|
18
16
|
logger = logging.getLogger(__name__)
|
19
17
|
|
20
18
|
|
21
|
-
def
|
19
|
+
def jitter_sleep(
|
22
20
|
base: float,
|
23
21
|
add_spread: float = 0.0,
|
24
22
|
mul_spread: float = 1.0,
|
@@ -39,7 +37,7 @@ def sleep_with_random_delay(
|
|
39
37
|
:param mul_spread: Maximum multiplier factor for base; drawn from [1.0, mul_spread].
|
40
38
|
:param max_sleep: Optional upper limit for the final sleep duration.
|
41
39
|
"""
|
42
|
-
if base < 0 or add_spread < 0 or mul_spread < 0:
|
40
|
+
if base < 0 or add_spread < 0 or mul_spread < 1.0:
|
43
41
|
logger.warning(
|
44
42
|
"[sleep] Invalid parameters: base=%s, add_spread=%s, mul_spread=%s",
|
45
43
|
base,
|
@@ -61,7 +59,7 @@ def sleep_with_random_delay(
|
|
61
59
|
return
|
62
60
|
|
63
61
|
|
64
|
-
async def
|
62
|
+
async def async_jitter_sleep(
|
65
63
|
base: float,
|
66
64
|
add_spread: float = 0.0,
|
67
65
|
mul_spread: float = 1.0,
|
@@ -100,6 +98,3 @@ async def async_sleep_with_random_delay(
|
|
100
98
|
|
101
99
|
logger.debug("[async time] Sleeping for %.2f seconds", duration)
|
102
100
|
await asyncio.sleep(duration)
|
103
|
-
|
104
|
-
|
105
|
-
__all__ = ["sleep_with_random_delay", "async_sleep_with_random_delay"]
|