novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -4
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +27 -104
- novel_downloader/cli/download.py +78 -66
- novel_downloader/cli/export.py +20 -21
- novel_downloader/cli/main.py +3 -1
- novel_downloader/cli/search.py +120 -0
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +10 -14
- novel_downloader/config/adapter.py +195 -99
- novel_downloader/config/{loader.py → file_io.py} +53 -27
- novel_downloader/core/__init__.py +14 -13
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/archived/qidian/searcher.py +79 -0
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +8 -30
- novel_downloader/core/downloaders/base.py +182 -30
- novel_downloader/core/downloaders/common.py +217 -384
- novel_downloader/core/downloaders/qianbi.py +332 -4
- novel_downloader/core/downloaders/qidian.py +250 -290
- novel_downloader/core/downloaders/registry.py +69 -0
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +8 -26
- novel_downloader/core/exporters/base.py +107 -31
- novel_downloader/core/exporters/common/__init__.py +3 -4
- novel_downloader/core/exporters/common/epub.py +92 -171
- novel_downloader/core/exporters/common/main_exporter.py +14 -67
- novel_downloader/core/exporters/common/txt.py +90 -86
- novel_downloader/core/exporters/epub_util.py +184 -1327
- novel_downloader/core/exporters/linovelib/__init__.py +3 -2
- novel_downloader/core/exporters/linovelib/epub.py +165 -222
- novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
- novel_downloader/core/exporters/linovelib/txt.py +76 -66
- novel_downloader/core/exporters/qidian.py +15 -11
- novel_downloader/core/exporters/registry.py +55 -0
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/fetchers/__init__.py +57 -56
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
- novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
- novel_downloader/core/fetchers/biquyuedu.py +83 -0
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +60 -0
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +8 -14
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +26 -0
- novel_downloader/core/parsers/__init__.py +58 -22
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
- novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
- novel_downloader/core/parsers/qidian/main_parser.py +19 -57
- novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +57 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +155 -0
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +51 -0
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/b520.py +84 -0
- novel_downloader/core/searchers/base.py +168 -0
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +102 -0
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +165 -0
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +79 -0
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +36 -79
- novel_downloader/locales/zh.json +37 -80
- novel_downloader/models/__init__.py +23 -50
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +16 -43
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +21 -0
- novel_downloader/resources/config/settings.toml +39 -74
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +43 -0
- novel_downloader/utils/chapter_storage.py +247 -226
- novel_downloader/utils/constants.py +5 -50
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +118 -0
- novel_downloader/utils/epub/documents.py +297 -0
- novel_downloader/utils/epub/models.py +120 -0
- novel_downloader/utils/epub/utils.py +179 -0
- novel_downloader/utils/file_utils/__init__.py +5 -30
- novel_downloader/utils/file_utils/io.py +9 -150
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -7
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +10 -16
- novel_downloader/utils/network.py +111 -252
- novel_downloader/utils/state.py +5 -90
- novel_downloader/utils/text_utils/__init__.py +16 -21
- novel_downloader/utils/text_utils/diff_display.py +6 -9
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +6 -12
- novel_downloader/utils/time_utils/datetime_utils.py +23 -33
- novel_downloader/utils/time_utils/sleep_utils.py +5 -10
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/downloaders/biquge.py +0 -25
- novel_downloader/core/downloaders/esjzone.py +0 -25
- novel_downloader/core/downloaders/linovelib.py +0 -25
- novel_downloader/core/downloaders/sfacg.py +0 -25
- novel_downloader/core/downloaders/yamibo.py +0 -25
- novel_downloader/core/exporters/biquge.py +0 -25
- novel_downloader/core/exporters/esjzone.py +0 -25
- novel_downloader/core/exporters/qianbi.py +0 -25
- novel_downloader/core/exporters/sfacg.py +0 -25
- novel_downloader/core/exporters/yamibo.py +0 -25
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -403
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -204
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -193
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -318
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -189
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -229
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/biquge/main_parser.py +0 -134
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/models/types.py +0 -15
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -303
- novel_downloader/utils/fontocr/ocr_v2.py +0 -752
- novel_downloader/utils/hash_store.py +0 -279
- novel_downloader/utils/hash_utils.py +0 -103
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/METADATA +0 -196
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,79 +0,0 @@
|
|
1
|
-
#main-layout {
|
2
|
-
grid-rows: 3 auto 1 auto 1fr;
|
3
|
-
grid-columns: 1fr;
|
4
|
-
grid-gutter: 1;
|
5
|
-
padding: 1;
|
6
|
-
height: 100%;
|
7
|
-
}
|
8
|
-
|
9
|
-
#title-bar {
|
10
|
-
height: 3;
|
11
|
-
layout: horizontal;
|
12
|
-
align: left middle;
|
13
|
-
padding: 0 1;
|
14
|
-
background: $boost;
|
15
|
-
}
|
16
|
-
|
17
|
-
#title {
|
18
|
-
width: 1fr;
|
19
|
-
content-align: left middle;
|
20
|
-
}
|
21
|
-
|
22
|
-
#settings,
|
23
|
-
#exit {
|
24
|
-
width: 8;
|
25
|
-
padding: 0 1;
|
26
|
-
}
|
27
|
-
|
28
|
-
#input-row {
|
29
|
-
layout: horizontal;
|
30
|
-
padding: 1 0;
|
31
|
-
overflow-x: auto;
|
32
|
-
}
|
33
|
-
|
34
|
-
#site {
|
35
|
-
width: 20;
|
36
|
-
margin-right: 1;
|
37
|
-
}
|
38
|
-
|
39
|
-
#book_ids {
|
40
|
-
width: 1fr;
|
41
|
-
min-width: 0;
|
42
|
-
margin-right: 1;
|
43
|
-
}
|
44
|
-
|
45
|
-
#download {
|
46
|
-
width: 15;
|
47
|
-
}
|
48
|
-
|
49
|
-
#site,
|
50
|
-
#book_ids,
|
51
|
-
#download {
|
52
|
-
width: 100%;
|
53
|
-
}
|
54
|
-
|
55
|
-
Button#download {
|
56
|
-
border: round $accent;
|
57
|
-
padding: 0 1;
|
58
|
-
}
|
59
|
-
Button#download:hover {
|
60
|
-
background: $accent-lighten-3;
|
61
|
-
color: $text;
|
62
|
-
}
|
63
|
-
|
64
|
-
|
65
|
-
#prog {
|
66
|
-
height: 1;
|
67
|
-
color: $success;
|
68
|
-
}
|
69
|
-
|
70
|
-
#label {
|
71
|
-
content-align: left middle;
|
72
|
-
padding-left: 1;
|
73
|
-
}
|
74
|
-
|
75
|
-
#log {
|
76
|
-
border: round $primary;
|
77
|
-
padding: 1;
|
78
|
-
overflow-y: auto;
|
79
|
-
}
|
@@ -1,24 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.tui.widgets.richlog_handler
|
4
|
-
--------------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
import logging
|
9
|
-
from logging import LogRecord
|
10
|
-
|
11
|
-
from textual.widgets import RichLog
|
12
|
-
|
13
|
-
|
14
|
-
class RichLogHandler(logging.Handler):
|
15
|
-
def __init__(self, rich_log_widget: RichLog):
|
16
|
-
super().__init__()
|
17
|
-
self.rich_log_widget = rich_log_widget
|
18
|
-
|
19
|
-
def emit(self, record: LogRecord) -> None:
|
20
|
-
msg = self.format(record)
|
21
|
-
try:
|
22
|
-
self.rich_log_widget.write(msg)
|
23
|
-
except Exception:
|
24
|
-
self.handleError(record)
|
novel_downloader/utils/cache.py
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.utils.cache
|
4
|
-
----------------------------
|
5
|
-
|
6
|
-
Provides decorators for caching function results,
|
7
|
-
specifically optimized for configuration loading functions.
|
8
|
-
"""
|
9
|
-
|
10
|
-
from collections.abc import Callable
|
11
|
-
from functools import lru_cache, wraps
|
12
|
-
from typing import Any, TypeVar, cast
|
13
|
-
|
14
|
-
T = TypeVar("T", bound=Callable[..., Any])
|
15
|
-
|
16
|
-
|
17
|
-
def cached_load_config(func: T) -> T:
|
18
|
-
"""
|
19
|
-
A decorator to cache the result of a config-loading function.
|
20
|
-
Uses LRU cache with maxsize=1.
|
21
|
-
"""
|
22
|
-
cached = lru_cache(maxsize=1)(func)
|
23
|
-
wrapped = wraps(func)(cached)
|
24
|
-
return cast(T, wrapped)
|
@@ -1,22 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.utils.fontocr
|
4
|
-
------------------------------
|
5
|
-
|
6
|
-
Utilities for font-based OCR, primarily used to decode custom font obfuscation
|
7
|
-
|
8
|
-
Supports:
|
9
|
-
- Font rendering and perceptual hash matching
|
10
|
-
- PaddleOCR-based character recognition
|
11
|
-
- Frequency-based scoring for ambiguous results
|
12
|
-
- Debugging and font mapping persistence
|
13
|
-
|
14
|
-
Exposes the selected OCR engine version via `FontOCR`.
|
15
|
-
"""
|
16
|
-
|
17
|
-
# from .ocr_v1 import FontOCRV1 as FontOCR
|
18
|
-
from .ocr_v2 import FontOCRV2 as FontOCR
|
19
|
-
|
20
|
-
__version__ = "v2"
|
21
|
-
|
22
|
-
__all__ = ["FontOCR"]
|
@@ -1,69 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.utils.fontocr.model_loader
|
4
|
-
-------------------------------------------
|
5
|
-
|
6
|
-
Utility functions for managing pre-trained model downloads.
|
7
|
-
|
8
|
-
Currently supports:
|
9
|
-
- Character recognition model for single Chinese character inference
|
10
|
-
"""
|
11
|
-
|
12
|
-
from pathlib import Path
|
13
|
-
|
14
|
-
from huggingface_hub import hf_hub_download
|
15
|
-
from huggingface_hub.errors import LocalEntryNotFoundError
|
16
|
-
|
17
|
-
from novel_downloader.utils.constants import (
|
18
|
-
MODEL_CACHE_DIR,
|
19
|
-
REC_CHAR_MODEL_FILES,
|
20
|
-
REC_CHAR_MODEL_REPO,
|
21
|
-
REC_CHAR_VECTOR_FILES,
|
22
|
-
)
|
23
|
-
|
24
|
-
|
25
|
-
def get_rec_chinese_char_model_dir(version: str = "v1.0") -> Path:
|
26
|
-
"""
|
27
|
-
Ensure model files are downloaded, return the directory path.
|
28
|
-
"""
|
29
|
-
model_dir = MODEL_CACHE_DIR / "rec_chinese_char"
|
30
|
-
|
31
|
-
model_dir.mkdir(parents=True, exist_ok=True)
|
32
|
-
|
33
|
-
for fname in REC_CHAR_MODEL_FILES:
|
34
|
-
try:
|
35
|
-
hf_hub_download(
|
36
|
-
repo_id=REC_CHAR_MODEL_REPO,
|
37
|
-
filename=fname,
|
38
|
-
revision=version,
|
39
|
-
local_dir=model_dir,
|
40
|
-
)
|
41
|
-
except LocalEntryNotFoundError as err:
|
42
|
-
raise RuntimeError(
|
43
|
-
f"[model] Missing model file '{fname}' and no internet connection."
|
44
|
-
) from err
|
45
|
-
return model_dir
|
46
|
-
|
47
|
-
|
48
|
-
def get_rec_char_vector_dir(version: str = "v1.0") -> Path:
|
49
|
-
"""
|
50
|
-
Ensure vector files are downloaded into a 'vector' subfolder under model directory.
|
51
|
-
Return the directory path.
|
52
|
-
"""
|
53
|
-
vector_dir = MODEL_CACHE_DIR / "rec_chinese_char"
|
54
|
-
vector_dir.mkdir(parents=True, exist_ok=True)
|
55
|
-
|
56
|
-
for fname in REC_CHAR_VECTOR_FILES:
|
57
|
-
try:
|
58
|
-
hf_hub_download(
|
59
|
-
repo_id=REC_CHAR_MODEL_REPO,
|
60
|
-
filename=fname,
|
61
|
-
revision=version,
|
62
|
-
local_dir=vector_dir,
|
63
|
-
)
|
64
|
-
except LocalEntryNotFoundError as err:
|
65
|
-
raise RuntimeError(
|
66
|
-
f"[vector] Missing vector file '{fname}' and no internet connection."
|
67
|
-
) from err
|
68
|
-
|
69
|
-
return vector_dir
|
@@ -1,303 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.utils.fontocr.ocr_v1
|
4
|
-
-------------------------------------
|
5
|
-
|
6
|
-
This class provides utility methods for optical character recognition (OCR)
|
7
|
-
and font mapping, primarily used for decrypting custom font encryption
|
8
|
-
on web pages (e.g., the Qidian website).
|
9
|
-
"""
|
10
|
-
|
11
|
-
import json
|
12
|
-
import logging
|
13
|
-
from pathlib import Path
|
14
|
-
from typing import Any
|
15
|
-
|
16
|
-
import numpy as np
|
17
|
-
import paddle
|
18
|
-
from fontTools.ttLib import TTFont
|
19
|
-
from paddleocr import PaddleOCR
|
20
|
-
from PIL import Image, ImageDraw, ImageFont
|
21
|
-
from PIL.Image import Transpose
|
22
|
-
|
23
|
-
from novel_downloader.utils.constants import (
|
24
|
-
REC_CHAR_MODEL_FILES,
|
25
|
-
REC_IMAGE_SHAPE_MAP,
|
26
|
-
)
|
27
|
-
from novel_downloader.utils.hash_store import img_hash_store
|
28
|
-
|
29
|
-
from .model_loader import get_rec_chinese_char_model_dir
|
30
|
-
|
31
|
-
logger = logging.getLogger(__name__)
|
32
|
-
|
33
|
-
|
34
|
-
class FontOCRV1:
|
35
|
-
"""
|
36
|
-
Version 1 of the FontOCR utility.
|
37
|
-
|
38
|
-
:param use_freq: if True, weight OCR scores by character frequency
|
39
|
-
:param cache_dir: base path to store font-map JSON data
|
40
|
-
:param threshold: minimum confidence threshold [0.0-1.0]
|
41
|
-
:param font_debug: if True, dump per-char debug images under cache_dir
|
42
|
-
"""
|
43
|
-
|
44
|
-
# Default constants
|
45
|
-
CHAR_IMAGE_SIZE = 64
|
46
|
-
CHAR_FONT_SIZE = 52
|
47
|
-
_freq_weight = 0.05
|
48
|
-
|
49
|
-
# shared resources
|
50
|
-
_global_char_freq_db: dict[str, int] = {}
|
51
|
-
_global_ocr: PaddleOCR | None = None
|
52
|
-
|
53
|
-
def __init__(
|
54
|
-
self,
|
55
|
-
cache_dir: str | Path,
|
56
|
-
use_freq: bool = False,
|
57
|
-
ocr_version: str = "v1.0",
|
58
|
-
threshold: float = 0.0,
|
59
|
-
font_debug: bool = False,
|
60
|
-
**kwargs: Any,
|
61
|
-
) -> None:
|
62
|
-
self.use_freq = use_freq
|
63
|
-
self.ocr_version = ocr_version
|
64
|
-
self.threshold = threshold
|
65
|
-
self.font_debug = font_debug
|
66
|
-
self._max_freq = 5
|
67
|
-
|
68
|
-
self._cache_dir = Path(cache_dir)
|
69
|
-
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
70
|
-
self._fixed_map_dir = self._cache_dir / "fixed_font_map"
|
71
|
-
self._fixed_map_dir.mkdir(exist_ok=True)
|
72
|
-
|
73
|
-
if font_debug:
|
74
|
-
self._debug_dir = self._cache_dir / "font_debug" / "badcase"
|
75
|
-
self._debug_dir.mkdir(parents=True, exist_ok=True)
|
76
|
-
|
77
|
-
# load shared NLP/OCR + frequency DB once
|
78
|
-
self._load_ocr_model()
|
79
|
-
if self.use_freq and not FontOCRV1._global_char_freq_db:
|
80
|
-
self._load_char_freq_db()
|
81
|
-
|
82
|
-
def _load_ocr_model(self) -> None:
|
83
|
-
"""
|
84
|
-
Initialize the shared PaddleOCR model if not already loaded.
|
85
|
-
"""
|
86
|
-
if FontOCRV1._global_ocr is not None:
|
87
|
-
return
|
88
|
-
|
89
|
-
gpu_available = paddle.device.is_compiled_with_cuda()
|
90
|
-
self._char_model_dir = get_rec_chinese_char_model_dir(self.ocr_version)
|
91
|
-
|
92
|
-
for fname in REC_CHAR_MODEL_FILES:
|
93
|
-
full_path = self._char_model_dir / fname
|
94
|
-
if not full_path.exists():
|
95
|
-
raise FileNotFoundError(f"[FontOCR] Required file missing: {full_path}")
|
96
|
-
|
97
|
-
char_dict_file = self._char_model_dir / "rec_custom_keys.txt"
|
98
|
-
FontOCRV1._global_ocr = PaddleOCR(
|
99
|
-
use_angle_cls=False,
|
100
|
-
lang="ch",
|
101
|
-
det=False,
|
102
|
-
use_gpu=gpu_available,
|
103
|
-
show_log=self.font_debug,
|
104
|
-
rec_model_dir=str(self._char_model_dir),
|
105
|
-
rec_char_dict_path=str(char_dict_file),
|
106
|
-
rec_image_shape=REC_IMAGE_SHAPE_MAP[self.ocr_version],
|
107
|
-
max_text_length=1,
|
108
|
-
use_space_char=False,
|
109
|
-
)
|
110
|
-
|
111
|
-
def _load_char_freq_db(self) -> bool:
|
112
|
-
"""
|
113
|
-
Loads character frequency data from a JSON file and
|
114
|
-
assigns it to the instance variable.
|
115
|
-
|
116
|
-
:return: True if successfully loaded, False otherwise.
|
117
|
-
"""
|
118
|
-
try:
|
119
|
-
char_freq_map_file = self._char_model_dir / "char_freq.json"
|
120
|
-
with char_freq_map_file.open("r", encoding="utf-8") as f:
|
121
|
-
FontOCRV1._global_char_freq_db = json.load(f)
|
122
|
-
self._max_freq = max(FontOCRV1._global_char_freq_db.values())
|
123
|
-
return True
|
124
|
-
except Exception as e:
|
125
|
-
logger.warning("[FontOCR] Failed to load char freq DB: %s", e)
|
126
|
-
return False
|
127
|
-
|
128
|
-
@staticmethod
|
129
|
-
def _generate_char_image(
|
130
|
-
char: str,
|
131
|
-
render_font: ImageFont.FreeTypeFont,
|
132
|
-
is_reflect: bool = False,
|
133
|
-
) -> Image.Image | None:
|
134
|
-
"""
|
135
|
-
Render a single character into a square image.
|
136
|
-
If is_reflect is True, flip horizontally.
|
137
|
-
"""
|
138
|
-
size = FontOCRV1.CHAR_IMAGE_SIZE
|
139
|
-
img = Image.new("L", (size, size), color=255)
|
140
|
-
draw = ImageDraw.Draw(img)
|
141
|
-
bbox = draw.textbbox((0, 0), char, font=render_font)
|
142
|
-
w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
143
|
-
x = (size - w) // 2 - bbox[0]
|
144
|
-
y = (size - h) // 2 - bbox[1]
|
145
|
-
draw.text((x, y), char, fill=0, font=render_font)
|
146
|
-
if is_reflect:
|
147
|
-
img = img.transpose(Transpose.FLIP_LEFT_RIGHT)
|
148
|
-
|
149
|
-
img_np = np.array(img)
|
150
|
-
if np.unique(img_np).size == 1:
|
151
|
-
return None
|
152
|
-
|
153
|
-
return img
|
154
|
-
|
155
|
-
def ocr_text(
|
156
|
-
self, img: Image.Image, top_k: int = 1
|
157
|
-
) -> str | list[tuple[str, float]]:
|
158
|
-
"""
|
159
|
-
Run PaddleOCR on a single-image, return best match(es).
|
160
|
-
If use_freq, adjust score by frequency bonus.
|
161
|
-
"""
|
162
|
-
if not FontOCRV1._global_ocr:
|
163
|
-
self._load_ocr_model()
|
164
|
-
try:
|
165
|
-
img_np = np.asarray(img)
|
166
|
-
assert FontOCRV1._global_ocr is not None
|
167
|
-
result = FontOCRV1._global_ocr.ocr(
|
168
|
-
img_np, cls=False, det=False
|
169
|
-
) # returns List[List[ (text, score) ]]
|
170
|
-
candidates = result[0] if result else []
|
171
|
-
# attach frequency weight if enabled
|
172
|
-
if self.use_freq and FontOCRV1._global_char_freq_db:
|
173
|
-
adjusted = []
|
174
|
-
for ch, score in candidates:
|
175
|
-
freq = FontOCRV1._global_char_freq_db.get(ch, self._max_freq)
|
176
|
-
bonus = (
|
177
|
-
FontOCRV1._freq_weight
|
178
|
-
* (self._max_freq - freq)
|
179
|
-
/ self._max_freq
|
180
|
-
)
|
181
|
-
adjusted.append((ch, score + bonus))
|
182
|
-
candidates = adjusted
|
183
|
-
# filter by threshold
|
184
|
-
filtered = [c for c in candidates if c[1] >= self.threshold]
|
185
|
-
return filtered[0][0] if top_k == 1 and filtered else filtered[:top_k]
|
186
|
-
except Exception as e:
|
187
|
-
logger.error("[FontOCR] OCR failure: %s", e)
|
188
|
-
return "" if top_k == 1 else []
|
189
|
-
|
190
|
-
def query(self, img: Image.Image, top_k: int = 1) -> str | list[tuple[str, float]]:
|
191
|
-
"""
|
192
|
-
First try hash-based lookup via img_hash_store;
|
193
|
-
if no hit, fall back to ocr_text().
|
194
|
-
"""
|
195
|
-
# quick hash lookup
|
196
|
-
matches = img_hash_store.query(img, k=top_k)
|
197
|
-
if matches:
|
198
|
-
# matches is List[(label, dist)]
|
199
|
-
return matches[0][0] if top_k == 1 else matches
|
200
|
-
|
201
|
-
# fallback to OCR
|
202
|
-
return self.ocr_text(img, top_k=top_k)
|
203
|
-
|
204
|
-
def generate_font_map(
|
205
|
-
self,
|
206
|
-
fixed_font_path: str | Path,
|
207
|
-
random_font_path: str | Path,
|
208
|
-
char_set: set[str],
|
209
|
-
refl_set: set[str],
|
210
|
-
chapter_id: str | None = None,
|
211
|
-
) -> dict[str, str]:
|
212
|
-
"""
|
213
|
-
Generates a mapping from encrypted (randomized) font characters to
|
214
|
-
their real recognized characters by rendering and OCR-based matching.
|
215
|
-
|
216
|
-
:param fixed_font_path: Path to the reference (fixed) font.
|
217
|
-
:param random_font_path: Path to the obfuscated (random) font.
|
218
|
-
:param char_set: Characters to process normally.
|
219
|
-
:param refl_set: Characters to process as horizontally flipped.
|
220
|
-
:param chapter_id: Chapter ID
|
221
|
-
|
222
|
-
:returns mapping_result: { obf_char: real_char, ... }
|
223
|
-
"""
|
224
|
-
mapping_result: dict[str, str] = {}
|
225
|
-
fixed_map_file = self._fixed_map_dir / f"{Path(fixed_font_path).stem}.json"
|
226
|
-
|
227
|
-
# 1) load or init fixed_font_map
|
228
|
-
if fixed_map_file.exists():
|
229
|
-
try:
|
230
|
-
with open(fixed_map_file, encoding="utf-8") as f:
|
231
|
-
fixed_map = json.load(f)
|
232
|
-
except Exception as e:
|
233
|
-
logger.debug("[FontOCR] Failed to load fixed map file: %s", e)
|
234
|
-
fixed_map = {}
|
235
|
-
else:
|
236
|
-
fixed_map = {}
|
237
|
-
|
238
|
-
# prepare font renderers and cmap sets
|
239
|
-
try:
|
240
|
-
fixed_ttf = TTFont(fixed_font_path)
|
241
|
-
fixed_chars = {chr(c) for c in fixed_ttf.getBestCmap()}
|
242
|
-
fixed_font = ImageFont.truetype(str(fixed_font_path), self.CHAR_FONT_SIZE)
|
243
|
-
|
244
|
-
random_ttf = TTFont(random_font_path)
|
245
|
-
random_chars = {chr(c) for c in random_ttf.getBestCmap()}
|
246
|
-
random_font = ImageFont.truetype(str(random_font_path), self.CHAR_FONT_SIZE)
|
247
|
-
except Exception as e:
|
248
|
-
logger.error("[FontOCR] Failed to load TTF fonts: %s", e)
|
249
|
-
return mapping_result
|
250
|
-
|
251
|
-
def _process(chars: set[str], reflect: bool = False) -> None:
|
252
|
-
for ch in chars:
|
253
|
-
try:
|
254
|
-
if ch in fixed_map:
|
255
|
-
mapping_result[ch] = fixed_map[ch]
|
256
|
-
logger.debug(
|
257
|
-
"[FontOCR] Using cached mapping: '%s' -> '%s'",
|
258
|
-
ch,
|
259
|
-
fixed_map[ch],
|
260
|
-
)
|
261
|
-
continue
|
262
|
-
|
263
|
-
if ch in fixed_chars:
|
264
|
-
font_to_use = fixed_font
|
265
|
-
elif ch in random_chars:
|
266
|
-
font_to_use = random_font
|
267
|
-
else:
|
268
|
-
logger.debug("[FontOCR] Skipping unknown char: '%s'", ch)
|
269
|
-
continue
|
270
|
-
|
271
|
-
img = self._generate_char_image(ch, font_to_use, is_reflect=reflect)
|
272
|
-
if img is None:
|
273
|
-
logger.debug("[FontOCR] Skipping unknown char: '%s'", ch)
|
274
|
-
continue
|
275
|
-
|
276
|
-
real = self.query(img, top_k=1)
|
277
|
-
if real:
|
278
|
-
real_char = (
|
279
|
-
str(real[0]) if isinstance(real, (list | tuple)) else real
|
280
|
-
)
|
281
|
-
mapping_result[ch] = real_char
|
282
|
-
if ch in fixed_chars:
|
283
|
-
fixed_map[ch] = real_char
|
284
|
-
logger.debug("[FontOCR] Mapped '%s' -> '%s'", ch, real_char)
|
285
|
-
elif self.font_debug and chapter_id:
|
286
|
-
dbg_path = self._debug_dir / f"{ord(ch):05X}_{chapter_id}.png"
|
287
|
-
img.save(dbg_path)
|
288
|
-
logger.debug("[FontOCR] Saved debug image: %s", dbg_path)
|
289
|
-
except Exception as e:
|
290
|
-
logger.warning("[FontOCR] Failed to process char '%s': %s", ch, e)
|
291
|
-
|
292
|
-
# process normal + reflected chars
|
293
|
-
_process(char_set, reflect=False)
|
294
|
-
_process(refl_set, reflect=True)
|
295
|
-
|
296
|
-
# persist updated fixed_map
|
297
|
-
try:
|
298
|
-
with open(fixed_map_file, "w", encoding="utf-8") as f:
|
299
|
-
json.dump(fixed_map, f, ensure_ascii=False, indent=2)
|
300
|
-
except Exception as e:
|
301
|
-
logger.error("[FontOCR] Failed to save fixed map: %s", e)
|
302
|
-
|
303
|
-
return mapping_result
|