novel-downloader 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/config/adapter.py +12 -9
- novel_downloader/config/models.py +6 -4
- novel_downloader/core/downloaders/base_async_downloader.py +1 -1
- novel_downloader/core/interfaces/async_requester_protocol.py +5 -3
- novel_downloader/core/interfaces/requester_protocol.py +3 -3
- novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +2 -0
- novel_downloader/core/parsers/qidian_parser/session/main_parser.py +2 -0
- novel_downloader/core/requesters/base_async_session.py +7 -5
- novel_downloader/core/requesters/base_browser.py +3 -3
- novel_downloader/core/requesters/base_session.py +5 -5
- novel_downloader/core/requesters/common_requester/common_async_session.py +4 -2
- novel_downloader/core/requesters/common_requester/common_session.py +2 -2
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +3 -3
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +3 -3
- novel_downloader/resources/config/settings.yaml +16 -13
- novel_downloader/utils/fontocr/ocr_v2.py +6 -0
- {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.1.dist-info}/METADATA +1 -1
- {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.1.dist-info}/RECORD +23 -23
- {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.1.dist-info}/WHEEL +0 -0
- {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.1.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.2.0.dist-info → novel_downloader-1.2.1.dist-info}/top_level.txt +0 -0
novel_downloader/__init__.py
CHANGED
@@ -93,18 +93,21 @@ class ConfigAdapter:
|
|
93
93
|
config["sites"][site] 中读取解析器相关配置, 返回 ParserConfig 实例
|
94
94
|
"""
|
95
95
|
gen = self._config.get("general", {})
|
96
|
+
font_ocr = gen.get("font_ocr", {})
|
96
97
|
site_cfg = self._config.get("sites", {}).get(self._site, {})
|
97
98
|
return ParserConfig(
|
98
99
|
cache_dir=gen.get("cache_dir", "./cache"),
|
99
|
-
decode_font=
|
100
|
-
use_freq=
|
101
|
-
use_ocr=
|
102
|
-
use_vec=
|
103
|
-
ocr_version=
|
104
|
-
save_font_debug=
|
105
|
-
batch_size=
|
106
|
-
|
107
|
-
|
100
|
+
decode_font=font_ocr.get("decode_font", False),
|
101
|
+
use_freq=font_ocr.get("use_freq", False),
|
102
|
+
use_ocr=font_ocr.get("use_ocr", True),
|
103
|
+
use_vec=font_ocr.get("use_vec", False),
|
104
|
+
ocr_version=font_ocr.get("ocr_version", "v1.0"),
|
105
|
+
save_font_debug=font_ocr.get("save_font_debug", False),
|
106
|
+
batch_size=font_ocr.get("batch_size", 32),
|
107
|
+
gpu_mem=font_ocr.get("gpu_mem", 500),
|
108
|
+
gpu_id=font_ocr.get("gpu_id", None),
|
109
|
+
ocr_weight=font_ocr.get("ocr_weight", 0.6),
|
110
|
+
vec_weight=font_ocr.get("vec_weight", 0.4),
|
108
111
|
mode=site_cfg.get("mode", "session"),
|
109
112
|
)
|
110
113
|
|
@@ -24,10 +24,10 @@ from typing import Any, Dict, List, Literal, Optional, TypedDict
|
|
24
24
|
# === Requesters ===
|
25
25
|
@dataclass
|
26
26
|
class RequesterConfig:
|
27
|
-
wait_time:
|
27
|
+
wait_time: float = 5.0
|
28
28
|
retry_times: int = 3
|
29
|
-
retry_interval:
|
30
|
-
timeout:
|
29
|
+
retry_interval: float = 5.0
|
30
|
+
timeout: float = 30.0
|
31
31
|
headless: bool = True
|
32
32
|
user_data_folder: str = ""
|
33
33
|
profile_name: str = ""
|
@@ -41,7 +41,7 @@ class RequesterConfig:
|
|
41
41
|
# === Downloaders ===
|
42
42
|
@dataclass
|
43
43
|
class DownloaderConfig:
|
44
|
-
request_interval:
|
44
|
+
request_interval: float = 5.0
|
45
45
|
raw_data_dir: str = "./raw_data"
|
46
46
|
cache_dir: str = "./novel_cache"
|
47
47
|
download_workers: int = 4
|
@@ -63,6 +63,8 @@ class ParserConfig:
|
|
63
63
|
use_vec: bool = False
|
64
64
|
ocr_version: str = "v1.0"
|
65
65
|
batch_size: int = 32
|
66
|
+
gpu_mem: int = 500
|
67
|
+
gpu_id: Optional[int] = None
|
66
68
|
ocr_weight: float = 0.6
|
67
69
|
vec_weight: float = 0.4
|
68
70
|
save_font_debug: bool = False
|
@@ -94,7 +94,7 @@ class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
|
|
94
94
|
return self._config.login_required
|
95
95
|
|
96
96
|
@property
|
97
|
-
def request_interval(self) ->
|
97
|
+
def request_interval(self) -> float:
|
98
98
|
return self._config.request_interval
|
99
99
|
|
100
100
|
async def prepare(self) -> None:
|
@@ -28,7 +28,9 @@ class AsyncRequesterProtocol(Protocol):
|
|
28
28
|
"""
|
29
29
|
...
|
30
30
|
|
31
|
-
async def get_book_info(
|
31
|
+
async def get_book_info(
|
32
|
+
self, book_id: str, wait_time: Optional[float] = None
|
33
|
+
) -> str:
|
32
34
|
"""
|
33
35
|
Fetch the raw HTML (or JSON) of the book info page asynchronously.
|
34
36
|
|
@@ -39,7 +41,7 @@ class AsyncRequesterProtocol(Protocol):
|
|
39
41
|
...
|
40
42
|
|
41
43
|
async def get_book_chapter(
|
42
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[
|
44
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
43
45
|
) -> str:
|
44
46
|
"""
|
45
47
|
Fetch the raw HTML (or JSON) of a single chapter asynchronously.
|
@@ -51,7 +53,7 @@ class AsyncRequesterProtocol(Protocol):
|
|
51
53
|
"""
|
52
54
|
...
|
53
55
|
|
54
|
-
async def get_bookcase(self, wait_time: Optional[
|
56
|
+
async def get_bookcase(self, wait_time: Optional[float] = None) -> str:
|
55
57
|
"""
|
56
58
|
Optional: Retrieve the HTML content of the authenticated
|
57
59
|
user's bookcase page asynchronously.
|
@@ -26,7 +26,7 @@ class RequesterProtocol(Protocol):
|
|
26
26
|
"""
|
27
27
|
...
|
28
28
|
|
29
|
-
def get_book_info(self, book_id: str, wait_time: Optional[
|
29
|
+
def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
|
30
30
|
"""
|
31
31
|
Fetch the raw HTML (or JSON) of the book info page.
|
32
32
|
|
@@ -37,7 +37,7 @@ class RequesterProtocol(Protocol):
|
|
37
37
|
...
|
38
38
|
|
39
39
|
def get_book_chapter(
|
40
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[
|
40
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
41
41
|
) -> str:
|
42
42
|
"""
|
43
43
|
Fetch the raw HTML (or JSON) of a single chapter.
|
@@ -55,7 +55,7 @@ class RequesterProtocol(Protocol):
|
|
55
55
|
"""
|
56
56
|
...
|
57
57
|
|
58
|
-
def get_bookcase(self, wait_time: Optional[
|
58
|
+
def get_bookcase(self, wait_time: Optional[float] = None) -> str:
|
59
59
|
"""
|
60
60
|
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
61
61
|
|
@@ -60,6 +60,8 @@ class QidianBrowserParser(BaseParser):
|
|
60
60
|
use_ocr=config.use_ocr,
|
61
61
|
use_vec=config.use_vec,
|
62
62
|
batch_size=config.batch_size,
|
63
|
+
gpu_mem=config.gpu_mem,
|
64
|
+
gpu_id=config.gpu_id,
|
63
65
|
ocr_weight=config.ocr_weight,
|
64
66
|
vec_weight=config.vec_weight,
|
65
67
|
font_debug=config.save_font_debug,
|
@@ -63,6 +63,8 @@ class QidianSessionParser(BaseParser):
|
|
63
63
|
use_ocr=config.use_ocr,
|
64
64
|
use_vec=config.use_vec,
|
65
65
|
batch_size=config.batch_size,
|
66
|
+
gpu_mem=config.gpu_mem,
|
67
|
+
gpu_id=config.gpu_id,
|
66
68
|
ocr_weight=config.ocr_weight,
|
67
69
|
vec_weight=config.vec_weight,
|
68
70
|
font_debug=config.save_font_debug,
|
@@ -51,7 +51,7 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
51
51
|
|
52
52
|
Attributes:
|
53
53
|
_session (ClientSession): The persistent aiohttp client session.
|
54
|
-
_timeout (
|
54
|
+
_timeout (float): Timeout for each request in seconds.
|
55
55
|
_retry_times (int): Number of retry attempts on failure.
|
56
56
|
_retry_interval (float): Delay (in seconds) between retries.
|
57
57
|
_headers (Dict[str, str]): Default HTTP headers to send.
|
@@ -111,7 +111,9 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
111
111
|
)
|
112
112
|
|
113
113
|
@abc.abstractmethod
|
114
|
-
async def get_book_info(
|
114
|
+
async def get_book_info(
|
115
|
+
self, book_id: str, wait_time: Optional[float] = None
|
116
|
+
) -> str:
|
115
117
|
"""
|
116
118
|
Fetch the raw HTML (or JSON) of the book info page asynchronously.
|
117
119
|
|
@@ -123,7 +125,7 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
123
125
|
|
124
126
|
@abc.abstractmethod
|
125
127
|
async def get_book_chapter(
|
126
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[
|
128
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
127
129
|
) -> str:
|
128
130
|
"""
|
129
131
|
Fetch the raw HTML (or JSON) of a single chapter asynchronously.
|
@@ -135,7 +137,7 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
135
137
|
"""
|
136
138
|
...
|
137
139
|
|
138
|
-
async def get_bookcase(self, wait_time: Optional[
|
140
|
+
async def get_bookcase(self, wait_time: Optional[float] = None) -> str:
|
139
141
|
"""
|
140
142
|
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
141
143
|
Subclasses that support user login/bookcase should override this.
|
@@ -238,7 +240,7 @@ class BaseAsyncSession(AsyncRequesterProtocol, abc.ABC):
|
|
238
240
|
return self._session
|
239
241
|
|
240
242
|
@property
|
241
|
-
def timeout(self) ->
|
243
|
+
def timeout(self) -> float:
|
242
244
|
"""Return the default timeout setting."""
|
243
245
|
return self._timeout
|
244
246
|
|
@@ -111,7 +111,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
|
|
111
111
|
)
|
112
112
|
|
113
113
|
@abc.abstractmethod
|
114
|
-
def get_book_info(self, book_id: str, wait_time: Optional[
|
114
|
+
def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
|
115
115
|
"""
|
116
116
|
Fetch the raw HTML (or JSON) of the book info page.
|
117
117
|
|
@@ -123,7 +123,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
|
|
123
123
|
|
124
124
|
@abc.abstractmethod
|
125
125
|
def get_book_chapter(
|
126
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[
|
126
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
127
127
|
) -> str:
|
128
128
|
"""
|
129
129
|
Fetch the raw HTML (or JSON) of a single chapter.
|
@@ -135,7 +135,7 @@ class BaseBrowser(RequesterProtocol, abc.ABC):
|
|
135
135
|
"""
|
136
136
|
...
|
137
137
|
|
138
|
-
def get_bookcase(self, wait_time: Optional[
|
138
|
+
def get_bookcase(self, wait_time: Optional[float] = None) -> str:
|
139
139
|
"""
|
140
140
|
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
141
141
|
|
@@ -28,7 +28,7 @@ class BaseSession(RequesterProtocol, abc.ABC):
|
|
28
28
|
|
29
29
|
Attributes:
|
30
30
|
_session (requests.Session): The persistent HTTP session.
|
31
|
-
_timeout (
|
31
|
+
_timeout (float): Timeout for each request in seconds.
|
32
32
|
"""
|
33
33
|
|
34
34
|
def _init_session(
|
@@ -81,7 +81,7 @@ class BaseSession(RequesterProtocol, abc.ABC):
|
|
81
81
|
)
|
82
82
|
|
83
83
|
@abc.abstractmethod
|
84
|
-
def get_book_info(self, book_id: str, wait_time: Optional[
|
84
|
+
def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
|
85
85
|
"""
|
86
86
|
Fetch the raw HTML (or JSON) of the book info page.
|
87
87
|
|
@@ -93,7 +93,7 @@ class BaseSession(RequesterProtocol, abc.ABC):
|
|
93
93
|
|
94
94
|
@abc.abstractmethod
|
95
95
|
def get_book_chapter(
|
96
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[
|
96
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
97
97
|
) -> str:
|
98
98
|
"""
|
99
99
|
Fetch the raw HTML (or JSON) of a single chapter.
|
@@ -105,7 +105,7 @@ class BaseSession(RequesterProtocol, abc.ABC):
|
|
105
105
|
"""
|
106
106
|
...
|
107
107
|
|
108
|
-
def get_bookcase(self, wait_time: Optional[
|
108
|
+
def get_bookcase(self, wait_time: Optional[float] = None) -> str:
|
109
109
|
"""
|
110
110
|
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
111
111
|
|
@@ -171,7 +171,7 @@ class BaseSession(RequesterProtocol, abc.ABC):
|
|
171
171
|
return self._session
|
172
172
|
|
173
173
|
@property
|
174
|
-
def timeout(self) ->
|
174
|
+
def timeout(self) -> float:
|
175
175
|
"""Return the default timeout setting."""
|
176
176
|
return self._timeout
|
177
177
|
|
@@ -45,7 +45,9 @@ class CommonAsyncSession(BaseAsyncSession):
|
|
45
45
|
self._site = site
|
46
46
|
self._profile = profile
|
47
47
|
|
48
|
-
async def get_book_info(
|
48
|
+
async def get_book_info(
|
49
|
+
self, book_id: str, wait_time: Optional[float] = None
|
50
|
+
) -> str:
|
49
51
|
"""
|
50
52
|
Fetch the raw HTML of the book info page asynchronously.
|
51
53
|
|
@@ -62,7 +64,7 @@ class CommonAsyncSession(BaseAsyncSession):
|
|
62
64
|
return html
|
63
65
|
|
64
66
|
async def get_book_chapter(
|
65
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[
|
67
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
66
68
|
) -> str:
|
67
69
|
"""
|
68
70
|
Fetch the raw HTML of a single chapter asynchronously.
|
@@ -47,7 +47,7 @@ class CommonSession(BaseSession):
|
|
47
47
|
self._site = site
|
48
48
|
self._profile = profile
|
49
49
|
|
50
|
-
def get_book_info(self, book_id: str, wait_time: Optional[
|
50
|
+
def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
|
51
51
|
"""
|
52
52
|
Fetch the raw HTML (or JSON) of the book info page.
|
53
53
|
|
@@ -75,7 +75,7 @@ class CommonSession(BaseSession):
|
|
75
75
|
raise RuntimeError("Unexpected error: get_book_info failed without returning")
|
76
76
|
|
77
77
|
def get_book_chapter(
|
78
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[
|
78
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
79
79
|
) -> str:
|
80
80
|
"""
|
81
81
|
Fetch the raw HTML (or JSON) of a single chapter.
|
@@ -266,7 +266,7 @@ class QidianBrowser(BaseBrowser):
|
|
266
266
|
"""
|
267
267
|
return self.QIDIAN_BOOKCASE_URL
|
268
268
|
|
269
|
-
def get_book_info(self, book_id: str, wait_time: Optional[
|
269
|
+
def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
|
270
270
|
"""
|
271
271
|
Retrieve the HTML of a Qidian book info page.
|
272
272
|
|
@@ -311,7 +311,7 @@ class QidianBrowser(BaseBrowser):
|
|
311
311
|
time.sleep(pause)
|
312
312
|
|
313
313
|
def get_book_chapter(
|
314
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[
|
314
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
315
315
|
) -> str:
|
316
316
|
"""
|
317
317
|
Retrieve the HTML content of a specific chapter.
|
@@ -347,7 +347,7 @@ class QidianBrowser(BaseBrowser):
|
|
347
347
|
logger.warning("[fetch] Error fetching chapter from '%s': %s", url, e)
|
348
348
|
return ""
|
349
349
|
|
350
|
-
def get_bookcase(self, wait_time: Optional[
|
350
|
+
def get_bookcase(self, wait_time: Optional[float] = None) -> str:
|
351
351
|
"""
|
352
352
|
Retrieve the HTML content of the logged‑in user's Qidian bookcase page.
|
353
353
|
|
@@ -108,7 +108,7 @@ class QidianSession(BaseSession):
|
|
108
108
|
self.get("https://www.qidian.com")
|
109
109
|
return True
|
110
110
|
|
111
|
-
def get_book_info(self, book_id: str, wait_time: Optional[
|
111
|
+
def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
|
112
112
|
"""
|
113
113
|
Fetch the raw HTML of the book info page.
|
114
114
|
|
@@ -140,7 +140,7 @@ class QidianSession(BaseSession):
|
|
140
140
|
raise RuntimeError("Unexpected fall-through in get_book_info")
|
141
141
|
|
142
142
|
def get_book_chapter(
|
143
|
-
self, book_id: str, chapter_id: str, wait_time: Optional[
|
143
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
|
144
144
|
) -> str:
|
145
145
|
"""
|
146
146
|
Fetch the HTML of a single chapter.
|
@@ -174,7 +174,7 @@ class QidianSession(BaseSession):
|
|
174
174
|
|
175
175
|
raise RuntimeError("Unexpected fall-through in get_book_chapter")
|
176
176
|
|
177
|
-
def get_bookcase(self, wait_time: Optional[
|
177
|
+
def get_bookcase(self, wait_time: Optional[float] = None) -> str:
|
178
178
|
"""
|
179
179
|
Retrieve the user's *bookcase* page.
|
180
180
|
|
@@ -1,9 +1,9 @@
|
|
1
1
|
# 网络请求层设置
|
2
2
|
requests:
|
3
|
-
wait_time: 5
|
3
|
+
wait_time: 5.0 # 每次请求等待时间 (秒)
|
4
4
|
retry_times: 3 # 请求失败重试次数
|
5
|
-
retry_interval: 5
|
6
|
-
timeout: 30
|
5
|
+
retry_interval: 5.0
|
6
|
+
timeout: 30.0 # 页面加载超时时间 (秒)
|
7
7
|
max_rps: null # 最大请求速率 (requests per second), 为 null 则不限制
|
8
8
|
# DrissionPage 专用设置
|
9
9
|
headless: false # 是否以无头模式启动浏览器
|
@@ -15,7 +15,7 @@ requests:
|
|
15
15
|
|
16
16
|
# 全局通用设置
|
17
17
|
general:
|
18
|
-
request_interval: 5
|
18
|
+
request_interval: 5.0 # 同一本书各章节请求间隔 (秒)
|
19
19
|
raw_data_dir: "./raw_data" # 原始章节 HTML/JSON 存放目录
|
20
20
|
output_dir: "./downloads" # 最终输出文件存放目录
|
21
21
|
cache_dir: "./novel_cache" # 本地缓存目录 (字体 / 图片等)
|
@@ -26,6 +26,18 @@ general:
|
|
26
26
|
debug:
|
27
27
|
save_html: false # 是否将抓取到的原始 HTML 保留到磁盘
|
28
28
|
log_level: "INFO" # 日志级别: DEBUG, INFO, WARNING, ERROR
|
29
|
+
font_ocr:
|
30
|
+
decode_font: false # 是否尝试本地解码混淆字体
|
31
|
+
use_freq: false # 是否使用频率分析
|
32
|
+
ocr_version: "v2.0" # "v1.0" / "v2.0"
|
33
|
+
use_ocr: true # 是否使用 OCR 辅助识别文本
|
34
|
+
use_vec: false # 是否使用 Vector 辅助识别文本
|
35
|
+
save_font_debug: false # 是否保存字体解码调试数据
|
36
|
+
batch_size: 32
|
37
|
+
gpu_mem: 500 # GPU 显存限制 (MB)
|
38
|
+
gpu_id: null # 使用哪个 GPU
|
39
|
+
ocr_weight: 0.6
|
40
|
+
vec_weight: 0.4
|
29
41
|
|
30
42
|
# 各站点的特定配置
|
31
43
|
sites:
|
@@ -38,15 +50,6 @@ sites:
|
|
38
50
|
- "0000000000"
|
39
51
|
mode: "browser" # browser / session
|
40
52
|
login_required: true # 是否需要登录才能访问
|
41
|
-
decode_font: false # 是否尝试本地解码混淆字体
|
42
|
-
use_freq: false # 是否使用频率分析
|
43
|
-
ocr_version: "v2.0" # "v1.0" / "v2.0"
|
44
|
-
use_ocr: true # 是否使用 OCR 辅助识别文本
|
45
|
-
use_vec: false # 是否使用 Vector 辅助识别文本
|
46
|
-
save_font_debug: false # 是否保存字体解码调试数据
|
47
|
-
batch_size: 32
|
48
|
-
ocr_weight: 0.6
|
49
|
-
vec_weight: 0.4
|
50
53
|
#
|
51
54
|
sample_site:
|
52
55
|
book_ids:
|
@@ -221,6 +221,8 @@ class FontOCRV2:
|
|
221
221
|
use_ocr: bool = True,
|
222
222
|
use_vec: bool = False,
|
223
223
|
batch_size: int = 32,
|
224
|
+
gpu_mem: int = 500,
|
225
|
+
gpu_id: Optional[int] = None,
|
224
226
|
ocr_weight: float = 0.6,
|
225
227
|
vec_weight: float = 0.4,
|
226
228
|
ocr_version: str = "v1.0",
|
@@ -232,6 +234,8 @@ class FontOCRV2:
|
|
232
234
|
self.use_ocr = use_ocr
|
233
235
|
self.use_vec = use_vec
|
234
236
|
self.batch_size = batch_size
|
237
|
+
self.gpu_mem = gpu_mem
|
238
|
+
self.gpu_id = gpu_id
|
235
239
|
self.ocr_weight = ocr_weight
|
236
240
|
self.vec_weight = vec_weight
|
237
241
|
self.ocr_version = ocr_version
|
@@ -279,6 +283,8 @@ class FontOCRV2:
|
|
279
283
|
rec_batch_num=self.batch_size,
|
280
284
|
use_space_char=False,
|
281
285
|
use_gpu=gpu_available,
|
286
|
+
gpu_mem=self.gpu_mem,
|
287
|
+
gpu_id=self.gpu_id,
|
282
288
|
)
|
283
289
|
|
284
290
|
def _load_char_freq_db(self) -> bool:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
novel_downloader/__init__.py,sha256=
|
1
|
+
novel_downloader/__init__.py,sha256=XOpc1EqvKxLGY9qvIlEY7_tCe_HUpPbsjT1S0DFq-68,242
|
2
2
|
novel_downloader/cli/__init__.py,sha256=ocGwOO4kmkby8VNol92UikMI1RPUJLv9i5xmB7wbpmw,198
|
3
3
|
novel_downloader/cli/clean.py,sha256=9_hOrxKg8nY7q6cyR8iNech0vSREGagPBmdB4k8Te2U,3937
|
4
4
|
novel_downloader/cli/download.py,sha256=l-Ht2duKI78EMR8vTEbdVnwFT9NkWe87l3L1LmmIuZc,4156
|
@@ -6,13 +6,13 @@ novel_downloader/cli/interactive.py,sha256=6vROwPsvupb_TWH1dd_78FDqvtAaiPfyEBvQV
|
|
6
6
|
novel_downloader/cli/main.py,sha256=km1MwHzIVZFcxUlKLRiiMctJlGHWKZNjRKrgAGQjkMs,1183
|
7
7
|
novel_downloader/cli/settings.py,sha256=bV3Hgg502V9goeP3g2xSiF-PMQB9G32qGmjb8ncTENA,6522
|
8
8
|
novel_downloader/config/__init__.py,sha256=tJ2k7nwZbxgqw1kKgJM4g1yu5-2fsx2LXU3VTadrTJ4,1129
|
9
|
-
novel_downloader/config/adapter.py,sha256=
|
9
|
+
novel_downloader/config/adapter.py,sha256=ksirRvp4038Xe3tu93Mwm93Iff0fOKNinJnXA746Xik,5949
|
10
10
|
novel_downloader/config/loader.py,sha256=_rm9rp1lmHYg-A7F_0PQETWjlXbvtyJYaqQD5oI-1O0,5690
|
11
|
-
novel_downloader/config/models.py,sha256=
|
11
|
+
novel_downloader/config/models.py,sha256=WWzwwWQB2BiiXc3KhJxW6N3vr9JIrqEZ0s4reIcH-vA,5087
|
12
12
|
novel_downloader/config/site_rules.py,sha256=WRw12Tfue-ErAPGKq506gRIqKOxWU-u96kay3JDgTNc,3031
|
13
13
|
novel_downloader/core/__init__.py,sha256=D-ACiIqP0rdARZmjBnF6WMKGvvjVtxGRIM7GhOS9kh4,779
|
14
14
|
novel_downloader/core/downloaders/__init__.py,sha256=Qp0q4p7zTy7lReQQF0hDP7ALUQnNflSNNIl4F7iPGz0,601
|
15
|
-
novel_downloader/core/downloaders/base_async_downloader.py,sha256=
|
15
|
+
novel_downloader/core/downloaders/base_async_downloader.py,sha256=8lMSVLU-VtGIdEMGkS0s_rEJpqCgu2WaljBvsEDyPN4,4281
|
16
16
|
novel_downloader/core/downloaders/base_downloader.py,sha256=kFw_yn3QRbWqU9jXJni4IGA8P3AxZf9gfjgfu01TauY,5371
|
17
17
|
novel_downloader/core/downloaders/common_asynb_downloader.py,sha256=u1ODvh_n13CSGWwjkBIMoThTbCeACX5mOhv5ub2Cd0c,7120
|
18
18
|
novel_downloader/core/downloaders/common_downloader.py,sha256=Ru60j-S9I-Nj1P7gNZJjohJ1H8gAuvK1bELPMeZ2TTo,6532
|
@@ -24,10 +24,10 @@ novel_downloader/core/factory/requester_factory.py,sha256=OEK2S-rj8vw4IdDTMTEWcb
|
|
24
24
|
novel_downloader/core/factory/saver_factory.py,sha256=OgZPDOWVIfhxLFiVBKI5jaNOEKmzP9f3YWDOnw63Hfc,1275
|
25
25
|
novel_downloader/core/interfaces/__init__.py,sha256=jeT8BmEEjIazVyX80ZdzQXgTccEj-ktG6Bbjs9uAVUM,843
|
26
26
|
novel_downloader/core/interfaces/async_downloader_protocol.py,sha256=QWjdhNc39hC3bD8Q1lUpBv2GqX3roxVxzKWh6cgwLhk,1002
|
27
|
-
novel_downloader/core/interfaces/async_requester_protocol.py,sha256=
|
27
|
+
novel_downloader/core/interfaces/async_requester_protocol.py,sha256=fvxw3fFO-azxc9RMvVjq8CUv7RViBQG1KUKIafr-qPE,2281
|
28
28
|
novel_downloader/core/interfaces/downloader_protocol.py,sha256=YJdSAE9uBWF7wNLWmlKsYd3J7M4rXOjqDV5m9O7kfio,947
|
29
29
|
novel_downloader/core/interfaces/parser_protocol.py,sha256=A2wIe7shEGdeKsNDFpMuPI8HFrK_H34HOseVAzqcnTo,1280
|
30
|
-
novel_downloader/core/interfaces/requester_protocol.py,sha256=
|
30
|
+
novel_downloader/core/interfaces/requester_protocol.py,sha256=TbzVWKf4w0k5Wk1UmoAr49GVuGRv_Z1abTaXM_d6_KM,1994
|
31
31
|
novel_downloader/core/interfaces/saver_protocol.py,sha256=_3ha4koF-xBrogJwvCOsQM3GEXfLn3_aYZHvmRd1ZR4,1623
|
32
32
|
novel_downloader/core/parsers/__init__.py,sha256=TSarXiYxthyjFgruRFkpMEucqmylrCtR_4Y1C1RVMhk,544
|
33
33
|
novel_downloader/core/parsers/base_parser.py,sha256=TCytPIEmh7mJPiaIfPyf2P16OO5jnB0tTdi1duBv76Q,2924
|
@@ -39,26 +39,26 @@ novel_downloader/core/parsers/qidian_parser/browser/__init__.py,sha256=E8kMkQW_L
|
|
39
39
|
novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py,sha256=65pm3-DrHUH--wzo317yks0fbpcFZm_INB-hVboTqBo,17696
|
40
40
|
novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py,sha256=SZ9Ncw6yLCRo4gJNEtv4V8N2WfONvEAc8lftJREsBTY,3190
|
41
41
|
novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py,sha256=qjN10SpQCUMjFcCaWnqIZhcLDx5sN5jzDfWIrBSbnyo,2101
|
42
|
-
novel_downloader/core/parsers/qidian_parser/browser/main_parser.py,sha256=
|
42
|
+
novel_downloader/core/parsers/qidian_parser/browser/main_parser.py,sha256=vElnyAzZ5douexl-1n_oUY64pfB6st0wYzcF9DIdVec,3844
|
43
43
|
novel_downloader/core/parsers/qidian_parser/session/__init__.py,sha256=Rs2Sz1vNn1-UdpY0O_reECBN4kgb3JYHQZoZ20P7lHU,358
|
44
44
|
novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py,sha256=EgY9qo9v3wx2CZ95zD_Rzcbu0FvfwWtkLuaP3mEcP2c,15999
|
45
45
|
novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py,sha256=ySQ7vUs4dLCkHv_nPSifDnH3xq3wqxa9FWGy_ETX-uw,3875
|
46
46
|
novel_downloader/core/parsers/qidian_parser/session/chapter_router.py,sha256=ob8ULDhNdnJgU3rlA-tLy0w0PqbC20vi8auFqQipJww,1978
|
47
|
-
novel_downloader/core/parsers/qidian_parser/session/main_parser.py,sha256=
|
47
|
+
novel_downloader/core/parsers/qidian_parser/session/main_parser.py,sha256=F_uFibBD6BUIf7JkwurfQ9TEA5zAJqhR5C4Bf9b7hZs,3935
|
48
48
|
novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py,sha256=7ZuneGzL7HX1g8taMzn-2qkJXLDHgrVee-FDkMZtIIw,5755
|
49
49
|
novel_downloader/core/parsers/qidian_parser/shared/__init__.py,sha256=K5HX7pgiRiJuTLdbQDbtm60mO-sXgr6bo5Ft8H1-JLs,978
|
50
50
|
novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py,sha256=juCV72QKcaAjQZU-j6XiBM1VgdRrXY9w_2NHrflHsv4,3047
|
51
51
|
novel_downloader/core/parsers/qidian_parser/shared/helpers.py,sha256=E8cWVhehaMLNXQAq2whIKl29xAULUzW4MdZvWshDb9Y,4284
|
52
52
|
novel_downloader/core/requesters/__init__.py,sha256=U2jDvt5RxF5P9yL2lwyZ-cRueJBZgRnjil3_5TvAh3Y,798
|
53
|
-
novel_downloader/core/requesters/base_async_session.py,sha256=
|
54
|
-
novel_downloader/core/requesters/base_browser.py,sha256=
|
55
|
-
novel_downloader/core/requesters/base_session.py,sha256=
|
53
|
+
novel_downloader/core/requesters/base_async_session.py,sha256=7hQfdUCFL-JnJiEltPMfWVrX2tjSntnnAazG1ygh6pU,10564
|
54
|
+
novel_downloader/core/requesters/base_browser.py,sha256=oHJA0JecYyODkfTmL2L2qMe4PnR-Y_Md_zblD5wQdP4,6759
|
55
|
+
novel_downloader/core/requesters/base_session.py,sha256=p7TvslYam_8WhIxp7LSBPQ7nWh0vG5KrcjuW24B4y1U,8255
|
56
56
|
novel_downloader/core/requesters/common_requester/__init__.py,sha256=kVKZyrS7PVlUnaV1xGsZdoW2J9XuyQ11A4oMV9Cc64Q,523
|
57
|
-
novel_downloader/core/requesters/common_requester/common_async_session.py,sha256=
|
58
|
-
novel_downloader/core/requesters/common_requester/common_session.py,sha256=
|
57
|
+
novel_downloader/core/requesters/common_requester/common_async_session.py,sha256=Bt_pgiJdxfoSDswMANeiZxaUQ4E5YitqEmcTlL4ImLY,3556
|
58
|
+
novel_downloader/core/requesters/common_requester/common_session.py,sha256=pdTad7LibvHh_4Uh801unyx1qN1SUsSivKMeM6GLyUk,4654
|
59
59
|
novel_downloader/core/requesters/qidian_requester/__init__.py,sha256=s0ldqNvfqUsEnm_biM_bXEGN7gz88Z5IAx1OBvGW1lY,682
|
60
|
-
novel_downloader/core/requesters/qidian_requester/qidian_broswer.py,sha256=
|
61
|
-
novel_downloader/core/requesters/qidian_requester/qidian_session.py,sha256=
|
60
|
+
novel_downloader/core/requesters/qidian_requester/qidian_broswer.py,sha256=kzhLvdX6wIxy-CmEN-7wkrTCEEC4dVPBqGMLt3aj8Uc,14111
|
61
|
+
novel_downloader/core/requesters/qidian_requester/qidian_session.py,sha256=huZflPnQoKglGNxP0pPD-wVoRZC90ER7hrwFo7WbM0M,7670
|
62
62
|
novel_downloader/core/savers/__init__.py,sha256=p9O6p8ZUblrSheDVJoTSuDr6s1mJpQi8mz3QmQ16nHs,391
|
63
63
|
novel_downloader/core/savers/base_saver.py,sha256=VocVl8go80IkzAp9qY4dgZjmLbK8TVkg48Ugl53pxrc,5513
|
64
64
|
novel_downloader/core/savers/qidian_saver.py,sha256=MVAcWdM-IX_qsRW5It2aIkx9QPdRCLcZGcD3ihfm3gU,627
|
@@ -74,7 +74,7 @@ novel_downloader/core/savers/epub_utils/volume_intro.py,sha256=1NhnLKRL_ieoDgXTR
|
|
74
74
|
novel_downloader/locales/en.json,sha256=7xPlFLf6ByH0VMnGTTRC_6gRSW2IdTvQnKa5_FquSsk,5277
|
75
75
|
novel_downloader/locales/zh.json,sha256=TylYUKSUUbG4Fh_DQazUNTY96HriQWyBfKjh1FrI0xM,5163
|
76
76
|
novel_downloader/resources/config/rules.toml,sha256=hrED6h3Z3cjSY5hRPQhp4TFAU5QXnN9xHfVABOJQNrM,4979
|
77
|
-
novel_downloader/resources/config/settings.yaml,sha256=
|
77
|
+
novel_downloader/resources/config/settings.yaml,sha256=NxtYrf2HWJQyGwIuj3f3JKA72Do5WPvSghonPM0Q7SM,3493
|
78
78
|
novel_downloader/resources/css_styles/main.css,sha256=WM6GePwdOGgM86fbbOxQ0_0oerTBDZeQHt8zRVfcJp8,1617
|
79
79
|
novel_downloader/resources/css_styles/volume-intro.css,sha256=6gaUnNKkrb2w8tYJRq1BGD1FwbhT1I5W2GI_Zelo9G4,1156
|
80
80
|
novel_downloader/resources/images/volume_border.png,sha256=2dEVimnTHKOfLMhi7bhkh_5joWNnrqg8duomLSNOZx4,28613
|
@@ -98,7 +98,7 @@ novel_downloader/utils/file_utils/normalize.py,sha256=7lqtVAMQO6z1JoCihLo-cf9PYn
|
|
98
98
|
novel_downloader/utils/file_utils/sanitize.py,sha256=dRgdtJx33G6DTFgEK0f1CFyTGUBKG5jxej2A-f7grQU,2324
|
99
99
|
novel_downloader/utils/fontocr/__init__.py,sha256=j8IrSs7wUPX8shg18wafo15hgQmqwUaqrfExbHJAaMY,567
|
100
100
|
novel_downloader/utils/fontocr/ocr_v1.py,sha256=hWB7cv6qQATpmrM0mVtRPlhBRmYKxJ9xNMiBrmo6__U,11351
|
101
|
-
novel_downloader/utils/fontocr/ocr_v2.py,sha256=
|
101
|
+
novel_downloader/utils/fontocr/ocr_v2.py,sha256=RSmHfrilrRudExAjzkBm-_PknDNT9blp_2Eu2wCw54M,24457
|
102
102
|
novel_downloader/utils/text_utils/__init__.py,sha256=Iq6ZuS93J_M54nv1zPECryuPXJ-a0b3OE2wnyWqo-qs,830
|
103
103
|
novel_downloader/utils/text_utils/chapter_formatting.py,sha256=NeRKvZFspA5fcBrxhgqf4ZP-zq1vr8zNZ3viNOXhwhM,1347
|
104
104
|
novel_downloader/utils/text_utils/diff_display.py,sha256=cLjpeAOtpLOVMHnjgq2yv91fHvH0LvL1lMB80v3bPjQ,2552
|
@@ -107,9 +107,9 @@ novel_downloader/utils/text_utils/text_cleaning.py,sha256=1yuaDeUBHqBRkkWhw43rV1
|
|
107
107
|
novel_downloader/utils/time_utils/__init__.py,sha256=bRpO14eorfH5C5xfqvW7QwSe3fQHhpr34j4O3qY5cGc,580
|
108
108
|
novel_downloader/utils/time_utils/datetime_utils.py,sha256=xYKuI2K6DKwZdfUBZ0j1SNbmHjhYU7hIu46NzlZqr3o,4887
|
109
109
|
novel_downloader/utils/time_utils/sleep_utils.py,sha256=CffWLotrhOZ-uYwC8Nb1cwZrAO2p83JDIrCGZLQuEC0,1384
|
110
|
-
novel_downloader-1.2.
|
111
|
-
novel_downloader-1.2.
|
112
|
-
novel_downloader-1.2.
|
113
|
-
novel_downloader-1.2.
|
114
|
-
novel_downloader-1.2.
|
115
|
-
novel_downloader-1.2.
|
110
|
+
novel_downloader-1.2.1.dist-info/licenses/LICENSE,sha256=XgmnH0mBf-qEiizoVAfJQAKzPB9y3rBa-ni7M0Aqv4A,1066
|
111
|
+
novel_downloader-1.2.1.dist-info/METADATA,sha256=3Wvzvnm4MzObnOT-71EOrmC1UbO1f9yNbfOlFpdFgdw,6291
|
112
|
+
novel_downloader-1.2.1.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
|
113
|
+
novel_downloader-1.2.1.dist-info/entry_points.txt,sha256=v23QrJrfrAcYpxUYslCVxubOVRRTaTw7vlG_tfMsFP8,65
|
114
|
+
novel_downloader-1.2.1.dist-info/top_level.txt,sha256=hP4jYWM2LTm1jxsW4hqEB8N0dsRvldO2QdhggJT917I,17
|
115
|
+
novel_downloader-1.2.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|