novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -4
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +27 -104
- novel_downloader/cli/download.py +78 -66
- novel_downloader/cli/export.py +20 -21
- novel_downloader/cli/main.py +3 -1
- novel_downloader/cli/search.py +120 -0
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +10 -14
- novel_downloader/config/adapter.py +195 -99
- novel_downloader/config/{loader.py → file_io.py} +53 -27
- novel_downloader/core/__init__.py +14 -13
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/archived/qidian/searcher.py +79 -0
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +8 -30
- novel_downloader/core/downloaders/base.py +182 -30
- novel_downloader/core/downloaders/common.py +217 -384
- novel_downloader/core/downloaders/qianbi.py +332 -4
- novel_downloader/core/downloaders/qidian.py +250 -290
- novel_downloader/core/downloaders/registry.py +69 -0
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +8 -26
- novel_downloader/core/exporters/base.py +107 -31
- novel_downloader/core/exporters/common/__init__.py +3 -4
- novel_downloader/core/exporters/common/epub.py +92 -171
- novel_downloader/core/exporters/common/main_exporter.py +14 -67
- novel_downloader/core/exporters/common/txt.py +90 -86
- novel_downloader/core/exporters/epub_util.py +184 -1327
- novel_downloader/core/exporters/linovelib/__init__.py +3 -2
- novel_downloader/core/exporters/linovelib/epub.py +165 -222
- novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
- novel_downloader/core/exporters/linovelib/txt.py +76 -66
- novel_downloader/core/exporters/qidian.py +15 -11
- novel_downloader/core/exporters/registry.py +55 -0
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/fetchers/__init__.py +57 -56
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
- novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
- novel_downloader/core/fetchers/biquyuedu.py +83 -0
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +60 -0
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +8 -14
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +26 -0
- novel_downloader/core/parsers/__init__.py +58 -22
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
- novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
- novel_downloader/core/parsers/qidian/main_parser.py +19 -57
- novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +57 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +155 -0
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +51 -0
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/b520.py +84 -0
- novel_downloader/core/searchers/base.py +168 -0
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +102 -0
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +165 -0
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +79 -0
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +36 -79
- novel_downloader/locales/zh.json +37 -80
- novel_downloader/models/__init__.py +23 -50
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +16 -43
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +21 -0
- novel_downloader/resources/config/settings.toml +39 -74
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +43 -0
- novel_downloader/utils/chapter_storage.py +247 -226
- novel_downloader/utils/constants.py +5 -50
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +118 -0
- novel_downloader/utils/epub/documents.py +297 -0
- novel_downloader/utils/epub/models.py +120 -0
- novel_downloader/utils/epub/utils.py +179 -0
- novel_downloader/utils/file_utils/__init__.py +5 -30
- novel_downloader/utils/file_utils/io.py +9 -150
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -7
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +10 -16
- novel_downloader/utils/network.py +111 -252
- novel_downloader/utils/state.py +5 -90
- novel_downloader/utils/text_utils/__init__.py +16 -21
- novel_downloader/utils/text_utils/diff_display.py +6 -9
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +6 -12
- novel_downloader/utils/time_utils/datetime_utils.py +23 -33
- novel_downloader/utils/time_utils/sleep_utils.py +5 -10
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/downloaders/biquge.py +0 -25
- novel_downloader/core/downloaders/esjzone.py +0 -25
- novel_downloader/core/downloaders/linovelib.py +0 -25
- novel_downloader/core/downloaders/sfacg.py +0 -25
- novel_downloader/core/downloaders/yamibo.py +0 -25
- novel_downloader/core/exporters/biquge.py +0 -25
- novel_downloader/core/exporters/esjzone.py +0 -25
- novel_downloader/core/exporters/qianbi.py +0 -25
- novel_downloader/core/exporters/sfacg.py +0 -25
- novel_downloader/core/exporters/yamibo.py +0 -25
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -403
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -204
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -193
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -318
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -189
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -229
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/biquge/main_parser.py +0 -134
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/models/types.py +0 -15
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -303
- novel_downloader/utils/fontocr/ocr_v2.py +0 -752
- novel_downloader/utils/hash_store.py +0 -279
- novel_downloader/utils/hash_utils.py +0 -103
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/METADATA +0 -196
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,4 +3,47 @@
|
|
3
3
|
novel_downloader.utils
|
4
4
|
----------------------
|
5
5
|
|
6
|
+
A collection of helper functions and classes.
|
6
7
|
"""
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"ChapterStorage",
|
11
|
+
"TextCleaner",
|
12
|
+
"parse_cookies",
|
13
|
+
"get_cookie_value",
|
14
|
+
"rc4_crypt",
|
15
|
+
"sanitize_filename",
|
16
|
+
"write_file",
|
17
|
+
"download",
|
18
|
+
"get_cleaner",
|
19
|
+
"content_prefix",
|
20
|
+
"truncate_half_lines",
|
21
|
+
"diff_inline_display",
|
22
|
+
"time_diff",
|
23
|
+
"async_jitter_sleep",
|
24
|
+
"jitter_sleep",
|
25
|
+
]
|
26
|
+
|
27
|
+
from .chapter_storage import ChapterStorage
|
28
|
+
from .cookies import (
|
29
|
+
get_cookie_value,
|
30
|
+
parse_cookies,
|
31
|
+
)
|
32
|
+
from .crypto_utils import rc4_crypt
|
33
|
+
from .file_utils import (
|
34
|
+
sanitize_filename,
|
35
|
+
write_file,
|
36
|
+
)
|
37
|
+
from .network import download
|
38
|
+
from .text_utils import (
|
39
|
+
TextCleaner,
|
40
|
+
content_prefix,
|
41
|
+
diff_inline_display,
|
42
|
+
get_cleaner,
|
43
|
+
truncate_half_lines,
|
44
|
+
)
|
45
|
+
from .time_utils import (
|
46
|
+
async_jitter_sleep,
|
47
|
+
jitter_sleep,
|
48
|
+
time_diff,
|
49
|
+
)
|
@@ -3,10 +3,11 @@
|
|
3
3
|
novel_downloader.utils.chapter_storage
|
4
4
|
--------------------------------------
|
5
5
|
|
6
|
-
Storage module for managing novel chapters in
|
7
|
-
either JSON file form or an SQLite database.
|
6
|
+
Storage module for managing novel chapters in an SQLite database.
|
8
7
|
"""
|
9
8
|
|
9
|
+
__all__ = ["ChapterStorage"]
|
10
|
+
|
10
11
|
import contextlib
|
11
12
|
import json
|
12
13
|
import sqlite3
|
@@ -14,21 +15,21 @@ import types
|
|
14
15
|
from pathlib import Path
|
15
16
|
from typing import Any, Self, cast
|
16
17
|
|
17
|
-
from novel_downloader.models import
|
18
|
-
ChapterDict,
|
19
|
-
SaveMode,
|
20
|
-
StorageBackend,
|
21
|
-
)
|
22
|
-
|
23
|
-
from .file_utils import save_as_json
|
18
|
+
from novel_downloader.models import ChapterDict
|
24
19
|
|
25
20
|
_CREATE_TABLE_SQL = """
|
26
|
-
CREATE TABLE IF NOT EXISTS
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
21
|
+
CREATE TABLE IF NOT EXISTS chapters (
|
22
|
+
id TEXT NOT NULL,
|
23
|
+
source_id INTEGER NOT NULL,
|
24
|
+
priority INTEGER NOT NULL DEFAULT 1000,
|
25
|
+
title TEXT NOT NULL,
|
26
|
+
content TEXT NOT NULL,
|
27
|
+
extra TEXT,
|
28
|
+
PRIMARY KEY (id, source_id)
|
29
|
+
);
|
30
|
+
|
31
|
+
CREATE INDEX IF NOT EXISTS
|
32
|
+
idx_chapters_id_priority ON chapters(id, priority);
|
32
33
|
"""
|
33
34
|
|
34
35
|
|
@@ -36,276 +37,292 @@ class ChapterStorage:
|
|
36
37
|
"""
|
37
38
|
Manage storage of chapters in JSON files or an SQLite database.
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
:param backend_type: "json" (default) or "sqlite".
|
40
|
+
Supports storing multiple versions of each chapter from different sources,
|
41
|
+
each with a defined priority for selecting the preferred version.
|
42
42
|
"""
|
43
43
|
|
44
44
|
def __init__(
|
45
45
|
self,
|
46
46
|
raw_base: str | Path,
|
47
|
-
|
48
|
-
backend_type: StorageBackend = "json",
|
49
|
-
*,
|
50
|
-
batch_size: int = 1,
|
47
|
+
priorities: dict[int, int],
|
51
48
|
) -> None:
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
49
|
+
"""
|
50
|
+
Initialize storage for a specific book.
|
51
|
+
|
52
|
+
:param raw_base: Directory path where the SQLite file will be stored.
|
53
|
+
:param priorities: Mapping of source_id to priority value.
|
54
|
+
Lower numbers indicate higher priority.
|
55
|
+
E.X. {0: 10, 1: 100} means source 0 is preferred.
|
56
|
+
"""
|
57
|
+
self._db_path = Path(raw_base) / "chapter_data.sqlite"
|
57
58
|
self._conn: sqlite3.Connection | None = None
|
58
|
-
self.
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
self._json_dir.mkdir(parents=True, exist_ok=True)
|
69
|
-
self._existing_ids = {p.stem for p in self._json_dir.glob("*.json")}
|
70
|
-
|
71
|
-
def _init_sql(self) -> None:
|
72
|
-
"""Prepare SQLite connection and ensure table exists."""
|
73
|
-
self._db_path = self.raw_base / f"{self.namespace}.sqlite"
|
59
|
+
self._priorities = priorities
|
60
|
+
self._existing_ids: set[tuple[str, int]] = set() # (chap_id, source_id)
|
61
|
+
|
62
|
+
def connect(self) -> None:
|
63
|
+
"""
|
64
|
+
Open the SQLite connection, enable foreign keys,
|
65
|
+
create schema, register initial sources, and cache existing keys.
|
66
|
+
"""
|
67
|
+
if self._conn:
|
68
|
+
return
|
74
69
|
self._conn = sqlite3.connect(self._db_path)
|
75
|
-
|
76
|
-
self._conn.execute(
|
70
|
+
self._conn.row_factory = sqlite3.Row
|
71
|
+
self._conn.execute("PRAGMA foreign_keys = ON;")
|
72
|
+
self._conn.executescript(_CREATE_TABLE_SQL)
|
77
73
|
self._conn.commit()
|
74
|
+
self._load_existing_keys()
|
78
75
|
|
79
|
-
|
80
|
-
self
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
return self._json_dir / f"{chap_id}.json"
|
85
|
-
|
86
|
-
def exists(self, chap_id: str) -> bool:
|
76
|
+
def exists(
|
77
|
+
self,
|
78
|
+
chap_id: str,
|
79
|
+
source_id: int | None = None,
|
80
|
+
) -> bool:
|
87
81
|
"""
|
88
82
|
Check if a chapter exists.
|
89
83
|
|
90
84
|
:param chap_id: Chapter identifier.
|
85
|
+
:param source_id: If provided, check existence for that source.
|
91
86
|
:return: True if found, else False.
|
92
87
|
"""
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
raw = self._json_path(chap_id).read_text(encoding="utf-8")
|
97
|
-
return cast(ChapterDict, json.loads(raw))
|
98
|
-
|
99
|
-
def _load_sql(self, chap_id: str) -> ChapterDict:
|
100
|
-
if self._conn is None:
|
101
|
-
raise RuntimeError("ChapterStorage is closed")
|
102
|
-
cur = self._conn.execute(
|
103
|
-
f'SELECT id, title, content, extra FROM "{self.namespace}" WHERE id = ?',
|
104
|
-
(chap_id,),
|
105
|
-
)
|
106
|
-
row = cur.fetchone()
|
107
|
-
return {
|
108
|
-
"id": row[0],
|
109
|
-
"title": row[1],
|
110
|
-
"content": row[2],
|
111
|
-
"extra": json.loads(row[3]),
|
112
|
-
}
|
88
|
+
if source_id is not None:
|
89
|
+
return (chap_id, source_id) in self._existing_ids
|
90
|
+
return any(key[0] == chap_id for key in self._existing_ids)
|
113
91
|
|
114
|
-
def
|
92
|
+
def upsert_chapter(
|
93
|
+
self,
|
94
|
+
data: ChapterDict,
|
95
|
+
source_id: int,
|
96
|
+
) -> None:
|
115
97
|
"""
|
116
|
-
|
98
|
+
Insert or update a single chapter record.
|
117
99
|
|
118
|
-
:param
|
119
|
-
:
|
100
|
+
:param data: ChapterDict containing id, title, content, extra.
|
101
|
+
:param source_id: Integer index of source.
|
120
102
|
"""
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
103
|
+
priority = self._priorities[source_id]
|
104
|
+
chap_id = data["id"]
|
105
|
+
title = data["title"]
|
106
|
+
content = data["content"]
|
107
|
+
extra_json = json.dumps(data["extra"])
|
108
|
+
|
109
|
+
self.conn.execute(
|
110
|
+
"""
|
111
|
+
INSERT OR REPLACE INTO chapters
|
112
|
+
(id, source_id, priority, title, content, extra)
|
113
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
114
|
+
""",
|
115
|
+
(chap_id, source_id, priority, title, content, extra_json),
|
127
116
|
)
|
117
|
+
self._existing_ids.add((chap_id, source_id))
|
118
|
+
self.conn.commit()
|
128
119
|
|
129
|
-
def
|
130
|
-
path = self._json_path(data["id"])
|
131
|
-
save_as_json(data, path, on_exist=on_exist)
|
132
|
-
self._existing_ids.add(data["id"])
|
133
|
-
|
134
|
-
def _save_sql(self, data: ChapterDict, on_exist: SaveMode) -> None:
|
135
|
-
if self._conn is None:
|
136
|
-
raise RuntimeError("ChapterStorage is closed")
|
137
|
-
sql = (
|
138
|
-
f'INSERT OR REPLACE INTO "{self.namespace}" '
|
139
|
-
"(id, title, content, extra) VALUES (?, ?, ?, ?)"
|
140
|
-
if on_exist == "overwrite"
|
141
|
-
else f'INSERT OR IGNORE INTO "{self.namespace}" '
|
142
|
-
"(id, title, content, extra) VALUES (?, ?, ?, ?)"
|
143
|
-
)
|
144
|
-
self._conn.execute(
|
145
|
-
sql,
|
146
|
-
(
|
147
|
-
data["id"],
|
148
|
-
data["title"],
|
149
|
-
data["content"],
|
150
|
-
json.dumps(data["extra"], ensure_ascii=False),
|
151
|
-
),
|
152
|
-
)
|
153
|
-
self._existing_ids.add(data["id"])
|
154
|
-
if self._batch_size == 1:
|
155
|
-
self._conn.commit()
|
156
|
-
else:
|
157
|
-
self._pending += 1
|
158
|
-
if self._pending >= self._batch_size:
|
159
|
-
self._conn.commit()
|
160
|
-
self._pending = 0
|
161
|
-
|
162
|
-
def _save_many_sql(
|
120
|
+
def upsert_chapters(
|
163
121
|
self,
|
164
|
-
|
165
|
-
|
122
|
+
data: list[ChapterDict],
|
123
|
+
source_id: int,
|
166
124
|
) -> None:
|
167
125
|
"""
|
168
|
-
|
126
|
+
Insert or update multiple chapters in one batch operation.
|
169
127
|
|
170
|
-
:param
|
171
|
-
:param
|
128
|
+
:param data: List of ChapterDicts.
|
129
|
+
:param source_id: Integer index of source.
|
172
130
|
"""
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
131
|
+
priority = self._priorities[source_id]
|
132
|
+
records = []
|
133
|
+
for chapter in data:
|
134
|
+
chap_id = chapter["id"]
|
135
|
+
title = chapter["title"]
|
136
|
+
content = chapter["content"]
|
137
|
+
extra_json = json.dumps(chapter["extra"])
|
138
|
+
records.append((chap_id, source_id, priority, title, content, extra_json))
|
139
|
+
self._existing_ids.add((chap_id, source_id))
|
140
|
+
|
141
|
+
self.conn.executemany(
|
142
|
+
"""
|
143
|
+
INSERT OR REPLACE INTO chapters
|
144
|
+
(id, source_id, priority, title, content, extra)
|
145
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
146
|
+
""",
|
147
|
+
records,
|
184
148
|
)
|
149
|
+
self.conn.commit()
|
185
150
|
|
186
|
-
|
187
|
-
(
|
188
|
-
data["id"],
|
189
|
-
data["title"],
|
190
|
-
data["content"],
|
191
|
-
json.dumps(data["extra"], ensure_ascii=False),
|
192
|
-
)
|
193
|
-
for data in datas
|
194
|
-
]
|
195
|
-
|
196
|
-
with self._conn:
|
197
|
-
self._conn.executemany(sql, params)
|
198
|
-
|
199
|
-
self._existing_ids.update(data["id"] for data in datas)
|
200
|
-
|
201
|
-
def save(
|
151
|
+
def get_chapter(
|
202
152
|
self,
|
203
|
-
|
204
|
-
|
205
|
-
) -> None:
|
153
|
+
chap_id: str,
|
154
|
+
source_id: int,
|
155
|
+
) -> ChapterDict | None:
|
206
156
|
"""
|
207
|
-
|
157
|
+
Retrieve a single chapter by id and source.
|
208
158
|
|
209
|
-
:param
|
210
|
-
:param
|
159
|
+
:param chap_id: Chapter identifier.
|
160
|
+
:param source_id: Integer index of source.
|
161
|
+
:return: A ChapterDict if found, else None.
|
211
162
|
"""
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
163
|
+
cur = self.conn.execute(
|
164
|
+
"""
|
165
|
+
SELECT title, content, extra
|
166
|
+
FROM chapters
|
167
|
+
WHERE id = ? AND source_id = ?
|
168
|
+
LIMIT 1
|
169
|
+
""",
|
170
|
+
(chap_id, source_id),
|
171
|
+
)
|
172
|
+
row = cur.fetchone()
|
173
|
+
if not row:
|
174
|
+
return None
|
175
|
+
|
176
|
+
return ChapterDict(
|
177
|
+
id=chap_id,
|
178
|
+
title=row["title"],
|
179
|
+
content=row["content"],
|
180
|
+
extra=self._load_dict(row["extra"]),
|
181
|
+
)
|
219
182
|
|
220
|
-
def
|
183
|
+
def get_chapters(
|
221
184
|
self,
|
222
|
-
|
223
|
-
|
224
|
-
) -> None:
|
225
|
-
"""
|
226
|
-
Save multiple chapter records in one shot.
|
227
|
-
|
228
|
-
:param datas: List of ChapterDict to store.
|
229
|
-
:param on_exist: What to do if chap_id already exists.
|
185
|
+
chap_ids: list[str],
|
186
|
+
source_id: int,
|
187
|
+
) -> dict[str, ChapterDict | None]:
|
230
188
|
"""
|
231
|
-
|
232
|
-
raise ValueError(f"invalid on_exist mode: {on_exist!r}")
|
233
|
-
|
234
|
-
if self.backend == "json":
|
235
|
-
for data in datas:
|
236
|
-
self._save_json(data, on_exist)
|
237
|
-
else:
|
238
|
-
self._save_many_sql(datas, on_exist)
|
189
|
+
Retrieve multiple chapters by their ids for a given source in one query.
|
239
190
|
|
240
|
-
|
191
|
+
:param chap_ids: List of chapter identifiers.
|
192
|
+
:param source_id: Integer index of source.
|
193
|
+
:return: A dict mapping chap_id to ChapterDict or None.
|
241
194
|
"""
|
242
|
-
|
195
|
+
placeholders = ",".join("?" for _ in chap_ids)
|
196
|
+
query = f"""
|
197
|
+
SELECT id, title, content, extra
|
198
|
+
FROM chapters
|
199
|
+
WHERE id IN ({placeholders}) AND source_id = ?
|
243
200
|
"""
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
201
|
+
rows = self.conn.execute(query, (*chap_ids, source_id)).fetchall()
|
202
|
+
|
203
|
+
result: dict[str, ChapterDict | None] = {cid: None for cid in chap_ids}
|
204
|
+
for row in rows:
|
205
|
+
result[row["id"]] = ChapterDict(
|
206
|
+
id=row["id"],
|
207
|
+
title=row["title"],
|
208
|
+
content=row["content"],
|
209
|
+
extra=self._load_dict(row["extra"]),
|
210
|
+
)
|
211
|
+
return result
|
251
212
|
|
252
|
-
def
|
213
|
+
def get_best_chapter(
|
214
|
+
self,
|
215
|
+
chap_id: str,
|
216
|
+
) -> ChapterDict | None:
|
253
217
|
"""
|
254
|
-
|
255
|
-
|
256
|
-
:param chap_id: Chapter identifier.
|
257
|
-
:return: True if deleted, False if not found.
|
218
|
+
Retrieve the chapter with the highest priority (lowest priority number)
|
219
|
+
among all sources for the given chap_id.
|
258
220
|
"""
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
221
|
+
cur = self.conn.execute(
|
222
|
+
"""
|
223
|
+
SELECT title, content, extra
|
224
|
+
FROM chapters
|
225
|
+
WHERE id = ?
|
226
|
+
ORDER BY priority ASC
|
227
|
+
LIMIT 1
|
228
|
+
""",
|
229
|
+
(chap_id,),
|
230
|
+
)
|
231
|
+
row = cur.fetchone()
|
232
|
+
if not row:
|
233
|
+
return None
|
234
|
+
|
235
|
+
return ChapterDict(
|
236
|
+
id=chap_id,
|
237
|
+
title=row["title"],
|
238
|
+
content=row["content"],
|
239
|
+
extra=self._load_dict(row["extra"]),
|
269
240
|
)
|
270
|
-
self._conn.commit()
|
271
|
-
return cur.rowcount > 0
|
272
241
|
|
273
|
-
def
|
242
|
+
def get_best_chapters(
|
243
|
+
self,
|
244
|
+
chap_ids: list[str],
|
245
|
+
) -> dict[str, ChapterDict | None]:
|
274
246
|
"""
|
275
|
-
|
247
|
+
Retrieve the best (highest-priority) chapter for each given id
|
248
|
+
in a single query using window functions.
|
276
249
|
"""
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
250
|
+
placeholders = ",".join("?" for _ in chap_ids)
|
251
|
+
query = f"""
|
252
|
+
SELECT chap_id, title, content, extra FROM (
|
253
|
+
SELECT id AS chap_id, title, content, extra,
|
254
|
+
ROW_NUMBER() OVER (
|
255
|
+
PARTITION BY id ORDER BY priority ASC
|
256
|
+
) AS rn
|
257
|
+
FROM chapters
|
258
|
+
WHERE id IN ({placeholders})
|
259
|
+
) sub
|
260
|
+
WHERE rn = 1
|
261
|
+
"""
|
262
|
+
rows = self.conn.execute(query, chap_ids).fetchall()
|
263
|
+
|
264
|
+
result: dict[str, ChapterDict | None] = {chap_id: None for chap_id in chap_ids}
|
265
|
+
for row in rows:
|
266
|
+
result[row["chap_id"]] = ChapterDict(
|
267
|
+
id=row["chap_id"],
|
268
|
+
title=row["title"],
|
269
|
+
content=row["content"],
|
270
|
+
extra=self._load_dict(row["extra"]),
|
271
|
+
)
|
272
|
+
return result
|
284
273
|
|
285
|
-
def
|
274
|
+
def count(self) -> int:
|
286
275
|
"""
|
287
|
-
|
276
|
+
Count total chapters stored.
|
288
277
|
"""
|
289
|
-
|
290
|
-
self._conn.commit()
|
291
|
-
self._pending = 0
|
278
|
+
return len(self._existing_ids)
|
292
279
|
|
293
280
|
def close(self) -> None:
|
294
281
|
"""
|
295
282
|
Gracefully close any open resources.
|
296
283
|
"""
|
297
|
-
if self.
|
284
|
+
if self._conn is None:
|
298
285
|
return
|
299
286
|
|
300
|
-
with contextlib.suppress(Exception):
|
301
|
-
self.flush()
|
302
|
-
|
303
287
|
with contextlib.suppress(Exception):
|
304
288
|
self._conn.close()
|
305
289
|
|
306
290
|
self._conn = None
|
291
|
+
self._existing_ids = set()
|
292
|
+
|
293
|
+
@property
|
294
|
+
def conn(self) -> sqlite3.Connection:
|
295
|
+
"""
|
296
|
+
Return the active SQLite connection, or raise if not connected.
|
297
|
+
|
298
|
+
:raises RuntimeError: if connect() has not been called.
|
299
|
+
"""
|
300
|
+
if self._conn is None:
|
301
|
+
raise RuntimeError(
|
302
|
+
"Database connection is not established. Call connect() first."
|
303
|
+
)
|
304
|
+
return self._conn
|
305
|
+
|
306
|
+
def _load_existing_keys(self) -> None:
|
307
|
+
"""
|
308
|
+
Cache all existing (chapter_id, source_id) pairs for fast upsert.
|
309
|
+
"""
|
310
|
+
cur = self.conn.execute("SELECT id, source_id FROM chapters")
|
311
|
+
self._existing_ids = {(row["id"], row["source_id"]) for row in cur.fetchall()}
|
312
|
+
|
313
|
+
@staticmethod
|
314
|
+
def _load_dict(data: str) -> dict[str, Any]:
|
315
|
+
try:
|
316
|
+
parsed = json.loads(data)
|
317
|
+
return cast(dict[str, Any], parsed)
|
318
|
+
except Exception:
|
319
|
+
return {}
|
307
320
|
|
308
321
|
def __enter__(self) -> Self:
|
322
|
+
"""
|
323
|
+
Enter context manager, automatically connecting to the database.
|
324
|
+
"""
|
325
|
+
self.connect()
|
309
326
|
return self
|
310
327
|
|
311
328
|
def __exit__(
|
@@ -314,14 +331,18 @@ class ChapterStorage:
|
|
314
331
|
exc_val: BaseException | None,
|
315
332
|
tb: types.TracebackType | None,
|
316
333
|
) -> None:
|
334
|
+
"""
|
335
|
+
Exit context manager, closing the database connection.
|
336
|
+
"""
|
317
337
|
self.close()
|
318
338
|
|
319
339
|
def __del__(self) -> None:
|
340
|
+
"""
|
341
|
+
Ensure the database connection is closed upon object deletion.
|
342
|
+
"""
|
320
343
|
self.close()
|
321
344
|
|
322
345
|
def __repr__(self) -> str:
|
323
346
|
return (
|
324
|
-
f"<ChapterStorage
|
325
|
-
f"backend='{self.backend}' "
|
326
|
-
f"path='{self.raw_base}'>"
|
347
|
+
f"<ChapterStorage priorities='{self._priorities}' path='{self._db_path}'>"
|
327
348
|
)
|