novel-downloader 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -2
  3. novel_downloader/cli/config.py +1 -83
  4. novel_downloader/cli/download.py +4 -5
  5. novel_downloader/cli/export.py +4 -1
  6. novel_downloader/cli/main.py +2 -0
  7. novel_downloader/cli/search.py +123 -0
  8. novel_downloader/config/__init__.py +3 -10
  9. novel_downloader/config/adapter.py +190 -54
  10. novel_downloader/config/loader.py +2 -3
  11. novel_downloader/core/__init__.py +13 -13
  12. novel_downloader/core/downloaders/__init__.py +10 -11
  13. novel_downloader/core/downloaders/base.py +152 -26
  14. novel_downloader/core/downloaders/biquge.py +5 -1
  15. novel_downloader/core/downloaders/common.py +157 -378
  16. novel_downloader/core/downloaders/esjzone.py +5 -1
  17. novel_downloader/core/downloaders/linovelib.py +5 -1
  18. novel_downloader/core/downloaders/qianbi.py +291 -4
  19. novel_downloader/core/downloaders/qidian.py +199 -285
  20. novel_downloader/core/downloaders/registry.py +67 -0
  21. novel_downloader/core/downloaders/sfacg.py +5 -1
  22. novel_downloader/core/downloaders/yamibo.py +5 -1
  23. novel_downloader/core/exporters/__init__.py +10 -11
  24. novel_downloader/core/exporters/base.py +87 -7
  25. novel_downloader/core/exporters/biquge.py +5 -8
  26. novel_downloader/core/exporters/common/__init__.py +2 -2
  27. novel_downloader/core/exporters/common/epub.py +82 -166
  28. novel_downloader/core/exporters/common/main_exporter.py +0 -60
  29. novel_downloader/core/exporters/common/txt.py +82 -83
  30. novel_downloader/core/exporters/epub_util.py +157 -1330
  31. novel_downloader/core/exporters/esjzone.py +5 -8
  32. novel_downloader/core/exporters/linovelib/__init__.py +2 -2
  33. novel_downloader/core/exporters/linovelib/epub.py +157 -212
  34. novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
  35. novel_downloader/core/exporters/linovelib/txt.py +67 -63
  36. novel_downloader/core/exporters/qianbi.py +5 -8
  37. novel_downloader/core/exporters/qidian.py +14 -4
  38. novel_downloader/core/exporters/registry.py +53 -0
  39. novel_downloader/core/exporters/sfacg.py +5 -8
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/exporters/yamibo.py +5 -8
  42. novel_downloader/core/fetchers/__init__.py +19 -24
  43. novel_downloader/core/fetchers/base/__init__.py +3 -3
  44. novel_downloader/core/fetchers/base/browser.py +23 -4
  45. novel_downloader/core/fetchers/base/session.py +30 -5
  46. novel_downloader/core/fetchers/biquge/__init__.py +3 -3
  47. novel_downloader/core/fetchers/biquge/browser.py +5 -0
  48. novel_downloader/core/fetchers/biquge/session.py +6 -1
  49. novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
  50. novel_downloader/core/fetchers/esjzone/browser.py +5 -0
  51. novel_downloader/core/fetchers/esjzone/session.py +6 -1
  52. novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
  53. novel_downloader/core/fetchers/linovelib/browser.py +6 -1
  54. novel_downloader/core/fetchers/linovelib/session.py +6 -1
  55. novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
  56. novel_downloader/core/fetchers/qianbi/browser.py +5 -0
  57. novel_downloader/core/fetchers/qianbi/session.py +5 -0
  58. novel_downloader/core/fetchers/qidian/__init__.py +3 -3
  59. novel_downloader/core/fetchers/qidian/browser.py +12 -4
  60. novel_downloader/core/fetchers/qidian/session.py +11 -3
  61. novel_downloader/core/fetchers/registry.py +71 -0
  62. novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
  63. novel_downloader/core/fetchers/sfacg/browser.py +5 -0
  64. novel_downloader/core/fetchers/sfacg/session.py +5 -0
  65. novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
  66. novel_downloader/core/fetchers/yamibo/browser.py +5 -0
  67. novel_downloader/core/fetchers/yamibo/session.py +6 -1
  68. novel_downloader/core/interfaces/__init__.py +7 -5
  69. novel_downloader/core/interfaces/searcher.py +18 -0
  70. novel_downloader/core/parsers/__init__.py +10 -11
  71. novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
  72. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
  73. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
  74. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
  75. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  76. novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
  77. novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
  78. novel_downloader/core/parsers/qidian/main_parser.py +10 -21
  79. novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
  80. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
  81. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  82. novel_downloader/core/parsers/registry.py +68 -0
  83. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
  84. novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
  85. novel_downloader/core/searchers/__init__.py +20 -0
  86. novel_downloader/core/searchers/base.py +92 -0
  87. novel_downloader/core/searchers/biquge.py +83 -0
  88. novel_downloader/core/searchers/esjzone.py +84 -0
  89. novel_downloader/core/searchers/qianbi.py +131 -0
  90. novel_downloader/core/searchers/qidian.py +87 -0
  91. novel_downloader/core/searchers/registry.py +63 -0
  92. novel_downloader/locales/en.json +12 -4
  93. novel_downloader/locales/zh.json +12 -4
  94. novel_downloader/models/__init__.py +4 -30
  95. novel_downloader/models/config.py +12 -6
  96. novel_downloader/models/search.py +16 -0
  97. novel_downloader/models/types.py +0 -2
  98. novel_downloader/resources/config/settings.toml +31 -4
  99. novel_downloader/resources/css_styles/intro.css +83 -0
  100. novel_downloader/resources/css_styles/main.css +30 -89
  101. novel_downloader/utils/__init__.py +52 -0
  102. novel_downloader/utils/chapter_storage.py +244 -224
  103. novel_downloader/utils/constants.py +1 -21
  104. novel_downloader/utils/epub/__init__.py +34 -0
  105. novel_downloader/utils/epub/builder.py +377 -0
  106. novel_downloader/utils/epub/constants.py +77 -0
  107. novel_downloader/utils/epub/documents.py +403 -0
  108. novel_downloader/utils/epub/models.py +134 -0
  109. novel_downloader/utils/epub/utils.py +212 -0
  110. novel_downloader/utils/file_utils/__init__.py +10 -14
  111. novel_downloader/utils/file_utils/io.py +20 -51
  112. novel_downloader/utils/file_utils/normalize.py +2 -2
  113. novel_downloader/utils/file_utils/sanitize.py +2 -3
  114. novel_downloader/utils/fontocr/__init__.py +5 -5
  115. novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
  116. novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
  117. novel_downloader/utils/fontocr/ocr_v1.py +13 -1
  118. novel_downloader/utils/fontocr/ocr_v2.py +13 -1
  119. novel_downloader/utils/fontocr/ocr_v3.py +744 -0
  120. novel_downloader/utils/i18n.py +2 -0
  121. novel_downloader/utils/logger.py +2 -0
  122. novel_downloader/utils/network.py +110 -251
  123. novel_downloader/utils/state.py +1 -0
  124. novel_downloader/utils/text_utils/__init__.py +18 -17
  125. novel_downloader/utils/text_utils/diff_display.py +4 -5
  126. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  127. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  128. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  129. novel_downloader/utils/time_utils/__init__.py +3 -3
  130. novel_downloader/utils/time_utils/datetime_utils.py +4 -5
  131. novel_downloader/utils/time_utils/sleep_utils.py +2 -3
  132. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
  133. novel_downloader-1.5.0.dist-info/RECORD +164 -0
  134. novel_downloader/config/site_rules.py +0 -94
  135. novel_downloader/core/factory/__init__.py +0 -20
  136. novel_downloader/core/factory/downloader.py +0 -73
  137. novel_downloader/core/factory/exporter.py +0 -58
  138. novel_downloader/core/factory/fetcher.py +0 -96
  139. novel_downloader/core/factory/parser.py +0 -86
  140. novel_downloader/core/fetchers/common/__init__.py +0 -14
  141. novel_downloader/core/fetchers/common/browser.py +0 -79
  142. novel_downloader/core/fetchers/common/session.py +0 -79
  143. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  144. novel_downloader/core/parsers/common/__init__.py +0 -13
  145. novel_downloader/core/parsers/common/helper.py +0 -323
  146. novel_downloader/core/parsers/common/main_parser.py +0 -106
  147. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  148. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  149. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  150. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  151. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  152. novel_downloader/models/browser.py +0 -21
  153. novel_downloader/models/site_rules.py +0 -99
  154. novel_downloader/models/tasks.py +0 -33
  155. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  156. novel_downloader/resources/json/replace_word_map.json +0 -4
  157. novel_downloader/resources/text/blacklist.txt +0 -22
  158. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  159. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  160. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  161. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  162. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
  163. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
  164. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
  165. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -14,21 +14,21 @@ import types
14
14
  from pathlib import Path
15
15
  from typing import Any, Self, cast
16
16
 
17
- from novel_downloader.models import (
18
- ChapterDict,
19
- SaveMode,
20
- StorageBackend,
21
- )
22
-
23
- from .file_utils import save_as_json
17
+ from novel_downloader.models import ChapterDict
24
18
 
25
19
  _CREATE_TABLE_SQL = """
26
- CREATE TABLE IF NOT EXISTS "{table}" (
27
- id TEXT PRIMARY KEY,
28
- title TEXT NOT NULL,
29
- content TEXT NOT NULL,
30
- extra TEXT NOT NULL
31
- )
20
+ CREATE TABLE IF NOT EXISTS chapters (
21
+ id TEXT NOT NULL,
22
+ source_id INTEGER NOT NULL,
23
+ priority INTEGER NOT NULL DEFAULT 1000,
24
+ title TEXT NOT NULL,
25
+ content TEXT NOT NULL,
26
+ extra TEXT,
27
+ PRIMARY KEY (id, source_id)
28
+ );
29
+
30
+ CREATE INDEX IF NOT EXISTS
31
+ idx_chapters_id_priority ON chapters(id, priority);
32
32
  """
33
33
 
34
34
 
@@ -36,276 +36,292 @@ class ChapterStorage:
36
36
  """
37
37
  Manage storage of chapters in JSON files or an SQLite database.
38
38
 
39
- :param raw_base: Base directory or file path for storage.
40
- :param namespace: Novel identifier (subfolder name or DB/table basename).
41
- :param backend_type: "json" (default) or "sqlite".
39
+ Supports storing multiple versions of each chapter from different sources,
40
+ each with a defined priority for selecting the preferred version.
42
41
  """
43
42
 
44
43
  def __init__(
45
44
  self,
46
45
  raw_base: str | Path,
47
- namespace: str,
48
- backend_type: StorageBackend = "json",
49
- *,
50
- batch_size: int = 1,
46
+ priorities: dict[int, int],
51
47
  ) -> None:
52
- self.raw_base = Path(raw_base)
53
- self.namespace = namespace
54
- self.backend = backend_type
55
- self._batch_size = batch_size
56
- self._pending = 0
48
+ """
49
+ Initialize storage for a specific book.
50
+
51
+ :param raw_base: Directory path where the SQLite file will be stored.
52
+ :param priorities: Mapping of source_id to priority value.
53
+ Lower numbers indicate higher priority.
54
+ E.X. {0: 10, 1: 100} means source 0 is preferred.
55
+ """
56
+ self._db_path = Path(raw_base) / "chapter_data.sqlite"
57
57
  self._conn: sqlite3.Connection | None = None
58
- self._existing_ids: set[str] = set()
59
-
60
- if self.backend == "json":
61
- self._init_json()
62
- else:
63
- self._init_sql()
64
-
65
- def _init_json(self) -> None:
66
- """Prepare directory for JSON files."""
67
- self._json_dir = self.raw_base / self.namespace
68
- self._json_dir.mkdir(parents=True, exist_ok=True)
69
- self._existing_ids = {p.stem for p in self._json_dir.glob("*.json")}
70
-
71
- def _init_sql(self) -> None:
72
- """Prepare SQLite connection and ensure table exists."""
73
- self._db_path = self.raw_base / f"{self.namespace}.sqlite"
58
+ self._priorities = priorities
59
+ self._existing_ids: set[tuple[str, int]] = set() # (chap_id, source_id)
60
+
61
+ def connect(self) -> None:
62
+ """
63
+ Open the SQLite connection, enable foreign keys,
64
+ create schema, register initial sources, and cache existing keys.
65
+ """
66
+ if self._conn:
67
+ return
74
68
  self._conn = sqlite3.connect(self._db_path)
75
- stmt = _CREATE_TABLE_SQL.format(table=self.namespace)
76
- self._conn.execute(stmt)
69
+ self._conn.row_factory = sqlite3.Row
70
+ self._conn.execute("PRAGMA foreign_keys = ON;")
71
+ self._conn.executescript(_CREATE_TABLE_SQL)
77
72
  self._conn.commit()
73
+ self._load_existing_keys()
78
74
 
79
- cur = self._conn.execute(f'SELECT id FROM "{self.namespace}"')
80
- self._existing_ids = {row[0] for row in cur.fetchall()}
81
-
82
- def _json_path(self, chap_id: str) -> Path:
83
- """Return Path for JSON file of given chapter ID."""
84
- return self._json_dir / f"{chap_id}.json"
85
-
86
- def exists(self, chap_id: str) -> bool:
75
+ def exists(
76
+ self,
77
+ chap_id: str,
78
+ source_id: int | None = None,
79
+ ) -> bool:
87
80
  """
88
81
  Check if a chapter exists.
89
82
 
90
83
  :param chap_id: Chapter identifier.
84
+ :param source_id: If provided, check existence for that source.
91
85
  :return: True if found, else False.
92
86
  """
93
- return chap_id in self._existing_ids
87
+ if source_id is not None:
88
+ return (chap_id, source_id) in self._existing_ids
89
+ return any(key[0] == chap_id for key in self._existing_ids)
94
90
 
95
- def _load_json(self, chap_id: str) -> ChapterDict:
96
- raw = self._json_path(chap_id).read_text(encoding="utf-8")
97
- return cast(ChapterDict, json.loads(raw))
98
-
99
- def _load_sql(self, chap_id: str) -> ChapterDict:
100
- if self._conn is None:
101
- raise RuntimeError("ChapterStorage is closed")
102
- cur = self._conn.execute(
103
- f'SELECT id, title, content, extra FROM "{self.namespace}" WHERE id = ?',
104
- (chap_id,),
105
- )
106
- row = cur.fetchone()
107
- return {
108
- "id": row[0],
109
- "title": row[1],
110
- "content": row[2],
111
- "extra": json.loads(row[3]),
112
- }
113
-
114
- def get(self, chap_id: str) -> ChapterDict | dict[str, Any]:
91
+ def upsert_chapter(
92
+ self,
93
+ data: ChapterDict,
94
+ source_id: int,
95
+ ) -> None:
115
96
  """
116
- Retrieve chapter by ID.
97
+ Insert or update a single chapter record.
117
98
 
118
- :param chap_id: Chapter identifier.
119
- :return: ChapterDict if exists, else empty dict.
99
+ :param data: ChapterDict containing id, title, content, extra.
100
+ :param source_id: Integer index of source.
120
101
  """
121
- if not self.exists(chap_id):
122
- return {}
123
- return (
124
- self._load_json(chap_id)
125
- if self.backend == "json"
126
- else self._load_sql(chap_id)
102
+ priority = self._priorities[source_id]
103
+ chap_id = data["id"]
104
+ title = data["title"]
105
+ content = data["content"]
106
+ extra_json = json.dumps(data["extra"])
107
+
108
+ self.conn.execute(
109
+ """
110
+ INSERT OR REPLACE INTO chapters
111
+ (id, source_id, priority, title, content, extra)
112
+ VALUES (?, ?, ?, ?, ?, ?)
113
+ """,
114
+ (chap_id, source_id, priority, title, content, extra_json),
127
115
  )
116
+ self._existing_ids.add((chap_id, source_id))
117
+ self.conn.commit()
128
118
 
129
- def _save_json(self, data: ChapterDict, on_exist: SaveMode) -> None:
130
- path = self._json_path(data["id"])
131
- save_as_json(data, path, on_exist=on_exist)
132
- self._existing_ids.add(data["id"])
133
-
134
- def _save_sql(self, data: ChapterDict, on_exist: SaveMode) -> None:
135
- if self._conn is None:
136
- raise RuntimeError("ChapterStorage is closed")
137
- sql = (
138
- f'INSERT OR REPLACE INTO "{self.namespace}" '
139
- "(id, title, content, extra) VALUES (?, ?, ?, ?)"
140
- if on_exist == "overwrite"
141
- else f'INSERT OR IGNORE INTO "{self.namespace}" '
142
- "(id, title, content, extra) VALUES (?, ?, ?, ?)"
143
- )
144
- self._conn.execute(
145
- sql,
146
- (
147
- data["id"],
148
- data["title"],
149
- data["content"],
150
- json.dumps(data["extra"], ensure_ascii=False),
151
- ),
152
- )
153
- self._existing_ids.add(data["id"])
154
- if self._batch_size == 1:
155
- self._conn.commit()
156
- else:
157
- self._pending += 1
158
- if self._pending >= self._batch_size:
159
- self._conn.commit()
160
- self._pending = 0
161
-
162
- def _save_many_sql(
119
+ def upsert_chapters(
163
120
  self,
164
- datas: list[ChapterDict],
165
- on_exist: SaveMode = "overwrite",
121
+ data: list[ChapterDict],
122
+ source_id: int,
166
123
  ) -> None:
167
124
  """
168
- Bulk-insert into SQLite using executemany + one commit.
125
+ Insert or update multiple chapters in one batch operation.
169
126
 
170
- :param datas: List of ChapterDict to store.
171
- :param on_exist: "overwrite" to REPLACE, "skip" to IGNORE on conflicts.
127
+ :param data: List of ChapterDicts.
128
+ :param source_id: Integer index of source.
172
129
  """
173
- if on_exist not in ("overwrite", "skip"):
174
- raise ValueError(f"invalid on_exist mode: {on_exist!r}")
175
- if self._conn is None:
176
- raise RuntimeError("ChapterStorage is closed")
177
-
178
- sql = (
179
- f'INSERT OR REPLACE INTO "{self.namespace}" '
180
- "(id, title, content, extra) VALUES (?, ?, ?, ?)"
181
- if on_exist == "overwrite"
182
- else f'INSERT OR IGNORE INTO "{self.namespace}" '
183
- "(id, title, content, extra) VALUES (?, ?, ?, ?)"
130
+ priority = self._priorities[source_id]
131
+ records = []
132
+ for chapter in data:
133
+ chap_id = chapter["id"]
134
+ title = chapter["title"]
135
+ content = chapter["content"]
136
+ extra_json = json.dumps(chapter["extra"])
137
+ records.append((chap_id, source_id, priority, title, content, extra_json))
138
+ self._existing_ids.add((chap_id, source_id))
139
+
140
+ self.conn.executemany(
141
+ """
142
+ INSERT OR REPLACE INTO chapters
143
+ (id, source_id, priority, title, content, extra)
144
+ VALUES (?, ?, ?, ?, ?, ?)
145
+ """,
146
+ records,
184
147
  )
148
+ self.conn.commit()
185
149
 
186
- params = [
187
- (
188
- data["id"],
189
- data["title"],
190
- data["content"],
191
- json.dumps(data["extra"], ensure_ascii=False),
192
- )
193
- for data in datas
194
- ]
195
-
196
- with self._conn:
197
- self._conn.executemany(sql, params)
198
-
199
- self._existing_ids.update(data["id"] for data in datas)
200
-
201
- def save(
150
+ def get_chapter(
202
151
  self,
203
- data: ChapterDict,
204
- on_exist: SaveMode = "overwrite",
205
- ) -> None:
152
+ chap_id: str,
153
+ source_id: int,
154
+ ) -> ChapterDict | None:
206
155
  """
207
- Save a chapter record.
156
+ Retrieve a single chapter by id and source.
208
157
 
209
- :param data: ChapterDict to store.
210
- :param on_exist: What to do if chap_id already exists
158
+ :param chap_id: Chapter identifier.
159
+ :param source_id: Integer index of source.
160
+ :return: A ChapterDict if found, else None.
211
161
  """
212
- if on_exist not in ("overwrite", "skip"):
213
- raise ValueError(f"invalid on_exist mode: {on_exist!r}")
214
-
215
- if self.backend == "json":
216
- self._save_json(data, on_exist)
217
- else:
218
- self._save_sql(data, on_exist)
162
+ cur = self.conn.execute(
163
+ """
164
+ SELECT title, content, extra
165
+ FROM chapters
166
+ WHERE id = ? AND source_id = ?
167
+ LIMIT 1
168
+ """,
169
+ (chap_id, source_id),
170
+ )
171
+ row = cur.fetchone()
172
+ if not row:
173
+ return None
174
+
175
+ return ChapterDict(
176
+ id=chap_id,
177
+ title=row["title"],
178
+ content=row["content"],
179
+ extra=self._load_dict(row["extra"]),
180
+ )
219
181
 
220
- def save_many(
182
+ def get_chapters(
221
183
  self,
222
- datas: list[ChapterDict],
223
- on_exist: SaveMode = "overwrite",
224
- ) -> None:
184
+ chap_ids: list[str],
185
+ source_id: int,
186
+ ) -> dict[str, ChapterDict | None]:
225
187
  """
226
- Save multiple chapter records in one shot.
188
+ Retrieve multiple chapters by their ids for a given source in one query.
227
189
 
228
- :param datas: List of ChapterDict to store.
229
- :param on_exist: What to do if chap_id already exists.
190
+ :param chap_ids: List of chapter identifiers.
191
+ :param source_id: Integer index of source.
192
+ :return: A dict mapping chap_id to ChapterDict or None.
230
193
  """
231
- if on_exist not in ("overwrite", "skip"):
232
- raise ValueError(f"invalid on_exist mode: {on_exist!r}")
233
-
234
- if self.backend == "json":
235
- for data in datas:
236
- self._save_json(data, on_exist)
237
- else:
238
- self._save_many_sql(datas, on_exist)
239
-
240
- def list_ids(self) -> list[str]:
194
+ placeholders = ",".join("?" for _ in chap_ids)
195
+ query = f"""
196
+ SELECT id, title, content, extra
197
+ FROM chapters
198
+ WHERE id IN ({placeholders}) AND source_id = ?
241
199
  """
242
- List all stored chapter IDs.
243
- """
244
- if self.backend == "json":
245
- return [p.stem for p in self._json_dir.glob("*.json") if p.is_file()]
246
-
247
- if self._conn is None:
248
- raise RuntimeError("ChapterStorage is closed")
249
- cur = self._conn.execute(f'SELECT id FROM "{self.namespace}"')
250
- return [row[0] for row in cur.fetchall()]
200
+ rows = self.conn.execute(query, (*chap_ids, source_id)).fetchall()
201
+
202
+ result: dict[str, ChapterDict | None] = {cid: None for cid in chap_ids}
203
+ for row in rows:
204
+ result[row["id"]] = ChapterDict(
205
+ id=row["id"],
206
+ title=row["title"],
207
+ content=row["content"],
208
+ extra=self._load_dict(row["extra"]),
209
+ )
210
+ return result
251
211
 
252
- def delete(self, chap_id: str) -> bool:
212
+ def get_best_chapter(
213
+ self,
214
+ chap_id: str,
215
+ ) -> ChapterDict | None:
253
216
  """
254
- Delete a chapter by ID.
255
-
256
- :param chap_id: Chapter identifier.
257
- :return: True if deleted, False if not found.
217
+ Retrieve the chapter with the highest priority (lowest priority number)
218
+ among all sources for the given chap_id.
258
219
  """
259
- if not self.exists(chap_id):
260
- return False
261
- if self.backend == "json":
262
- self._json_path(chap_id).unlink()
263
- return True
264
-
265
- if self._conn is None:
266
- raise RuntimeError("ChapterStorage is closed")
267
- cur = self._conn.execute(
268
- f'DELETE FROM "{self.namespace}" WHERE id = ?', (chap_id,)
220
+ cur = self.conn.execute(
221
+ """
222
+ SELECT title, content, extra
223
+ FROM chapters
224
+ WHERE id = ?
225
+ ORDER BY priority ASC
226
+ LIMIT 1
227
+ """,
228
+ (chap_id,),
229
+ )
230
+ row = cur.fetchone()
231
+ if not row:
232
+ return None
233
+
234
+ return ChapterDict(
235
+ id=chap_id,
236
+ title=row["title"],
237
+ content=row["content"],
238
+ extra=self._load_dict(row["extra"]),
269
239
  )
270
- self._conn.commit()
271
- return cur.rowcount > 0
272
240
 
273
- def count(self) -> int:
241
+ def get_best_chapters(
242
+ self,
243
+ chap_ids: list[str],
244
+ ) -> dict[str, ChapterDict | None]:
274
245
  """
275
- Count total chapters stored.
246
+ Retrieve the best (highest-priority) chapter for each given id
247
+ in a single query using window functions.
276
248
  """
277
- if self.backend == "json":
278
- return len(self.list_ids())
279
-
280
- if self._conn is None:
281
- raise RuntimeError("ChapterStorage is closed")
282
- cur = self._conn.execute(f'SELECT COUNT(1) FROM "{self.namespace}"')
283
- return int(cur.fetchone()[0])
249
+ placeholders = ",".join("?" for _ in chap_ids)
250
+ query = f"""
251
+ SELECT chap_id, title, content, extra FROM (
252
+ SELECT id AS chap_id, title, content, extra,
253
+ ROW_NUMBER() OVER (
254
+ PARTITION BY id ORDER BY priority ASC
255
+ ) AS rn
256
+ FROM chapters
257
+ WHERE id IN ({placeholders})
258
+ ) sub
259
+ WHERE rn = 1
260
+ """
261
+ rows = self.conn.execute(query, chap_ids).fetchall()
262
+
263
+ result: dict[str, ChapterDict | None] = {chap_id: None for chap_id in chap_ids}
264
+ for row in rows:
265
+ result[row["chap_id"]] = ChapterDict(
266
+ id=row["chap_id"],
267
+ title=row["title"],
268
+ content=row["content"],
269
+ extra=self._load_dict(row["extra"]),
270
+ )
271
+ return result
284
272
 
285
- def flush(self) -> None:
273
+ def count(self) -> int:
286
274
  """
287
- Write out any leftover rows (< batch_size) at the end.
275
+ Count total chapters stored.
288
276
  """
289
- if self._conn is not None and self._pending > 0:
290
- self._conn.commit()
291
- self._pending = 0
277
+ return len(self._existing_ids)
292
278
 
293
279
  def close(self) -> None:
294
280
  """
295
281
  Gracefully close any open resources.
296
282
  """
297
- if self.backend != "sqlite" or self._conn is None:
283
+ if self._conn is None:
298
284
  return
299
285
 
300
- with contextlib.suppress(Exception):
301
- self.flush()
302
-
303
286
  with contextlib.suppress(Exception):
304
287
  self._conn.close()
305
288
 
306
289
  self._conn = None
290
+ self._existing_ids = set()
291
+
292
+ @property
293
+ def conn(self) -> sqlite3.Connection:
294
+ """
295
+ Return the active SQLite connection, or raise if not connected.
296
+
297
+ :raises RuntimeError: if connect() has not been called.
298
+ """
299
+ if self._conn is None:
300
+ raise RuntimeError(
301
+ "Database connection is not established. Call connect() first."
302
+ )
303
+ return self._conn
304
+
305
+ def _load_existing_keys(self) -> None:
306
+ """
307
+ Cache all existing (chapter_id, source_id) pairs for fast upsert.
308
+ """
309
+ cur = self.conn.execute("SELECT id, source_id FROM chapters")
310
+ self._existing_ids = {(row["id"], row["source_id"]) for row in cur.fetchall()}
311
+
312
+ @staticmethod
313
+ def _load_dict(data: str) -> dict[str, Any]:
314
+ try:
315
+ parsed = json.loads(data)
316
+ return cast(dict[str, Any], parsed)
317
+ except Exception:
318
+ return {}
307
319
 
308
320
  def __enter__(self) -> Self:
321
+ """
322
+ Enter context manager, automatically connecting to the database.
323
+ """
324
+ self.connect()
309
325
  return self
310
326
 
311
327
  def __exit__(
@@ -314,14 +330,18 @@ class ChapterStorage:
314
330
  exc_val: BaseException | None,
315
331
  tb: types.TracebackType | None,
316
332
  ) -> None:
333
+ """
334
+ Exit context manager, closing the database connection.
335
+ """
317
336
  self.close()
318
337
 
319
338
  def __del__(self) -> None:
339
+ """
340
+ Ensure the database connection is closed upon object deletion.
341
+ """
320
342
  self.close()
321
343
 
322
344
  def __repr__(self) -> str:
323
345
  return (
324
- f"<ChapterStorage ns='{self.namespace}' "
325
- f"backend='{self.backend}' "
326
- f"path='{self.raw_base}'>"
346
+ f"<ChapterStorage priorities='{self._priorities}' path='{self._db_path}'>"
327
347
  )
@@ -19,16 +19,6 @@ APP_NAME = "NovelDownloader" # Display name
19
19
  APP_DIR_NAME = "novel_downloader" # Directory name for platformdirs
20
20
  LOGGER_NAME = PACKAGE_NAME # Root logger name
21
21
 
22
- SUPPORTED_SITES = {
23
- "biquge",
24
- "esjzone",
25
- "linovelib",
26
- "qianbi",
27
- "qidian",
28
- "sfacg",
29
- "yamibo",
30
- }
31
-
32
22
  # -----------------------------------------------------------------------------
33
23
  # Base directories
34
24
  # -----------------------------------------------------------------------------
@@ -49,9 +39,7 @@ MODEL_CACHE_DIR = BASE_CONFIG_DIR / "models"
49
39
  # Default file paths
50
40
  # -----------------------------------------------------------------------------
51
41
  STATE_FILE = DATA_DIR / "state.json"
52
- HASH_STORE_FILE = DATA_DIR / "image_hashes.json"
53
42
  SETTING_FILE = CONFIG_DIR / "settings.json"
54
- SITE_RULES_FILE = CONFIG_DIR / "site_rules.json"
55
43
  DEFAULT_USER_DATA_DIR = DATA_DIR / "browser_data"
56
44
 
57
45
 
@@ -91,9 +79,7 @@ DEFAULT_SETTINGS_PATHS = [
91
79
 
92
80
  # CSS Styles
93
81
  CSS_MAIN_PATH = files("novel_downloader.resources.css_styles").joinpath("main.css")
94
- CSS_VOLUME_INTRO_PATH = files("novel_downloader.resources.css_styles").joinpath(
95
- "volume-intro.css"
96
- )
82
+ CSS_INTRO_PATH = files("novel_downloader.resources.css_styles").joinpath("intro.css")
97
83
 
98
84
  # Images
99
85
  VOLUME_BORDER_IMAGE_PATH = files("novel_downloader.resources.images").joinpath(
@@ -101,9 +87,6 @@ VOLUME_BORDER_IMAGE_PATH = files("novel_downloader.resources.images").joinpath(
101
87
  )
102
88
 
103
89
  # JSON
104
- REPLACE_WORD_MAP_PATH = files("novel_downloader.resources.json").joinpath(
105
- "replace_word_map.json"
106
- )
107
90
  LINOVELIB_FONT_MAP_PATH = files("novel_downloader.resources.json").joinpath(
108
91
  "linovelib_font_map.json"
109
92
  )
@@ -113,9 +96,6 @@ QD_DECRYPT_SCRIPT_PATH = files("novel_downloader.resources.js_scripts").joinpath
113
96
  "qidian_decrypt_node.js"
114
97
  )
115
98
 
116
- # Text Files
117
- BLACKLIST_PATH = files("novel_downloader.resources.text").joinpath("blacklist.txt")
118
-
119
99
  # ---------------------------------------------------------------------
120
100
  # Pretrained model registry (e.g. used in font recovery or OCR)
121
101
  # ---------------------------------------------------------------------
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.utils.epub
4
+ ---------------------------
5
+
6
+ Top-level package for EPUB export utilities.
7
+
8
+ Key components:
9
+
10
+ - EpubBuilder : orchestrates metadata, manifest, spine, navigation, and resources
11
+ - Chapter, Volume : represent and render content sections and volume intros
12
+
13
+ Usage example:
14
+
15
+ ```python
16
+ builder = EpubBuilder(title="My Novel", author="Author Name", uid="uuid-1234")
17
+ builder.add_chapter(Chapter(id="ch1", title="Chapter 1", content="<p>xxx</p>"))
18
+ builder.export("output/my_novel.epub")
19
+ ```
20
+ """
21
+
22
+ __all__ = [
23
+ "EpubBuilder",
24
+ "Chapter",
25
+ "Volume",
26
+ "StyleSheet",
27
+ ]
28
+
29
+ from .builder import EpubBuilder
30
+ from .models import (
31
+ Chapter,
32
+ StyleSheet,
33
+ Volume,
34
+ )