article-backup 0.3.13__tar.gz → 0.3.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {article_backup-0.3.13 → article_backup-0.3.14}/PKG-INFO +1 -1
  2. {article_backup-0.3.13 → article_backup-0.3.14}/article_backup.egg-info/PKG-INFO +1 -1
  3. {article_backup-0.3.13 → article_backup-0.3.14}/backup.py +4 -0
  4. {article_backup-0.3.13 → article_backup-0.3.14}/pyproject.toml +1 -1
  5. {article_backup-0.3.13 → article_backup-0.3.14}/src/downloader.py +53 -31
  6. article_backup-0.3.14/tests/test_asset_dedup.py +352 -0
  7. {article_backup-0.3.13 → article_backup-0.3.14}/tests/test_config_hardening.py +3 -0
  8. article_backup-0.3.13/tests/test_asset_dedup.py +0 -180
  9. {article_backup-0.3.13 → article_backup-0.3.14}/LICENSE +0 -0
  10. {article_backup-0.3.13 → article_backup-0.3.14}/README.md +0 -0
  11. {article_backup-0.3.13 → article_backup-0.3.14}/article_backup.egg-info/SOURCES.txt +0 -0
  12. {article_backup-0.3.13 → article_backup-0.3.14}/article_backup.egg-info/dependency_links.txt +0 -0
  13. {article_backup-0.3.13 → article_backup-0.3.14}/article_backup.egg-info/entry_points.txt +0 -0
  14. {article_backup-0.3.13 → article_backup-0.3.14}/article_backup.egg-info/requires.txt +0 -0
  15. {article_backup-0.3.13 → article_backup-0.3.14}/article_backup.egg-info/top_level.txt +0 -0
  16. {article_backup-0.3.13 → article_backup-0.3.14}/setup.cfg +0 -0
  17. {article_backup-0.3.13 → article_backup-0.3.14}/src/__init__.py +0 -0
  18. {article_backup-0.3.13 → article_backup-0.3.14}/src/boosty.py +0 -0
  19. {article_backup-0.3.13 → article_backup-0.3.14}/src/config.py +0 -0
  20. {article_backup-0.3.13 → article_backup-0.3.14}/src/database.py +0 -0
  21. {article_backup-0.3.13 → article_backup-0.3.14}/src/sponsr.py +0 -0
  22. {article_backup-0.3.13 → article_backup-0.3.14}/src/utils.py +0 -0
  23. {article_backup-0.3.13 → article_backup-0.3.14}/tests/test_boosty_empty_link.py +0 -0
  24. {article_backup-0.3.13 → article_backup-0.3.14}/tests/test_boosty_normalize.py +0 -0
  25. {article_backup-0.3.13 → article_backup-0.3.14}/tests/test_incremental_sync.py +0 -0
  26. {article_backup-0.3.13 → article_backup-0.3.14}/tests/test_slug_safety.py +0 -0
  27. {article_backup-0.3.13 → article_backup-0.3.14}/tests/test_sponsr_formatting_fix.py +0 -0
  28. {article_backup-0.3.13 → article_backup-0.3.14}/tests/test_sponsr_normalize.py +0 -0
  29. {article_backup-0.3.13 → article_backup-0.3.14}/tests/test_sponsr_tags.py +0 -0
  30. {article_backup-0.3.13 → article_backup-0.3.14}/tests/test_sync_policy.py +0 -0
  31. {article_backup-0.3.13 → article_backup-0.3.14}/tests/test_video_embed.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: article-backup
3
- Version: 0.3.13
3
+ Version: 0.3.14
4
4
  Summary: Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией
5
5
  Author-email: Eugene Chaykin <eugene@chayk.in>
6
6
  License: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: article-backup
3
- Version: 0.3.13
3
+ Version: 0.3.14
4
4
  Summary: Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией
5
5
  Author-email: Eugene Chaykin <eugene@chayk.in>
6
6
  License: Apache-2.0
@@ -27,9 +27,13 @@ def generate_hugo_config(config: Config):
27
27
 
28
28
  content = f'''baseURL = {toml_str(config.hugo.base_url)}
29
29
  locale = {toml_str(config.hugo.language_code)}
30
+ defaultContentLanguage = {toml_str(config.hugo.language_code)}
30
31
  title = {toml_str(config.hugo.title)}
31
32
  relativeURLs = true
32
33
 
34
+ [languages.{config.hugo.language_code}]
35
+ locale = {toml_str(config.hugo.language_code)}
36
+
33
37
  [params]
34
38
  default_theme = {toml_str(config.hugo.default_theme)}
35
39
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "article-backup"
3
- version = "0.3.13"
3
+ version = "0.3.14"
4
4
  description = "Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией"
5
5
  readme = "README.md"
6
6
  license = {text = "Apache-2.0"}
@@ -33,6 +33,7 @@ def retry_request(
33
33
  base_delay: float = 1.0,
34
34
  max_delay: float = 30.0,
35
35
  backoff_factor: float = 2.0,
36
+ delays: list[float] | None = None,
36
37
  ):
37
38
  """
38
39
  Выполняет функцию с retry и exponential backoff.
@@ -43,6 +44,7 @@ def retry_request(
43
44
  base_delay: Начальная задержка в секундах
44
45
  max_delay: Максимальная задержка в секундах
45
46
  backoff_factor: Множитель для увеличения задержки
47
+ delays: Явная последовательность задержек между попытками
46
48
  """
47
49
  last_exception = None
48
50
  delay = base_delay
@@ -58,8 +60,11 @@ def retry_request(
58
60
  raise
59
61
 
60
62
  if attempt < max_retries - 1:
61
- time.sleep(delay)
62
- delay = min(delay * backoff_factor, max_delay)
63
+ if delays:
64
+ time.sleep(delays[min(attempt, len(delays) - 1)])
65
+ else:
66
+ time.sleep(delay)
67
+ delay = min(delay * backoff_factor, max_delay)
63
68
 
64
69
  if last_exception:
65
70
  raise last_exception
@@ -326,43 +331,60 @@ class BaseDownloader(ABC):
326
331
  if ext and not force and not should_download_asset(url, None, self.source.asset_types):
327
332
  return url, None
328
333
 
329
- def do_request():
330
- resp = self.session.get(request_url, stream=True, timeout=self.TIMEOUT)
331
- resp.raise_for_status()
332
- return resp
334
+ filename: str | None = None
335
+ filepath: Path | None = None
333
336
 
334
- response = retry_request(do_request, max_retries=3)
335
- try:
336
- content_type = response.headers.get('Content-Type', '')
337
+ def download_to_file():
338
+ nonlocal filename, filepath
339
+ resp = self.session.get(request_url, stream=True, timeout=self.TIMEOUT)
340
+ try:
341
+ resp.raise_for_status()
342
+ content_type = resp.headers.get('Content-Type', '')
337
343
 
338
- # Полная проверка после получения Content-Type
339
- if not force and not should_download_asset(url, content_type, self.source.asset_types):
340
- return url, None
344
+ # Полная проверка после получения Content-Type
345
+ if not force and not should_download_asset(url, content_type, self.source.asset_types):
346
+ return None
341
347
 
342
- filename_base = self._make_asset_filename(url, content_type, asset.get('alt'))
348
+ if filename is None or filepath is None:
349
+ filename_base = self._make_asset_filename(url, content_type, asset.get('alt'))
343
350
 
344
- with used_lock:
345
- filename = filename_base
346
- filepath = assets_dir / filename
347
- if filename in used_filenames or filepath.exists():
348
- filename = self._deduplicate_filename(filename, url)
349
- filepath = assets_dir / filename
351
+ with used_lock:
352
+ filename = filename_base
353
+ filepath = assets_dir / filename
354
+ if filename in used_filenames or filepath.exists():
355
+ filename = self._deduplicate_filename(filename, url)
356
+ filepath = assets_dir / filename
350
357
 
351
- # На всякий случай добиваемся уникальности в рамках сессии
352
- while filename in used_filenames or filepath.exists():
353
- filename = self._deduplicate_filename(filename, url + filename)
354
- filepath = assets_dir / filename
358
+ # На всякий случай добиваемся уникальности в рамках сессии
359
+ while filename in used_filenames or filepath.exists():
360
+ filename = self._deduplicate_filename(filename, url + filename)
361
+ filepath = assets_dir / filename
355
362
 
356
- used_filenames.add(filename)
363
+ used_filenames.add(filename)
357
364
 
358
- if not filepath.exists():
359
365
  with open(filepath, 'wb') as f:
360
- for chunk in response.iter_content(chunk_size=8192):
361
- f.write(chunk)
362
- finally:
363
- close = getattr(response, 'close', None)
364
- if callable(close):
365
- close()
366
+ for chunk in resp.iter_content(chunk_size=8192):
367
+ if chunk:
368
+ f.write(chunk)
369
+ return filename
370
+ except Exception as e:
371
+ if filepath and filepath.exists():
372
+ filepath.unlink()
373
+ if isinstance(e, OSError) and not isinstance(e, requests.RequestException):
374
+ raise requests.RequestException(str(e)) from e
375
+ raise
376
+ finally:
377
+ close = getattr(resp, 'close', None)
378
+ if callable(close):
379
+ close()
380
+
381
+ filename = retry_request(
382
+ download_to_file,
383
+ max_retries=10,
384
+ delays=[3, 5, 7, 10, 15, 15, 15, 15, 15],
385
+ )
386
+ if not filename:
387
+ return url, None
366
388
 
367
389
  return url, filename
368
390
  except requests.RequestException as e:
@@ -0,0 +1,352 @@
1
+ import tempfile
2
+ import unittest
3
+ from pathlib import Path
4
+ from typing import cast
5
+ from unittest.mock import patch
6
+
7
+ import requests
8
+
9
+ from src.config import Auth, Config, Source
10
+ from src.database import Database
11
+ from src.downloader import BaseDownloader
12
+
13
+
14
+ class _FakeResponse:
15
+ def __init__(self, content_type: str, body: bytes):
16
+ self.headers = {"Content-Type": content_type}
17
+ self._body = body
18
+
19
+ def raise_for_status(self):
20
+ return None
21
+
22
+ def iter_content(self, chunk_size: int = 8192):
23
+ # Yield at least one chunk to trigger file write.
24
+ yield self._body
25
+
26
+ def close(self):
27
+ return None
28
+
29
+
30
+ class _FailingStreamResponse(_FakeResponse):
31
+ def iter_content(self, chunk_size: int = 8192):
32
+ yield self._body
33
+ raise requests.exceptions.ChunkedEncodingError("stream interrupted")
34
+
35
+
36
+ class _HttpErrorResponse(_FakeResponse):
37
+ def __init__(self, status_code: int):
38
+ super().__init__("text/plain", b"")
39
+ self.status_code = status_code
40
+
41
+ def raise_for_status(self):
42
+ response = requests.Response()
43
+ response.status_code = self.status_code
44
+ raise requests.HTTPError(f"{self.status_code} error", response=response)
45
+
46
+
47
+ class _DummyDB:
48
+ pass
49
+
50
+
51
+ class _DummyDownloader(BaseDownloader):
52
+ PLATFORM = "dummy"
53
+ MAX_WORKERS = 2
54
+
55
+ def _setup_session(self):
56
+ # Tests patch session.get directly.
57
+ return None
58
+
59
+ def fetch_posts_list(
60
+ self,
61
+ existing_ids: set[str] | None = None,
62
+ incremental: bool = False,
63
+ safety_chunks: int = 1
64
+ ):
65
+ raise NotImplementedError
66
+
67
+ def fetch_post(self, post_id: str):
68
+ raise NotImplementedError
69
+
70
+ def _parse_post(self, raw_data: dict):
71
+ raise NotImplementedError
72
+
73
+ def _to_markdown(self, post, asset_map):
74
+ raise NotImplementedError
75
+
76
+
77
+ class _FailingWriteFile:
78
+ def __init__(self, wrapped):
79
+ self._wrapped = wrapped
80
+
81
+ def __enter__(self):
82
+ self._wrapped.__enter__()
83
+ return self
84
+
85
+ def __exit__(self, exc_type, exc_val, exc_tb):
86
+ return self._wrapped.__exit__(exc_type, exc_val, exc_tb)
87
+
88
+ def write(self, data: bytes):
89
+ self._wrapped.write(b"partial")
90
+ raise OSError("temporary disk write failure")
91
+
92
+
93
+ class AssetDedupTests(unittest.TestCase):
94
+ def test_download_assets_deduplicates_colliding_names(self):
95
+ with tempfile.TemporaryDirectory() as tmp:
96
+ tmp_path = Path(tmp)
97
+ assets_dir = tmp_path / "assets"
98
+ assets_dir.mkdir(parents=True, exist_ok=True)
99
+
100
+ config = Config(output_dir=tmp_path, auth=Auth())
101
+ source = Source(platform="sponsr", author="author", download_assets=True)
102
+ dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
103
+
104
+ def fake_get(url: str, stream: bool = True, timeout=None):
105
+ # URLs intentionally do not contain extensions.
106
+ return _FakeResponse("image/jpeg", body=(url + "\n").encode("ascii"))
107
+
108
+ dl.session.get = fake_get # type: ignore[method-assign]
109
+
110
+ assets = [
111
+ {"url": "https://example.test/media/1", "alt": "same name"},
112
+ {"url": "https://example.test/media/2", "alt": "same name"},
113
+ ]
114
+
115
+ asset_map = dl._download_assets(assets, assets_dir)
116
+
117
+ self.assertEqual(set(asset_map.keys()), {a["url"] for a in assets})
118
+
119
+ filenames = list(asset_map.values())
120
+ self.assertEqual(len(filenames), 2)
121
+ self.assertNotEqual(filenames[0], filenames[1])
122
+
123
+ for fn in filenames:
124
+ self.assertTrue((assets_dir / fn).exists(), msg=f"missing file: {fn}")
125
+
126
+ def test_download_assets_deduplicates_when_file_exists(self):
127
+ with tempfile.TemporaryDirectory() as tmp:
128
+ tmp_path = Path(tmp)
129
+ assets_dir = tmp_path / "assets"
130
+ assets_dir.mkdir(parents=True, exist_ok=True)
131
+
132
+ config = Config(output_dir=tmp_path, auth=Auth())
133
+ source = Source(platform="sponsr", author="author", download_assets=True)
134
+ dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
135
+
136
+ # Pre-create a file with the expected base name.
137
+ base = dl._make_asset_filename(
138
+ "https://example.test/media/1",
139
+ "image/jpeg",
140
+ "same name",
141
+ )
142
+ (assets_dir / base).write_bytes(b"existing")
143
+
144
+ def fake_get(url: str, stream: bool = True, timeout=None):
145
+ return _FakeResponse("image/jpeg", body=b"downloaded")
146
+
147
+ dl.session.get = fake_get # type: ignore[method-assign]
148
+
149
+ assets = [{"url": "https://example.test/media/1", "alt": "same name"}]
150
+ asset_map = dl._download_assets(assets, assets_dir)
151
+
152
+ self.assertIn("https://example.test/media/1", asset_map)
153
+ self.assertNotEqual(asset_map["https://example.test/media/1"], base)
154
+ self.assertTrue((assets_dir / asset_map["https://example.test/media/1"]).exists())
155
+
156
+ def test_download_assets_keeps_unique_names_under_parallelism(self):
157
+ with tempfile.TemporaryDirectory() as tmp:
158
+ tmp_path = Path(tmp)
159
+ assets_dir = tmp_path / "assets"
160
+ assets_dir.mkdir(parents=True, exist_ok=True)
161
+
162
+ config = Config(output_dir=tmp_path, auth=Auth())
163
+ source = Source(platform="sponsr", author="author", download_assets=True)
164
+ dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
165
+ dl.MAX_WORKERS = 5
166
+
167
+ def fake_get(url: str, stream: bool = True, timeout=None):
168
+ return _FakeResponse("image/jpeg", body=(url + "\n").encode("ascii"))
169
+
170
+ dl.session.get = fake_get # type: ignore[method-assign]
171
+
172
+ assets = [
173
+ {"url": f"https://example.test/media/{i}", "alt": "same name"}
174
+ for i in range(20)
175
+ ]
176
+
177
+ asset_map = dl._download_assets(assets, assets_dir)
178
+
179
+ self.assertEqual(len(asset_map), 20)
180
+ filenames = list(asset_map.values())
181
+ self.assertEqual(len(set(filenames)), 20)
182
+ for fn in filenames:
183
+ self.assertTrue((assets_dir / fn).exists(), msg=f"missing file: {fn}")
184
+
185
+ def test_download_assets_uses_download_url_but_maps_original_url(self):
186
+ with tempfile.TemporaryDirectory() as tmp:
187
+ tmp_path = Path(tmp)
188
+ assets_dir = tmp_path / "assets"
189
+ assets_dir.mkdir(parents=True, exist_ok=True)
190
+
191
+ config = Config(output_dir=tmp_path, auth=Auth())
192
+ source = Source(platform="boosty", author="author", download_assets=True)
193
+ dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
194
+
195
+ requested_urls = []
196
+
197
+ def fake_get(url: str, stream: bool = True, timeout=None):
198
+ requested_urls.append(url)
199
+ return _FakeResponse("audio/mpeg", body=b"audio")
200
+
201
+ dl.session.get = fake_get # type: ignore[method-assign]
202
+
203
+ asset_map = dl._download_assets(
204
+ [
205
+ {
206
+ "url": "https://cdn.boosty.to/audio/audio-id",
207
+ "download_url": "https://cdn.boosty.to/audio/audio-id?sign=abc",
208
+ "alt": "audio.mp3",
209
+ }
210
+ ],
211
+ assets_dir,
212
+ )
213
+
214
+ self.assertEqual(requested_urls, ["https://cdn.boosty.to/audio/audio-id?sign=abc"])
215
+ self.assertIn("https://cdn.boosty.to/audio/audio-id", asset_map)
216
+
217
+ def test_download_assets_retries_network_errors_ten_times(self):
218
+ with tempfile.TemporaryDirectory() as tmp:
219
+ tmp_path = Path(tmp)
220
+ assets_dir = tmp_path / "assets"
221
+ assets_dir.mkdir(parents=True, exist_ok=True)
222
+
223
+ config = Config(output_dir=tmp_path, auth=Auth())
224
+ source = Source(platform="boosty", author="author", download_assets=True)
225
+ dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
226
+
227
+ attempts = 0
228
+
229
+ def fake_get(url: str, stream: bool = True, timeout=None):
230
+ nonlocal attempts
231
+ attempts += 1
232
+ if attempts < 10:
233
+ raise requests.ConnectionError("temporary cdn failure")
234
+ return _FakeResponse("audio/mpeg", body=b"audio")
235
+
236
+ dl.session.get = fake_get # type: ignore[method-assign]
237
+
238
+ with patch("src.downloader.time.sleep") as sleep_mock:
239
+ asset_map = dl._download_assets(
240
+ [{"url": "https://cdn.boosty.to/audio/audio-id", "alt": "audio.mp3"}],
241
+ assets_dir,
242
+ )
243
+
244
+ self.assertEqual(attempts, 10)
245
+ self.assertEqual(
246
+ [call.args[0] for call in sleep_mock.call_args_list],
247
+ [3, 5, 7, 10, 15, 15, 15, 15, 15],
248
+ )
249
+ self.assertIn("https://cdn.boosty.to/audio/audio-id", asset_map)
250
+
251
+ def test_download_assets_retries_stream_errors_and_removes_partial_file(self):
252
+ with tempfile.TemporaryDirectory() as tmp:
253
+ tmp_path = Path(tmp)
254
+ assets_dir = tmp_path / "assets"
255
+ assets_dir.mkdir(parents=True, exist_ok=True)
256
+
257
+ config = Config(output_dir=tmp_path, auth=Auth())
258
+ source = Source(platform="boosty", author="author", download_assets=True)
259
+ dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
260
+
261
+ attempts = 0
262
+
263
+ def fake_get(url: str, stream: bool = True, timeout=None):
264
+ nonlocal attempts
265
+ attempts += 1
266
+ if attempts == 1:
267
+ return _FailingStreamResponse("audio/mpeg", body=b"partial")
268
+ return _FakeResponse("audio/mpeg", body=b"complete")
269
+
270
+ dl.session.get = fake_get # type: ignore[method-assign]
271
+
272
+ with patch("src.downloader.time.sleep"):
273
+ asset_map = dl._download_assets(
274
+ [{"url": "https://cdn.boosty.to/audio/audio-id", "alt": "audio.mp3"}],
275
+ assets_dir,
276
+ )
277
+
278
+ self.assertEqual(attempts, 2)
279
+ filename = asset_map["https://cdn.boosty.to/audio/audio-id"]
280
+ self.assertEqual((assets_dir / filename).read_bytes(), b"complete")
281
+ self.assertFalse(any(path.read_bytes() == b"partial" for path in assets_dir.iterdir()))
282
+
283
+ def test_download_assets_does_not_retry_permanent_404(self):
284
+ with tempfile.TemporaryDirectory() as tmp:
285
+ tmp_path = Path(tmp)
286
+ assets_dir = tmp_path / "assets"
287
+ assets_dir.mkdir(parents=True, exist_ok=True)
288
+
289
+ config = Config(output_dir=tmp_path, auth=Auth())
290
+ source = Source(platform="boosty", author="author", download_assets=True)
291
+ dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
292
+
293
+ attempts = 0
294
+
295
+ def fake_get(url: str, stream: bool = True, timeout=None):
296
+ nonlocal attempts
297
+ attempts += 1
298
+ return _HttpErrorResponse(404)
299
+
300
+ dl.session.get = fake_get # type: ignore[method-assign]
301
+
302
+ with patch("src.downloader.time.sleep"):
303
+ asset_map = dl._download_assets(
304
+ [{"url": "https://cdn.boosty.to/audio/missing-id", "alt": "missing.mp3"}],
305
+ assets_dir,
306
+ )
307
+
308
+ self.assertEqual(attempts, 1)
309
+ self.assertEqual(asset_map, {})
310
+
311
+ def test_download_assets_retries_write_errors_and_removes_partial_file(self):
312
+ with tempfile.TemporaryDirectory() as tmp:
313
+ tmp_path = Path(tmp)
314
+ assets_dir = tmp_path / "assets"
315
+ assets_dir.mkdir(parents=True, exist_ok=True)
316
+
317
+ config = Config(output_dir=tmp_path, auth=Auth())
318
+ source = Source(platform="boosty", author="author", download_assets=True)
319
+ dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
320
+
321
+ def fake_get(url: str, stream: bool = True, timeout=None):
322
+ return _FakeResponse("audio/mpeg", body=b"complete")
323
+
324
+ dl.session.get = fake_get # type: ignore[method-assign]
325
+
326
+ real_open = open
327
+ open_attempts = 0
328
+
329
+ def flaky_open(path, mode="r", *args, **kwargs):
330
+ nonlocal open_attempts
331
+ if "wb" in mode:
332
+ open_attempts += 1
333
+ wrapped = real_open(path, mode, *args, **kwargs)
334
+ if open_attempts == 1:
335
+ return _FailingWriteFile(wrapped)
336
+ return wrapped
337
+ return real_open(path, mode, *args, **kwargs)
338
+
339
+ with patch("src.downloader.time.sleep"), patch("builtins.open", flaky_open):
340
+ asset_map = dl._download_assets(
341
+ [{"url": "https://cdn.boosty.to/audio/audio-id", "alt": "audio.mp3"}],
342
+ assets_dir,
343
+ )
344
+
345
+ self.assertEqual(open_attempts, 2)
346
+ filename = asset_map["https://cdn.boosty.to/audio/audio-id"]
347
+ self.assertEqual((assets_dir / filename).read_bytes(), b"complete")
348
+ self.assertFalse(any(path.read_bytes() == b"partial" for path in assets_dir.iterdir()))
349
+
350
+
351
+ if __name__ == "__main__":
352
+ unittest.main()
@@ -61,6 +61,9 @@ class ConfigHardeningTests(unittest.TestCase):
61
61
  self.assertIn('title = "Bob\'s \\"backup\\""', toml)
62
62
  self.assertIn('baseURL = "https://example.com/a\\"b"', toml)
63
63
  self.assertIn('locale = "ru"', toml)
64
+ self.assertIn('defaultContentLanguage = "ru"', toml)
65
+ self.assertIn('[languages.ru]', toml)
66
+ self.assertIn(' locale = "ru"', toml)
64
67
  self.assertNotIn('languageCode', toml)
65
68
  self.assertIn('default_theme = "light\\"mode"', toml)
66
69
  finally:
@@ -1,180 +0,0 @@
1
- import tempfile
2
- import unittest
3
- from pathlib import Path
4
- from typing import cast
5
-
6
- from src.config import Auth, Config, Source
7
- from src.database import Database
8
- from src.downloader import BaseDownloader
9
-
10
-
11
- class _FakeResponse:
12
- def __init__(self, content_type: str, body: bytes):
13
- self.headers = {"Content-Type": content_type}
14
- self._body = body
15
-
16
- def raise_for_status(self):
17
- return None
18
-
19
- def iter_content(self, chunk_size: int = 8192):
20
- # Yield at least one chunk to trigger file write.
21
- yield self._body
22
-
23
-
24
- class _DummyDB:
25
- pass
26
-
27
-
28
- class _DummyDownloader(BaseDownloader):
29
- PLATFORM = "dummy"
30
- MAX_WORKERS = 2
31
-
32
- def _setup_session(self):
33
- # Tests patch session.get directly.
34
- return None
35
-
36
- def fetch_posts_list(
37
- self,
38
- existing_ids: set[str] | None = None,
39
- incremental: bool = False,
40
- safety_chunks: int = 1
41
- ):
42
- raise NotImplementedError
43
-
44
- def fetch_post(self, post_id: str):
45
- raise NotImplementedError
46
-
47
- def _parse_post(self, raw_data: dict):
48
- raise NotImplementedError
49
-
50
- def _to_markdown(self, post, asset_map):
51
- raise NotImplementedError
52
-
53
-
54
- class AssetDedupTests(unittest.TestCase):
55
- def test_download_assets_deduplicates_colliding_names(self):
56
- with tempfile.TemporaryDirectory() as tmp:
57
- tmp_path = Path(tmp)
58
- assets_dir = tmp_path / "assets"
59
- assets_dir.mkdir(parents=True, exist_ok=True)
60
-
61
- config = Config(output_dir=tmp_path, auth=Auth())
62
- source = Source(platform="sponsr", author="author", download_assets=True)
63
- dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
64
-
65
- def fake_get(url: str, stream: bool = True, timeout=None):
66
- # URLs intentionally do not contain extensions.
67
- return _FakeResponse("image/jpeg", body=(url + "\n").encode("ascii"))
68
-
69
- dl.session.get = fake_get # type: ignore[method-assign]
70
-
71
- assets = [
72
- {"url": "https://example.test/media/1", "alt": "same name"},
73
- {"url": "https://example.test/media/2", "alt": "same name"},
74
- ]
75
-
76
- asset_map = dl._download_assets(assets, assets_dir)
77
-
78
- self.assertEqual(set(asset_map.keys()), {a["url"] for a in assets})
79
-
80
- filenames = list(asset_map.values())
81
- self.assertEqual(len(filenames), 2)
82
- self.assertNotEqual(filenames[0], filenames[1])
83
-
84
- for fn in filenames:
85
- self.assertTrue((assets_dir / fn).exists(), msg=f"missing file: {fn}")
86
-
87
- def test_download_assets_deduplicates_when_file_exists(self):
88
- with tempfile.TemporaryDirectory() as tmp:
89
- tmp_path = Path(tmp)
90
- assets_dir = tmp_path / "assets"
91
- assets_dir.mkdir(parents=True, exist_ok=True)
92
-
93
- config = Config(output_dir=tmp_path, auth=Auth())
94
- source = Source(platform="sponsr", author="author", download_assets=True)
95
- dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
96
-
97
- # Pre-create a file with the expected base name.
98
- base = dl._make_asset_filename(
99
- "https://example.test/media/1",
100
- "image/jpeg",
101
- "same name",
102
- )
103
- (assets_dir / base).write_bytes(b"existing")
104
-
105
- def fake_get(url: str, stream: bool = True, timeout=None):
106
- return _FakeResponse("image/jpeg", body=b"downloaded")
107
-
108
- dl.session.get = fake_get # type: ignore[method-assign]
109
-
110
- assets = [{"url": "https://example.test/media/1", "alt": "same name"}]
111
- asset_map = dl._download_assets(assets, assets_dir)
112
-
113
- self.assertIn("https://example.test/media/1", asset_map)
114
- self.assertNotEqual(asset_map["https://example.test/media/1"], base)
115
- self.assertTrue((assets_dir / asset_map["https://example.test/media/1"]).exists())
116
-
117
- def test_download_assets_keeps_unique_names_under_parallelism(self):
118
- with tempfile.TemporaryDirectory() as tmp:
119
- tmp_path = Path(tmp)
120
- assets_dir = tmp_path / "assets"
121
- assets_dir.mkdir(parents=True, exist_ok=True)
122
-
123
- config = Config(output_dir=tmp_path, auth=Auth())
124
- source = Source(platform="sponsr", author="author", download_assets=True)
125
- dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
126
- dl.MAX_WORKERS = 5
127
-
128
- def fake_get(url: str, stream: bool = True, timeout=None):
129
- return _FakeResponse("image/jpeg", body=(url + "\n").encode("ascii"))
130
-
131
- dl.session.get = fake_get # type: ignore[method-assign]
132
-
133
- assets = [
134
- {"url": f"https://example.test/media/{i}", "alt": "same name"}
135
- for i in range(20)
136
- ]
137
-
138
- asset_map = dl._download_assets(assets, assets_dir)
139
-
140
- self.assertEqual(len(asset_map), 20)
141
- filenames = list(asset_map.values())
142
- self.assertEqual(len(set(filenames)), 20)
143
- for fn in filenames:
144
- self.assertTrue((assets_dir / fn).exists(), msg=f"missing file: {fn}")
145
-
146
- def test_download_assets_uses_download_url_but_maps_original_url(self):
147
- with tempfile.TemporaryDirectory() as tmp:
148
- tmp_path = Path(tmp)
149
- assets_dir = tmp_path / "assets"
150
- assets_dir.mkdir(parents=True, exist_ok=True)
151
-
152
- config = Config(output_dir=tmp_path, auth=Auth())
153
- source = Source(platform="boosty", author="author", download_assets=True)
154
- dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
155
-
156
- requested_urls = []
157
-
158
- def fake_get(url: str, stream: bool = True, timeout=None):
159
- requested_urls.append(url)
160
- return _FakeResponse("audio/mpeg", body=b"audio")
161
-
162
- dl.session.get = fake_get # type: ignore[method-assign]
163
-
164
- asset_map = dl._download_assets(
165
- [
166
- {
167
- "url": "https://cdn.boosty.to/audio/audio-id",
168
- "download_url": "https://cdn.boosty.to/audio/audio-id?sign=abc",
169
- "alt": "audio.mp3",
170
- }
171
- ],
172
- assets_dir,
173
- )
174
-
175
- self.assertEqual(requested_urls, ["https://cdn.boosty.to/audio/audio-id?sign=abc"])
176
- self.assertIn("https://cdn.boosty.to/audio/audio-id", asset_map)
177
-
178
-
179
- if __name__ == "__main__":
180
- unittest.main()
File without changes