PyPI - article-backup - Versions diffs - 0.3.12__tar.gz → 0.3.14__tar.gz - Mend

article-backup 0.3.12tar.gz → 0.3.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{article_backup-0.3.12 → article_backup-0.3.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: article-backup
-Version: 0.3.12
+Version: 0.3.14
 Summary: Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией
 Author-email: Eugene Chaykin <eugene@chayk.in>
 License: Apache-2.0

{article_backup-0.3.12 → article_backup-0.3.14}/article_backup.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: article-backup
-Version: 0.3.12
+Version: 0.3.14
 Summary: Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией
 Author-email: Eugene Chaykin <eugene@chayk.in>
 License: Apache-2.0

{article_backup-0.3.12 → article_backup-0.3.14}/backup.py RENAMED Viewed

@@ -27,9 +27,13 @@ def generate_hugo_config(config: Config):
     content = f'''baseURL = {toml_str(config.hugo.base_url)}
 locale = {toml_str(config.hugo.language_code)}
+defaultContentLanguage = {toml_str(config.hugo.language_code)}
 title = {toml_str(config.hugo.title)}
 relativeURLs = true
+[languages.{config.hugo.language_code}]
+  locale = {toml_str(config.hugo.language_code)}
 [params]
   default_theme = {toml_str(config.hugo.default_theme)}

{article_backup-0.3.12 → article_backup-0.3.14}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "article-backup"
-version = "0.3.12"
+version = "0.3.14"
 description = "Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией"
 readme = "README.md"
 license = {text = "Apache-2.0"}

{article_backup-0.3.12 → article_backup-0.3.14}/src/boosty.py RENAMED Viewed

@@ -3,6 +3,7 @@
 import json
 from datetime import datetime, timezone
+from urllib.parse import parse_qsl, urlencode, urlparse
 import requests
@@ -158,7 +159,7 @@ class BoostyDownloader(BaseDownloader):
         content_blocks = raw_data.get("data", [])
         # Извлекаем assets
-        assets = self._extract_assets(content_blocks)
+        assets = self._extract_assets(content_blocks, raw_data.get("signedQuery", ""))
         return Post(
             post_id=post_id,
@@ -170,7 +171,7 @@ class BoostyDownloader(BaseDownloader):
             assets=assets,
         )
-    def _extract_assets(self, blocks: list[dict]) -> list[dict]:
+    def _extract_assets(self, blocks: list[dict], signed_query: str = "") -> list[dict]:
         """Извлекает URL медиафайлов из блоков контента."""
         assets = []
@@ -190,6 +191,16 @@ class BoostyDownloader(BaseDownloader):
                 if url:
                     assets.append({
                         "url": url,
+                        "download_url": self._sign_media_url(url, signed_query),
+                        "alt": block.get("title", block.get("id", "")),
+                    })
+            elif block_type == "file":
+                url = block.get("url", "")
+                if url:
+                    assets.append({
+                        "url": url,
+                        "download_url": self._sign_media_url(url, signed_query),
                         "alt": block.get("title", block.get("id", "")),
                     })
@@ -244,7 +255,7 @@ class BoostyDownloader(BaseDownloader):
                 continue
             # Block-level элементы разрывают параграф
-            if block_type in ("image", "audio_file", "ok_video"):
+            if block_type in ("image", "audio_file", "file", "ok_video"):
                 if current_paragraph:
                     lines.append("".join(current_paragraph))
                     current_paragraph = []
@@ -293,6 +304,15 @@ class BoostyDownloader(BaseDownloader):
             elif url:
                 return f"\n🎵 **{title}**: [слушать]({url})\n"
+        elif block_type == "file":
+            url = block.get("url", "")
+            title = block.get("title") or block.get("id") or "file"
+            local = asset_map.get(url)
+            if local:
+                return f"\n📎 [{title}](assets/{local})\n"
+            elif url:
+                return f"\n📎 [{title}]({url})\n"
         elif block_type == "ok_video":
             # Определяем ссылку на видео (приоритет: локальный файл > ok.ru/video > videoembed)
             video_url = self._extract_ok_video_player_url(block)
@@ -322,6 +342,20 @@ class BoostyDownloader(BaseDownloader):
         return ""
+    def _sign_media_url(self, url: str, signed_query: str) -> str:
+        """Добавляет signedQuery Boosty к URL медиа, не перезаписывая существующие параметры."""
+        if not url or not signed_query:
+            return url
+        parsed = urlparse(url)
+        params = dict(parse_qsl(parsed.query, keep_blank_values=True))
+        query = signed_query[1:] if signed_query.startswith("?") else signed_query
+        for key, value in parse_qsl(query, keep_blank_values=True):
+            if key not in params:
+                params[key] = value
+        return parsed._replace(query=urlencode(params)).geturl()
     def _extract_ok_video_player_url(self, block: dict) -> str:
         """Выбирает лучший прямой URL видео из ok_video блока."""
         player_urls = block.get("playerUrls")

{article_backup-0.3.12 → article_backup-0.3.14}/src/downloader.py RENAMED Viewed

@@ -33,6 +33,7 @@ def retry_request(
     base_delay: float = 1.0,
     max_delay: float = 30.0,
     backoff_factor: float = 2.0,
+    delays: list[float] | None = None,
 ):
     """
     Выполняет функцию с retry и exponential backoff.
@@ -43,6 +44,7 @@ def retry_request(
         base_delay: Начальная задержка в секундах
         max_delay: Максимальная задержка в секундах
         backoff_factor: Множитель для увеличения задержки
+        delays: Явная последовательность задержек между попытками
     """
     last_exception = None
     delay = base_delay
@@ -58,8 +60,11 @@ def retry_request(
                     raise
             if attempt < max_retries - 1:
-                time.sleep(delay)
-                delay = min(delay * backoff_factor, max_delay)
+                if delays:
+                    time.sleep(delays[min(attempt, len(delays) - 1)])
+                else:
+                    time.sleep(delay)
+                    delay = min(delay * backoff_factor, max_delay)
     if last_exception:
         raise last_exception
@@ -318,6 +323,7 @@ class BaseDownloader(ABC):
         def download_one(asset: dict) -> tuple[str, str | None]:
             url = asset["url"]
+            request_url = asset.get("download_url", url)
             force = asset.get("force", False)
             try:
                 # Предварительная проверка (если расширение есть)
@@ -325,43 +331,60 @@ class BaseDownloader(ABC):
                 if ext and not force and not should_download_asset(url, None, self.source.asset_types):
                     return url, None
-                def do_request():
-                    resp = self.session.get(url, stream=True, timeout=self.TIMEOUT)
-                    resp.raise_for_status()
-                    return resp
+                filename: str | None = None
+                filepath: Path | None = None
-                response = retry_request(do_request, max_retries=3)
-                try:
-                    content_type = response.headers.get('Content-Type', '')
+                def download_to_file():
+                    nonlocal filename, filepath
+                    resp = self.session.get(request_url, stream=True, timeout=self.TIMEOUT)
+                    try:
+                        resp.raise_for_status()
+                        content_type = resp.headers.get('Content-Type', '')
-                    # Полная проверка после получения Content-Type
-                    if not force and not should_download_asset(url, content_type, self.source.asset_types):
-                        return url, None
+                        # Полная проверка после получения Content-Type
+                        if not force and not should_download_asset(url, content_type, self.source.asset_types):
+                            return None
-                    filename_base = self._make_asset_filename(url, content_type, asset.get('alt'))
+                        if filename is None or filepath is None:
+                            filename_base = self._make_asset_filename(url, content_type, asset.get('alt'))
-                    with used_lock:
-                        filename = filename_base
-                        filepath = assets_dir / filename
-                        if filename in used_filenames or filepath.exists():
-                            filename = self._deduplicate_filename(filename, url)
-                            filepath = assets_dir / filename
+                            with used_lock:
+                                filename = filename_base
+                                filepath = assets_dir / filename
+                                if filename in used_filenames or filepath.exists():
+                                    filename = self._deduplicate_filename(filename, url)
+                                    filepath = assets_dir / filename
-                        # На всякий случай добиваемся уникальности в рамках сессии
-                        while filename in used_filenames or filepath.exists():
-                            filename = self._deduplicate_filename(filename, url + filename)
-                            filepath = assets_dir / filename
+                                # На всякий случай добиваемся уникальности в рамках сессии
+                                while filename in used_filenames or filepath.exists():
+                                    filename = self._deduplicate_filename(filename, url + filename)
+                                    filepath = assets_dir / filename
-                        used_filenames.add(filename)
+                                used_filenames.add(filename)
-                    if not filepath.exists():
                         with open(filepath, 'wb') as f:
-                            for chunk in response.iter_content(chunk_size=8192):
-                                f.write(chunk)
-                finally:
-                    close = getattr(response, 'close', None)
-                    if callable(close):
-                        close()
+                            for chunk in resp.iter_content(chunk_size=8192):
+                                if chunk:
+                                    f.write(chunk)
+                        return filename
+                    except Exception as e:
+                        if filepath and filepath.exists():
+                            filepath.unlink()
+                        if isinstance(e, OSError) and not isinstance(e, requests.RequestException):
+                            raise requests.RequestException(str(e)) from e
+                        raise
+                    finally:
+                        close = getattr(resp, 'close', None)
+                        if callable(close):
+                            close()
+                filename = retry_request(
+                    download_to_file,
+                    max_retries=10,
+                    delays=[3, 5, 7, 10, 15, 15, 15, 15, 15],
+                )
+                if not filename:
+                    return url, None
                 return url, filename
             except requests.RequestException as e:

article_backup-0.3.14/tests/test_asset_dedup.py ADDED Viewed

@@ -0,0 +1,352 @@
+import tempfile
+import unittest
+from pathlib import Path
+from typing import cast
+from unittest.mock import patch
+import requests
+from src.config import Auth, Config, Source
+from src.database import Database
+from src.downloader import BaseDownloader
+class _FakeResponse:
+    def __init__(self, content_type: str, body: bytes):
+        self.headers = {"Content-Type": content_type}
+        self._body = body
+    def raise_for_status(self):
+        return None
+    def iter_content(self, chunk_size: int = 8192):
+        # Yield at least one chunk to trigger file write.
+        yield self._body
+    def close(self):
+        return None
+class _FailingStreamResponse(_FakeResponse):
+    def iter_content(self, chunk_size: int = 8192):
+        yield self._body
+        raise requests.exceptions.ChunkedEncodingError("stream interrupted")
+class _HttpErrorResponse(_FakeResponse):
+    def __init__(self, status_code: int):
+        super().__init__("text/plain", b"")
+        self.status_code = status_code
+    def raise_for_status(self):
+        response = requests.Response()
+        response.status_code = self.status_code
+        raise requests.HTTPError(f"{self.status_code} error", response=response)
+class _DummyDB:
+    pass
+class _DummyDownloader(BaseDownloader):
+    PLATFORM = "dummy"
+    MAX_WORKERS = 2
+    def _setup_session(self):
+        # Tests patch session.get directly.
+        return None
+    def fetch_posts_list(
+        self,
+        existing_ids: set[str] | None = None,
+        incremental: bool = False,
+        safety_chunks: int = 1
+    ):
+        raise NotImplementedError
+    def fetch_post(self, post_id: str):
+        raise NotImplementedError
+    def _parse_post(self, raw_data: dict):
+        raise NotImplementedError
+    def _to_markdown(self, post, asset_map):
+        raise NotImplementedError
+class _FailingWriteFile:
+    def __init__(self, wrapped):
+        self._wrapped = wrapped
+    def __enter__(self):
+        self._wrapped.__enter__()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        return self._wrapped.__exit__(exc_type, exc_val, exc_tb)
+    def write(self, data: bytes):
+        self._wrapped.write(b"partial")
+        raise OSError("temporary disk write failure")
+class AssetDedupTests(unittest.TestCase):
+    def test_download_assets_deduplicates_colliding_names(self):
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            assets_dir = tmp_path / "assets"
+            assets_dir.mkdir(parents=True, exist_ok=True)
+            config = Config(output_dir=tmp_path, auth=Auth())
+            source = Source(platform="sponsr", author="author", download_assets=True)
+            dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
+            def fake_get(url: str, stream: bool = True, timeout=None):
+                # URLs intentionally do not contain extensions.
+                return _FakeResponse("image/jpeg", body=(url + "\n").encode("ascii"))
+            dl.session.get = fake_get  # type: ignore[method-assign]
+            assets = [
+                {"url": "https://example.test/media/1", "alt": "same name"},
+                {"url": "https://example.test/media/2", "alt": "same name"},
+            ]
+            asset_map = dl._download_assets(assets, assets_dir)
+            self.assertEqual(set(asset_map.keys()), {a["url"] for a in assets})
+            filenames = list(asset_map.values())
+            self.assertEqual(len(filenames), 2)
+            self.assertNotEqual(filenames[0], filenames[1])
+            for fn in filenames:
+                self.assertTrue((assets_dir / fn).exists(), msg=f"missing file: {fn}")
+    def test_download_assets_deduplicates_when_file_exists(self):
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            assets_dir = tmp_path / "assets"
+            assets_dir.mkdir(parents=True, exist_ok=True)
+            config = Config(output_dir=tmp_path, auth=Auth())
+            source = Source(platform="sponsr", author="author", download_assets=True)
+            dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
+            # Pre-create a file with the expected base name.
+            base = dl._make_asset_filename(
+                "https://example.test/media/1",
+                "image/jpeg",
+                "same name",
+            )
+            (assets_dir / base).write_bytes(b"existing")
+            def fake_get(url: str, stream: bool = True, timeout=None):
+                return _FakeResponse("image/jpeg", body=b"downloaded")
+            dl.session.get = fake_get  # type: ignore[method-assign]
+            assets = [{"url": "https://example.test/media/1", "alt": "same name"}]
+            asset_map = dl._download_assets(assets, assets_dir)
+            self.assertIn("https://example.test/media/1", asset_map)
+            self.assertNotEqual(asset_map["https://example.test/media/1"], base)
+            self.assertTrue((assets_dir / asset_map["https://example.test/media/1"]).exists())
+    def test_download_assets_keeps_unique_names_under_parallelism(self):
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            assets_dir = tmp_path / "assets"
+            assets_dir.mkdir(parents=True, exist_ok=True)
+            config = Config(output_dir=tmp_path, auth=Auth())
+            source = Source(platform="sponsr", author="author", download_assets=True)
+            dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
+            dl.MAX_WORKERS = 5
+            def fake_get(url: str, stream: bool = True, timeout=None):
+                return _FakeResponse("image/jpeg", body=(url + "\n").encode("ascii"))
+            dl.session.get = fake_get  # type: ignore[method-assign]
+            assets = [
+                {"url": f"https://example.test/media/{i}", "alt": "same name"}
+                for i in range(20)
+            ]
+            asset_map = dl._download_assets(assets, assets_dir)
+            self.assertEqual(len(asset_map), 20)
+            filenames = list(asset_map.values())
+            self.assertEqual(len(set(filenames)), 20)
+            for fn in filenames:
+                self.assertTrue((assets_dir / fn).exists(), msg=f"missing file: {fn}")
+    def test_download_assets_uses_download_url_but_maps_original_url(self):
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            assets_dir = tmp_path / "assets"
+            assets_dir.mkdir(parents=True, exist_ok=True)
+            config = Config(output_dir=tmp_path, auth=Auth())
+            source = Source(platform="boosty", author="author", download_assets=True)
+            dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
+            requested_urls = []
+            def fake_get(url: str, stream: bool = True, timeout=None):
+                requested_urls.append(url)
+                return _FakeResponse("audio/mpeg", body=b"audio")
+            dl.session.get = fake_get  # type: ignore[method-assign]
+            asset_map = dl._download_assets(
+                [
+                    {
+                        "url": "https://cdn.boosty.to/audio/audio-id",
+                        "download_url": "https://cdn.boosty.to/audio/audio-id?sign=abc",
+                        "alt": "audio.mp3",
+                    }
+                ],
+                assets_dir,
+            )
+            self.assertEqual(requested_urls, ["https://cdn.boosty.to/audio/audio-id?sign=abc"])
+            self.assertIn("https://cdn.boosty.to/audio/audio-id", asset_map)
+    def test_download_assets_retries_network_errors_ten_times(self):
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            assets_dir = tmp_path / "assets"
+            assets_dir.mkdir(parents=True, exist_ok=True)
+            config = Config(output_dir=tmp_path, auth=Auth())
+            source = Source(platform="boosty", author="author", download_assets=True)
+            dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
+            attempts = 0
+            def fake_get(url: str, stream: bool = True, timeout=None):
+                nonlocal attempts
+                attempts += 1
+                if attempts < 10:
+                    raise requests.ConnectionError("temporary cdn failure")
+                return _FakeResponse("audio/mpeg", body=b"audio")
+            dl.session.get = fake_get  # type: ignore[method-assign]
+            with patch("src.downloader.time.sleep") as sleep_mock:
+                asset_map = dl._download_assets(
+                    [{"url": "https://cdn.boosty.to/audio/audio-id", "alt": "audio.mp3"}],
+                    assets_dir,
+                )
+            self.assertEqual(attempts, 10)
+            self.assertEqual(
+                [call.args[0] for call in sleep_mock.call_args_list],
+                [3, 5, 7, 10, 15, 15, 15, 15, 15],
+            )
+            self.assertIn("https://cdn.boosty.to/audio/audio-id", asset_map)
+    def test_download_assets_retries_stream_errors_and_removes_partial_file(self):
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            assets_dir = tmp_path / "assets"
+            assets_dir.mkdir(parents=True, exist_ok=True)
+            config = Config(output_dir=tmp_path, auth=Auth())
+            source = Source(platform="boosty", author="author", download_assets=True)
+            dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
+            attempts = 0
+            def fake_get(url: str, stream: bool = True, timeout=None):
+                nonlocal attempts
+                attempts += 1
+                if attempts == 1:
+                    return _FailingStreamResponse("audio/mpeg", body=b"partial")
+                return _FakeResponse("audio/mpeg", body=b"complete")
+            dl.session.get = fake_get  # type: ignore[method-assign]
+            with patch("src.downloader.time.sleep"):
+                asset_map = dl._download_assets(
+                    [{"url": "https://cdn.boosty.to/audio/audio-id", "alt": "audio.mp3"}],
+                    assets_dir,
+                )
+            self.assertEqual(attempts, 2)
+            filename = asset_map["https://cdn.boosty.to/audio/audio-id"]
+            self.assertEqual((assets_dir / filename).read_bytes(), b"complete")
+            self.assertFalse(any(path.read_bytes() == b"partial" for path in assets_dir.iterdir()))
+    def test_download_assets_does_not_retry_permanent_404(self):
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            assets_dir = tmp_path / "assets"
+            assets_dir.mkdir(parents=True, exist_ok=True)
+            config = Config(output_dir=tmp_path, auth=Auth())
+            source = Source(platform="boosty", author="author", download_assets=True)
+            dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
+            attempts = 0
+            def fake_get(url: str, stream: bool = True, timeout=None):
+                nonlocal attempts
+                attempts += 1
+                return _HttpErrorResponse(404)
+            dl.session.get = fake_get  # type: ignore[method-assign]
+            with patch("src.downloader.time.sleep"):
+                asset_map = dl._download_assets(
+                    [{"url": "https://cdn.boosty.to/audio/missing-id", "alt": "missing.mp3"}],
+                    assets_dir,
+                )
+            self.assertEqual(attempts, 1)
+            self.assertEqual(asset_map, {})
+    def test_download_assets_retries_write_errors_and_removes_partial_file(self):
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            assets_dir = tmp_path / "assets"
+            assets_dir.mkdir(parents=True, exist_ok=True)
+            config = Config(output_dir=tmp_path, auth=Auth())
+            source = Source(platform="boosty", author="author", download_assets=True)
+            dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
+            def fake_get(url: str, stream: bool = True, timeout=None):
+                return _FakeResponse("audio/mpeg", body=b"complete")
+            dl.session.get = fake_get  # type: ignore[method-assign]
+            real_open = open
+            open_attempts = 0
+            def flaky_open(path, mode="r", *args, **kwargs):
+                nonlocal open_attempts
+                if "wb" in mode:
+                    open_attempts += 1
+                    wrapped = real_open(path, mode, *args, **kwargs)
+                    if open_attempts == 1:
+                        return _FailingWriteFile(wrapped)
+                    return wrapped
+                return real_open(path, mode, *args, **kwargs)
+            with patch("src.downloader.time.sleep"), patch("builtins.open", flaky_open):
+                asset_map = dl._download_assets(
+                    [{"url": "https://cdn.boosty.to/audio/audio-id", "alt": "audio.mp3"}],
+                    assets_dir,
+                )
+            self.assertEqual(open_attempts, 2)
+            filename = asset_map["https://cdn.boosty.to/audio/audio-id"]
+            self.assertEqual((assets_dir / filename).read_bytes(), b"complete")
+            self.assertFalse(any(path.read_bytes() == b"partial" for path in assets_dir.iterdir()))
+if __name__ == "__main__":
+    unittest.main()

{article_backup-0.3.12 → article_backup-0.3.14}/tests/test_boosty_normalize.py RENAMED Viewed

@@ -143,5 +143,75 @@ class BoostyParagraphTests(unittest.TestCase):
         self.assertIn(')\n\nТекст после', md)
+class BoostySignedMediaTests(unittest.TestCase):
+    def setUp(self):
+        self.config = Config(output_dir=Path('/tmp/test'), auth=Auth())
+        self.source = Source(platform='boosty', author='test_author')
+        self.db = MagicMock(spec=Database)
+        with patch('src.boosty.load_cookie', return_value='fake'), \
+             patch('src.boosty.load_auth_header', return_value='Bearer fake'):
+            self.downloader = BoostyDownloader(self.config, self.source, self.db)
+    def test_parse_post_signs_audio_asset_with_signed_query(self):
+        raw = {
+            'id': 'post-id',
+            'title': 'Post',
+            'createdAt': 1735689600,
+            'signedQuery': '?sign=abc&expires=123',
+            'data': [
+                {
+                    'type': 'audio_file',
+                    'url': 'https://cdn.boosty.to/audio/audio-id',
+                    'title': 'Audio title.mp3',
+                }
+            ],
+        }
+        post = self.downloader._parse_post(raw)
+        self.assertEqual(post.assets[0]['url'], 'https://cdn.boosty.to/audio/audio-id')
+        self.assertEqual(
+            post.assets[0]['download_url'],
+            'https://cdn.boosty.to/audio/audio-id?sign=abc&expires=123',
+        )
+    def test_parse_post_signs_file_asset_with_signed_query(self):
+        raw = {
+            'id': 'post-id',
+            'title': 'Post',
+            'createdAt': 1735689600,
+            'signedQuery': 'sign=abc&expires=123',
+            'data': [
+                {
+                    'type': 'file',
+                    'url': 'https://cdn.boosty.to/file/file-id?name=doc.pdf',
+                    'title': 'doc.pdf',
+                }
+            ],
+        }
+        post = self.downloader._parse_post(raw)
+        self.assertEqual(post.assets[0]['url'], 'https://cdn.boosty.to/file/file-id?name=doc.pdf')
+        self.assertEqual(
+            post.assets[0]['download_url'],
+            'https://cdn.boosty.to/file/file-id?name=doc.pdf&sign=abc&expires=123',
+        )
+    def test_file_block_uses_local_asset_when_downloaded(self):
+        block = {
+            'type': 'file',
+            'url': 'https://cdn.boosty.to/file/file-id',
+            'title': 'doc.pdf',
+        }
+        md = self.downloader._block_to_markdown(
+            block,
+            {'https://cdn.boosty.to/file/file-id': 'doc.pdf'},
+        )
+        self.assertIn('[doc.pdf](assets/doc.pdf)', md)
 if __name__ == '__main__':
     unittest.main()

{article_backup-0.3.12 → article_backup-0.3.14}/tests/test_config_hardening.py RENAMED Viewed

@@ -61,6 +61,9 @@ class ConfigHardeningTests(unittest.TestCase):
                 self.assertIn('title = "Bob\'s \\"backup\\""', toml)
                 self.assertIn('baseURL = "https://example.com/a\\"b"', toml)
                 self.assertIn('locale = "ru"', toml)
+                self.assertIn('defaultContentLanguage = "ru"', toml)
+                self.assertIn('[languages.ru]', toml)
+                self.assertIn('  locale = "ru"', toml)
                 self.assertNotIn('languageCode', toml)
                 self.assertIn('default_theme = "light\\"mode"', toml)
             finally:

article_backup-0.3.12/tests/test_asset_dedup.py DELETED Viewed

@@ -1,148 +0,0 @@
-import tempfile
-import unittest
-from pathlib import Path
-from typing import cast
-from src.config import Auth, Config, Source
-from src.database import Database
-from src.downloader import BaseDownloader
-class _FakeResponse:
-    def __init__(self, content_type: str, body: bytes):
-        self.headers = {"Content-Type": content_type}
-        self._body = body
-    def raise_for_status(self):
-        return None
-    def iter_content(self, chunk_size: int = 8192):
-        # Yield at least one chunk to trigger file write.
-        yield self._body
-class _DummyDB:
-    pass
-class _DummyDownloader(BaseDownloader):
-    PLATFORM = "dummy"
-    MAX_WORKERS = 2
-    def _setup_session(self):
-        # Tests patch session.get directly.
-        return None
-    def fetch_posts_list(
-        self,
-        existing_ids: set[str] | None = None,
-        incremental: bool = False,
-        safety_chunks: int = 1
-    ):
-        raise NotImplementedError
-    def fetch_post(self, post_id: str):
-        raise NotImplementedError
-    def _parse_post(self, raw_data: dict):
-        raise NotImplementedError
-    def _to_markdown(self, post, asset_map):
-        raise NotImplementedError
-class AssetDedupTests(unittest.TestCase):
-    def test_download_assets_deduplicates_colliding_names(self):
-        with tempfile.TemporaryDirectory() as tmp:
-            tmp_path = Path(tmp)
-            assets_dir = tmp_path / "assets"
-            assets_dir.mkdir(parents=True, exist_ok=True)
-            config = Config(output_dir=tmp_path, auth=Auth())
-            source = Source(platform="sponsr", author="author", download_assets=True)
-            dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
-            def fake_get(url: str, stream: bool = True, timeout=None):
-                # URLs intentionally do not contain extensions.
-                return _FakeResponse("image/jpeg", body=(url + "\n").encode("ascii"))
-            dl.session.get = fake_get  # type: ignore[method-assign]
-            assets = [
-                {"url": "https://example.test/media/1", "alt": "same name"},
-                {"url": "https://example.test/media/2", "alt": "same name"},
-            ]
-            asset_map = dl._download_assets(assets, assets_dir)
-            self.assertEqual(set(asset_map.keys()), {a["url"] for a in assets})
-            filenames = list(asset_map.values())
-            self.assertEqual(len(filenames), 2)
-            self.assertNotEqual(filenames[0], filenames[1])
-            for fn in filenames:
-                self.assertTrue((assets_dir / fn).exists(), msg=f"missing file: {fn}")
-    def test_download_assets_deduplicates_when_file_exists(self):
-        with tempfile.TemporaryDirectory() as tmp:
-            tmp_path = Path(tmp)
-            assets_dir = tmp_path / "assets"
-            assets_dir.mkdir(parents=True, exist_ok=True)
-            config = Config(output_dir=tmp_path, auth=Auth())
-            source = Source(platform="sponsr", author="author", download_assets=True)
-            dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
-            # Pre-create a file with the expected base name.
-            base = dl._make_asset_filename(
-                "https://example.test/media/1",
-                "image/jpeg",
-                "same name",
-            )
-            (assets_dir / base).write_bytes(b"existing")
-            def fake_get(url: str, stream: bool = True, timeout=None):
-                return _FakeResponse("image/jpeg", body=b"downloaded")
-            dl.session.get = fake_get  # type: ignore[method-assign]
-            assets = [{"url": "https://example.test/media/1", "alt": "same name"}]
-            asset_map = dl._download_assets(assets, assets_dir)
-            self.assertIn("https://example.test/media/1", asset_map)
-            self.assertNotEqual(asset_map["https://example.test/media/1"], base)
-            self.assertTrue((assets_dir / asset_map["https://example.test/media/1"]).exists())
-    def test_download_assets_keeps_unique_names_under_parallelism(self):
-        with tempfile.TemporaryDirectory() as tmp:
-            tmp_path = Path(tmp)
-            assets_dir = tmp_path / "assets"
-            assets_dir.mkdir(parents=True, exist_ok=True)
-            config = Config(output_dir=tmp_path, auth=Auth())
-            source = Source(platform="sponsr", author="author", download_assets=True)
-            dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
-            dl.MAX_WORKERS = 5
-            def fake_get(url: str, stream: bool = True, timeout=None):
-                return _FakeResponse("image/jpeg", body=(url + "\n").encode("ascii"))
-            dl.session.get = fake_get  # type: ignore[method-assign]
-            assets = [
-                {"url": f"https://example.test/media/{i}", "alt": "same name"}
-                for i in range(20)
-            ]
-            asset_map = dl._download_assets(assets, assets_dir)
-            self.assertEqual(len(asset_map), 20)
-            filenames = list(asset_map.values())
-            self.assertEqual(len(set(filenames)), 20)
-            for fn in filenames:
-                self.assertTrue((assets_dir / fn).exists(), msg=f"missing file: {fn}")
-if __name__ == "__main__":
-    unittest.main()