article-backup 0.3.11__tar.gz → 0.3.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {article_backup-0.3.11 → article_backup-0.3.13}/PKG-INFO +8 -1
- {article_backup-0.3.11 → article_backup-0.3.13}/README.md +7 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/article_backup.egg-info/PKG-INFO +8 -1
- {article_backup-0.3.11 → article_backup-0.3.13}/article_backup.egg-info/SOURCES.txt +1 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/backup.py +33 -5
- {article_backup-0.3.11 → article_backup-0.3.13}/pyproject.toml +1 -1
- {article_backup-0.3.11 → article_backup-0.3.13}/src/boosty.py +48 -3
- {article_backup-0.3.11 → article_backup-0.3.13}/src/config.py +18 -1
- {article_backup-0.3.11 → article_backup-0.3.13}/src/downloader.py +6 -1
- {article_backup-0.3.11 → article_backup-0.3.13}/src/sponsr.py +14 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/tests/test_asset_dedup.py +32 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/tests/test_boosty_normalize.py +70 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/tests/test_config_hardening.py +20 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/tests/test_sponsr_normalize.py +1 -0
- article_backup-0.3.13/tests/test_sync_policy.py +129 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/LICENSE +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/article_backup.egg-info/dependency_links.txt +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/article_backup.egg-info/entry_points.txt +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/article_backup.egg-info/requires.txt +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/article_backup.egg-info/top_level.txt +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/setup.cfg +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/src/__init__.py +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/src/database.py +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/src/utils.py +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/tests/test_boosty_empty_link.py +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/tests/test_incremental_sync.py +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/tests/test_slug_safety.py +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/tests/test_sponsr_formatting_fix.py +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/tests/test_sponsr_tags.py +0 -0
- {article_backup-0.3.11 → article_backup-0.3.13}/tests/test_video_embed.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: article-backup
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.13
|
|
4
4
|
Summary: Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией
|
|
5
5
|
Author-email: Eugene Chaykin <eugene@chayk.in>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -91,6 +91,11 @@ hugo:
|
|
|
91
91
|
title: "Бэкап статей"
|
|
92
92
|
language_code: "ru"
|
|
93
93
|
|
|
94
|
+
sync:
|
|
95
|
+
# stop: остановиться, если авторизация любого источника не прошла
|
|
96
|
+
# continue: пропустить проблемные источники и собрать сайт из доступных данных
|
|
97
|
+
on_error: stop
|
|
98
|
+
|
|
94
99
|
auth:
|
|
95
100
|
sponsr_cookie_file: ./sponsr_cookie.txt
|
|
96
101
|
boosty_cookie_file: ./boosty_cookie.txt
|
|
@@ -142,6 +147,8 @@ article-backup
|
|
|
142
147
|
python backup.py
|
|
143
148
|
```
|
|
144
149
|
|
|
150
|
+
Перед скачиванием выполняется проверка авторизации для всех источников. По умолчанию `sync.on_error: stop`: если один токен протух, скачивание не начинается и команда завершается с ошибкой. Если указать `sync.on_error: continue`, источники с ошибками авторизации будут пропущены, остальные источники синхронизируются, а Docker-запуск продолжит сборку Hugo-сайта.
|
|
151
|
+
|
|
145
152
|
### Скачать один пост по URL
|
|
146
153
|
|
|
147
154
|
```bash
|
|
@@ -60,6 +60,11 @@ hugo:
|
|
|
60
60
|
title: "Бэкап статей"
|
|
61
61
|
language_code: "ru"
|
|
62
62
|
|
|
63
|
+
sync:
|
|
64
|
+
# stop: остановиться, если авторизация любого источника не прошла
|
|
65
|
+
# continue: пропустить проблемные источники и собрать сайт из доступных данных
|
|
66
|
+
on_error: stop
|
|
67
|
+
|
|
63
68
|
auth:
|
|
64
69
|
sponsr_cookie_file: ./sponsr_cookie.txt
|
|
65
70
|
boosty_cookie_file: ./boosty_cookie.txt
|
|
@@ -111,6 +116,8 @@ article-backup
|
|
|
111
116
|
python backup.py
|
|
112
117
|
```
|
|
113
118
|
|
|
119
|
+
Перед скачиванием выполняется проверка авторизации для всех источников. По умолчанию `sync.on_error: stop`: если один токен протух, скачивание не начинается и команда завершается с ошибкой. Если указать `sync.on_error: continue`, источники с ошибками авторизации будут пропущены, остальные источники синхронизируются, а Docker-запуск продолжит сборку Hugo-сайта.
|
|
120
|
+
|
|
114
121
|
### Скачать один пост по URL
|
|
115
122
|
|
|
116
123
|
```bash
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: article-backup
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.13
|
|
4
4
|
Summary: Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией
|
|
5
5
|
Author-email: Eugene Chaykin <eugene@chayk.in>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -91,6 +91,11 @@ hugo:
|
|
|
91
91
|
title: "Бэкап статей"
|
|
92
92
|
language_code: "ru"
|
|
93
93
|
|
|
94
|
+
sync:
|
|
95
|
+
# stop: остановиться, если авторизация любого источника не прошла
|
|
96
|
+
# continue: пропустить проблемные источники и собрать сайт из доступных данных
|
|
97
|
+
on_error: stop
|
|
98
|
+
|
|
94
99
|
auth:
|
|
95
100
|
sponsr_cookie_file: ./sponsr_cookie.txt
|
|
96
101
|
boosty_cookie_file: ./boosty_cookie.txt
|
|
@@ -142,6 +147,8 @@ article-backup
|
|
|
142
147
|
python backup.py
|
|
143
148
|
```
|
|
144
149
|
|
|
150
|
+
Перед скачиванием выполняется проверка авторизации для всех источников. По умолчанию `sync.on_error: stop`: если один токен протух, скачивание не начинается и команда завершается с ошибкой. Если указать `sync.on_error: continue`, источники с ошибками авторизации будут пропущены, остальные источники синхронизируются, а Docker-запуск продолжит сборку Hugo-сайта.
|
|
151
|
+
|
|
145
152
|
### Скачать один пост по URL
|
|
146
153
|
|
|
147
154
|
```bash
|
|
@@ -26,7 +26,7 @@ def generate_hugo_config(config: Config):
|
|
|
26
26
|
return json.dumps(value, ensure_ascii=False)
|
|
27
27
|
|
|
28
28
|
content = f'''baseURL = {toml_str(config.hugo.base_url)}
|
|
29
|
-
|
|
29
|
+
locale = {toml_str(config.hugo.language_code)}
|
|
30
30
|
title = {toml_str(config.hugo.title)}
|
|
31
31
|
relativeURLs = true
|
|
32
32
|
|
|
@@ -91,16 +91,35 @@ def get_downloader(platform: str, config: Config, source: Source, db: Database):
|
|
|
91
91
|
raise ValueError(f"Неизвестная платформа: {platform}")
|
|
92
92
|
|
|
93
93
|
|
|
94
|
-
def
|
|
95
|
-
"""
|
|
94
|
+
def preflight_sources(config: Config, db: Database):
|
|
95
|
+
"""Проверяет доступность источников до начала синхронизации."""
|
|
96
|
+
ready_sources: list[Source] = []
|
|
96
97
|
errors: list[tuple[Source, Exception]] = []
|
|
98
|
+
|
|
97
99
|
for source in config.sources:
|
|
100
|
+
try:
|
|
101
|
+
downloader = get_downloader(source.platform, config, source, db)
|
|
102
|
+
downloader.check_auth()
|
|
103
|
+
ready_sources.append(source)
|
|
104
|
+
except Exception as e:
|
|
105
|
+
print(f"[{source.platform}] Ошибка проверки авторизации {source.author}: {e}")
|
|
106
|
+
errors.append((source, e))
|
|
107
|
+
|
|
108
|
+
return ready_sources, errors
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def sync_all(config: Config, db: Database, sources: list[Source] | None = None):
|
|
112
|
+
"""Синхронизирует всех авторов из конфига."""
|
|
113
|
+
errors: list[tuple[Source, Exception]] = []
|
|
114
|
+
for source in sources if sources is not None else config.sources:
|
|
98
115
|
try:
|
|
99
116
|
downloader = get_downloader(source.platform, config, source, db)
|
|
100
117
|
downloader.sync()
|
|
101
118
|
except Exception as e:
|
|
102
119
|
print(f"[{source.platform}] Ошибка при синхронизации {source.author}: {e}")
|
|
103
120
|
errors.append((source, e))
|
|
121
|
+
if config.sync.on_error == 'stop':
|
|
122
|
+
break
|
|
104
123
|
return errors
|
|
105
124
|
|
|
106
125
|
|
|
@@ -181,7 +200,15 @@ def main():
|
|
|
181
200
|
if not config.sources:
|
|
182
201
|
print("Нет источников в конфиге. Добавьте секцию 'sources'.")
|
|
183
202
|
sys.exit(1)
|
|
184
|
-
|
|
203
|
+
ready_sources, preflight_errors = preflight_sources(config, db)
|
|
204
|
+
if preflight_errors:
|
|
205
|
+
sync_errors.extend(preflight_errors)
|
|
206
|
+
if config.sync.on_error == 'stop':
|
|
207
|
+
print("\nОстановлено из-за ошибок проверки авторизации.")
|
|
208
|
+
else:
|
|
209
|
+
print("\nИсточники с ошибками проверки авторизации будут пропущены.")
|
|
210
|
+
if not preflight_errors or config.sync.on_error == 'continue':
|
|
211
|
+
sync_errors.extend(sync_all(config, db, ready_sources))
|
|
185
212
|
|
|
186
213
|
ensure_site_content_link(config)
|
|
187
214
|
generate_hugo_config(config)
|
|
@@ -190,7 +217,8 @@ def main():
|
|
|
190
217
|
print(f"\nЗавершено с ошибками: {len(sync_errors)}")
|
|
191
218
|
for source, error in sync_errors:
|
|
192
219
|
print(f" - [{source.platform}] {source.author}: {error}")
|
|
193
|
-
|
|
220
|
+
if config.sync.on_error == 'stop':
|
|
221
|
+
sys.exit(1)
|
|
194
222
|
|
|
195
223
|
print("\nГотово!")
|
|
196
224
|
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
import json
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
|
+
from urllib.parse import parse_qsl, urlencode, urlparse
|
|
6
7
|
|
|
7
8
|
import requests
|
|
8
9
|
|
|
@@ -43,6 +44,17 @@ class BoostyDownloader(BaseDownloader):
|
|
|
43
44
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
44
45
|
})
|
|
45
46
|
|
|
47
|
+
def check_auth(self):
|
|
48
|
+
"""Проверяет доступ к ленте автора минимальным API-запросом."""
|
|
49
|
+
url = f"{self.API_BASE}/blog/{self.source.author}/post/?limit=1"
|
|
50
|
+
|
|
51
|
+
def do_request():
|
|
52
|
+
resp = self.session.get(url, timeout=self.TIMEOUT)
|
|
53
|
+
resp.raise_for_status()
|
|
54
|
+
return resp
|
|
55
|
+
|
|
56
|
+
retry_request(do_request, max_retries=3)
|
|
57
|
+
|
|
46
58
|
def fetch_posts_list(
|
|
47
59
|
self,
|
|
48
60
|
existing_ids: set[str] | None = None,
|
|
@@ -147,7 +159,7 @@ class BoostyDownloader(BaseDownloader):
|
|
|
147
159
|
content_blocks = raw_data.get("data", [])
|
|
148
160
|
|
|
149
161
|
# Извлекаем assets
|
|
150
|
-
assets = self._extract_assets(content_blocks)
|
|
162
|
+
assets = self._extract_assets(content_blocks, raw_data.get("signedQuery", ""))
|
|
151
163
|
|
|
152
164
|
return Post(
|
|
153
165
|
post_id=post_id,
|
|
@@ -159,7 +171,7 @@ class BoostyDownloader(BaseDownloader):
|
|
|
159
171
|
assets=assets,
|
|
160
172
|
)
|
|
161
173
|
|
|
162
|
-
def _extract_assets(self, blocks: list[dict]) -> list[dict]:
|
|
174
|
+
def _extract_assets(self, blocks: list[dict], signed_query: str = "") -> list[dict]:
|
|
163
175
|
"""Извлекает URL медиафайлов из блоков контента."""
|
|
164
176
|
assets = []
|
|
165
177
|
|
|
@@ -179,6 +191,16 @@ class BoostyDownloader(BaseDownloader):
|
|
|
179
191
|
if url:
|
|
180
192
|
assets.append({
|
|
181
193
|
"url": url,
|
|
194
|
+
"download_url": self._sign_media_url(url, signed_query),
|
|
195
|
+
"alt": block.get("title", block.get("id", "")),
|
|
196
|
+
})
|
|
197
|
+
|
|
198
|
+
elif block_type == "file":
|
|
199
|
+
url = block.get("url", "")
|
|
200
|
+
if url:
|
|
201
|
+
assets.append({
|
|
202
|
+
"url": url,
|
|
203
|
+
"download_url": self._sign_media_url(url, signed_query),
|
|
182
204
|
"alt": block.get("title", block.get("id", "")),
|
|
183
205
|
})
|
|
184
206
|
|
|
@@ -233,7 +255,7 @@ class BoostyDownloader(BaseDownloader):
|
|
|
233
255
|
continue
|
|
234
256
|
|
|
235
257
|
# Block-level элементы разрывают параграф
|
|
236
|
-
if block_type in ("image", "audio_file", "ok_video"):
|
|
258
|
+
if block_type in ("image", "audio_file", "file", "ok_video"):
|
|
237
259
|
if current_paragraph:
|
|
238
260
|
lines.append("".join(current_paragraph))
|
|
239
261
|
current_paragraph = []
|
|
@@ -282,6 +304,15 @@ class BoostyDownloader(BaseDownloader):
|
|
|
282
304
|
elif url:
|
|
283
305
|
return f"\n🎵 **{title}**: [слушать]({url})\n"
|
|
284
306
|
|
|
307
|
+
elif block_type == "file":
|
|
308
|
+
url = block.get("url", "")
|
|
309
|
+
title = block.get("title") or block.get("id") or "file"
|
|
310
|
+
local = asset_map.get(url)
|
|
311
|
+
if local:
|
|
312
|
+
return f"\n📎 [{title}](assets/{local})\n"
|
|
313
|
+
elif url:
|
|
314
|
+
return f"\n📎 [{title}]({url})\n"
|
|
315
|
+
|
|
285
316
|
elif block_type == "ok_video":
|
|
286
317
|
# Определяем ссылку на видео (приоритет: локальный файл > ok.ru/video > videoembed)
|
|
287
318
|
video_url = self._extract_ok_video_player_url(block)
|
|
@@ -311,6 +342,20 @@ class BoostyDownloader(BaseDownloader):
|
|
|
311
342
|
|
|
312
343
|
return ""
|
|
313
344
|
|
|
345
|
+
def _sign_media_url(self, url: str, signed_query: str) -> str:
|
|
346
|
+
"""Добавляет signedQuery Boosty к URL медиа, не перезаписывая существующие параметры."""
|
|
347
|
+
if not url or not signed_query:
|
|
348
|
+
return url
|
|
349
|
+
|
|
350
|
+
parsed = urlparse(url)
|
|
351
|
+
params = dict(parse_qsl(parsed.query, keep_blank_values=True))
|
|
352
|
+
query = signed_query[1:] if signed_query.startswith("?") else signed_query
|
|
353
|
+
for key, value in parse_qsl(query, keep_blank_values=True):
|
|
354
|
+
if key not in params:
|
|
355
|
+
params[key] = value
|
|
356
|
+
|
|
357
|
+
return parsed._replace(query=urlencode(params)).geturl()
|
|
358
|
+
|
|
314
359
|
def _extract_ok_video_player_url(self, block: dict) -> str:
|
|
315
360
|
"""Выбирает лучший прямой URL видео из ok_video блока."""
|
|
316
361
|
player_urls = block.get("playerUrls")
|
|
@@ -34,12 +34,18 @@ class HugoConfig:
|
|
|
34
34
|
default_theme: str = "light"
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
@dataclass
|
|
38
|
+
class SyncConfig:
|
|
39
|
+
on_error: Literal['stop', 'continue'] = "stop"
|
|
40
|
+
|
|
41
|
+
|
|
37
42
|
@dataclass
|
|
38
43
|
class Config:
|
|
39
44
|
output_dir: Path
|
|
40
45
|
auth: Auth
|
|
41
46
|
sources: list[Source] = field(default_factory=list)
|
|
42
47
|
hugo: HugoConfig = field(default_factory=HugoConfig)
|
|
48
|
+
sync: SyncConfig = field(default_factory=SyncConfig)
|
|
43
49
|
|
|
44
50
|
|
|
45
51
|
def load_config(config_path: Path) -> Config:
|
|
@@ -105,7 +111,18 @@ def load_config(config_path: Path) -> Config:
|
|
|
105
111
|
default_theme=hugo_data.get('default_theme', HugoConfig.default_theme),
|
|
106
112
|
)
|
|
107
113
|
|
|
108
|
-
|
|
114
|
+
# sync
|
|
115
|
+
sync_data = data.get('sync', {})
|
|
116
|
+
if sync_data is None:
|
|
117
|
+
sync_data = {}
|
|
118
|
+
if not isinstance(sync_data, dict):
|
|
119
|
+
raise ValueError("Секция 'sync' должна быть объектом")
|
|
120
|
+
sync_on_error = sync_data.get('on_error', SyncConfig.on_error)
|
|
121
|
+
if sync_on_error not in ('stop', 'continue'):
|
|
122
|
+
raise ValueError("sync.on_error должен быть 'stop' или 'continue'")
|
|
123
|
+
sync = SyncConfig(on_error=sync_on_error)
|
|
124
|
+
|
|
125
|
+
return Config(output_dir=output_dir, auth=auth, sources=sources, hugo=hugo, sync=sync)
|
|
109
126
|
|
|
110
127
|
|
|
111
128
|
def _to_path(value: str | None) -> Path | None:
|
|
@@ -98,6 +98,10 @@ class BaseDownloader(ABC):
|
|
|
98
98
|
"""Настройка сессии (cookies, headers)."""
|
|
99
99
|
pass
|
|
100
100
|
|
|
101
|
+
def check_auth(self):
|
|
102
|
+
"""Проверяет, что авторизация позволяет читать источник."""
|
|
103
|
+
raise NotImplementedError(f"{self.PLATFORM} не реализует проверку авторизации")
|
|
104
|
+
|
|
101
105
|
@abstractmethod
|
|
102
106
|
def fetch_posts_list(
|
|
103
107
|
self,
|
|
@@ -314,6 +318,7 @@ class BaseDownloader(ABC):
|
|
|
314
318
|
|
|
315
319
|
def download_one(asset: dict) -> tuple[str, str | None]:
|
|
316
320
|
url = asset["url"]
|
|
321
|
+
request_url = asset.get("download_url", url)
|
|
317
322
|
force = asset.get("force", False)
|
|
318
323
|
try:
|
|
319
324
|
# Предварительная проверка (если расширение есть)
|
|
@@ -322,7 +327,7 @@ class BaseDownloader(ABC):
|
|
|
322
327
|
return url, None
|
|
323
328
|
|
|
324
329
|
def do_request():
|
|
325
|
-
resp = self.session.get(
|
|
330
|
+
resp = self.session.get(request_url, stream=True, timeout=self.TIMEOUT)
|
|
326
331
|
resp.raise_for_status()
|
|
327
332
|
return resp
|
|
328
333
|
|
|
@@ -44,6 +44,18 @@ class SponsorDownloader(BaseDownloader):
|
|
|
44
44
|
'X-Requested-With': 'XMLHttpRequest',
|
|
45
45
|
})
|
|
46
46
|
|
|
47
|
+
def check_auth(self):
|
|
48
|
+
"""Проверяет доступ к проекту минимальным API-запросом."""
|
|
49
|
+
project_id = self._get_project_id()
|
|
50
|
+
api_url = f"https://sponsr.ru/project/{project_id}/more-posts/?offset=0"
|
|
51
|
+
|
|
52
|
+
def do_request():
|
|
53
|
+
resp = self.session.get(api_url, timeout=self.TIMEOUT)
|
|
54
|
+
resp.raise_for_status()
|
|
55
|
+
return resp
|
|
56
|
+
|
|
57
|
+
retry_request(do_request, max_retries=3)
|
|
58
|
+
|
|
47
59
|
def _get_project_id(self) -> str:
|
|
48
60
|
"""Получает project_id со страницы проекта."""
|
|
49
61
|
if self._project_id:
|
|
@@ -640,6 +652,8 @@ class SponsorDownloader(BaseDownloader):
|
|
|
640
652
|
markdown = markdown.replace('@@@LBR@@@', r'\[')
|
|
641
653
|
markdown = markdown.replace('@@@RBR@@@', r'\]')
|
|
642
654
|
# Заменяем маркеры пробелов, вставленные в DOM
|
|
655
|
+
markdown = re.sub(r'[ \t]*@@@SP@@@[ \t]*', '@@@SP@@@', markdown)
|
|
656
|
+
markdown = re.sub(r'(?:@@@SP@@@)+', '@@@SP@@@', markdown)
|
|
643
657
|
markdown = markdown.replace('@@@SP@@@', ' ')
|
|
644
658
|
|
|
645
659
|
# Удаляем bidi-маркеры, которые ломают пробелы рядом с текстом
|
|
@@ -143,6 +143,38 @@ class AssetDedupTests(unittest.TestCase):
|
|
|
143
143
|
for fn in filenames:
|
|
144
144
|
self.assertTrue((assets_dir / fn).exists(), msg=f"missing file: {fn}")
|
|
145
145
|
|
|
146
|
+
def test_download_assets_uses_download_url_but_maps_original_url(self):
|
|
147
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
148
|
+
tmp_path = Path(tmp)
|
|
149
|
+
assets_dir = tmp_path / "assets"
|
|
150
|
+
assets_dir.mkdir(parents=True, exist_ok=True)
|
|
151
|
+
|
|
152
|
+
config = Config(output_dir=tmp_path, auth=Auth())
|
|
153
|
+
source = Source(platform="boosty", author="author", download_assets=True)
|
|
154
|
+
dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
|
|
155
|
+
|
|
156
|
+
requested_urls = []
|
|
157
|
+
|
|
158
|
+
def fake_get(url: str, stream: bool = True, timeout=None):
|
|
159
|
+
requested_urls.append(url)
|
|
160
|
+
return _FakeResponse("audio/mpeg", body=b"audio")
|
|
161
|
+
|
|
162
|
+
dl.session.get = fake_get # type: ignore[method-assign]
|
|
163
|
+
|
|
164
|
+
asset_map = dl._download_assets(
|
|
165
|
+
[
|
|
166
|
+
{
|
|
167
|
+
"url": "https://cdn.boosty.to/audio/audio-id",
|
|
168
|
+
"download_url": "https://cdn.boosty.to/audio/audio-id?sign=abc",
|
|
169
|
+
"alt": "audio.mp3",
|
|
170
|
+
}
|
|
171
|
+
],
|
|
172
|
+
assets_dir,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
self.assertEqual(requested_urls, ["https://cdn.boosty.to/audio/audio-id?sign=abc"])
|
|
176
|
+
self.assertIn("https://cdn.boosty.to/audio/audio-id", asset_map)
|
|
177
|
+
|
|
146
178
|
|
|
147
179
|
if __name__ == "__main__":
|
|
148
180
|
unittest.main()
|
|
@@ -143,5 +143,75 @@ class BoostyParagraphTests(unittest.TestCase):
|
|
|
143
143
|
self.assertIn(')\n\nТекст после', md)
|
|
144
144
|
|
|
145
145
|
|
|
146
|
+
class BoostySignedMediaTests(unittest.TestCase):
|
|
147
|
+
def setUp(self):
|
|
148
|
+
self.config = Config(output_dir=Path('/tmp/test'), auth=Auth())
|
|
149
|
+
self.source = Source(platform='boosty', author='test_author')
|
|
150
|
+
self.db = MagicMock(spec=Database)
|
|
151
|
+
with patch('src.boosty.load_cookie', return_value='fake'), \
|
|
152
|
+
patch('src.boosty.load_auth_header', return_value='Bearer fake'):
|
|
153
|
+
self.downloader = BoostyDownloader(self.config, self.source, self.db)
|
|
154
|
+
|
|
155
|
+
def test_parse_post_signs_audio_asset_with_signed_query(self):
|
|
156
|
+
raw = {
|
|
157
|
+
'id': 'post-id',
|
|
158
|
+
'title': 'Post',
|
|
159
|
+
'createdAt': 1735689600,
|
|
160
|
+
'signedQuery': '?sign=abc&expires=123',
|
|
161
|
+
'data': [
|
|
162
|
+
{
|
|
163
|
+
'type': 'audio_file',
|
|
164
|
+
'url': 'https://cdn.boosty.to/audio/audio-id',
|
|
165
|
+
'title': 'Audio title.mp3',
|
|
166
|
+
}
|
|
167
|
+
],
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
post = self.downloader._parse_post(raw)
|
|
171
|
+
|
|
172
|
+
self.assertEqual(post.assets[0]['url'], 'https://cdn.boosty.to/audio/audio-id')
|
|
173
|
+
self.assertEqual(
|
|
174
|
+
post.assets[0]['download_url'],
|
|
175
|
+
'https://cdn.boosty.to/audio/audio-id?sign=abc&expires=123',
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def test_parse_post_signs_file_asset_with_signed_query(self):
|
|
179
|
+
raw = {
|
|
180
|
+
'id': 'post-id',
|
|
181
|
+
'title': 'Post',
|
|
182
|
+
'createdAt': 1735689600,
|
|
183
|
+
'signedQuery': 'sign=abc&expires=123',
|
|
184
|
+
'data': [
|
|
185
|
+
{
|
|
186
|
+
'type': 'file',
|
|
187
|
+
'url': 'https://cdn.boosty.to/file/file-id?name=doc.pdf',
|
|
188
|
+
'title': 'doc.pdf',
|
|
189
|
+
}
|
|
190
|
+
],
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
post = self.downloader._parse_post(raw)
|
|
194
|
+
|
|
195
|
+
self.assertEqual(post.assets[0]['url'], 'https://cdn.boosty.to/file/file-id?name=doc.pdf')
|
|
196
|
+
self.assertEqual(
|
|
197
|
+
post.assets[0]['download_url'],
|
|
198
|
+
'https://cdn.boosty.to/file/file-id?name=doc.pdf&sign=abc&expires=123',
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def test_file_block_uses_local_asset_when_downloaded(self):
|
|
202
|
+
block = {
|
|
203
|
+
'type': 'file',
|
|
204
|
+
'url': 'https://cdn.boosty.to/file/file-id',
|
|
205
|
+
'title': 'doc.pdf',
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
md = self.downloader._block_to_markdown(
|
|
209
|
+
block,
|
|
210
|
+
{'https://cdn.boosty.to/file/file-id': 'doc.pdf'},
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
self.assertIn('[doc.pdf](assets/doc.pdf)', md)
|
|
214
|
+
|
|
215
|
+
|
|
146
216
|
if __name__ == '__main__':
|
|
147
217
|
unittest.main()
|
|
@@ -17,6 +17,24 @@ class ConfigHardeningTests(unittest.TestCase):
|
|
|
17
17
|
|
|
18
18
|
self.assertEqual(cfg.output_dir, Path("./backup"))
|
|
19
19
|
self.assertEqual(cfg.sources, [])
|
|
20
|
+
self.assertEqual(cfg.sync.on_error, "stop")
|
|
21
|
+
|
|
22
|
+
def test_load_config_accepts_sync_continue_policy(self):
|
|
23
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
24
|
+
cfg_path = Path(tmp) / "config.yaml"
|
|
25
|
+
cfg_path.write_text("sync:\n on_error: continue\n", encoding="utf-8")
|
|
26
|
+
|
|
27
|
+
cfg = load_config(cfg_path)
|
|
28
|
+
|
|
29
|
+
self.assertEqual(cfg.sync.on_error, "continue")
|
|
30
|
+
|
|
31
|
+
def test_load_config_rejects_unknown_sync_policy(self):
|
|
32
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
33
|
+
cfg_path = Path(tmp) / "config.yaml"
|
|
34
|
+
cfg_path.write_text("sync:\n on_error: ignore\n", encoding="utf-8")
|
|
35
|
+
|
|
36
|
+
with self.assertRaisesRegex(ValueError, "sync.on_error"):
|
|
37
|
+
load_config(cfg_path)
|
|
20
38
|
|
|
21
39
|
def test_generate_hugo_config_escapes_quotes(self):
|
|
22
40
|
with tempfile.TemporaryDirectory() as tmp:
|
|
@@ -42,6 +60,8 @@ class ConfigHardeningTests(unittest.TestCase):
|
|
|
42
60
|
|
|
43
61
|
self.assertIn('title = "Bob\'s \\"backup\\""', toml)
|
|
44
62
|
self.assertIn('baseURL = "https://example.com/a\\"b"', toml)
|
|
63
|
+
self.assertIn('locale = "ru"', toml)
|
|
64
|
+
self.assertNotIn('languageCode', toml)
|
|
45
65
|
self.assertIn('default_theme = "light\\"mode"', toml)
|
|
46
66
|
finally:
|
|
47
67
|
os.chdir(old_cwd)
|
|
@@ -119,6 +119,7 @@ class SponsorNormalizeTests(unittest.TestCase):
|
|
|
119
119
|
# Ожидаем пробелы вокруг **жирное**
|
|
120
120
|
self.assertIn('слово **жирное** слово', result)
|
|
121
121
|
self.assertNotIn('слово**жирное**слово', result)
|
|
122
|
+
self.assertNotIn('**жирное** слово', result)
|
|
122
123
|
|
|
123
124
|
def test_real_world_case_from_issue(self):
|
|
124
125
|
"""Тест реального случая из issue."""
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
import sys
|
|
3
|
+
import tempfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import backup
|
|
7
|
+
from src.config import Auth, Config, Source, SyncConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DummyDownloader:
|
|
11
|
+
checks: list[str] = []
|
|
12
|
+
synced: list[str] = []
|
|
13
|
+
check_failures: dict[str, Exception] = {}
|
|
14
|
+
sync_failures: dict[str, Exception] = {}
|
|
15
|
+
|
|
16
|
+
def __init__(self, config, source, db):
|
|
17
|
+
self.source = source
|
|
18
|
+
|
|
19
|
+
def check_auth(self):
|
|
20
|
+
DummyDownloader.checks.append(self.source.author)
|
|
21
|
+
error = DummyDownloader.check_failures.get(self.source.author)
|
|
22
|
+
if error:
|
|
23
|
+
raise error
|
|
24
|
+
|
|
25
|
+
def sync(self):
|
|
26
|
+
DummyDownloader.synced.append(self.source.author)
|
|
27
|
+
error = DummyDownloader.sync_failures.get(self.source.author)
|
|
28
|
+
if error:
|
|
29
|
+
raise error
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SyncPolicyTests(unittest.TestCase):
|
|
33
|
+
def setUp(self):
|
|
34
|
+
self.old_get_downloader = backup.get_downloader
|
|
35
|
+
backup.get_downloader = lambda platform, config, source, db: DummyDownloader(config, source, db)
|
|
36
|
+
DummyDownloader.checks = []
|
|
37
|
+
DummyDownloader.synced = []
|
|
38
|
+
DummyDownloader.check_failures = {}
|
|
39
|
+
DummyDownloader.sync_failures = {}
|
|
40
|
+
|
|
41
|
+
def tearDown(self):
|
|
42
|
+
backup.get_downloader = self.old_get_downloader
|
|
43
|
+
|
|
44
|
+
def make_config(self, on_error):
|
|
45
|
+
return Config(
|
|
46
|
+
output_dir=Path("/tmp/test"),
|
|
47
|
+
auth=Auth(),
|
|
48
|
+
sources=[
|
|
49
|
+
Source(platform="sponsr", author="good"),
|
|
50
|
+
Source(platform="boosty", author="bad"),
|
|
51
|
+
],
|
|
52
|
+
sync=SyncConfig(on_error=on_error),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def test_preflight_continue_filters_failed_sources(self):
|
|
56
|
+
config = self.make_config("continue")
|
|
57
|
+
DummyDownloader.check_failures = {"bad": RuntimeError("401 Unauthorized")}
|
|
58
|
+
|
|
59
|
+
ready_sources, errors = backup.preflight_sources(config, object())
|
|
60
|
+
|
|
61
|
+
self.assertEqual([source.author for source in ready_sources], ["good"])
|
|
62
|
+
self.assertEqual([source.author for source, _ in errors], ["bad"])
|
|
63
|
+
self.assertEqual(DummyDownloader.checks, ["good", "bad"])
|
|
64
|
+
|
|
65
|
+
def test_sync_all_continue_keeps_syncing_after_source_error(self):
|
|
66
|
+
config = self.make_config("continue")
|
|
67
|
+
DummyDownloader.sync_failures = {"good": RuntimeError("boom")}
|
|
68
|
+
|
|
69
|
+
errors = backup.sync_all(config, object())
|
|
70
|
+
|
|
71
|
+
self.assertEqual([source.author for source, _ in errors], ["good"])
|
|
72
|
+
self.assertEqual(DummyDownloader.synced, ["good", "bad"])
|
|
73
|
+
|
|
74
|
+
def test_sync_all_stop_stops_after_first_source_error(self):
|
|
75
|
+
config = self.make_config("stop")
|
|
76
|
+
DummyDownloader.sync_failures = {"good": RuntimeError("boom")}
|
|
77
|
+
|
|
78
|
+
errors = backup.sync_all(config, object())
|
|
79
|
+
|
|
80
|
+
self.assertEqual([source.author for source, _ in errors], ["good"])
|
|
81
|
+
self.assertEqual(DummyDownloader.synced, ["good"])
|
|
82
|
+
|
|
83
|
+
def test_main_continue_preflight_errors_do_not_exit_with_failure(self):
|
|
84
|
+
config = self.make_config("continue")
|
|
85
|
+
DummyDownloader.check_failures = {"bad": RuntimeError("401 Unauthorized")}
|
|
86
|
+
|
|
87
|
+
class DummyDatabase:
|
|
88
|
+
def __init__(self, path):
|
|
89
|
+
self.path = path
|
|
90
|
+
|
|
91
|
+
def __enter__(self):
|
|
92
|
+
return self
|
|
93
|
+
|
|
94
|
+
def __exit__(self, exc_type, exc, tb):
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
old_argv = sys.argv
|
|
98
|
+
old_load_config = backup.load_config
|
|
99
|
+
old_database = backup.Database
|
|
100
|
+
old_ensure_link = backup.ensure_site_content_link
|
|
101
|
+
old_generate_hugo_config = backup.generate_hugo_config
|
|
102
|
+
|
|
103
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
104
|
+
cfg_path = Path(tmp) / "config.yaml"
|
|
105
|
+
cfg_path.write_text("", encoding="utf-8")
|
|
106
|
+
config.output_dir = Path(tmp) / "backup"
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
sys.argv = ["backup.py", "--config", str(cfg_path)]
|
|
110
|
+
backup.load_config = lambda path: config
|
|
111
|
+
backup.Database = DummyDatabase
|
|
112
|
+
backup.ensure_site_content_link = lambda cfg: None
|
|
113
|
+
backup.generate_hugo_config = lambda cfg: None
|
|
114
|
+
|
|
115
|
+
backup.main()
|
|
116
|
+
except SystemExit as e:
|
|
117
|
+
self.fail(f"main() exited with {e.code} for continue policy")
|
|
118
|
+
finally:
|
|
119
|
+
sys.argv = old_argv
|
|
120
|
+
backup.load_config = old_load_config
|
|
121
|
+
backup.Database = old_database
|
|
122
|
+
backup.ensure_site_content_link = old_ensure_link
|
|
123
|
+
backup.generate_hugo_config = old_generate_hugo_config
|
|
124
|
+
|
|
125
|
+
self.assertEqual(DummyDownloader.synced, ["good"])
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
if __name__ == "__main__":
|
|
129
|
+
unittest.main()
|
|
File without changes
|
{article_backup-0.3.11 → article_backup-0.3.13}/article_backup.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|