article-backup 0.3.10__tar.gz → 0.3.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {article_backup-0.3.10 → article_backup-0.3.12}/PKG-INFO +8 -1
- {article_backup-0.3.10 → article_backup-0.3.12}/README.md +7 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/article_backup.egg-info/PKG-INFO +8 -1
- {article_backup-0.3.10 → article_backup-0.3.12}/article_backup.egg-info/SOURCES.txt +1 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/backup.py +33 -5
- {article_backup-0.3.10 → article_backup-0.3.12}/pyproject.toml +1 -1
- {article_backup-0.3.10 → article_backup-0.3.12}/src/boosty.py +39 -15
- {article_backup-0.3.10 → article_backup-0.3.12}/src/config.py +18 -1
- {article_backup-0.3.10 → article_backup-0.3.12}/src/downloader.py +7 -2
- {article_backup-0.3.10 → article_backup-0.3.12}/src/sponsr.py +14 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/tests/test_config_hardening.py +20 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/tests/test_sponsr_normalize.py +1 -0
- article_backup-0.3.12/tests/test_sync_policy.py +129 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/tests/test_video_embed.py +137 -18
- {article_backup-0.3.10 → article_backup-0.3.12}/LICENSE +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/article_backup.egg-info/dependency_links.txt +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/article_backup.egg-info/entry_points.txt +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/article_backup.egg-info/requires.txt +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/article_backup.egg-info/top_level.txt +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/setup.cfg +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/src/__init__.py +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/src/database.py +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/src/utils.py +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/tests/test_asset_dedup.py +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/tests/test_boosty_empty_link.py +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/tests/test_boosty_normalize.py +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/tests/test_incremental_sync.py +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/tests/test_slug_safety.py +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/tests/test_sponsr_formatting_fix.py +0 -0
- {article_backup-0.3.10 → article_backup-0.3.12}/tests/test_sponsr_tags.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: article-backup
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.12
|
|
4
4
|
Summary: Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией
|
|
5
5
|
Author-email: Eugene Chaykin <eugene@chayk.in>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -91,6 +91,11 @@ hugo:
|
|
|
91
91
|
title: "Бэкап статей"
|
|
92
92
|
language_code: "ru"
|
|
93
93
|
|
|
94
|
+
sync:
|
|
95
|
+
# stop: остановиться, если авторизация любого источника не прошла
|
|
96
|
+
# continue: пропустить проблемные источники и собрать сайт из доступных данных
|
|
97
|
+
on_error: stop
|
|
98
|
+
|
|
94
99
|
auth:
|
|
95
100
|
sponsr_cookie_file: ./sponsr_cookie.txt
|
|
96
101
|
boosty_cookie_file: ./boosty_cookie.txt
|
|
@@ -142,6 +147,8 @@ article-backup
|
|
|
142
147
|
python backup.py
|
|
143
148
|
```
|
|
144
149
|
|
|
150
|
+
Перед скачиванием выполняется проверка авторизации для всех источников. По умолчанию `sync.on_error: stop`: если один токен протух, скачивание не начинается и команда завершается с ошибкой. Если указать `sync.on_error: continue`, источники с ошибками авторизации будут пропущены, остальные источники синхронизируются, а Docker-запуск продолжит сборку Hugo-сайта.
|
|
151
|
+
|
|
145
152
|
### Скачать один пост по URL
|
|
146
153
|
|
|
147
154
|
```bash
|
|
@@ -60,6 +60,11 @@ hugo:
|
|
|
60
60
|
title: "Бэкап статей"
|
|
61
61
|
language_code: "ru"
|
|
62
62
|
|
|
63
|
+
sync:
|
|
64
|
+
# stop: остановиться, если авторизация любого источника не прошла
|
|
65
|
+
# continue: пропустить проблемные источники и собрать сайт из доступных данных
|
|
66
|
+
on_error: stop
|
|
67
|
+
|
|
63
68
|
auth:
|
|
64
69
|
sponsr_cookie_file: ./sponsr_cookie.txt
|
|
65
70
|
boosty_cookie_file: ./boosty_cookie.txt
|
|
@@ -111,6 +116,8 @@ article-backup
|
|
|
111
116
|
python backup.py
|
|
112
117
|
```
|
|
113
118
|
|
|
119
|
+
Перед скачиванием выполняется проверка авторизации для всех источников. По умолчанию `sync.on_error: stop`: если один токен протух, скачивание не начинается и команда завершается с ошибкой. Если указать `sync.on_error: continue`, источники с ошибками авторизации будут пропущены, остальные источники синхронизируются, а Docker-запуск продолжит сборку Hugo-сайта.
|
|
120
|
+
|
|
114
121
|
### Скачать один пост по URL
|
|
115
122
|
|
|
116
123
|
```bash
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: article-backup
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.12
|
|
4
4
|
Summary: Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией
|
|
5
5
|
Author-email: Eugene Chaykin <eugene@chayk.in>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -91,6 +91,11 @@ hugo:
|
|
|
91
91
|
title: "Бэкап статей"
|
|
92
92
|
language_code: "ru"
|
|
93
93
|
|
|
94
|
+
sync:
|
|
95
|
+
# stop: остановиться, если авторизация любого источника не прошла
|
|
96
|
+
# continue: пропустить проблемные источники и собрать сайт из доступных данных
|
|
97
|
+
on_error: stop
|
|
98
|
+
|
|
94
99
|
auth:
|
|
95
100
|
sponsr_cookie_file: ./sponsr_cookie.txt
|
|
96
101
|
boosty_cookie_file: ./boosty_cookie.txt
|
|
@@ -142,6 +147,8 @@ article-backup
|
|
|
142
147
|
python backup.py
|
|
143
148
|
```
|
|
144
149
|
|
|
150
|
+
Перед скачиванием выполняется проверка авторизации для всех источников. По умолчанию `sync.on_error: stop`: если один токен протух, скачивание не начинается и команда завершается с ошибкой. Если указать `sync.on_error: continue`, источники с ошибками авторизации будут пропущены, остальные источники синхронизируются, а Docker-запуск продолжит сборку Hugo-сайта.
|
|
151
|
+
|
|
145
152
|
### Скачать один пост по URL
|
|
146
153
|
|
|
147
154
|
```bash
|
|
@@ -26,7 +26,7 @@ def generate_hugo_config(config: Config):
|
|
|
26
26
|
return json.dumps(value, ensure_ascii=False)
|
|
27
27
|
|
|
28
28
|
content = f'''baseURL = {toml_str(config.hugo.base_url)}
|
|
29
|
-
|
|
29
|
+
locale = {toml_str(config.hugo.language_code)}
|
|
30
30
|
title = {toml_str(config.hugo.title)}
|
|
31
31
|
relativeURLs = true
|
|
32
32
|
|
|
@@ -91,16 +91,35 @@ def get_downloader(platform: str, config: Config, source: Source, db: Database):
|
|
|
91
91
|
raise ValueError(f"Неизвестная платформа: {platform}")
|
|
92
92
|
|
|
93
93
|
|
|
94
|
-
def
|
|
95
|
-
"""
|
|
94
|
+
def preflight_sources(config: Config, db: Database):
|
|
95
|
+
"""Проверяет доступность источников до начала синхронизации."""
|
|
96
|
+
ready_sources: list[Source] = []
|
|
96
97
|
errors: list[tuple[Source, Exception]] = []
|
|
98
|
+
|
|
97
99
|
for source in config.sources:
|
|
100
|
+
try:
|
|
101
|
+
downloader = get_downloader(source.platform, config, source, db)
|
|
102
|
+
downloader.check_auth()
|
|
103
|
+
ready_sources.append(source)
|
|
104
|
+
except Exception as e:
|
|
105
|
+
print(f"[{source.platform}] Ошибка проверки авторизации {source.author}: {e}")
|
|
106
|
+
errors.append((source, e))
|
|
107
|
+
|
|
108
|
+
return ready_sources, errors
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def sync_all(config: Config, db: Database, sources: list[Source] | None = None):
|
|
112
|
+
"""Синхронизирует всех авторов из конфига."""
|
|
113
|
+
errors: list[tuple[Source, Exception]] = []
|
|
114
|
+
for source in sources if sources is not None else config.sources:
|
|
98
115
|
try:
|
|
99
116
|
downloader = get_downloader(source.platform, config, source, db)
|
|
100
117
|
downloader.sync()
|
|
101
118
|
except Exception as e:
|
|
102
119
|
print(f"[{source.platform}] Ошибка при синхронизации {source.author}: {e}")
|
|
103
120
|
errors.append((source, e))
|
|
121
|
+
if config.sync.on_error == 'stop':
|
|
122
|
+
break
|
|
104
123
|
return errors
|
|
105
124
|
|
|
106
125
|
|
|
@@ -181,7 +200,15 @@ def main():
|
|
|
181
200
|
if not config.sources:
|
|
182
201
|
print("Нет источников в конфиге. Добавьте секцию 'sources'.")
|
|
183
202
|
sys.exit(1)
|
|
184
|
-
|
|
203
|
+
ready_sources, preflight_errors = preflight_sources(config, db)
|
|
204
|
+
if preflight_errors:
|
|
205
|
+
sync_errors.extend(preflight_errors)
|
|
206
|
+
if config.sync.on_error == 'stop':
|
|
207
|
+
print("\nОстановлено из-за ошибок проверки авторизации.")
|
|
208
|
+
else:
|
|
209
|
+
print("\nИсточники с ошибками проверки авторизации будут пропущены.")
|
|
210
|
+
if not preflight_errors or config.sync.on_error == 'continue':
|
|
211
|
+
sync_errors.extend(sync_all(config, db, ready_sources))
|
|
185
212
|
|
|
186
213
|
ensure_site_content_link(config)
|
|
187
214
|
generate_hugo_config(config)
|
|
@@ -190,7 +217,8 @@ def main():
|
|
|
190
217
|
print(f"\nЗавершено с ошибками: {len(sync_errors)}")
|
|
191
218
|
for source, error in sync_errors:
|
|
192
219
|
print(f" - [{source.platform}] {source.author}: {error}")
|
|
193
|
-
|
|
220
|
+
if config.sync.on_error == 'stop':
|
|
221
|
+
sys.exit(1)
|
|
194
222
|
|
|
195
223
|
print("\nГотово!")
|
|
196
224
|
|
|
@@ -43,6 +43,17 @@ class BoostyDownloader(BaseDownloader):
|
|
|
43
43
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
44
44
|
})
|
|
45
45
|
|
|
46
|
+
def check_auth(self):
|
|
47
|
+
"""Проверяет доступ к ленте автора минимальным API-запросом."""
|
|
48
|
+
url = f"{self.API_BASE}/blog/{self.source.author}/post/?limit=1"
|
|
49
|
+
|
|
50
|
+
def do_request():
|
|
51
|
+
resp = self.session.get(url, timeout=self.TIMEOUT)
|
|
52
|
+
resp.raise_for_status()
|
|
53
|
+
return resp
|
|
54
|
+
|
|
55
|
+
retry_request(do_request, max_retries=3)
|
|
56
|
+
|
|
46
57
|
def fetch_posts_list(
|
|
47
58
|
self,
|
|
48
59
|
existing_ids: set[str] | None = None,
|
|
@@ -183,19 +194,21 @@ class BoostyDownloader(BaseDownloader):
|
|
|
183
194
|
})
|
|
184
195
|
|
|
185
196
|
elif block_type == "ok_video":
|
|
197
|
+
# Превью скачивается всегда (force=True обходит фильтр asset_types)
|
|
198
|
+
preview = block.get("previewUrl") or block.get("preview") or ""
|
|
199
|
+
if preview:
|
|
200
|
+
assets.append({
|
|
201
|
+
"url": preview,
|
|
202
|
+
"alt": f"video-preview-{block.get('id', '')}",
|
|
203
|
+
"force": True,
|
|
204
|
+
})
|
|
205
|
+
# Видео скачивается через обычный механизм (фильтруется по asset_types)
|
|
186
206
|
video_url = self._extract_ok_video_player_url(block)
|
|
187
207
|
if video_url:
|
|
188
208
|
assets.append({
|
|
189
209
|
"url": video_url,
|
|
190
210
|
"alt": block.get("title") or f"video-{block.get('id', '')}",
|
|
191
211
|
})
|
|
192
|
-
else:
|
|
193
|
-
preview = block.get("previewUrl") or block.get("preview") or ""
|
|
194
|
-
if preview:
|
|
195
|
-
assets.append({
|
|
196
|
-
"url": preview,
|
|
197
|
-
"alt": f"video-preview-{block.get('id', '')}",
|
|
198
|
-
})
|
|
199
212
|
|
|
200
213
|
return assets
|
|
201
214
|
|
|
@@ -281,16 +294,27 @@ class BoostyDownloader(BaseDownloader):
|
|
|
281
294
|
return f"\n🎵 **{title}**: [слушать]({url})\n"
|
|
282
295
|
|
|
283
296
|
elif block_type == "ok_video":
|
|
297
|
+
# Определяем ссылку на видео (приоритет: локальный файл > ok.ru/video > videoembed)
|
|
284
298
|
video_url = self._extract_ok_video_player_url(block)
|
|
299
|
+
video_link = ""
|
|
285
300
|
if video_url:
|
|
286
|
-
|
|
287
|
-
if
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
301
|
+
local_video = asset_map.get(video_url)
|
|
302
|
+
if local_video:
|
|
303
|
+
video_link = f"assets/{local_video}"
|
|
304
|
+
else:
|
|
305
|
+
video_link = video_url
|
|
306
|
+
if not video_link:
|
|
307
|
+
video_link = self._extract_ok_video_fallback_url(block)
|
|
308
|
+
if not video_link:
|
|
309
|
+
return ""
|
|
310
|
+
|
|
311
|
+
# Определяем превью-картинку
|
|
312
|
+
preview_url = block.get("previewUrl") or block.get("preview") or ""
|
|
313
|
+
local_preview = asset_map.get(preview_url) if preview_url else None
|
|
314
|
+
|
|
315
|
+
if local_preview:
|
|
316
|
+
return f"\n[]({video_link})\n"
|
|
317
|
+
return f"\n[\U0001f4f9 Видео]({video_link})\n"
|
|
294
318
|
|
|
295
319
|
elif block_type and block_type not in self._warned_unknown_block_types:
|
|
296
320
|
print(f" [boosty] Пропущен неподдерживаемый тип блока: {block_type}")
|
|
@@ -34,12 +34,18 @@ class HugoConfig:
|
|
|
34
34
|
default_theme: str = "light"
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
@dataclass
|
|
38
|
+
class SyncConfig:
|
|
39
|
+
on_error: Literal['stop', 'continue'] = "stop"
|
|
40
|
+
|
|
41
|
+
|
|
37
42
|
@dataclass
|
|
38
43
|
class Config:
|
|
39
44
|
output_dir: Path
|
|
40
45
|
auth: Auth
|
|
41
46
|
sources: list[Source] = field(default_factory=list)
|
|
42
47
|
hugo: HugoConfig = field(default_factory=HugoConfig)
|
|
48
|
+
sync: SyncConfig = field(default_factory=SyncConfig)
|
|
43
49
|
|
|
44
50
|
|
|
45
51
|
def load_config(config_path: Path) -> Config:
|
|
@@ -105,7 +111,18 @@ def load_config(config_path: Path) -> Config:
|
|
|
105
111
|
default_theme=hugo_data.get('default_theme', HugoConfig.default_theme),
|
|
106
112
|
)
|
|
107
113
|
|
|
108
|
-
|
|
114
|
+
# sync
|
|
115
|
+
sync_data = data.get('sync', {})
|
|
116
|
+
if sync_data is None:
|
|
117
|
+
sync_data = {}
|
|
118
|
+
if not isinstance(sync_data, dict):
|
|
119
|
+
raise ValueError("Секция 'sync' должна быть объектом")
|
|
120
|
+
sync_on_error = sync_data.get('on_error', SyncConfig.on_error)
|
|
121
|
+
if sync_on_error not in ('stop', 'continue'):
|
|
122
|
+
raise ValueError("sync.on_error должен быть 'stop' или 'continue'")
|
|
123
|
+
sync = SyncConfig(on_error=sync_on_error)
|
|
124
|
+
|
|
125
|
+
return Config(output_dir=output_dir, auth=auth, sources=sources, hugo=hugo, sync=sync)
|
|
109
126
|
|
|
110
127
|
|
|
111
128
|
def _to_path(value: str | None) -> Path | None:
|
|
@@ -98,6 +98,10 @@ class BaseDownloader(ABC):
|
|
|
98
98
|
"""Настройка сессии (cookies, headers)."""
|
|
99
99
|
pass
|
|
100
100
|
|
|
101
|
+
def check_auth(self):
|
|
102
|
+
"""Проверяет, что авторизация позволяет читать источник."""
|
|
103
|
+
raise NotImplementedError(f"{self.PLATFORM} не реализует проверку авторизации")
|
|
104
|
+
|
|
101
105
|
@abstractmethod
|
|
102
106
|
def fetch_posts_list(
|
|
103
107
|
self,
|
|
@@ -314,10 +318,11 @@ class BaseDownloader(ABC):
|
|
|
314
318
|
|
|
315
319
|
def download_one(asset: dict) -> tuple[str, str | None]:
|
|
316
320
|
url = asset["url"]
|
|
321
|
+
force = asset.get("force", False)
|
|
317
322
|
try:
|
|
318
323
|
# Предварительная проверка (если расширение есть)
|
|
319
324
|
ext = Path(urlparse(url).path).suffix.lower()
|
|
320
|
-
if ext and not should_download_asset(url, None, self.source.asset_types):
|
|
325
|
+
if ext and not force and not should_download_asset(url, None, self.source.asset_types):
|
|
321
326
|
return url, None
|
|
322
327
|
|
|
323
328
|
def do_request():
|
|
@@ -330,7 +335,7 @@ class BaseDownloader(ABC):
|
|
|
330
335
|
content_type = response.headers.get('Content-Type', '')
|
|
331
336
|
|
|
332
337
|
# Полная проверка после получения Content-Type
|
|
333
|
-
if not should_download_asset(url, content_type, self.source.asset_types):
|
|
338
|
+
if not force and not should_download_asset(url, content_type, self.source.asset_types):
|
|
334
339
|
return url, None
|
|
335
340
|
|
|
336
341
|
filename_base = self._make_asset_filename(url, content_type, asset.get('alt'))
|
|
@@ -44,6 +44,18 @@ class SponsorDownloader(BaseDownloader):
|
|
|
44
44
|
'X-Requested-With': 'XMLHttpRequest',
|
|
45
45
|
})
|
|
46
46
|
|
|
47
|
+
def check_auth(self):
|
|
48
|
+
"""Проверяет доступ к проекту минимальным API-запросом."""
|
|
49
|
+
project_id = self._get_project_id()
|
|
50
|
+
api_url = f"https://sponsr.ru/project/{project_id}/more-posts/?offset=0"
|
|
51
|
+
|
|
52
|
+
def do_request():
|
|
53
|
+
resp = self.session.get(api_url, timeout=self.TIMEOUT)
|
|
54
|
+
resp.raise_for_status()
|
|
55
|
+
return resp
|
|
56
|
+
|
|
57
|
+
retry_request(do_request, max_retries=3)
|
|
58
|
+
|
|
47
59
|
def _get_project_id(self) -> str:
|
|
48
60
|
"""Получает project_id со страницы проекта."""
|
|
49
61
|
if self._project_id:
|
|
@@ -640,6 +652,8 @@ class SponsorDownloader(BaseDownloader):
|
|
|
640
652
|
markdown = markdown.replace('@@@LBR@@@', r'\[')
|
|
641
653
|
markdown = markdown.replace('@@@RBR@@@', r'\]')
|
|
642
654
|
# Заменяем маркеры пробелов, вставленные в DOM
|
|
655
|
+
markdown = re.sub(r'[ \t]*@@@SP@@@[ \t]*', '@@@SP@@@', markdown)
|
|
656
|
+
markdown = re.sub(r'(?:@@@SP@@@)+', '@@@SP@@@', markdown)
|
|
643
657
|
markdown = markdown.replace('@@@SP@@@', ' ')
|
|
644
658
|
|
|
645
659
|
# Удаляем bidi-маркеры, которые ломают пробелы рядом с текстом
|
|
@@ -17,6 +17,24 @@ class ConfigHardeningTests(unittest.TestCase):
|
|
|
17
17
|
|
|
18
18
|
self.assertEqual(cfg.output_dir, Path("./backup"))
|
|
19
19
|
self.assertEqual(cfg.sources, [])
|
|
20
|
+
self.assertEqual(cfg.sync.on_error, "stop")
|
|
21
|
+
|
|
22
|
+
def test_load_config_accepts_sync_continue_policy(self):
|
|
23
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
24
|
+
cfg_path = Path(tmp) / "config.yaml"
|
|
25
|
+
cfg_path.write_text("sync:\n on_error: continue\n", encoding="utf-8")
|
|
26
|
+
|
|
27
|
+
cfg = load_config(cfg_path)
|
|
28
|
+
|
|
29
|
+
self.assertEqual(cfg.sync.on_error, "continue")
|
|
30
|
+
|
|
31
|
+
def test_load_config_rejects_unknown_sync_policy(self):
|
|
32
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
33
|
+
cfg_path = Path(tmp) / "config.yaml"
|
|
34
|
+
cfg_path.write_text("sync:\n on_error: ignore\n", encoding="utf-8")
|
|
35
|
+
|
|
36
|
+
with self.assertRaisesRegex(ValueError, "sync.on_error"):
|
|
37
|
+
load_config(cfg_path)
|
|
20
38
|
|
|
21
39
|
def test_generate_hugo_config_escapes_quotes(self):
|
|
22
40
|
with tempfile.TemporaryDirectory() as tmp:
|
|
@@ -42,6 +60,8 @@ class ConfigHardeningTests(unittest.TestCase):
|
|
|
42
60
|
|
|
43
61
|
self.assertIn('title = "Bob\'s \\"backup\\""', toml)
|
|
44
62
|
self.assertIn('baseURL = "https://example.com/a\\"b"', toml)
|
|
63
|
+
self.assertIn('locale = "ru"', toml)
|
|
64
|
+
self.assertNotIn('languageCode', toml)
|
|
45
65
|
self.assertIn('default_theme = "light\\"mode"', toml)
|
|
46
66
|
finally:
|
|
47
67
|
os.chdir(old_cwd)
|
|
@@ -119,6 +119,7 @@ class SponsorNormalizeTests(unittest.TestCase):
|
|
|
119
119
|
# Ожидаем пробелы вокруг **жирное**
|
|
120
120
|
self.assertIn('слово **жирное** слово', result)
|
|
121
121
|
self.assertNotIn('слово**жирное**слово', result)
|
|
122
|
+
self.assertNotIn('**жирное** слово', result)
|
|
122
123
|
|
|
123
124
|
def test_real_world_case_from_issue(self):
|
|
124
125
|
"""Тест реального случая из issue."""
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
import sys
|
|
3
|
+
import tempfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import backup
|
|
7
|
+
from src.config import Auth, Config, Source, SyncConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DummyDownloader:
|
|
11
|
+
checks: list[str] = []
|
|
12
|
+
synced: list[str] = []
|
|
13
|
+
check_failures: dict[str, Exception] = {}
|
|
14
|
+
sync_failures: dict[str, Exception] = {}
|
|
15
|
+
|
|
16
|
+
def __init__(self, config, source, db):
|
|
17
|
+
self.source = source
|
|
18
|
+
|
|
19
|
+
def check_auth(self):
|
|
20
|
+
DummyDownloader.checks.append(self.source.author)
|
|
21
|
+
error = DummyDownloader.check_failures.get(self.source.author)
|
|
22
|
+
if error:
|
|
23
|
+
raise error
|
|
24
|
+
|
|
25
|
+
def sync(self):
|
|
26
|
+
DummyDownloader.synced.append(self.source.author)
|
|
27
|
+
error = DummyDownloader.sync_failures.get(self.source.author)
|
|
28
|
+
if error:
|
|
29
|
+
raise error
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SyncPolicyTests(unittest.TestCase):
|
|
33
|
+
def setUp(self):
|
|
34
|
+
self.old_get_downloader = backup.get_downloader
|
|
35
|
+
backup.get_downloader = lambda platform, config, source, db: DummyDownloader(config, source, db)
|
|
36
|
+
DummyDownloader.checks = []
|
|
37
|
+
DummyDownloader.synced = []
|
|
38
|
+
DummyDownloader.check_failures = {}
|
|
39
|
+
DummyDownloader.sync_failures = {}
|
|
40
|
+
|
|
41
|
+
def tearDown(self):
|
|
42
|
+
backup.get_downloader = self.old_get_downloader
|
|
43
|
+
|
|
44
|
+
def make_config(self, on_error):
|
|
45
|
+
return Config(
|
|
46
|
+
output_dir=Path("/tmp/test"),
|
|
47
|
+
auth=Auth(),
|
|
48
|
+
sources=[
|
|
49
|
+
Source(platform="sponsr", author="good"),
|
|
50
|
+
Source(platform="boosty", author="bad"),
|
|
51
|
+
],
|
|
52
|
+
sync=SyncConfig(on_error=on_error),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def test_preflight_continue_filters_failed_sources(self):
|
|
56
|
+
config = self.make_config("continue")
|
|
57
|
+
DummyDownloader.check_failures = {"bad": RuntimeError("401 Unauthorized")}
|
|
58
|
+
|
|
59
|
+
ready_sources, errors = backup.preflight_sources(config, object())
|
|
60
|
+
|
|
61
|
+
self.assertEqual([source.author for source in ready_sources], ["good"])
|
|
62
|
+
self.assertEqual([source.author for source, _ in errors], ["bad"])
|
|
63
|
+
self.assertEqual(DummyDownloader.checks, ["good", "bad"])
|
|
64
|
+
|
|
65
|
+
def test_sync_all_continue_keeps_syncing_after_source_error(self):
|
|
66
|
+
config = self.make_config("continue")
|
|
67
|
+
DummyDownloader.sync_failures = {"good": RuntimeError("boom")}
|
|
68
|
+
|
|
69
|
+
errors = backup.sync_all(config, object())
|
|
70
|
+
|
|
71
|
+
self.assertEqual([source.author for source, _ in errors], ["good"])
|
|
72
|
+
self.assertEqual(DummyDownloader.synced, ["good", "bad"])
|
|
73
|
+
|
|
74
|
+
def test_sync_all_stop_stops_after_first_source_error(self):
|
|
75
|
+
config = self.make_config("stop")
|
|
76
|
+
DummyDownloader.sync_failures = {"good": RuntimeError("boom")}
|
|
77
|
+
|
|
78
|
+
errors = backup.sync_all(config, object())
|
|
79
|
+
|
|
80
|
+
self.assertEqual([source.author for source, _ in errors], ["good"])
|
|
81
|
+
self.assertEqual(DummyDownloader.synced, ["good"])
|
|
82
|
+
|
|
83
|
+
def test_main_continue_preflight_errors_do_not_exit_with_failure(self):
|
|
84
|
+
config = self.make_config("continue")
|
|
85
|
+
DummyDownloader.check_failures = {"bad": RuntimeError("401 Unauthorized")}
|
|
86
|
+
|
|
87
|
+
class DummyDatabase:
|
|
88
|
+
def __init__(self, path):
|
|
89
|
+
self.path = path
|
|
90
|
+
|
|
91
|
+
def __enter__(self):
|
|
92
|
+
return self
|
|
93
|
+
|
|
94
|
+
def __exit__(self, exc_type, exc, tb):
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
old_argv = sys.argv
|
|
98
|
+
old_load_config = backup.load_config
|
|
99
|
+
old_database = backup.Database
|
|
100
|
+
old_ensure_link = backup.ensure_site_content_link
|
|
101
|
+
old_generate_hugo_config = backup.generate_hugo_config
|
|
102
|
+
|
|
103
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
104
|
+
cfg_path = Path(tmp) / "config.yaml"
|
|
105
|
+
cfg_path.write_text("", encoding="utf-8")
|
|
106
|
+
config.output_dir = Path(tmp) / "backup"
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
sys.argv = ["backup.py", "--config", str(cfg_path)]
|
|
110
|
+
backup.load_config = lambda path: config
|
|
111
|
+
backup.Database = DummyDatabase
|
|
112
|
+
backup.ensure_site_content_link = lambda cfg: None
|
|
113
|
+
backup.generate_hugo_config = lambda cfg: None
|
|
114
|
+
|
|
115
|
+
backup.main()
|
|
116
|
+
except SystemExit as e:
|
|
117
|
+
self.fail(f"main() exited with {e.code} for continue policy")
|
|
118
|
+
finally:
|
|
119
|
+
sys.argv = old_argv
|
|
120
|
+
backup.load_config = old_load_config
|
|
121
|
+
backup.Database = old_database
|
|
122
|
+
backup.ensure_site_content_link = old_ensure_link
|
|
123
|
+
backup.generate_hugo_config = old_generate_hugo_config
|
|
124
|
+
|
|
125
|
+
self.assertEqual(DummyDownloader.synced, ["good"])
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
if __name__ == "__main__":
|
|
129
|
+
unittest.main()
|
|
@@ -139,8 +139,8 @@ class BoostyVideoEmbedTests(unittest.TestCase):
|
|
|
139
139
|
patch('src.boosty.load_auth_header', return_value='Bearer fake_token'):
|
|
140
140
|
self.downloader = BoostyDownloader(self.config, self.source, self.db)
|
|
141
141
|
|
|
142
|
-
def
|
|
143
|
-
"""ok_video
|
|
142
|
+
def test_ok_video_uses_player_url_no_preview(self):
|
|
143
|
+
"""ok_video без превью → простая текстовая ссылка на playerUrl."""
|
|
144
144
|
blocks = [
|
|
145
145
|
{
|
|
146
146
|
"type": "ok_video",
|
|
@@ -164,13 +164,15 @@ class BoostyVideoEmbedTests(unittest.TestCase):
|
|
|
164
164
|
# Не должно быть старого формата
|
|
165
165
|
self.assertNotIn('📹 Видео:', result)
|
|
166
166
|
|
|
167
|
-
def
|
|
168
|
-
"""ok_video
|
|
167
|
+
def test_ok_video_clickable_preview_with_local_video(self):
|
|
168
|
+
"""ok_video: превью скачано + видео скачано → кликабельная картинка на локальный файл."""
|
|
169
|
+
preview_url = "https://iv.okcdn.ru/videoPreview?id=1"
|
|
169
170
|
video_url = "https://vd.example/high?id=1"
|
|
170
171
|
blocks = [
|
|
171
172
|
{
|
|
172
173
|
"type": "ok_video",
|
|
173
174
|
"id": "abc",
|
|
175
|
+
"preview": preview_url,
|
|
174
176
|
"playerUrls": [{"type": "high", "url": video_url}],
|
|
175
177
|
}
|
|
176
178
|
]
|
|
@@ -181,14 +183,43 @@ class BoostyVideoEmbedTests(unittest.TestCase):
|
|
|
181
183
|
tags=[], assets=[]
|
|
182
184
|
)
|
|
183
185
|
|
|
184
|
-
|
|
186
|
+
asset_map = {preview_url: "video-preview-abc.jpg", video_url: "video-1.mp4"}
|
|
187
|
+
result = self.downloader._to_markdown(post, asset_map)
|
|
185
188
|
|
|
186
|
-
self.assertIn('[📹 Видео](assets/video-1.mp4)', result)
|
|
189
|
+
self.assertIn('[](assets/video-1.mp4)', result)
|
|
187
190
|
|
|
188
|
-
def
|
|
189
|
-
"""
|
|
191
|
+
def test_ok_video_clickable_preview_with_fallback_url(self):
|
|
192
|
+
"""ok_video: превью скачано, видео нет → кликабельная картинка на ok.ru/video."""
|
|
193
|
+
preview_url = "https://iv.okcdn.ru/videoPreview?id=1"
|
|
190
194
|
blocks = [
|
|
191
|
-
{
|
|
195
|
+
{
|
|
196
|
+
"type": "ok_video",
|
|
197
|
+
"id": "uuid-1",
|
|
198
|
+
"vid": "11386338749172",
|
|
199
|
+
"preview": preview_url,
|
|
200
|
+
}
|
|
201
|
+
]
|
|
202
|
+
post = Post(
|
|
203
|
+
post_id='1', title='Test',
|
|
204
|
+
content_html=json.dumps(blocks),
|
|
205
|
+
post_date='2025-01-01', source_url='https://test.com',
|
|
206
|
+
tags=[], assets=[]
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
asset_map = {preview_url: "video-preview-uuid-1.jpg"}
|
|
210
|
+
result = self.downloader._to_markdown(post, asset_map)
|
|
211
|
+
|
|
212
|
+
self.assertIn('[](https://ok.ru/video/11386338749172)', result)
|
|
213
|
+
|
|
214
|
+
def test_ok_video_preview_not_downloaded_falls_back_to_text_link(self):
|
|
215
|
+
"""ok_video: превью есть в блоке но не скачано → обычная текстовая ссылка."""
|
|
216
|
+
blocks = [
|
|
217
|
+
{
|
|
218
|
+
"type": "ok_video",
|
|
219
|
+
"id": "uuid-1",
|
|
220
|
+
"vid": "11386338749172",
|
|
221
|
+
"preview": "https://iv.okcdn.ru/videoPreview?id=1",
|
|
222
|
+
}
|
|
192
223
|
]
|
|
193
224
|
post = Post(
|
|
194
225
|
post_id='1', title='Test',
|
|
@@ -200,6 +231,7 @@ class BoostyVideoEmbedTests(unittest.TestCase):
|
|
|
200
231
|
result = self.downloader._to_markdown(post, {})
|
|
201
232
|
|
|
202
233
|
self.assertIn('[📹 Видео](https://ok.ru/video/11386338749172)', result)
|
|
234
|
+
self.assertNotIn('![', result)
|
|
203
235
|
|
|
204
236
|
def test_ok_video_falls_back_to_embed_id(self):
|
|
205
237
|
"""Legacy fallback: если есть только id, оставляем videoembed/{id}."""
|
|
@@ -219,13 +251,16 @@ class BoostyVideoEmbedTests(unittest.TestCase):
|
|
|
219
251
|
|
|
220
252
|
def test_ok_video_with_surrounding_text(self):
|
|
221
253
|
"""ok_video между текстовыми блоками."""
|
|
254
|
+
preview_url = "https://iv.okcdn.ru/preview?id=2"
|
|
255
|
+
video_url = "https://vd.example/medium?id=2"
|
|
222
256
|
blocks = [
|
|
223
257
|
{"type": "text", "content": json.dumps(["Посмотрите видео:"])},
|
|
224
258
|
{"type": "text", "modificator": "BLOCK_END"},
|
|
225
259
|
{
|
|
226
260
|
"type": "ok_video",
|
|
227
261
|
"id": "999888777",
|
|
228
|
-
"
|
|
262
|
+
"preview": preview_url,
|
|
263
|
+
"playerUrls": [{"type": "medium", "url": video_url}],
|
|
229
264
|
},
|
|
230
265
|
{"type": "text", "content": json.dumps(["Вот такие дела."])},
|
|
231
266
|
{"type": "text", "modificator": "BLOCK_END"},
|
|
@@ -237,14 +272,15 @@ class BoostyVideoEmbedTests(unittest.TestCase):
|
|
|
237
272
|
tags=[], assets=[]
|
|
238
273
|
)
|
|
239
274
|
|
|
240
|
-
|
|
275
|
+
asset_map = {preview_url: "preview.jpg", video_url: "video.mp4"}
|
|
276
|
+
result = self.downloader._to_markdown(post, asset_map)
|
|
241
277
|
|
|
242
278
|
self.assertIn('Посмотрите видео:', result)
|
|
243
|
-
self.assertIn('[📹 Видео](
|
|
279
|
+
self.assertIn('[](assets/video.mp4)', result)
|
|
244
280
|
self.assertIn('Вот такие дела.', result)
|
|
245
281
|
|
|
246
|
-
def
|
|
247
|
-
"""_extract_assets для ok_video
|
|
282
|
+
def test_extract_assets_ok_video_with_player_urls_extracts_both(self):
|
|
283
|
+
"""_extract_assets для ok_video с playerUrls: и превью, и видео."""
|
|
248
284
|
blocks = [
|
|
249
285
|
{
|
|
250
286
|
"type": "ok_video",
|
|
@@ -260,11 +296,16 @@ class BoostyVideoEmbedTests(unittest.TestCase):
|
|
|
260
296
|
|
|
261
297
|
assets = self.downloader._extract_assets(blocks)
|
|
262
298
|
|
|
263
|
-
self.assertEqual(len(assets),
|
|
264
|
-
|
|
299
|
+
self.assertEqual(len(assets), 2)
|
|
300
|
+
# Первый — превью (с force=True)
|
|
301
|
+
self.assertEqual(assets[0]["url"], "https://iv.okcdn.ru/videoPreview?id=1")
|
|
302
|
+
self.assertIn("video-preview-", assets[0]["alt"])
|
|
303
|
+
self.assertTrue(assets[0].get("force"))
|
|
304
|
+
# Второй — видео
|
|
305
|
+
self.assertEqual(assets[1]["url"], "https://vd.example/high?id=1")
|
|
265
306
|
|
|
266
|
-
def
|
|
267
|
-
"""_extract_assets: если playerUrls пусты, берём preview."""
|
|
307
|
+
def test_extract_assets_ok_video_without_player_urls_extracts_preview(self):
|
|
308
|
+
"""_extract_assets: если playerUrls пусты, берём только preview (с force)."""
|
|
268
309
|
blocks = [
|
|
269
310
|
{
|
|
270
311
|
"type": "ok_video",
|
|
@@ -278,6 +319,25 @@ class BoostyVideoEmbedTests(unittest.TestCase):
|
|
|
278
319
|
self.assertEqual(len(assets), 1)
|
|
279
320
|
self.assertEqual(assets[0]["url"], "https://iv.okcdn.ru/videoPreview?id=1")
|
|
280
321
|
self.assertIn("video-preview-", assets[0]["alt"])
|
|
322
|
+
self.assertTrue(assets[0].get("force"))
|
|
323
|
+
|
|
324
|
+
def test_extract_assets_ok_video_no_preview_only_video(self):
|
|
325
|
+
"""_extract_assets: если нет preview, только видео."""
|
|
326
|
+
blocks = [
|
|
327
|
+
{
|
|
328
|
+
"type": "ok_video",
|
|
329
|
+
"id": "video-id",
|
|
330
|
+
"playerUrls": [
|
|
331
|
+
{"type": "high", "url": "https://vd.example/high?id=1"},
|
|
332
|
+
],
|
|
333
|
+
}
|
|
334
|
+
]
|
|
335
|
+
|
|
336
|
+
assets = self.downloader._extract_assets(blocks)
|
|
337
|
+
|
|
338
|
+
self.assertEqual(len(assets), 1)
|
|
339
|
+
self.assertEqual(assets[0]["url"], "https://vd.example/high?id=1")
|
|
340
|
+
self.assertFalse(assets[0].get("force", False))
|
|
281
341
|
|
|
282
342
|
def test_ok_video_player_url_all_empty(self):
|
|
283
343
|
"""playerUrls с пустыми url → fallback на vid/id."""
|
|
@@ -367,5 +427,64 @@ class BoostyVideoEmbedTests(unittest.TestCase):
|
|
|
367
427
|
self.assertNotIn('📹', result)
|
|
368
428
|
|
|
369
429
|
|
|
430
|
+
class DownloadAssetsForceTests(unittest.TestCase):
|
|
431
|
+
"""Тесты force-флага при скачивании assets."""
|
|
432
|
+
|
|
433
|
+
def setUp(self):
|
|
434
|
+
self.config = Config(output_dir=Path('/tmp/test'), auth=Auth())
|
|
435
|
+
# asset_types без image — обычные картинки фильтруются
|
|
436
|
+
self.source = Source(platform='boosty', author='test_author',
|
|
437
|
+
asset_types=['video'])
|
|
438
|
+
self.db = MagicMock(spec=Database)
|
|
439
|
+
|
|
440
|
+
with patch('src.boosty.load_cookie', return_value='fake_cookie'), \
|
|
441
|
+
patch('src.boosty.load_auth_header', return_value='Bearer fake_token'):
|
|
442
|
+
self.downloader = BoostyDownloader(self.config, self.source, self.db)
|
|
443
|
+
|
|
444
|
+
@patch('src.downloader.retry_request')
|
|
445
|
+
def test_force_asset_bypasses_type_filter(self, mock_retry):
|
|
446
|
+
"""Asset с force=True скачивается даже если тип не в asset_types."""
|
|
447
|
+
import tempfile
|
|
448
|
+
import os
|
|
449
|
+
|
|
450
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
451
|
+
assets_dir = Path(tmpdir)
|
|
452
|
+
|
|
453
|
+
# Мокаем ответ для картинки-превью
|
|
454
|
+
mock_response = MagicMock()
|
|
455
|
+
mock_response.headers = {'Content-Type': 'image/jpeg'}
|
|
456
|
+
mock_response.iter_content.return_value = [b'fake image data']
|
|
457
|
+
mock_response.close = MagicMock()
|
|
458
|
+
mock_retry.return_value = mock_response
|
|
459
|
+
|
|
460
|
+
assets = [
|
|
461
|
+
{"url": "https://iv.okcdn.ru/preview.jpg", "alt": "video-preview-1", "force": True},
|
|
462
|
+
]
|
|
463
|
+
|
|
464
|
+
result = self.downloader._download_assets(assets, assets_dir)
|
|
465
|
+
|
|
466
|
+
# Должна быть скачана, несмотря на то что image не в asset_types
|
|
467
|
+
self.assertEqual(len(result), 1)
|
|
468
|
+
self.assertIn("https://iv.okcdn.ru/preview.jpg", result)
|
|
469
|
+
|
|
470
|
+
@patch('src.downloader.retry_request')
|
|
471
|
+
def test_non_force_asset_filtered_by_type(self, mock_retry):
|
|
472
|
+
"""Обычный asset фильтруется по asset_types."""
|
|
473
|
+
import tempfile
|
|
474
|
+
|
|
475
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
476
|
+
assets_dir = Path(tmpdir)
|
|
477
|
+
|
|
478
|
+
assets = [
|
|
479
|
+
{"url": "https://example.com/photo.jpg", "alt": "photo"},
|
|
480
|
+
]
|
|
481
|
+
|
|
482
|
+
result = self.downloader._download_assets(assets, assets_dir)
|
|
483
|
+
|
|
484
|
+
# Не должна быть скачана — image не в asset_types
|
|
485
|
+
self.assertEqual(len(result), 0)
|
|
486
|
+
mock_retry.assert_not_called()
|
|
487
|
+
|
|
488
|
+
|
|
370
489
|
if __name__ == '__main__':
|
|
371
490
|
unittest.main()
|
|
File without changes
|
{article_backup-0.3.10 → article_backup-0.3.12}/article_backup.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|