article-backup 0.3.12__tar.gz → 0.3.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {article_backup-0.3.12 → article_backup-0.3.13}/PKG-INFO +1 -1
- {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/PKG-INFO +1 -1
- {article_backup-0.3.12 → article_backup-0.3.13}/pyproject.toml +1 -1
- {article_backup-0.3.12 → article_backup-0.3.13}/src/boosty.py +37 -3
- {article_backup-0.3.12 → article_backup-0.3.13}/src/downloader.py +2 -1
- {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_asset_dedup.py +32 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_boosty_normalize.py +70 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/LICENSE +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/README.md +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/SOURCES.txt +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/dependency_links.txt +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/entry_points.txt +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/requires.txt +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/top_level.txt +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/backup.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/setup.cfg +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/src/__init__.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/src/config.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/src/database.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/src/sponsr.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/src/utils.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_boosty_empty_link.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_config_hardening.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_incremental_sync.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_slug_safety.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_sponsr_formatting_fix.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_sponsr_normalize.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_sponsr_tags.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_sync_policy.py +0 -0
- {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_video_embed.py +0 -0
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
import json
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
|
+
from urllib.parse import parse_qsl, urlencode, urlparse
|
|
6
7
|
|
|
7
8
|
import requests
|
|
8
9
|
|
|
@@ -158,7 +159,7 @@ class BoostyDownloader(BaseDownloader):
|
|
|
158
159
|
content_blocks = raw_data.get("data", [])
|
|
159
160
|
|
|
160
161
|
# Извлекаем assets
|
|
161
|
-
assets = self._extract_assets(content_blocks)
|
|
162
|
+
assets = self._extract_assets(content_blocks, raw_data.get("signedQuery", ""))
|
|
162
163
|
|
|
163
164
|
return Post(
|
|
164
165
|
post_id=post_id,
|
|
@@ -170,7 +171,7 @@ class BoostyDownloader(BaseDownloader):
|
|
|
170
171
|
assets=assets,
|
|
171
172
|
)
|
|
172
173
|
|
|
173
|
-
def _extract_assets(self, blocks: list[dict]) -> list[dict]:
|
|
174
|
+
def _extract_assets(self, blocks: list[dict], signed_query: str = "") -> list[dict]:
|
|
174
175
|
"""Извлекает URL медиафайлов из блоков контента."""
|
|
175
176
|
assets = []
|
|
176
177
|
|
|
@@ -190,6 +191,16 @@ class BoostyDownloader(BaseDownloader):
|
|
|
190
191
|
if url:
|
|
191
192
|
assets.append({
|
|
192
193
|
"url": url,
|
|
194
|
+
"download_url": self._sign_media_url(url, signed_query),
|
|
195
|
+
"alt": block.get("title", block.get("id", "")),
|
|
196
|
+
})
|
|
197
|
+
|
|
198
|
+
elif block_type == "file":
|
|
199
|
+
url = block.get("url", "")
|
|
200
|
+
if url:
|
|
201
|
+
assets.append({
|
|
202
|
+
"url": url,
|
|
203
|
+
"download_url": self._sign_media_url(url, signed_query),
|
|
193
204
|
"alt": block.get("title", block.get("id", "")),
|
|
194
205
|
})
|
|
195
206
|
|
|
@@ -244,7 +255,7 @@ class BoostyDownloader(BaseDownloader):
|
|
|
244
255
|
continue
|
|
245
256
|
|
|
246
257
|
# Block-level элементы разрывают параграф
|
|
247
|
-
if block_type in ("image", "audio_file", "ok_video"):
|
|
258
|
+
if block_type in ("image", "audio_file", "file", "ok_video"):
|
|
248
259
|
if current_paragraph:
|
|
249
260
|
lines.append("".join(current_paragraph))
|
|
250
261
|
current_paragraph = []
|
|
@@ -293,6 +304,15 @@ class BoostyDownloader(BaseDownloader):
|
|
|
293
304
|
elif url:
|
|
294
305
|
return f"\n🎵 **{title}**: [слушать]({url})\n"
|
|
295
306
|
|
|
307
|
+
elif block_type == "file":
|
|
308
|
+
url = block.get("url", "")
|
|
309
|
+
title = block.get("title") or block.get("id") or "file"
|
|
310
|
+
local = asset_map.get(url)
|
|
311
|
+
if local:
|
|
312
|
+
return f"\n📎 [{title}](assets/{local})\n"
|
|
313
|
+
elif url:
|
|
314
|
+
return f"\n📎 [{title}]({url})\n"
|
|
315
|
+
|
|
296
316
|
elif block_type == "ok_video":
|
|
297
317
|
# Определяем ссылку на видео (приоритет: локальный файл > ok.ru/video > videoembed)
|
|
298
318
|
video_url = self._extract_ok_video_player_url(block)
|
|
@@ -322,6 +342,20 @@ class BoostyDownloader(BaseDownloader):
|
|
|
322
342
|
|
|
323
343
|
return ""
|
|
324
344
|
|
|
345
|
+
def _sign_media_url(self, url: str, signed_query: str) -> str:
|
|
346
|
+
"""Добавляет signedQuery Boosty к URL медиа, не перезаписывая существующие параметры."""
|
|
347
|
+
if not url or not signed_query:
|
|
348
|
+
return url
|
|
349
|
+
|
|
350
|
+
parsed = urlparse(url)
|
|
351
|
+
params = dict(parse_qsl(parsed.query, keep_blank_values=True))
|
|
352
|
+
query = signed_query[1:] if signed_query.startswith("?") else signed_query
|
|
353
|
+
for key, value in parse_qsl(query, keep_blank_values=True):
|
|
354
|
+
if key not in params:
|
|
355
|
+
params[key] = value
|
|
356
|
+
|
|
357
|
+
return parsed._replace(query=urlencode(params)).geturl()
|
|
358
|
+
|
|
325
359
|
def _extract_ok_video_player_url(self, block: dict) -> str:
|
|
326
360
|
"""Выбирает лучший прямой URL видео из ok_video блока."""
|
|
327
361
|
player_urls = block.get("playerUrls")
|
|
@@ -318,6 +318,7 @@ class BaseDownloader(ABC):
|
|
|
318
318
|
|
|
319
319
|
def download_one(asset: dict) -> tuple[str, str | None]:
|
|
320
320
|
url = asset["url"]
|
|
321
|
+
request_url = asset.get("download_url", url)
|
|
321
322
|
force = asset.get("force", False)
|
|
322
323
|
try:
|
|
323
324
|
# Предварительная проверка (если расширение есть)
|
|
@@ -326,7 +327,7 @@ class BaseDownloader(ABC):
|
|
|
326
327
|
return url, None
|
|
327
328
|
|
|
328
329
|
def do_request():
|
|
329
|
-
resp = self.session.get(
|
|
330
|
+
resp = self.session.get(request_url, stream=True, timeout=self.TIMEOUT)
|
|
330
331
|
resp.raise_for_status()
|
|
331
332
|
return resp
|
|
332
333
|
|
|
@@ -143,6 +143,38 @@ class AssetDedupTests(unittest.TestCase):
|
|
|
143
143
|
for fn in filenames:
|
|
144
144
|
self.assertTrue((assets_dir / fn).exists(), msg=f"missing file: {fn}")
|
|
145
145
|
|
|
146
|
+
def test_download_assets_uses_download_url_but_maps_original_url(self):
|
|
147
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
148
|
+
tmp_path = Path(tmp)
|
|
149
|
+
assets_dir = tmp_path / "assets"
|
|
150
|
+
assets_dir.mkdir(parents=True, exist_ok=True)
|
|
151
|
+
|
|
152
|
+
config = Config(output_dir=tmp_path, auth=Auth())
|
|
153
|
+
source = Source(platform="boosty", author="author", download_assets=True)
|
|
154
|
+
dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
|
|
155
|
+
|
|
156
|
+
requested_urls = []
|
|
157
|
+
|
|
158
|
+
def fake_get(url: str, stream: bool = True, timeout=None):
|
|
159
|
+
requested_urls.append(url)
|
|
160
|
+
return _FakeResponse("audio/mpeg", body=b"audio")
|
|
161
|
+
|
|
162
|
+
dl.session.get = fake_get # type: ignore[method-assign]
|
|
163
|
+
|
|
164
|
+
asset_map = dl._download_assets(
|
|
165
|
+
[
|
|
166
|
+
{
|
|
167
|
+
"url": "https://cdn.boosty.to/audio/audio-id",
|
|
168
|
+
"download_url": "https://cdn.boosty.to/audio/audio-id?sign=abc",
|
|
169
|
+
"alt": "audio.mp3",
|
|
170
|
+
}
|
|
171
|
+
],
|
|
172
|
+
assets_dir,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
self.assertEqual(requested_urls, ["https://cdn.boosty.to/audio/audio-id?sign=abc"])
|
|
176
|
+
self.assertIn("https://cdn.boosty.to/audio/audio-id", asset_map)
|
|
177
|
+
|
|
146
178
|
|
|
147
179
|
if __name__ == "__main__":
|
|
148
180
|
unittest.main()
|
|
@@ -143,5 +143,75 @@ class BoostyParagraphTests(unittest.TestCase):
|
|
|
143
143
|
self.assertIn(')\n\nТекст после', md)
|
|
144
144
|
|
|
145
145
|
|
|
146
|
+
class BoostySignedMediaTests(unittest.TestCase):
|
|
147
|
+
def setUp(self):
|
|
148
|
+
self.config = Config(output_dir=Path('/tmp/test'), auth=Auth())
|
|
149
|
+
self.source = Source(platform='boosty', author='test_author')
|
|
150
|
+
self.db = MagicMock(spec=Database)
|
|
151
|
+
with patch('src.boosty.load_cookie', return_value='fake'), \
|
|
152
|
+
patch('src.boosty.load_auth_header', return_value='Bearer fake'):
|
|
153
|
+
self.downloader = BoostyDownloader(self.config, self.source, self.db)
|
|
154
|
+
|
|
155
|
+
def test_parse_post_signs_audio_asset_with_signed_query(self):
|
|
156
|
+
raw = {
|
|
157
|
+
'id': 'post-id',
|
|
158
|
+
'title': 'Post',
|
|
159
|
+
'createdAt': 1735689600,
|
|
160
|
+
'signedQuery': '?sign=abc&expires=123',
|
|
161
|
+
'data': [
|
|
162
|
+
{
|
|
163
|
+
'type': 'audio_file',
|
|
164
|
+
'url': 'https://cdn.boosty.to/audio/audio-id',
|
|
165
|
+
'title': 'Audio title.mp3',
|
|
166
|
+
}
|
|
167
|
+
],
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
post = self.downloader._parse_post(raw)
|
|
171
|
+
|
|
172
|
+
self.assertEqual(post.assets[0]['url'], 'https://cdn.boosty.to/audio/audio-id')
|
|
173
|
+
self.assertEqual(
|
|
174
|
+
post.assets[0]['download_url'],
|
|
175
|
+
'https://cdn.boosty.to/audio/audio-id?sign=abc&expires=123',
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def test_parse_post_signs_file_asset_with_signed_query(self):
|
|
179
|
+
raw = {
|
|
180
|
+
'id': 'post-id',
|
|
181
|
+
'title': 'Post',
|
|
182
|
+
'createdAt': 1735689600,
|
|
183
|
+
'signedQuery': 'sign=abc&expires=123',
|
|
184
|
+
'data': [
|
|
185
|
+
{
|
|
186
|
+
'type': 'file',
|
|
187
|
+
'url': 'https://cdn.boosty.to/file/file-id?name=doc.pdf',
|
|
188
|
+
'title': 'doc.pdf',
|
|
189
|
+
}
|
|
190
|
+
],
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
post = self.downloader._parse_post(raw)
|
|
194
|
+
|
|
195
|
+
self.assertEqual(post.assets[0]['url'], 'https://cdn.boosty.to/file/file-id?name=doc.pdf')
|
|
196
|
+
self.assertEqual(
|
|
197
|
+
post.assets[0]['download_url'],
|
|
198
|
+
'https://cdn.boosty.to/file/file-id?name=doc.pdf&sign=abc&expires=123',
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def test_file_block_uses_local_asset_when_downloaded(self):
|
|
202
|
+
block = {
|
|
203
|
+
'type': 'file',
|
|
204
|
+
'url': 'https://cdn.boosty.to/file/file-id',
|
|
205
|
+
'title': 'doc.pdf',
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
md = self.downloader._block_to_markdown(
|
|
209
|
+
block,
|
|
210
|
+
{'https://cdn.boosty.to/file/file-id': 'doc.pdf'},
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
self.assertIn('[doc.pdf](assets/doc.pdf)', md)
|
|
214
|
+
|
|
215
|
+
|
|
146
216
|
if __name__ == '__main__':
|
|
147
217
|
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|