article-backup 0.3.12__tar.gz → 0.3.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {article_backup-0.3.12 → article_backup-0.3.13}/PKG-INFO +1 -1
  2. {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/PKG-INFO +1 -1
  3. {article_backup-0.3.12 → article_backup-0.3.13}/pyproject.toml +1 -1
  4. {article_backup-0.3.12 → article_backup-0.3.13}/src/boosty.py +37 -3
  5. {article_backup-0.3.12 → article_backup-0.3.13}/src/downloader.py +2 -1
  6. {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_asset_dedup.py +32 -0
  7. {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_boosty_normalize.py +70 -0
  8. {article_backup-0.3.12 → article_backup-0.3.13}/LICENSE +0 -0
  9. {article_backup-0.3.12 → article_backup-0.3.13}/README.md +0 -0
  10. {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/SOURCES.txt +0 -0
  11. {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/dependency_links.txt +0 -0
  12. {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/entry_points.txt +0 -0
  13. {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/requires.txt +0 -0
  14. {article_backup-0.3.12 → article_backup-0.3.13}/article_backup.egg-info/top_level.txt +0 -0
  15. {article_backup-0.3.12 → article_backup-0.3.13}/backup.py +0 -0
  16. {article_backup-0.3.12 → article_backup-0.3.13}/setup.cfg +0 -0
  17. {article_backup-0.3.12 → article_backup-0.3.13}/src/__init__.py +0 -0
  18. {article_backup-0.3.12 → article_backup-0.3.13}/src/config.py +0 -0
  19. {article_backup-0.3.12 → article_backup-0.3.13}/src/database.py +0 -0
  20. {article_backup-0.3.12 → article_backup-0.3.13}/src/sponsr.py +0 -0
  21. {article_backup-0.3.12 → article_backup-0.3.13}/src/utils.py +0 -0
  22. {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_boosty_empty_link.py +0 -0
  23. {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_config_hardening.py +0 -0
  24. {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_incremental_sync.py +0 -0
  25. {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_slug_safety.py +0 -0
  26. {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_sponsr_formatting_fix.py +0 -0
  27. {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_sponsr_normalize.py +0 -0
  28. {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_sponsr_tags.py +0 -0
  29. {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_sync_policy.py +0 -0
  30. {article_backup-0.3.12 → article_backup-0.3.13}/tests/test_video_embed.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: article-backup
3
- Version: 0.3.12
3
+ Version: 0.3.13
4
4
  Summary: Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией
5
5
  Author-email: Eugene Chaykin <eugene@chayk.in>
6
6
  License: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: article-backup
3
- Version: 0.3.12
3
+ Version: 0.3.13
4
4
  Summary: Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией
5
5
  Author-email: Eugene Chaykin <eugene@chayk.in>
6
6
  License: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "article-backup"
3
- version = "0.3.12"
3
+ version = "0.3.13"
4
4
  description = "Локальный бэкап статей с Sponsr.ru и Boosty.to в Markdown с Hugo-интеграцией"
5
5
  readme = "README.md"
6
6
  license = {text = "Apache-2.0"}
@@ -3,6 +3,7 @@
3
3
 
4
4
  import json
5
5
  from datetime import datetime, timezone
6
+ from urllib.parse import parse_qsl, urlencode, urlparse
6
7
 
7
8
  import requests
8
9
 
@@ -158,7 +159,7 @@ class BoostyDownloader(BaseDownloader):
158
159
  content_blocks = raw_data.get("data", [])
159
160
 
160
161
  # Извлекаем assets
161
- assets = self._extract_assets(content_blocks)
162
+ assets = self._extract_assets(content_blocks, raw_data.get("signedQuery", ""))
162
163
 
163
164
  return Post(
164
165
  post_id=post_id,
@@ -170,7 +171,7 @@ class BoostyDownloader(BaseDownloader):
170
171
  assets=assets,
171
172
  )
172
173
 
173
- def _extract_assets(self, blocks: list[dict]) -> list[dict]:
174
+ def _extract_assets(self, blocks: list[dict], signed_query: str = "") -> list[dict]:
174
175
  """Извлекает URL медиафайлов из блоков контента."""
175
176
  assets = []
176
177
 
@@ -190,6 +191,16 @@ class BoostyDownloader(BaseDownloader):
190
191
  if url:
191
192
  assets.append({
192
193
  "url": url,
194
+ "download_url": self._sign_media_url(url, signed_query),
195
+ "alt": block.get("title", block.get("id", "")),
196
+ })
197
+
198
+ elif block_type == "file":
199
+ url = block.get("url", "")
200
+ if url:
201
+ assets.append({
202
+ "url": url,
203
+ "download_url": self._sign_media_url(url, signed_query),
193
204
  "alt": block.get("title", block.get("id", "")),
194
205
  })
195
206
 
@@ -244,7 +255,7 @@ class BoostyDownloader(BaseDownloader):
244
255
  continue
245
256
 
246
257
  # Block-level элементы разрывают параграф
247
- if block_type in ("image", "audio_file", "ok_video"):
258
+ if block_type in ("image", "audio_file", "file", "ok_video"):
248
259
  if current_paragraph:
249
260
  lines.append("".join(current_paragraph))
250
261
  current_paragraph = []
@@ -293,6 +304,15 @@ class BoostyDownloader(BaseDownloader):
293
304
  elif url:
294
305
  return f"\n🎵 **{title}**: [слушать]({url})\n"
295
306
 
307
+ elif block_type == "file":
308
+ url = block.get("url", "")
309
+ title = block.get("title") or block.get("id") or "file"
310
+ local = asset_map.get(url)
311
+ if local:
312
+ return f"\n📎 [{title}](assets/{local})\n"
313
+ elif url:
314
+ return f"\n📎 [{title}]({url})\n"
315
+
296
316
  elif block_type == "ok_video":
297
317
  # Определяем ссылку на видео (приоритет: локальный файл > ok.ru/video > videoembed)
298
318
  video_url = self._extract_ok_video_player_url(block)
@@ -322,6 +342,20 @@ class BoostyDownloader(BaseDownloader):
322
342
 
323
343
  return ""
324
344
 
345
+ def _sign_media_url(self, url: str, signed_query: str) -> str:
346
+ """Добавляет signedQuery Boosty к URL медиа, не перезаписывая существующие параметры."""
347
+ if not url or not signed_query:
348
+ return url
349
+
350
+ parsed = urlparse(url)
351
+ params = dict(parse_qsl(parsed.query, keep_blank_values=True))
352
+ query = signed_query[1:] if signed_query.startswith("?") else signed_query
353
+ for key, value in parse_qsl(query, keep_blank_values=True):
354
+ if key not in params:
355
+ params[key] = value
356
+
357
+ return parsed._replace(query=urlencode(params)).geturl()
358
+
325
359
  def _extract_ok_video_player_url(self, block: dict) -> str:
326
360
  """Выбирает лучший прямой URL видео из ok_video блока."""
327
361
  player_urls = block.get("playerUrls")
@@ -318,6 +318,7 @@ class BaseDownloader(ABC):
318
318
 
319
319
  def download_one(asset: dict) -> tuple[str, str | None]:
320
320
  url = asset["url"]
321
+ request_url = asset.get("download_url", url)
321
322
  force = asset.get("force", False)
322
323
  try:
323
324
  # Предварительная проверка (если расширение есть)
@@ -326,7 +327,7 @@ class BaseDownloader(ABC):
326
327
  return url, None
327
328
 
328
329
  def do_request():
329
- resp = self.session.get(url, stream=True, timeout=self.TIMEOUT)
330
+ resp = self.session.get(request_url, stream=True, timeout=self.TIMEOUT)
330
331
  resp.raise_for_status()
331
332
  return resp
332
333
 
@@ -143,6 +143,38 @@ class AssetDedupTests(unittest.TestCase):
143
143
  for fn in filenames:
144
144
  self.assertTrue((assets_dir / fn).exists(), msg=f"missing file: {fn}")
145
145
 
146
+ def test_download_assets_uses_download_url_but_maps_original_url(self):
147
+ with tempfile.TemporaryDirectory() as tmp:
148
+ tmp_path = Path(tmp)
149
+ assets_dir = tmp_path / "assets"
150
+ assets_dir.mkdir(parents=True, exist_ok=True)
151
+
152
+ config = Config(output_dir=tmp_path, auth=Auth())
153
+ source = Source(platform="boosty", author="author", download_assets=True)
154
+ dl = _DummyDownloader(config, source, cast(Database, _DummyDB()))
155
+
156
+ requested_urls = []
157
+
158
+ def fake_get(url: str, stream: bool = True, timeout=None):
159
+ requested_urls.append(url)
160
+ return _FakeResponse("audio/mpeg", body=b"audio")
161
+
162
+ dl.session.get = fake_get # type: ignore[method-assign]
163
+
164
+ asset_map = dl._download_assets(
165
+ [
166
+ {
167
+ "url": "https://cdn.boosty.to/audio/audio-id",
168
+ "download_url": "https://cdn.boosty.to/audio/audio-id?sign=abc",
169
+ "alt": "audio.mp3",
170
+ }
171
+ ],
172
+ assets_dir,
173
+ )
174
+
175
+ self.assertEqual(requested_urls, ["https://cdn.boosty.to/audio/audio-id?sign=abc"])
176
+ self.assertIn("https://cdn.boosty.to/audio/audio-id", asset_map)
177
+
146
178
 
147
179
  if __name__ == "__main__":
148
180
  unittest.main()
@@ -143,5 +143,75 @@ class BoostyParagraphTests(unittest.TestCase):
143
143
  self.assertIn(')\n\nТекст после', md)
144
144
 
145
145
 
146
+ class BoostySignedMediaTests(unittest.TestCase):
147
+ def setUp(self):
148
+ self.config = Config(output_dir=Path('/tmp/test'), auth=Auth())
149
+ self.source = Source(platform='boosty', author='test_author')
150
+ self.db = MagicMock(spec=Database)
151
+ with patch('src.boosty.load_cookie', return_value='fake'), \
152
+ patch('src.boosty.load_auth_header', return_value='Bearer fake'):
153
+ self.downloader = BoostyDownloader(self.config, self.source, self.db)
154
+
155
+ def test_parse_post_signs_audio_asset_with_signed_query(self):
156
+ raw = {
157
+ 'id': 'post-id',
158
+ 'title': 'Post',
159
+ 'createdAt': 1735689600,
160
+ 'signedQuery': '?sign=abc&expires=123',
161
+ 'data': [
162
+ {
163
+ 'type': 'audio_file',
164
+ 'url': 'https://cdn.boosty.to/audio/audio-id',
165
+ 'title': 'Audio title.mp3',
166
+ }
167
+ ],
168
+ }
169
+
170
+ post = self.downloader._parse_post(raw)
171
+
172
+ self.assertEqual(post.assets[0]['url'], 'https://cdn.boosty.to/audio/audio-id')
173
+ self.assertEqual(
174
+ post.assets[0]['download_url'],
175
+ 'https://cdn.boosty.to/audio/audio-id?sign=abc&expires=123',
176
+ )
177
+
178
+ def test_parse_post_signs_file_asset_with_signed_query(self):
179
+ raw = {
180
+ 'id': 'post-id',
181
+ 'title': 'Post',
182
+ 'createdAt': 1735689600,
183
+ 'signedQuery': 'sign=abc&expires=123',
184
+ 'data': [
185
+ {
186
+ 'type': 'file',
187
+ 'url': 'https://cdn.boosty.to/file/file-id?name=doc.pdf',
188
+ 'title': 'doc.pdf',
189
+ }
190
+ ],
191
+ }
192
+
193
+ post = self.downloader._parse_post(raw)
194
+
195
+ self.assertEqual(post.assets[0]['url'], 'https://cdn.boosty.to/file/file-id?name=doc.pdf')
196
+ self.assertEqual(
197
+ post.assets[0]['download_url'],
198
+ 'https://cdn.boosty.to/file/file-id?name=doc.pdf&sign=abc&expires=123',
199
+ )
200
+
201
+ def test_file_block_uses_local_asset_when_downloaded(self):
202
+ block = {
203
+ 'type': 'file',
204
+ 'url': 'https://cdn.boosty.to/file/file-id',
205
+ 'title': 'doc.pdf',
206
+ }
207
+
208
+ md = self.downloader._block_to_markdown(
209
+ block,
210
+ {'https://cdn.boosty.to/file/file-id': 'doc.pdf'},
211
+ )
212
+
213
+ self.assertIn('[doc.pdf](assets/doc.pdf)', md)
214
+
215
+
146
216
  if __name__ == '__main__':
147
217
  unittest.main()
File without changes