StreamingCommunity 3.3.8__py3-none-any.whl → 3.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of StreamingCommunity might be problematic. Click here for more details.

Files changed (26) hide show
  1. StreamingCommunity/Api/Player/supervideo.py +1 -1
  2. StreamingCommunity/Api/Site/crunchyroll/film.py +13 -3
  3. StreamingCommunity/Api/Site/crunchyroll/series.py +6 -6
  4. StreamingCommunity/Api/Site/crunchyroll/site.py +13 -8
  5. StreamingCommunity/Api/Site/crunchyroll/util/ScrapeSerie.py +16 -41
  6. StreamingCommunity/Api/Site/crunchyroll/util/get_license.py +107 -101
  7. StreamingCommunity/Api/Site/mediasetinfinity/util/get_license.py +1 -1
  8. StreamingCommunity/Api/Site/raiplay/series.py +1 -10
  9. StreamingCommunity/Api/Site/raiplay/site.py +5 -13
  10. StreamingCommunity/Api/Site/raiplay/util/ScrapeSerie.py +12 -12
  11. StreamingCommunity/Lib/Downloader/DASH/cdm_helpher.py +8 -3
  12. StreamingCommunity/Lib/Downloader/DASH/decrypt.py +1 -0
  13. StreamingCommunity/Lib/Downloader/DASH/downloader.py +9 -2
  14. StreamingCommunity/Lib/Downloader/DASH/parser.py +456 -98
  15. StreamingCommunity/Lib/Downloader/DASH/segments.py +109 -64
  16. StreamingCommunity/Lib/Downloader/HLS/segments.py +261 -355
  17. StreamingCommunity/Lib/Downloader/MP4/downloader.py +1 -1
  18. StreamingCommunity/Lib/FFmpeg/command.py +3 -3
  19. StreamingCommunity/Lib/M3U8/estimator.py +0 -1
  20. StreamingCommunity/Upload/version.py +1 -1
  21. {streamingcommunity-3.3.8.dist-info → streamingcommunity-3.3.9.dist-info}/METADATA +1 -1
  22. {streamingcommunity-3.3.8.dist-info → streamingcommunity-3.3.9.dist-info}/RECORD +26 -26
  23. {streamingcommunity-3.3.8.dist-info → streamingcommunity-3.3.9.dist-info}/WHEEL +0 -0
  24. {streamingcommunity-3.3.8.dist-info → streamingcommunity-3.3.9.dist-info}/entry_points.txt +0 -0
  25. {streamingcommunity-3.3.8.dist-info → streamingcommunity-3.3.9.dist-info}/licenses/LICENSE +0 -0
  26. {streamingcommunity-3.3.8.dist-info → streamingcommunity-3.3.9.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ import logging
5
5
 
6
6
 
7
7
  # External libraries
8
- import httpx
8
+ from curl_cffi import requests
9
9
  from rich.console import Console
10
10
  from pywidevine.cdm import Cdm
11
11
  from pywidevine.device import Device
@@ -39,8 +39,13 @@ def get_widevine_keys(pssh, license_url, cdm_device_path, headers=None, payload=
39
39
  req_headers = headers or {}
40
40
  req_headers['Content-Type'] = 'application/octet-stream'
41
41
 
42
- # Send license request
43
- response = httpx.post(license_url, data=challenge, headers=req_headers, content=payload)
42
+ # Send license request using curl_cffi
43
+ try:
44
+ # response = httpx.post(license_url, data=challenge, headers=req_headers, content=payload)
45
+ response = requests.post(license_url, data=challenge, headers=req_headers, json=payload, impersonate="chrome124")
46
+ except Exception as e:
47
+ console.print(f"[bold red]Request error:[/bold red] {e}")
48
+ return None
44
49
 
45
50
  if response.status_code != 200:
46
51
  console.print(f"[bold red]License error:[/bold red] {response.status_code}, {response.text}")
@@ -16,6 +16,7 @@ from StreamingCommunity.Util.os import get_mp4decrypt_path
16
16
  console = Console()
17
17
 
18
18
 
19
+ # NOTE!: SAREBBE MEGLIO FARLO PER OGNI FILE DURANTE IL DOWNLOAD ... MA PER ORA LO LASCIO COSI
19
20
  def decrypt_with_mp4decrypt(encrypted_path, kid, key, output_path=None, cleanup=True):
20
21
  """
21
22
  Decrypt an mp4/m4s file using mp4decrypt.
@@ -13,7 +13,7 @@ from rich.table import Table
13
13
 
14
14
  # Internal utilities
15
15
  from StreamingCommunity.Util.config_json import config_manager
16
- from StreamingCommunity.Util.os import internet_manager
16
+ from StreamingCommunity.Util.os import os_manager, internet_manager
17
17
  from StreamingCommunity.Util.http_client import create_client
18
18
  from StreamingCommunity.Util.headers import get_userAgent
19
19
 
@@ -59,10 +59,17 @@ class DASH_Downloader:
59
59
  self.license_url = license_url
60
60
  self.mpd_url = mpd_url
61
61
  self.mpd_sub_list = mpd_sub_list or []
62
- self.out_path = os.path.splitext(os.path.abspath(str(output_path)))[0]
62
+ self.out_path = os.path.splitext(os.path.abspath(os_manager.get_sanitize_path(output_path)))[0]
63
63
  self.original_output_path = output_path
64
64
  self.file_already_exists = os.path.exists(self.original_output_path)
65
65
  self.parser = None
66
+
67
+ # Added defaults to avoid AttributeError when no subtitles/audio/video are present
68
+ # Non la soluzione migliore ma evita crash in assenza di audio/video/subs
69
+ self.selected_subs = []
70
+ self.selected_video = None
71
+ self.selected_audio = None
72
+
66
73
  self._setup_temp_dirs()
67
74
 
68
75
  self.error = None
@@ -1,11 +1,13 @@
1
1
  # 25.07.25
2
2
 
3
+ import re
3
4
  from urllib.parse import urljoin
4
5
  import xml.etree.ElementTree as ET
6
+ from typing import List, Dict, Optional, Tuple, Any
5
7
 
6
8
 
7
9
  # External library
8
- import httpx
10
+ from curl_cffi import requests
9
11
  from rich.console import Console
10
12
 
11
13
 
@@ -16,9 +18,384 @@ from StreamingCommunity.Util.config_json import config_manager
16
18
  # Variable
17
19
  console = Console()
18
20
  max_timeout = config_manager.get_int('REQUESTS', 'timeout')
21
+ max_retry = config_manager.get_int('REQUESTS', 'max_retry')
22
+
23
+
24
+
25
+ class CodecQuality:
26
+ """Utility class to rank codec quality"""
27
+ VIDEO_CODEC_RANK = {
28
+ 'av01': 5, # AV1
29
+ 'vp9': 4, # VP9
30
+ 'vp09': 4, # VP9
31
+ 'hev1': 3, # HEVC/H.265
32
+ 'hvc1': 3, # HEVC/H.265
33
+ 'avc1': 2, # H.264
34
+ 'avc3': 2, # H.264
35
+ 'mp4v': 1, # MPEG-4
36
+ }
37
+
38
+ AUDIO_CODEC_RANK = {
39
+ 'opus': 5, # Opus
40
+ 'mp4a.40.2': 4, # AAC-LC
41
+ 'mp4a.40.5': 3, # AAC-HE
42
+ 'mp4a': 2, # Generic AAC
43
+ 'ac-3': 2, # Dolby Digital
44
+ 'ec-3': 3, # Dolby Digital Plus
45
+ }
46
+
47
+ @staticmethod
48
+ def get_video_codec_rank(codec: Optional[str]) -> int:
49
+ """Get ranking for video codec"""
50
+ if not codec:
51
+ return 0
52
+ codec_lower = codec.lower()
53
+ for key, rank in CodecQuality.VIDEO_CODEC_RANK.items():
54
+ if codec_lower.startswith(key):
55
+ return rank
56
+ return 0
57
+
58
+ @staticmethod
59
+ def get_audio_codec_rank(codec: Optional[str]) -> int:
60
+ """Get ranking for audio codec"""
61
+ if not codec:
62
+ return 0
63
+ codec_lower = codec.lower()
64
+ for key, rank in CodecQuality.AUDIO_CODEC_RANK.items():
65
+ if codec_lower.startswith(key):
66
+ return rank
67
+ return 0
68
+
69
+
70
+ class URLBuilder:
71
+
72
+ @staticmethod
73
+ def build_url(base: str, template: str, rep_id: Optional[str] = None, number: Optional[int] = None, time: Optional[int] = None, bandwidth: Optional[int] = None) -> str:
74
+ """Build absolute URL preserving query/hash"""
75
+ if not template:
76
+ return None
77
+
78
+ # Substitute RepresentationID and Bandwidth first
79
+ if rep_id is not None:
80
+ template = template.replace('$RepresentationID$', rep_id)
81
+ if bandwidth is not None:
82
+ template = template.replace('$Bandwidth$', str(bandwidth))
83
+
84
+ # Handle $Number$ with optional formatting
85
+ template = URLBuilder._replace_number(template, number)
86
+
87
+ # Replace $Time$ if present
88
+ if '$Time$' in template and time is not None:
89
+ template = template.replace('$Time$', str(time))
90
+
91
+ return URLBuilder._finalize_url(base, template)
92
+
93
+ @staticmethod
94
+ def _replace_number(template: str, number: Optional[int]) -> str:
95
+ """Handle $Number$ placeholder with formatting"""
96
+ def _replace_number_match(match):
97
+ num = number if number is not None else 0
98
+ fmt = match.group(1)
99
+
100
+ if fmt:
101
+ # fmt like %05d -> convert to python format
102
+ m = re.match(r'%0(\d+)d', fmt)
103
+ if m:
104
+ width = int(m.group(1))
105
+ return str(num).zfill(width)
106
+
107
+ return str(num)
108
+
109
+ return re.sub(r'\$Number(\%0\d+d)?\$', _replace_number_match, template)
110
+
111
+ @staticmethod
112
+ def _finalize_url(base: str, template: str) -> str:
113
+ """Finalize URL construction preserving query and fragment"""
114
+
115
+ # Split path/query/fragment to avoid urljoin mangling query
116
+ split = template.split('#', 1)
117
+ path_and_query = split[0]
118
+ frag = ('#' + split[1]) if len(split) == 2 else ''
119
+
120
+ if '?' in path_and_query:
121
+ path_part, query_part = path_and_query.split('?', 1)
122
+ abs_path = urljoin(base, path_part)
123
+
124
+ # ensure we don't accidentally lose existing query separators
125
+ final = abs_path + '?' + query_part + frag
126
+
127
+ else:
128
+ abs_path = urljoin(base, path_and_query)
129
+ final = abs_path + frag
130
+
131
+ return final
132
+
133
+
134
+ class SegmentTimelineParser:
135
+ """Parser for SegmentTimeline elements"""
136
+
137
+ def __init__(self, namespace: Dict[str, str]):
138
+ self.ns = namespace
139
+
140
+ def parse(self, seg_timeline_element) -> Tuple[List[int], List[int]]:
141
+ """
142
+ Parse SegmentTimeline and return (number_list, time_list)
143
+ """
144
+ number_list = []
145
+ time_list = []
146
+
147
+ if seg_timeline_element is None:
148
+ return number_list, time_list
149
+
150
+ current_time = None
151
+ start_number = 1 # Default start number
152
+
153
+ for s_element in seg_timeline_element.findall('mpd:S', self.ns):
154
+ d = s_element.get('d')
155
+ if d is None:
156
+ continue
157
+
158
+ d = int(d)
159
+ r = int(s_element.get('r', 0))
160
+
161
+ # Handle 't' attribute
162
+ if s_element.get('t') is not None:
163
+ current_time = int(s_element.get('t'))
164
+ elif current_time is None:
165
+ current_time = 0
166
+
167
+ # Append (r+1) times and numbers
168
+ for i in range(r + 1):
169
+ number_list.append(start_number)
170
+ time_list.append(current_time)
171
+ start_number += 1
172
+ current_time += d
173
+
174
+ return number_list, time_list
175
+
176
+
177
+ class RepresentationParser:
178
+ """Parser for individual representations"""
179
+
180
+ def __init__(self, mpd_url: str, namespace: Dict[str, str]):
181
+ self.mpd_url = mpd_url
182
+ self.ns = namespace
183
+ self.timeline_parser = SegmentTimelineParser(namespace)
184
+
185
+ def parse_adaptation_set(self, adapt_set, base_url: str) -> List[Dict[str, Any]]:
186
+ """
187
+ Parse all representations in an adaptation set
188
+ """
189
+ representations = []
190
+ mime_type = adapt_set.get('mimeType', '')
191
+ lang = adapt_set.get('lang', '')
192
+
193
+ # Find SegmentTemplate at AdaptationSet level
194
+ adapt_seg_template = adapt_set.find('mpd:SegmentTemplate', self.ns)
195
+
196
+ for rep_element in adapt_set.findall('mpd:Representation', self.ns):
197
+ representation = self._parse_representation(rep_element, adapt_set, adapt_seg_template, base_url, mime_type, lang)
198
+ if representation:
199
+ representations.append(representation)
200
+
201
+ return representations
202
+
203
+ def _parse_representation(self, rep_element, adapt_set, adapt_seg_template, base_url: str, mime_type: str, lang: str) -> Optional[Dict[str, Any]]:
204
+ """Parse a single representation"""
205
+ rep_id = rep_element.get('id')
206
+ bandwidth = rep_element.get('bandwidth')
207
+ codecs = rep_element.get('codecs')
208
+ width = rep_element.get('width')
209
+ height = rep_element.get('height')
210
+ audio_sampling_rate = rep_element.get('audioSamplingRate')
211
+
212
+ # Try to find SegmentTemplate at Representation level
213
+ rep_seg_template = rep_element.find('mpd:SegmentTemplate', self.ns)
214
+ seg_tmpl = rep_seg_template if rep_seg_template is not None else adapt_seg_template
215
+
216
+ if seg_tmpl is None:
217
+ return None
218
+
219
+ # Build URLs
220
+ rep_base_url = self._resolve_base_url(rep_element, adapt_set, base_url)
221
+ init_url, media_urls = self._build_segment_urls(seg_tmpl, rep_id, bandwidth, rep_base_url)
222
+
223
+ # Determine content type first
224
+ content_type = 'unknown'
225
+ if mime_type:
226
+ content_type = mime_type.split('/')[0]
227
+ elif width or height:
228
+ content_type = 'video'
229
+ elif audio_sampling_rate or (codecs and 'mp4a' in codecs.lower()):
230
+ content_type = 'audio'
231
+
232
+ # Clean language: convert None, empty string, or "undefined" to None
233
+ # For audio tracks without language, generate a generic name
234
+ clean_lang = None
235
+ if lang and lang.lower() not in ['undefined', 'none', '']:
236
+ clean_lang = lang
237
+ elif content_type == 'audio':
238
+
239
+ # Generate generic audio track name based on rep_id or bandwidth
240
+ if rep_id:
241
+ clean_lang = f"aud_{rep_id}"
242
+ else:
243
+ clean_lang = f"aud_{bandwidth or '0'}"
244
+
245
+ return {
246
+ 'id': rep_id,
247
+ 'type': content_type,
248
+ 'codec': codecs,
249
+ 'bandwidth': int(bandwidth) if bandwidth else 0,
250
+ 'width': int(width) if width else 0,
251
+ 'height': int(height) if height else 0,
252
+ 'audio_sampling_rate': int(audio_sampling_rate) if audio_sampling_rate else 0,
253
+ 'language': clean_lang,
254
+ 'init_url': init_url,
255
+ 'segment_urls': media_urls
256
+ }
257
+
258
+ def _resolve_base_url(self, rep_element, adapt_set, initial_base: str) -> str:
259
+ """Resolve base URL by concatenating MPD -> Period/AdaptationSet -> Representation BaseURLs"""
260
+ base = initial_base
261
+
262
+ # Adaptation-level BaseURL
263
+ if adapt_set is not None:
264
+ adapt_base = adapt_set.find('mpd:BaseURL', self.ns)
265
+ if adapt_base is not None and adapt_base.text:
266
+ base_text = adapt_base.text.strip()
267
+
268
+ # Handle BaseURL that might already be absolute
269
+ if base_text.startswith('http'):
270
+ base = base_text
271
+ else:
272
+ base = urljoin(base, base_text)
273
+
274
+ # Representation-level BaseURL
275
+ if rep_element is not None:
276
+ rep_base = rep_element.find('mpd:BaseURL', self.ns)
277
+ if rep_base is not None and rep_base.text:
278
+ base_text = rep_base.text.strip()
279
+
280
+ # Handle BaseURL that might already be absolute
281
+ if base_text.startswith('http'):
282
+ base = base_text
283
+ else:
284
+ base = urljoin(base, base_text)
285
+
286
+ return base
287
+
288
+ def _build_segment_urls(self, seg_tmpl, rep_id: str, bandwidth: str, base_url: str) -> Tuple[str, List[str]]:
289
+ """Build initialization and media segment URLs"""
290
+ init = seg_tmpl.get('initialization')
291
+ media = seg_tmpl.get('media')
292
+ start_number = int(seg_tmpl.get('startNumber', 1))
293
+
294
+ # Build init URL
295
+ init_url = URLBuilder.build_url(
296
+ base_url, init,
297
+ rep_id=rep_id,
298
+ bandwidth=int(bandwidth) if bandwidth else None
299
+ ) if init else None
300
+
301
+ # Parse segment timeline
302
+ seg_timeline = seg_tmpl.find('mpd:SegmentTimeline', self.ns)
303
+ number_list, time_list = self.timeline_parser.parse(seg_timeline)
304
+
305
+ if not number_list:
306
+ number_list = list(range(start_number, start_number + 100))
307
+
308
+ # Build media URLs
309
+ media_urls = self._build_media_urls(media, base_url, rep_id, bandwidth, number_list, time_list)
310
+
311
+ return init_url, media_urls
312
+
313
+ def _build_media_urls(self, media_template: str, base_url: str, rep_id: str, bandwidth: str, number_list: List[int], time_list: List[int]) -> List[str]:
314
+ """Build list of media segment URLs"""
315
+ if not media_template:
316
+ return []
317
+
318
+ media_urls = []
319
+ bandwidth_int = int(bandwidth) if bandwidth else None
320
+
321
+ if '$Time$' in media_template and time_list:
322
+ for t in time_list:
323
+ media_urls.append(URLBuilder.build_url(
324
+ base_url, media_template,
325
+ rep_id=rep_id, time=t, bandwidth=bandwidth_int
326
+ ))
327
+ elif '$Number' in media_template and number_list:
328
+ for n in number_list:
329
+ media_urls.append(URLBuilder.build_url(
330
+ base_url, media_template,
331
+ rep_id=rep_id, number=n, bandwidth=bandwidth_int
332
+ ))
333
+ else:
334
+ media_urls.append(URLBuilder.build_url(
335
+ base_url, media_template,
336
+ rep_id=rep_id, bandwidth=bandwidth_int
337
+ ))
338
+
339
+ return media_urls
19
340
 
20
341
 
21
342
  class MPDParser:
343
+ @staticmethod
344
+ def _deduplicate_videos(representations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
345
+ """
346
+ Remove duplicate video representations with same resolution.
347
+ Keep the one with best codec, then highest bandwidth.
348
+ """
349
+ resolution_map = {}
350
+
351
+ for rep in representations:
352
+ key = (rep['width'], rep['height'])
353
+
354
+ if key not in resolution_map:
355
+ resolution_map[key] = rep
356
+ else:
357
+ existing = resolution_map[key]
358
+
359
+ # Compare codec quality first
360
+ existing_codec_rank = CodecQuality.get_video_codec_rank(existing['codec'])
361
+ new_codec_rank = CodecQuality.get_video_codec_rank(rep['codec'])
362
+
363
+ if new_codec_rank > existing_codec_rank:
364
+ resolution_map[key] = rep
365
+ elif new_codec_rank == existing_codec_rank and rep['bandwidth'] > existing['bandwidth']:
366
+ resolution_map[key] = rep
367
+
368
+ return list(resolution_map.values())
369
+
370
+ @staticmethod
371
+ def _deduplicate_audios(representations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
372
+ """
373
+ Remove duplicate audio representations.
374
+ Group by (language, sampling_rate) and keep the one with best codec, then highest bandwidth.
375
+ """
376
+ audio_map = {}
377
+
378
+ for rep in representations:
379
+
380
+ # Use both language and sampling rate as key to differentiate audio tracks
381
+ key = (rep['language'], rep['audio_sampling_rate'])
382
+
383
+ if key not in audio_map:
384
+ audio_map[key] = rep
385
+ else:
386
+ existing = audio_map[key]
387
+
388
+ # Compare codec quality first
389
+ existing_codec_rank = CodecQuality.get_audio_codec_rank(existing['codec'])
390
+ new_codec_rank = CodecQuality.get_audio_codec_rank(rep['codec'])
391
+
392
+ if new_codec_rank > existing_codec_rank:
393
+ audio_map[key] = rep
394
+ elif new_codec_rank == existing_codec_rank and rep['bandwidth'] > existing['bandwidth']:
395
+ audio_map[key] = rep
396
+
397
+ return list(audio_map.values())
398
+
22
399
  @staticmethod
23
400
  def get_best(representations):
24
401
  """
@@ -54,28 +431,51 @@ class MPDParser:
54
431
  return [r for r in representations if r['type'] == type_filter]
55
432
  return representations
56
433
 
57
- def __init__(self, mpd_url):
434
+ def __init__(self, mpd_url: str):
58
435
  self.mpd_url = mpd_url
59
436
  self.pssh = None
60
437
  self.representations = []
61
- self.base_url = mpd_url.rsplit('/', 1)[0] + '/'
438
+ self.ns = {}
439
+ self.root = None
440
+
441
+ def parse(self, custom_headers: Dict[str, str]) -> None:
442
+ """Parse the MPD file and extract all representations"""
443
+ self._fetch_and_parse_mpd(custom_headers)
444
+ self._extract_namespace()
445
+ self._extract_pssh()
446
+ self._parse_representations()
447
+ self._deduplicate_representations()
448
+
449
+ def _fetch_and_parse_mpd(self, custom_headers: Dict[str, str]) -> None:
450
+ """Fetch MPD content and parse XML"""
451
+ for attempt in range(max_retry + 1):
452
+ try:
453
+ response = requests.get(
454
+ self.mpd_url, headers=custom_headers, timeout=max_timeout,
455
+ allow_redirects=True, impersonate="chrome124"
456
+ )
457
+
458
+ response.raise_for_status()
459
+ self.root = ET.fromstring(response.content)
460
+ break
62
461
 
63
- def parse(self, custom_headers):
64
- response = httpx.get(self.mpd_url, headers=custom_headers, timeout=max_timeout, follow_redirects=True)
65
- response.raise_for_status()
462
+ except Exception as e:
463
+ if attempt == max_retry:
464
+ raise e
66
465
 
67
- root = ET.fromstring(response.content)
466
+ console.print(f"[bold yellow]Retrying manifest request ... ({attempt + 1}/{max_retry})[/bold yellow]")
68
467
 
69
- # Properly handle default namespace
70
- ns = {}
71
- if root.tag.startswith('{'):
72
- uri = root.tag[1:].split('}')[0]
73
- ns['mpd'] = uri
74
- ns['cenc'] = 'urn:mpeg:cenc:2013'
468
+ def _extract_namespace(self) -> None:
469
+ """Extract and register namespaces from the root element"""
470
+ if self.root.tag.startswith('{'):
471
+ uri = self.root.tag[1:].split('}')[0]
472
+ self.ns['mpd'] = uri
473
+ self.ns['cenc'] = 'urn:mpeg:cenc:2013'
75
474
 
76
- # Extract PSSH dynamically: take the first <cenc:pssh> found
77
- for protection in root.findall('.//mpd:ContentProtection', ns):
78
- pssh_element = protection.find('cenc:pssh', ns)
475
+ def _extract_pssh(self) -> None:
476
+ """Extract PSSH from ContentProtection elements"""
477
+ for protection in self.root.findall('.//mpd:ContentProtection', self.ns):
478
+ pssh_element = protection.find('cenc:pssh', self.ns)
79
479
  if pssh_element is not None and pssh_element.text:
80
480
  self.pssh = pssh_element.text
81
481
  break
@@ -83,83 +483,42 @@ class MPDParser:
83
483
  if not self.pssh:
84
484
  console.print("[bold red]PSSH not found in MPD![/bold red]")
85
485
 
86
- # Extract representations
87
- for adapt_set in root.findall('.//mpd:AdaptationSet', ns):
88
- mime_type = adapt_set.get('mimeType', '')
89
- lang = adapt_set.get('lang', '')
90
-
91
- # Find SegmentTemplate at AdaptationSet level (DASH spec allows this)
92
- seg_template = adapt_set.find('mpd:SegmentTemplate', ns)
93
-
94
- for rep in adapt_set.findall('mpd:Representation', ns):
95
- rep_id = rep.get('id')
96
- bandwidth = rep.get('bandwidth')
97
- codecs = rep.get('codecs')
98
- width = rep.get('width')
99
- height = rep.get('height')
100
-
101
- # Try to find SegmentTemplate at Representation level (overrides AdaptationSet)
102
- rep_seg_template = rep.find('mpd:SegmentTemplate', ns)
103
- seg_tmpl = rep_seg_template if rep_seg_template is not None else seg_template
104
- if seg_tmpl is None:
105
- continue
106
-
107
- init = seg_tmpl.get('initialization')
108
- media = seg_tmpl.get('media')
109
- start_number = int(seg_tmpl.get('startNumber', 1))
110
-
111
- # Use BaseURL from Representation if present, else fallback to self.base_url
112
- base_url_elem = rep.find('mpd:BaseURL', ns)
113
- base_url = base_url_elem.text if base_url_elem is not None else self.base_url
114
-
115
- # Replace $RepresentationID$ in init/media if present
116
- if init and '$RepresentationID$' in init:
117
- init = init.replace('$RepresentationID$', rep_id)
118
- if media and '$RepresentationID$' in media:
119
- media = media.replace('$RepresentationID$', rep_id)
120
-
121
- init_url = urljoin(base_url, init) if init else None
122
-
123
- # Calculate segments from timeline
124
- segments = []
125
- seg_timeline = seg_tmpl.find('mpd:SegmentTimeline', ns)
126
- if seg_timeline is not None:
127
- segment_number = start_number
128
- for s in seg_timeline.findall('mpd:S', ns):
129
- repeat = int(s.get('r', 0))
130
-
131
- # Always append at least one segment
132
- segments.append(segment_number)
133
- segment_number += 1
134
- for _ in range(repeat):
135
- segments.append(segment_number)
136
- segment_number += 1
137
-
138
- if not segments:
139
- segments = list(range(start_number, start_number + 100))
140
-
141
- # Replace $Number$ and $RepresentationID$ in media URL
142
- media_urls = []
143
- for n in segments:
144
- url = media
145
- if '$Number$' in url:
146
- url = url.replace('$Number$', str(n))
147
- if '$RepresentationID$' in url:
148
- url = url.replace('$RepresentationID$', rep_id)
149
- media_urls.append(urljoin(base_url, url))
150
-
151
- self.representations.append({
152
- 'id': rep_id,
153
- 'type': mime_type.split('/')[0] if mime_type else (rep.get('mimeType', '').split('/')[0] if rep.get('mimeType') else 'unknown'),
154
- 'codec': codecs,
155
- 'bandwidth': int(bandwidth) if bandwidth else 0,
156
- 'width': int(width) if width else 0,
157
- 'height': int(height) if height else 0,
158
- 'language': lang,
159
- 'init_url': init_url,
160
- 'segment_urls': media_urls
161
- })
486
+ def _parse_representations(self) -> None:
487
+ """Parse all representations from the MPD"""
488
+ base_url = self._get_initial_base_url()
489
+ representation_parser = RepresentationParser(self.mpd_url, self.ns)
490
+
491
+ for adapt_set in self.root.findall('.//mpd:AdaptationSet', self.ns):
492
+ representations = representation_parser.parse_adaptation_set(adapt_set, base_url)
493
+ self.representations.extend(representations)
162
494
 
495
+ def _deduplicate_representations(self) -> None:
496
+ """Remove duplicate video and audio representations"""
497
+ videos = [r for r in self.representations if r['type'] == 'video']
498
+ audios = [r for r in self.representations if r['type'] == 'audio']
499
+ others = [r for r in self.representations if r['type'] not in ['video', 'audio']]
500
+
501
+ deduplicated_videos = self._deduplicate_videos(videos)
502
+ deduplicated_audios = self._deduplicate_audios(audios)
503
+ self.representations = deduplicated_videos + deduplicated_audios + others
504
+
505
+ def _get_initial_base_url(self) -> str:
506
+ """Get the initial base URL from MPD-level BaseURL"""
507
+ base_url = self.mpd_url.rsplit('/', 1)[0] + '/'
508
+
509
+ # MPD-level BaseURL
510
+ mpd_base = self.root.find('mpd:BaseURL', self.ns)
511
+ if mpd_base is not None and mpd_base.text:
512
+ base_text = mpd_base.text.strip()
513
+
514
+ # Handle BaseURL that might already be absolute
515
+ if base_text.startswith('http'):
516
+ base_url = base_text
517
+ else:
518
+ base_url = urljoin(base_url, base_text)
519
+
520
+ return base_url
521
+
163
522
  def get_resolutions(self):
164
523
  """Return list of video representations with their resolutions."""
165
524
  return [
@@ -222,19 +581,18 @@ class MPDParser:
222
581
  Returns: (selected_audio, list_available_audio_langs, filter_custom_audio, downloadable_audio)
223
582
  """
224
583
  audio_reps = self.get_audios()
225
- list_available_audio_langs = [
226
- rep['language'] or "None" for rep in audio_reps
227
- ]
584
+
585
+ # Include all languages (including generated ones like aud_XXX)
586
+ list_available_audio_langs = [rep['language'] for rep in audio_reps]
228
587
 
229
588
  selected_audio = None
230
589
  filter_custom_audio = "First"
231
590
 
232
591
  if preferred_audio_langs:
233
-
234
592
  # Search for the first available language in order of preference
235
593
  for lang in preferred_audio_langs:
236
594
  for rep in audio_reps:
237
- if (rep['language'] or "None").lower() == lang.lower():
595
+ if rep['language'] and rep['language'].lower() == lang.lower():
238
596
  selected_audio = rep
239
597
  filter_custom_audio = lang
240
598
  break
@@ -245,5 +603,5 @@ class MPDParser:
245
603
  else:
246
604
  selected_audio = self.get_best_audio()
247
605
 
248
- downloadable_audio = selected_audio['language'] or "None" if selected_audio else "N/A"
606
+ downloadable_audio = selected_audio['language'] if selected_audio else "N/A"
249
607
  return selected_audio, list_available_audio_langs, filter_custom_audio, downloadable_audio