StreamingCommunity 3.3.8__py3-none-any.whl → 3.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of StreamingCommunity might be problematic. Click here for more details.
- StreamingCommunity/Api/Player/hdplayer.py +0 -5
- StreamingCommunity/Api/Player/mediapolisvod.py +4 -13
- StreamingCommunity/Api/Player/supervideo.py +3 -8
- StreamingCommunity/Api/Player/sweetpixel.py +1 -9
- StreamingCommunity/Api/Player/vixcloud.py +5 -16
- StreamingCommunity/Api/Site/altadefinizione/film.py +4 -15
- StreamingCommunity/Api/Site/altadefinizione/site.py +2 -7
- StreamingCommunity/Api/Site/altadefinizione/util/ScrapeSerie.py +2 -7
- StreamingCommunity/Api/Site/animeunity/site.py +9 -24
- StreamingCommunity/Api/Site/animeunity/util/ScrapeSerie.py +11 -27
- StreamingCommunity/Api/Site/animeworld/film.py +4 -2
- StreamingCommunity/Api/Site/animeworld/site.py +3 -11
- StreamingCommunity/Api/Site/animeworld/util/ScrapeSerie.py +1 -4
- StreamingCommunity/Api/Site/crunchyroll/film.py +17 -8
- StreamingCommunity/Api/Site/crunchyroll/series.py +8 -9
- StreamingCommunity/Api/Site/crunchyroll/site.py +14 -16
- StreamingCommunity/Api/Site/crunchyroll/util/ScrapeSerie.py +18 -65
- StreamingCommunity/Api/Site/crunchyroll/util/get_license.py +97 -106
- StreamingCommunity/Api/Site/guardaserie/site.py +4 -12
- StreamingCommunity/Api/Site/guardaserie/util/ScrapeSerie.py +3 -10
- StreamingCommunity/Api/Site/mediasetinfinity/film.py +11 -12
- StreamingCommunity/Api/Site/mediasetinfinity/series.py +1 -2
- StreamingCommunity/Api/Site/mediasetinfinity/site.py +3 -11
- StreamingCommunity/Api/Site/mediasetinfinity/util/ScrapeSerie.py +39 -50
- StreamingCommunity/Api/Site/mediasetinfinity/util/fix_mpd.py +3 -3
- StreamingCommunity/Api/Site/mediasetinfinity/util/get_license.py +8 -26
- StreamingCommunity/Api/Site/raiplay/film.py +6 -7
- StreamingCommunity/Api/Site/raiplay/series.py +1 -12
- StreamingCommunity/Api/Site/raiplay/site.py +8 -24
- StreamingCommunity/Api/Site/raiplay/util/ScrapeSerie.py +15 -22
- StreamingCommunity/Api/Site/raiplay/util/get_license.py +3 -12
- StreamingCommunity/Api/Site/streamingcommunity/film.py +5 -16
- StreamingCommunity/Api/Site/streamingcommunity/site.py +3 -22
- StreamingCommunity/Api/Site/streamingcommunity/util/ScrapeSerie.py +11 -26
- StreamingCommunity/Api/Site/streamingwatch/__init__.py +1 -0
- StreamingCommunity/Api/Site/streamingwatch/film.py +4 -2
- StreamingCommunity/Api/Site/streamingwatch/series.py +1 -1
- StreamingCommunity/Api/Site/streamingwatch/site.py +4 -18
- StreamingCommunity/Api/Site/streamingwatch/util/ScrapeSerie.py +0 -3
- StreamingCommunity/Api/Template/config_loader.py +0 -7
- StreamingCommunity/Lib/Downloader/DASH/cdm_helpher.py +8 -3
- StreamingCommunity/Lib/Downloader/DASH/decrypt.py +55 -1
- StreamingCommunity/Lib/Downloader/DASH/downloader.py +139 -55
- StreamingCommunity/Lib/Downloader/DASH/parser.py +458 -101
- StreamingCommunity/Lib/Downloader/DASH/segments.py +131 -74
- StreamingCommunity/Lib/Downloader/HLS/downloader.py +31 -50
- StreamingCommunity/Lib/Downloader/HLS/segments.py +266 -365
- StreamingCommunity/Lib/Downloader/MP4/downloader.py +1 -1
- StreamingCommunity/Lib/FFmpeg/capture.py +37 -5
- StreamingCommunity/Lib/FFmpeg/command.py +35 -93
- StreamingCommunity/Lib/M3U8/estimator.py +0 -1
- StreamingCommunity/Lib/TMBD/tmdb.py +2 -4
- StreamingCommunity/TelegramHelp/config.json +0 -1
- StreamingCommunity/Upload/version.py +1 -1
- StreamingCommunity/Util/config_json.py +28 -21
- StreamingCommunity/Util/http_client.py +28 -0
- StreamingCommunity/Util/os.py +16 -6
- {streamingcommunity-3.3.8.dist-info → streamingcommunity-3.4.0.dist-info}/METADATA +1 -3
- streamingcommunity-3.4.0.dist-info/RECORD +111 -0
- streamingcommunity-3.3.8.dist-info/RECORD +0 -111
- {streamingcommunity-3.3.8.dist-info → streamingcommunity-3.4.0.dist-info}/WHEEL +0 -0
- {streamingcommunity-3.3.8.dist-info → streamingcommunity-3.4.0.dist-info}/entry_points.txt +0 -0
- {streamingcommunity-3.3.8.dist-info → streamingcommunity-3.4.0.dist-info}/licenses/LICENSE +0 -0
- {streamingcommunity-3.3.8.dist-info → streamingcommunity-3.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
# 25.07.25
|
|
2
2
|
|
|
3
|
+
import re
|
|
4
|
+
import logging
|
|
3
5
|
from urllib.parse import urljoin
|
|
4
6
|
import xml.etree.ElementTree as ET
|
|
7
|
+
from typing import List, Dict, Optional, Tuple, Any
|
|
5
8
|
|
|
6
9
|
|
|
7
10
|
# External library
|
|
8
|
-
import
|
|
11
|
+
from curl_cffi import requests
|
|
9
12
|
from rich.console import Console
|
|
10
13
|
|
|
11
14
|
|
|
@@ -16,9 +19,384 @@ from StreamingCommunity.Util.config_json import config_manager
|
|
|
16
19
|
# Variable
|
|
17
20
|
console = Console()
|
|
18
21
|
max_timeout = config_manager.get_int('REQUESTS', 'timeout')
|
|
22
|
+
max_retry = config_manager.get_int('REQUESTS', 'max_retry')
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CodecQuality:
|
|
27
|
+
"""Utility class to rank codec quality"""
|
|
28
|
+
VIDEO_CODEC_RANK = {
|
|
29
|
+
'av01': 5, # AV1
|
|
30
|
+
'vp9': 4, # VP9
|
|
31
|
+
'vp09': 4, # VP9
|
|
32
|
+
'hev1': 3, # HEVC/H.265
|
|
33
|
+
'hvc1': 3, # HEVC/H.265
|
|
34
|
+
'avc1': 2, # H.264
|
|
35
|
+
'avc3': 2, # H.264
|
|
36
|
+
'mp4v': 1, # MPEG-4
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
AUDIO_CODEC_RANK = {
|
|
40
|
+
'opus': 5, # Opus
|
|
41
|
+
'mp4a.40.2': 4, # AAC-LC
|
|
42
|
+
'mp4a.40.5': 3, # AAC-HE
|
|
43
|
+
'mp4a': 2, # Generic AAC
|
|
44
|
+
'ac-3': 2, # Dolby Digital
|
|
45
|
+
'ec-3': 3, # Dolby Digital Plus
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def get_video_codec_rank(codec: Optional[str]) -> int:
|
|
50
|
+
"""Get ranking for video codec"""
|
|
51
|
+
if not codec:
|
|
52
|
+
return 0
|
|
53
|
+
codec_lower = codec.lower()
|
|
54
|
+
for key, rank in CodecQuality.VIDEO_CODEC_RANK.items():
|
|
55
|
+
if codec_lower.startswith(key):
|
|
56
|
+
return rank
|
|
57
|
+
return 0
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def get_audio_codec_rank(codec: Optional[str]) -> int:
|
|
61
|
+
"""Get ranking for audio codec"""
|
|
62
|
+
if not codec:
|
|
63
|
+
return 0
|
|
64
|
+
codec_lower = codec.lower()
|
|
65
|
+
for key, rank in CodecQuality.AUDIO_CODEC_RANK.items():
|
|
66
|
+
if codec_lower.startswith(key):
|
|
67
|
+
return rank
|
|
68
|
+
return 0
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class URLBuilder:
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def build_url(base: str, template: str, rep_id: Optional[str] = None, number: Optional[int] = None, time: Optional[int] = None, bandwidth: Optional[int] = None) -> str:
|
|
75
|
+
"""Build absolute URL preserving query/hash"""
|
|
76
|
+
if not template:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
# Substitute RepresentationID and Bandwidth first
|
|
80
|
+
if rep_id is not None:
|
|
81
|
+
template = template.replace('$RepresentationID$', rep_id)
|
|
82
|
+
if bandwidth is not None:
|
|
83
|
+
template = template.replace('$Bandwidth$', str(bandwidth))
|
|
84
|
+
|
|
85
|
+
# Handle $Number$ with optional formatting
|
|
86
|
+
template = URLBuilder._replace_number(template, number)
|
|
87
|
+
|
|
88
|
+
# Replace $Time$ if present
|
|
89
|
+
if '$Time$' in template and time is not None:
|
|
90
|
+
template = template.replace('$Time$', str(time))
|
|
91
|
+
|
|
92
|
+
return URLBuilder._finalize_url(base, template)
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def _replace_number(template: str, number: Optional[int]) -> str:
|
|
96
|
+
"""Handle $Number$ placeholder with formatting"""
|
|
97
|
+
def _replace_number_match(match):
|
|
98
|
+
num = number if number is not None else 0
|
|
99
|
+
fmt = match.group(1)
|
|
100
|
+
|
|
101
|
+
if fmt:
|
|
102
|
+
# fmt like %05d -> convert to python format
|
|
103
|
+
m = re.match(r'%0(\d+)d', fmt)
|
|
104
|
+
if m:
|
|
105
|
+
width = int(m.group(1))
|
|
106
|
+
return str(num).zfill(width)
|
|
107
|
+
|
|
108
|
+
return str(num)
|
|
109
|
+
|
|
110
|
+
return re.sub(r'\$Number(\%0\d+d)?\$', _replace_number_match, template)
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _finalize_url(base: str, template: str) -> str:
|
|
114
|
+
"""Finalize URL construction preserving query and fragment"""
|
|
115
|
+
|
|
116
|
+
# Split path/query/fragment to avoid urljoin mangling query
|
|
117
|
+
split = template.split('#', 1)
|
|
118
|
+
path_and_query = split[0]
|
|
119
|
+
frag = ('#' + split[1]) if len(split) == 2 else ''
|
|
120
|
+
|
|
121
|
+
if '?' in path_and_query:
|
|
122
|
+
path_part, query_part = path_and_query.split('?', 1)
|
|
123
|
+
abs_path = urljoin(base, path_part)
|
|
124
|
+
|
|
125
|
+
# ensure we don't accidentally lose existing query separators
|
|
126
|
+
final = abs_path + '?' + query_part + frag
|
|
127
|
+
|
|
128
|
+
else:
|
|
129
|
+
abs_path = urljoin(base, path_and_query)
|
|
130
|
+
final = abs_path + frag
|
|
131
|
+
|
|
132
|
+
return final
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class SegmentTimelineParser:
|
|
136
|
+
"""Parser for SegmentTimeline elements"""
|
|
137
|
+
|
|
138
|
+
def __init__(self, namespace: Dict[str, str]):
|
|
139
|
+
self.ns = namespace
|
|
140
|
+
|
|
141
|
+
def parse(self, seg_timeline_element) -> Tuple[List[int], List[int]]:
|
|
142
|
+
"""
|
|
143
|
+
Parse SegmentTimeline and return (number_list, time_list)
|
|
144
|
+
"""
|
|
145
|
+
number_list = []
|
|
146
|
+
time_list = []
|
|
147
|
+
|
|
148
|
+
if seg_timeline_element is None:
|
|
149
|
+
return number_list, time_list
|
|
150
|
+
|
|
151
|
+
current_time = None
|
|
152
|
+
start_number = 1 # Default start number
|
|
153
|
+
|
|
154
|
+
for s_element in seg_timeline_element.findall('mpd:S', self.ns):
|
|
155
|
+
d = s_element.get('d')
|
|
156
|
+
if d is None:
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
d = int(d)
|
|
160
|
+
r = int(s_element.get('r', 0))
|
|
161
|
+
|
|
162
|
+
# Handle 't' attribute
|
|
163
|
+
if s_element.get('t') is not None:
|
|
164
|
+
current_time = int(s_element.get('t'))
|
|
165
|
+
elif current_time is None:
|
|
166
|
+
current_time = 0
|
|
167
|
+
|
|
168
|
+
# Append (r+1) times and numbers
|
|
169
|
+
for i in range(r + 1):
|
|
170
|
+
number_list.append(start_number)
|
|
171
|
+
time_list.append(current_time)
|
|
172
|
+
start_number += 1
|
|
173
|
+
current_time += d
|
|
174
|
+
|
|
175
|
+
return number_list, time_list
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class RepresentationParser:
|
|
179
|
+
"""Parser for individual representations"""
|
|
180
|
+
|
|
181
|
+
def __init__(self, mpd_url: str, namespace: Dict[str, str]):
|
|
182
|
+
self.mpd_url = mpd_url
|
|
183
|
+
self.ns = namespace
|
|
184
|
+
self.timeline_parser = SegmentTimelineParser(namespace)
|
|
185
|
+
|
|
186
|
+
def parse_adaptation_set(self, adapt_set, base_url: str) -> List[Dict[str, Any]]:
|
|
187
|
+
"""
|
|
188
|
+
Parse all representations in an adaptation set
|
|
189
|
+
"""
|
|
190
|
+
representations = []
|
|
191
|
+
mime_type = adapt_set.get('mimeType', '')
|
|
192
|
+
lang = adapt_set.get('lang', '')
|
|
193
|
+
|
|
194
|
+
# Find SegmentTemplate at AdaptationSet level
|
|
195
|
+
adapt_seg_template = adapt_set.find('mpd:SegmentTemplate', self.ns)
|
|
196
|
+
|
|
197
|
+
for rep_element in adapt_set.findall('mpd:Representation', self.ns):
|
|
198
|
+
representation = self._parse_representation(rep_element, adapt_set, adapt_seg_template, base_url, mime_type, lang)
|
|
199
|
+
if representation:
|
|
200
|
+
representations.append(representation)
|
|
201
|
+
|
|
202
|
+
return representations
|
|
203
|
+
|
|
204
|
+
def _parse_representation(self, rep_element, adapt_set, adapt_seg_template, base_url: str, mime_type: str, lang: str) -> Optional[Dict[str, Any]]:
|
|
205
|
+
"""Parse a single representation"""
|
|
206
|
+
rep_id = rep_element.get('id')
|
|
207
|
+
bandwidth = rep_element.get('bandwidth')
|
|
208
|
+
codecs = rep_element.get('codecs')
|
|
209
|
+
width = rep_element.get('width')
|
|
210
|
+
height = rep_element.get('height')
|
|
211
|
+
audio_sampling_rate = rep_element.get('audioSamplingRate')
|
|
212
|
+
|
|
213
|
+
# Try to find SegmentTemplate at Representation level
|
|
214
|
+
rep_seg_template = rep_element.find('mpd:SegmentTemplate', self.ns)
|
|
215
|
+
seg_tmpl = rep_seg_template if rep_seg_template is not None else adapt_seg_template
|
|
216
|
+
|
|
217
|
+
if seg_tmpl is None:
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
# Build URLs
|
|
221
|
+
rep_base_url = self._resolve_base_url(rep_element, adapt_set, base_url)
|
|
222
|
+
init_url, media_urls = self._build_segment_urls(seg_tmpl, rep_id, bandwidth, rep_base_url)
|
|
223
|
+
|
|
224
|
+
# Determine content type first
|
|
225
|
+
content_type = 'unknown'
|
|
226
|
+
if mime_type:
|
|
227
|
+
content_type = mime_type.split('/')[0]
|
|
228
|
+
elif width or height:
|
|
229
|
+
content_type = 'video'
|
|
230
|
+
elif audio_sampling_rate or (codecs and 'mp4a' in codecs.lower()):
|
|
231
|
+
content_type = 'audio'
|
|
232
|
+
|
|
233
|
+
# Clean language: convert None, empty string, or "undefined" to None
|
|
234
|
+
# For audio tracks without language, generate a generic name
|
|
235
|
+
clean_lang = None
|
|
236
|
+
if lang and lang.lower() not in ['undefined', 'none', '']:
|
|
237
|
+
clean_lang = lang
|
|
238
|
+
elif content_type == 'audio':
|
|
239
|
+
|
|
240
|
+
# Generate generic audio track name based on rep_id or bandwidth
|
|
241
|
+
if rep_id:
|
|
242
|
+
clean_lang = f"aud_{rep_id}"
|
|
243
|
+
else:
|
|
244
|
+
clean_lang = f"aud_{bandwidth or '0'}"
|
|
245
|
+
|
|
246
|
+
return {
|
|
247
|
+
'id': rep_id,
|
|
248
|
+
'type': content_type,
|
|
249
|
+
'codec': codecs,
|
|
250
|
+
'bandwidth': int(bandwidth) if bandwidth else 0,
|
|
251
|
+
'width': int(width) if width else 0,
|
|
252
|
+
'height': int(height) if height else 0,
|
|
253
|
+
'audio_sampling_rate': int(audio_sampling_rate) if audio_sampling_rate else 0,
|
|
254
|
+
'language': clean_lang,
|
|
255
|
+
'init_url': init_url,
|
|
256
|
+
'segment_urls': media_urls
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
def _resolve_base_url(self, rep_element, adapt_set, initial_base: str) -> str:
|
|
260
|
+
"""Resolve base URL by concatenating MPD -> Period/AdaptationSet -> Representation BaseURLs"""
|
|
261
|
+
base = initial_base
|
|
262
|
+
|
|
263
|
+
# Adaptation-level BaseURL
|
|
264
|
+
if adapt_set is not None:
|
|
265
|
+
adapt_base = adapt_set.find('mpd:BaseURL', self.ns)
|
|
266
|
+
if adapt_base is not None and adapt_base.text:
|
|
267
|
+
base_text = adapt_base.text.strip()
|
|
268
|
+
|
|
269
|
+
# Handle BaseURL that might already be absolute
|
|
270
|
+
if base_text.startswith('http'):
|
|
271
|
+
base = base_text
|
|
272
|
+
else:
|
|
273
|
+
base = urljoin(base, base_text)
|
|
274
|
+
|
|
275
|
+
# Representation-level BaseURL
|
|
276
|
+
if rep_element is not None:
|
|
277
|
+
rep_base = rep_element.find('mpd:BaseURL', self.ns)
|
|
278
|
+
if rep_base is not None and rep_base.text:
|
|
279
|
+
base_text = rep_base.text.strip()
|
|
280
|
+
|
|
281
|
+
# Handle BaseURL that might already be absolute
|
|
282
|
+
if base_text.startswith('http'):
|
|
283
|
+
base = base_text
|
|
284
|
+
else:
|
|
285
|
+
base = urljoin(base, base_text)
|
|
286
|
+
|
|
287
|
+
return base
|
|
288
|
+
|
|
289
|
+
def _build_segment_urls(self, seg_tmpl, rep_id: str, bandwidth: str, base_url: str) -> Tuple[str, List[str]]:
|
|
290
|
+
"""Build initialization and media segment URLs"""
|
|
291
|
+
init = seg_tmpl.get('initialization')
|
|
292
|
+
media = seg_tmpl.get('media')
|
|
293
|
+
start_number = int(seg_tmpl.get('startNumber', 1))
|
|
294
|
+
|
|
295
|
+
# Build init URL
|
|
296
|
+
init_url = URLBuilder.build_url(
|
|
297
|
+
base_url, init,
|
|
298
|
+
rep_id=rep_id,
|
|
299
|
+
bandwidth=int(bandwidth) if bandwidth else None
|
|
300
|
+
) if init else None
|
|
301
|
+
|
|
302
|
+
# Parse segment timeline
|
|
303
|
+
seg_timeline = seg_tmpl.find('mpd:SegmentTimeline', self.ns)
|
|
304
|
+
number_list, time_list = self.timeline_parser.parse(seg_timeline)
|
|
305
|
+
|
|
306
|
+
if not number_list:
|
|
307
|
+
number_list = list(range(start_number, start_number + 100))
|
|
308
|
+
|
|
309
|
+
# Build media URLs
|
|
310
|
+
media_urls = self._build_media_urls(media, base_url, rep_id, bandwidth, number_list, time_list)
|
|
311
|
+
|
|
312
|
+
return init_url, media_urls
|
|
313
|
+
|
|
314
|
+
def _build_media_urls(self, media_template: str, base_url: str, rep_id: str, bandwidth: str, number_list: List[int], time_list: List[int]) -> List[str]:
|
|
315
|
+
"""Build list of media segment URLs"""
|
|
316
|
+
if not media_template:
|
|
317
|
+
return []
|
|
318
|
+
|
|
319
|
+
media_urls = []
|
|
320
|
+
bandwidth_int = int(bandwidth) if bandwidth else None
|
|
321
|
+
|
|
322
|
+
if '$Time$' in media_template and time_list:
|
|
323
|
+
for t in time_list:
|
|
324
|
+
media_urls.append(URLBuilder.build_url(
|
|
325
|
+
base_url, media_template,
|
|
326
|
+
rep_id=rep_id, time=t, bandwidth=bandwidth_int
|
|
327
|
+
))
|
|
328
|
+
elif '$Number' in media_template and number_list:
|
|
329
|
+
for n in number_list:
|
|
330
|
+
media_urls.append(URLBuilder.build_url(
|
|
331
|
+
base_url, media_template,
|
|
332
|
+
rep_id=rep_id, number=n, bandwidth=bandwidth_int
|
|
333
|
+
))
|
|
334
|
+
else:
|
|
335
|
+
media_urls.append(URLBuilder.build_url(
|
|
336
|
+
base_url, media_template,
|
|
337
|
+
rep_id=rep_id, bandwidth=bandwidth_int
|
|
338
|
+
))
|
|
339
|
+
|
|
340
|
+
return media_urls
|
|
19
341
|
|
|
20
342
|
|
|
21
343
|
class MPDParser:
|
|
344
|
+
@staticmethod
|
|
345
|
+
def _deduplicate_videos(representations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
346
|
+
"""
|
|
347
|
+
Remove duplicate video representations with same resolution.
|
|
348
|
+
Keep the one with best codec, then highest bandwidth.
|
|
349
|
+
"""
|
|
350
|
+
resolution_map = {}
|
|
351
|
+
|
|
352
|
+
for rep in representations:
|
|
353
|
+
key = (rep['width'], rep['height'])
|
|
354
|
+
|
|
355
|
+
if key not in resolution_map:
|
|
356
|
+
resolution_map[key] = rep
|
|
357
|
+
else:
|
|
358
|
+
existing = resolution_map[key]
|
|
359
|
+
|
|
360
|
+
# Compare codec quality first
|
|
361
|
+
existing_codec_rank = CodecQuality.get_video_codec_rank(existing['codec'])
|
|
362
|
+
new_codec_rank = CodecQuality.get_video_codec_rank(rep['codec'])
|
|
363
|
+
|
|
364
|
+
if new_codec_rank > existing_codec_rank:
|
|
365
|
+
resolution_map[key] = rep
|
|
366
|
+
elif new_codec_rank == existing_codec_rank and rep['bandwidth'] > existing['bandwidth']:
|
|
367
|
+
resolution_map[key] = rep
|
|
368
|
+
|
|
369
|
+
return list(resolution_map.values())
|
|
370
|
+
|
|
371
|
+
@staticmethod
|
|
372
|
+
def _deduplicate_audios(representations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
373
|
+
"""
|
|
374
|
+
Remove duplicate audio representations.
|
|
375
|
+
Group by (language, sampling_rate) and keep the one with best codec, then highest bandwidth.
|
|
376
|
+
"""
|
|
377
|
+
audio_map = {}
|
|
378
|
+
|
|
379
|
+
for rep in representations:
|
|
380
|
+
|
|
381
|
+
# Use both language and sampling rate as key to differentiate audio tracks
|
|
382
|
+
key = (rep['language'], rep['audio_sampling_rate'])
|
|
383
|
+
|
|
384
|
+
if key not in audio_map:
|
|
385
|
+
audio_map[key] = rep
|
|
386
|
+
else:
|
|
387
|
+
existing = audio_map[key]
|
|
388
|
+
|
|
389
|
+
# Compare codec quality first
|
|
390
|
+
existing_codec_rank = CodecQuality.get_audio_codec_rank(existing['codec'])
|
|
391
|
+
new_codec_rank = CodecQuality.get_audio_codec_rank(rep['codec'])
|
|
392
|
+
|
|
393
|
+
if new_codec_rank > existing_codec_rank:
|
|
394
|
+
audio_map[key] = rep
|
|
395
|
+
elif new_codec_rank == existing_codec_rank and rep['bandwidth'] > existing['bandwidth']:
|
|
396
|
+
audio_map[key] = rep
|
|
397
|
+
|
|
398
|
+
return list(audio_map.values())
|
|
399
|
+
|
|
22
400
|
@staticmethod
|
|
23
401
|
def get_best(representations):
|
|
24
402
|
"""
|
|
@@ -54,112 +432,92 @@ class MPDParser:
|
|
|
54
432
|
return [r for r in representations if r['type'] == type_filter]
|
|
55
433
|
return representations
|
|
56
434
|
|
|
57
|
-
def __init__(self, mpd_url):
|
|
435
|
+
def __init__(self, mpd_url: str):
|
|
58
436
|
self.mpd_url = mpd_url
|
|
59
437
|
self.pssh = None
|
|
60
438
|
self.representations = []
|
|
61
|
-
self.
|
|
439
|
+
self.ns = {}
|
|
440
|
+
self.root = None
|
|
441
|
+
|
|
442
|
+
def parse(self, custom_headers: Dict[str, str]) -> None:
|
|
443
|
+
"""Parse the MPD file and extract all representations"""
|
|
444
|
+
self._fetch_and_parse_mpd(custom_headers)
|
|
445
|
+
self._extract_namespace()
|
|
446
|
+
self._extract_pssh()
|
|
447
|
+
self._parse_representations()
|
|
448
|
+
self._deduplicate_representations()
|
|
449
|
+
|
|
450
|
+
def _fetch_and_parse_mpd(self, custom_headers: Dict[str, str]) -> None:
|
|
451
|
+
"""Fetch MPD content and parse XML"""
|
|
452
|
+
for attempt in range(max_retry + 1):
|
|
453
|
+
try:
|
|
454
|
+
response = requests.get(
|
|
455
|
+
self.mpd_url, headers=custom_headers, timeout=max_timeout,
|
|
456
|
+
allow_redirects=True, impersonate="chrome124"
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
response.raise_for_status()
|
|
460
|
+
logging.info(f"Successfully fetched MPD: {response.content}")
|
|
461
|
+
self.root = ET.fromstring(response.content)
|
|
462
|
+
break
|
|
62
463
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
464
|
+
except Exception as e:
|
|
465
|
+
if attempt == max_retry:
|
|
466
|
+
raise e
|
|
66
467
|
|
|
67
|
-
|
|
468
|
+
console.print(f"[bold yellow]Retrying manifest request ... ({attempt + 1}/{max_retry})[/bold yellow]")
|
|
68
469
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
if root.tag.startswith('{'):
|
|
72
|
-
uri = root.tag[1:].split('}')[0]
|
|
73
|
-
ns['mpd'] = uri
|
|
74
|
-
ns['cenc'] = 'urn:mpeg:cenc:2013'
|
|
470
|
+
def _extract_namespace(self) -> None:
|
|
471
|
+
"""Extract and register namespaces from the root element"""
|
|
472
|
+
if self.root.tag.startswith('{'):
|
|
473
|
+
uri = self.root.tag[1:].split('}')[0]
|
|
474
|
+
self.ns['mpd'] = uri
|
|
475
|
+
self.ns['cenc'] = 'urn:mpeg:cenc:2013'
|
|
75
476
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
477
|
+
def _extract_pssh(self) -> None:
|
|
478
|
+
"""Extract PSSH from ContentProtection elements"""
|
|
479
|
+
for protection in self.root.findall('.//mpd:ContentProtection', self.ns):
|
|
480
|
+
pssh_element = protection.find('cenc:pssh', self.ns)
|
|
79
481
|
if pssh_element is not None and pssh_element.text:
|
|
80
482
|
self.pssh = pssh_element.text
|
|
81
483
|
break
|
|
82
484
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
# Find SegmentTemplate at AdaptationSet level (DASH spec allows this)
|
|
92
|
-
seg_template = adapt_set.find('mpd:SegmentTemplate', ns)
|
|
93
|
-
|
|
94
|
-
for rep in adapt_set.findall('mpd:Representation', ns):
|
|
95
|
-
rep_id = rep.get('id')
|
|
96
|
-
bandwidth = rep.get('bandwidth')
|
|
97
|
-
codecs = rep.get('codecs')
|
|
98
|
-
width = rep.get('width')
|
|
99
|
-
height = rep.get('height')
|
|
100
|
-
|
|
101
|
-
# Try to find SegmentTemplate at Representation level (overrides AdaptationSet)
|
|
102
|
-
rep_seg_template = rep.find('mpd:SegmentTemplate', ns)
|
|
103
|
-
seg_tmpl = rep_seg_template if rep_seg_template is not None else seg_template
|
|
104
|
-
if seg_tmpl is None:
|
|
105
|
-
continue
|
|
106
|
-
|
|
107
|
-
init = seg_tmpl.get('initialization')
|
|
108
|
-
media = seg_tmpl.get('media')
|
|
109
|
-
start_number = int(seg_tmpl.get('startNumber', 1))
|
|
110
|
-
|
|
111
|
-
# Use BaseURL from Representation if present, else fallback to self.base_url
|
|
112
|
-
base_url_elem = rep.find('mpd:BaseURL', ns)
|
|
113
|
-
base_url = base_url_elem.text if base_url_elem is not None else self.base_url
|
|
114
|
-
|
|
115
|
-
# Replace $RepresentationID$ in init/media if present
|
|
116
|
-
if init and '$RepresentationID$' in init:
|
|
117
|
-
init = init.replace('$RepresentationID$', rep_id)
|
|
118
|
-
if media and '$RepresentationID$' in media:
|
|
119
|
-
media = media.replace('$RepresentationID$', rep_id)
|
|
120
|
-
|
|
121
|
-
init_url = urljoin(base_url, init) if init else None
|
|
122
|
-
|
|
123
|
-
# Calculate segments from timeline
|
|
124
|
-
segments = []
|
|
125
|
-
seg_timeline = seg_tmpl.find('mpd:SegmentTimeline', ns)
|
|
126
|
-
if seg_timeline is not None:
|
|
127
|
-
segment_number = start_number
|
|
128
|
-
for s in seg_timeline.findall('mpd:S', ns):
|
|
129
|
-
repeat = int(s.get('r', 0))
|
|
130
|
-
|
|
131
|
-
# Always append at least one segment
|
|
132
|
-
segments.append(segment_number)
|
|
133
|
-
segment_number += 1
|
|
134
|
-
for _ in range(repeat):
|
|
135
|
-
segments.append(segment_number)
|
|
136
|
-
segment_number += 1
|
|
137
|
-
|
|
138
|
-
if not segments:
|
|
139
|
-
segments = list(range(start_number, start_number + 100))
|
|
140
|
-
|
|
141
|
-
# Replace $Number$ and $RepresentationID$ in media URL
|
|
142
|
-
media_urls = []
|
|
143
|
-
for n in segments:
|
|
144
|
-
url = media
|
|
145
|
-
if '$Number$' in url:
|
|
146
|
-
url = url.replace('$Number$', str(n))
|
|
147
|
-
if '$RepresentationID$' in url:
|
|
148
|
-
url = url.replace('$RepresentationID$', rep_id)
|
|
149
|
-
media_urls.append(urljoin(base_url, url))
|
|
150
|
-
|
|
151
|
-
self.representations.append({
|
|
152
|
-
'id': rep_id,
|
|
153
|
-
'type': mime_type.split('/')[0] if mime_type else (rep.get('mimeType', '').split('/')[0] if rep.get('mimeType') else 'unknown'),
|
|
154
|
-
'codec': codecs,
|
|
155
|
-
'bandwidth': int(bandwidth) if bandwidth else 0,
|
|
156
|
-
'width': int(width) if width else 0,
|
|
157
|
-
'height': int(height) if height else 0,
|
|
158
|
-
'language': lang,
|
|
159
|
-
'init_url': init_url,
|
|
160
|
-
'segment_urls': media_urls
|
|
161
|
-
})
|
|
485
|
+
def _parse_representations(self) -> None:
|
|
486
|
+
"""Parse all representations from the MPD"""
|
|
487
|
+
base_url = self._get_initial_base_url()
|
|
488
|
+
representation_parser = RepresentationParser(self.mpd_url, self.ns)
|
|
489
|
+
|
|
490
|
+
for adapt_set in self.root.findall('.//mpd:AdaptationSet', self.ns):
|
|
491
|
+
representations = representation_parser.parse_adaptation_set(adapt_set, base_url)
|
|
492
|
+
self.representations.extend(representations)
|
|
162
493
|
|
|
494
|
+
def _deduplicate_representations(self) -> None:
|
|
495
|
+
"""Remove duplicate video and audio representations"""
|
|
496
|
+
videos = [r for r in self.representations if r['type'] == 'video']
|
|
497
|
+
audios = [r for r in self.representations if r['type'] == 'audio']
|
|
498
|
+
others = [r for r in self.representations if r['type'] not in ['video', 'audio']]
|
|
499
|
+
|
|
500
|
+
deduplicated_videos = self._deduplicate_videos(videos)
|
|
501
|
+
deduplicated_audios = self._deduplicate_audios(audios)
|
|
502
|
+
self.representations = deduplicated_videos + deduplicated_audios + others
|
|
503
|
+
|
|
504
|
+
def _get_initial_base_url(self) -> str:
|
|
505
|
+
"""Get the initial base URL from MPD-level BaseURL"""
|
|
506
|
+
base_url = self.mpd_url.rsplit('/', 1)[0] + '/'
|
|
507
|
+
|
|
508
|
+
# MPD-level BaseURL
|
|
509
|
+
mpd_base = self.root.find('mpd:BaseURL', self.ns)
|
|
510
|
+
if mpd_base is not None and mpd_base.text:
|
|
511
|
+
base_text = mpd_base.text.strip()
|
|
512
|
+
|
|
513
|
+
# Handle BaseURL that might already be absolute
|
|
514
|
+
if base_text.startswith('http'):
|
|
515
|
+
base_url = base_text
|
|
516
|
+
else:
|
|
517
|
+
base_url = urljoin(base_url, base_text)
|
|
518
|
+
|
|
519
|
+
return base_url
|
|
520
|
+
|
|
163
521
|
def get_resolutions(self):
|
|
164
522
|
"""Return list of video representations with their resolutions."""
|
|
165
523
|
return [
|
|
@@ -222,19 +580,18 @@ class MPDParser:
|
|
|
222
580
|
Returns: (selected_audio, list_available_audio_langs, filter_custom_audio, downloadable_audio)
|
|
223
581
|
"""
|
|
224
582
|
audio_reps = self.get_audios()
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
]
|
|
583
|
+
|
|
584
|
+
# Include all languages (including generated ones like aud_XXX)
|
|
585
|
+
list_available_audio_langs = [rep['language'] for rep in audio_reps]
|
|
228
586
|
|
|
229
587
|
selected_audio = None
|
|
230
588
|
filter_custom_audio = "First"
|
|
231
589
|
|
|
232
590
|
if preferred_audio_langs:
|
|
233
|
-
|
|
234
591
|
# Search for the first available language in order of preference
|
|
235
592
|
for lang in preferred_audio_langs:
|
|
236
593
|
for rep in audio_reps:
|
|
237
|
-
if
|
|
594
|
+
if rep['language'] and rep['language'].lower() == lang.lower():
|
|
238
595
|
selected_audio = rep
|
|
239
596
|
filter_custom_audio = lang
|
|
240
597
|
break
|
|
@@ -245,5 +602,5 @@ class MPDParser:
|
|
|
245
602
|
else:
|
|
246
603
|
selected_audio = self.get_best_audio()
|
|
247
604
|
|
|
248
|
-
downloadable_audio = selected_audio['language']
|
|
605
|
+
downloadable_audio = selected_audio['language'] if selected_audio else "N/A"
|
|
249
606
|
return selected_audio, list_available_audio_langs, filter_custom_audio, downloadable_audio
|