matrice-streaming 0.1.14__py3-none-any.whl → 0.1.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrice_streaming/__init__.py +44 -32
- matrice_streaming/streaming_gateway/camera_streamer/__init__.py +68 -1
- matrice_streaming/streaming_gateway/camera_streamer/async_camera_worker.py +1388 -0
- matrice_streaming/streaming_gateway/camera_streamer/async_ffmpeg_worker.py +966 -0
- matrice_streaming/streaming_gateway/camera_streamer/camera_streamer.py +188 -24
- matrice_streaming/streaming_gateway/camera_streamer/device_detection.py +507 -0
- matrice_streaming/streaming_gateway/camera_streamer/encoding_pool_manager.py +136 -0
- matrice_streaming/streaming_gateway/camera_streamer/ffmpeg_camera_streamer.py +1048 -0
- matrice_streaming/streaming_gateway/camera_streamer/ffmpeg_config.py +192 -0
- matrice_streaming/streaming_gateway/camera_streamer/ffmpeg_worker_manager.py +470 -0
- matrice_streaming/streaming_gateway/camera_streamer/gstreamer_camera_streamer.py +1368 -0
- matrice_streaming/streaming_gateway/camera_streamer/gstreamer_worker.py +1063 -0
- matrice_streaming/streaming_gateway/camera_streamer/gstreamer_worker_manager.py +546 -0
- matrice_streaming/streaming_gateway/camera_streamer/message_builder.py +60 -15
- matrice_streaming/streaming_gateway/camera_streamer/nvdec.py +1330 -0
- matrice_streaming/streaming_gateway/camera_streamer/nvdec_worker_manager.py +412 -0
- matrice_streaming/streaming_gateway/camera_streamer/platform_pipelines.py +680 -0
- matrice_streaming/streaming_gateway/camera_streamer/stream_statistics.py +111 -4
- matrice_streaming/streaming_gateway/camera_streamer/video_capture_manager.py +223 -27
- matrice_streaming/streaming_gateway/camera_streamer/worker_manager.py +694 -0
- matrice_streaming/streaming_gateway/debug/__init__.py +27 -2
- matrice_streaming/streaming_gateway/debug/benchmark.py +727 -0
- matrice_streaming/streaming_gateway/debug/debug_gstreamer_gateway.py +599 -0
- matrice_streaming/streaming_gateway/debug/debug_streaming_gateway.py +245 -95
- matrice_streaming/streaming_gateway/debug/debug_utils.py +29 -0
- matrice_streaming/streaming_gateway/debug/test_videoplayback.py +318 -0
- matrice_streaming/streaming_gateway/dynamic_camera_manager.py +656 -39
- matrice_streaming/streaming_gateway/metrics_reporter.py +676 -139
- matrice_streaming/streaming_gateway/streaming_action.py +71 -20
- matrice_streaming/streaming_gateway/streaming_gateway.py +1026 -78
- matrice_streaming/streaming_gateway/streaming_gateway_utils.py +175 -20
- matrice_streaming/streaming_gateway/streaming_status_listener.py +89 -0
- {matrice_streaming-0.1.14.dist-info → matrice_streaming-0.1.65.dist-info}/METADATA +1 -1
- matrice_streaming-0.1.65.dist-info/RECORD +56 -0
- matrice_streaming-0.1.14.dist-info/RECORD +0 -38
- {matrice_streaming-0.1.14.dist-info → matrice_streaming-0.1.65.dist-info}/WHEEL +0 -0
- {matrice_streaming-0.1.14.dist-info → matrice_streaming-0.1.65.dist-info}/licenses/LICENSE.txt +0 -0
- {matrice_streaming-0.1.14.dist-info → matrice_streaming-0.1.65.dist-info}/top_level.txt +0 -0
|
@@ -8,6 +8,7 @@ class StreamStatistics:
|
|
|
8
8
|
"""Manages streaming statistics and timing data."""
|
|
9
9
|
|
|
10
10
|
STATS_LOG_INTERVAL = 50
|
|
11
|
+
MAX_HISTORY_SIZE = 1000 # Maximum entries per stream to prevent memory growth
|
|
11
12
|
|
|
12
13
|
def __init__(self):
|
|
13
14
|
"""Initialize statistics tracker."""
|
|
@@ -25,11 +26,13 @@ class StreamStatistics:
|
|
|
25
26
|
self.last_frame_sizes: Dict[str, int] = {}
|
|
26
27
|
|
|
27
28
|
# History storage for accurate statistics (accumulated between reporting intervals)
|
|
29
|
+
# These are bounded to MAX_HISTORY_SIZE entries per stream
|
|
28
30
|
self.read_times_history: Dict[str, List[float]] = {}
|
|
29
31
|
self.write_times_history: Dict[str, List[float]] = {}
|
|
30
32
|
self.process_times_history: Dict[str, List[float]] = {}
|
|
31
33
|
self.frame_sizes_history: Dict[str, List[int]] = {}
|
|
32
34
|
self.frame_timestamps_history: Dict[str, List[float]] = {}
|
|
35
|
+
self.encoding_times_history: Dict[str, List[float]] = {} # NEW: encoding time tracking
|
|
33
36
|
|
|
34
37
|
# Per-stream input order tracking
|
|
35
38
|
self.input_order: Dict[str, int] = {}
|
|
@@ -58,7 +61,8 @@ class StreamStatistics:
|
|
|
58
61
|
read_time: float,
|
|
59
62
|
write_time: float,
|
|
60
63
|
process_time: float,
|
|
61
|
-
frame_size: Optional[int] = None
|
|
64
|
+
frame_size: Optional[int] = None,
|
|
65
|
+
encoding_time: float = 0.0
|
|
62
66
|
):
|
|
63
67
|
"""Update timing statistics for a stream.
|
|
64
68
|
|
|
@@ -68,6 +72,7 @@ class StreamStatistics:
|
|
|
68
72
|
write_time: Time spent writing/sending frame
|
|
69
73
|
process_time: Total processing time
|
|
70
74
|
frame_size: Size of encoded frame in bytes (ACG frame size)
|
|
75
|
+
encoding_time: Time spent encoding frame (NEW)
|
|
71
76
|
"""
|
|
72
77
|
key = self._normalize_key(stream_key)
|
|
73
78
|
timestamp = time.time()
|
|
@@ -79,21 +84,34 @@ class StreamStatistics:
|
|
|
79
84
|
if frame_size is not None:
|
|
80
85
|
self.last_frame_sizes[key] = frame_size
|
|
81
86
|
|
|
82
|
-
# Append to history for accurate statistics
|
|
87
|
+
# Append to history for accurate statistics (bounded to prevent memory growth)
|
|
83
88
|
if key not in self.read_times_history:
|
|
84
89
|
self.read_times_history[key] = []
|
|
85
90
|
self.write_times_history[key] = []
|
|
86
91
|
self.process_times_history[key] = []
|
|
87
92
|
self.frame_sizes_history[key] = []
|
|
88
93
|
self.frame_timestamps_history[key] = []
|
|
94
|
+
self.encoding_times_history[key] = []
|
|
89
95
|
|
|
90
96
|
self.read_times_history[key].append(read_time)
|
|
91
97
|
self.write_times_history[key].append(write_time)
|
|
92
98
|
self.process_times_history[key].append(process_time)
|
|
93
99
|
self.frame_timestamps_history[key].append(timestamp)
|
|
100
|
+
self.encoding_times_history[key].append(encoding_time)
|
|
94
101
|
|
|
95
102
|
if frame_size is not None:
|
|
96
103
|
self.frame_sizes_history[key].append(frame_size)
|
|
104
|
+
|
|
105
|
+
# Enforce size limits to prevent unbounded growth
|
|
106
|
+
# Keep only the last MAX_HISTORY_SIZE entries
|
|
107
|
+
if len(self.read_times_history[key]) > self.MAX_HISTORY_SIZE:
|
|
108
|
+
self.read_times_history[key] = self.read_times_history[key][-self.MAX_HISTORY_SIZE:]
|
|
109
|
+
self.write_times_history[key] = self.write_times_history[key][-self.MAX_HISTORY_SIZE:]
|
|
110
|
+
self.process_times_history[key] = self.process_times_history[key][-self.MAX_HISTORY_SIZE:]
|
|
111
|
+
self.frame_timestamps_history[key] = self.frame_timestamps_history[key][-self.MAX_HISTORY_SIZE:]
|
|
112
|
+
self.encoding_times_history[key] = self.encoding_times_history[key][-self.MAX_HISTORY_SIZE:]
|
|
113
|
+
if len(self.frame_sizes_history[key]) > self.MAX_HISTORY_SIZE:
|
|
114
|
+
self.frame_sizes_history[key] = self.frame_sizes_history[key][-self.MAX_HISTORY_SIZE:]
|
|
97
115
|
|
|
98
116
|
def get_timing(self, stream_key: str) -> Tuple[float, float, float]:
|
|
99
117
|
"""Get timing data for a stream.
|
|
@@ -142,7 +160,7 @@ class StreamStatistics:
|
|
|
142
160
|
write_time: float
|
|
143
161
|
):
|
|
144
162
|
"""Log periodic statistics.
|
|
145
|
-
|
|
163
|
+
|
|
146
164
|
Args:
|
|
147
165
|
stream_key: Stream identifier
|
|
148
166
|
read_time: Time spent reading frame
|
|
@@ -157,7 +175,82 @@ class StreamStatistics:
|
|
|
157
175
|
f"Timing: read={read_time*1000:.1f}ms, encode={encoding_time*1000:.1f}ms, "
|
|
158
176
|
f"write={write_time*1000:.1f}ms"
|
|
159
177
|
)
|
|
160
|
-
|
|
178
|
+
|
|
179
|
+
def log_detailed_stats(self, stream_key: str) -> None:
|
|
180
|
+
"""Log comprehensive metrics for a stream.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
stream_key: Stream identifier
|
|
184
|
+
"""
|
|
185
|
+
stats = self.get_timing_statistics(stream_key)
|
|
186
|
+
if not stats:
|
|
187
|
+
return
|
|
188
|
+
|
|
189
|
+
# Calculate additional metrics
|
|
190
|
+
total_frames = self.frames_sent + self.frames_skipped + self.frames_diff_sent
|
|
191
|
+
skip_rate = (self.frames_skipped / total_frames * 100) if total_frames > 0 else 0
|
|
192
|
+
|
|
193
|
+
# FPS metrics
|
|
194
|
+
fps_stats = stats.get("fps", {})
|
|
195
|
+
fps_current = fps_stats.get("avg", 0)
|
|
196
|
+
fps_min = fps_stats.get("min", 0)
|
|
197
|
+
fps_max = fps_stats.get("max", 0)
|
|
198
|
+
|
|
199
|
+
# Latency breakdown (ms)
|
|
200
|
+
read_ms = stats.get("read_time_ms", {}).get("avg", 0)
|
|
201
|
+
encoding_ms = stats.get("encoding_time_ms", {}).get("avg", 0)
|
|
202
|
+
write_ms = stats.get("write_time_ms", {}).get("avg", 0)
|
|
203
|
+
process_ms = stats.get("process_time_ms", {}).get("avg", 0)
|
|
204
|
+
|
|
205
|
+
# Frame size stats (KB)
|
|
206
|
+
frame_size_stats = stats.get("frame_size_bytes", {})
|
|
207
|
+
frame_size_avg_kb = frame_size_stats.get("avg", 0) / 1024
|
|
208
|
+
frame_size_min_kb = frame_size_stats.get("min", 0) / 1024
|
|
209
|
+
frame_size_max_kb = frame_size_stats.get("max", 0) / 1024
|
|
210
|
+
|
|
211
|
+
# Throughput (KB/s)
|
|
212
|
+
throughput_kbps = (frame_size_avg_kb * fps_current) if fps_current > 0 else 0
|
|
213
|
+
|
|
214
|
+
self.logger.info(
|
|
215
|
+
f"Stream Metrics [{stream_key}]: "
|
|
216
|
+
f"FPS={fps_current:.1f} (min={fps_min:.1f}, max={fps_max:.1f}) | "
|
|
217
|
+
f"Latency: read={read_ms:.1f}ms, encode={encoding_ms:.1f}ms, write={write_ms:.1f}ms, total={process_ms:.1f}ms | "
|
|
218
|
+
f"Frames: sent={self.frames_sent}, skipped={self.frames_skipped} ({skip_rate:.1f}%) | "
|
|
219
|
+
f"Frame size: {frame_size_avg_kb:.1f}KB (min={frame_size_min_kb:.1f}, max={frame_size_max_kb:.1f}) | "
|
|
220
|
+
f"Throughput: {throughput_kbps:.1f} KB/s"
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
def log_aggregated_stats(self) -> None:
|
|
224
|
+
"""Log aggregated metrics across all streams."""
|
|
225
|
+
total_frames_sent = self.frames_sent
|
|
226
|
+
total_frames_skipped = self.frames_skipped
|
|
227
|
+
total_frames_diff = self.frames_diff_sent
|
|
228
|
+
total_frames = total_frames_sent + total_frames_skipped + total_frames_diff
|
|
229
|
+
|
|
230
|
+
if total_frames == 0:
|
|
231
|
+
return
|
|
232
|
+
|
|
233
|
+
skip_rate = (total_frames_skipped / total_frames * 100)
|
|
234
|
+
diff_rate = (total_frames_diff / total_frames * 100)
|
|
235
|
+
|
|
236
|
+
# Aggregate FPS across all streams
|
|
237
|
+
all_fps = []
|
|
238
|
+
for stream_key in self.last_read_times.keys():
|
|
239
|
+
stats = self.get_timing_statistics(stream_key)
|
|
240
|
+
if stats and "fps" in stats:
|
|
241
|
+
fps_avg = stats["fps"].get("avg", 0)
|
|
242
|
+
if fps_avg > 0:
|
|
243
|
+
all_fps.append(fps_avg)
|
|
244
|
+
|
|
245
|
+
avg_fps = sum(all_fps) / len(all_fps) if all_fps else 0
|
|
246
|
+
|
|
247
|
+
self.logger.info(
|
|
248
|
+
f"Gateway Aggregate Metrics: "
|
|
249
|
+
f"Total frames: {total_frames} (sent={total_frames_sent}, skipped={total_frames_skipped}, diff={total_frames_diff}) | "
|
|
250
|
+
f"Skip rate: {skip_rate:.1f}%, Diff rate: {diff_rate:.1f}% | "
|
|
251
|
+
f"Avg FPS across {len(all_fps)} streams: {avg_fps:.1f}"
|
|
252
|
+
)
|
|
253
|
+
|
|
161
254
|
def get_transmission_stats(self, video_codec: str, active_streams: int) -> Dict[str, Any]:
|
|
162
255
|
"""Get comprehensive transmission statistics.
|
|
163
256
|
|
|
@@ -231,6 +324,7 @@ class StreamStatistics:
|
|
|
231
324
|
result = {
|
|
232
325
|
"read_time_ms": {"min": 0, "max": 0, "avg": 0, "count": 0},
|
|
233
326
|
"write_time_ms": {"min": 0, "max": 0, "avg": 0, "count": 0},
|
|
327
|
+
"encoding_time_ms": {"min": 0, "max": 0, "avg": 0, "count": 0}, # NEW
|
|
234
328
|
"process_time_ms": {"min": 0, "max": 0, "avg": 0, "count": 0},
|
|
235
329
|
"frame_size_bytes": {"min": 0, "max": 0, "avg": 0, "count": 0},
|
|
236
330
|
"fps": {"min": 0, "max": 0, "avg": 0},
|
|
@@ -266,6 +360,16 @@ class StreamStatistics:
|
|
|
266
360
|
"count": len(process_times),
|
|
267
361
|
}
|
|
268
362
|
|
|
363
|
+
# Calculate encoding time statistics
|
|
364
|
+
if key in self.encoding_times_history and self.encoding_times_history[key]:
|
|
365
|
+
encoding_times = self.encoding_times_history[key]
|
|
366
|
+
result["encoding_time_ms"] = {
|
|
367
|
+
"min": min(encoding_times) * 1000,
|
|
368
|
+
"max": max(encoding_times) * 1000,
|
|
369
|
+
"avg": (sum(encoding_times) / len(encoding_times)) * 1000,
|
|
370
|
+
"count": len(encoding_times),
|
|
371
|
+
}
|
|
372
|
+
|
|
269
373
|
# Calculate frame size statistics
|
|
270
374
|
if key in self.frame_sizes_history and self.frame_sizes_history[key]:
|
|
271
375
|
frame_sizes = self.frame_sizes_history[key]
|
|
@@ -318,6 +422,7 @@ class StreamStatistics:
|
|
|
318
422
|
self.process_times_history.clear()
|
|
319
423
|
self.frame_sizes_history.clear()
|
|
320
424
|
self.frame_timestamps_history.clear()
|
|
425
|
+
self.encoding_times_history.clear()
|
|
321
426
|
self.logger.debug("Cleared timing history for all streams")
|
|
322
427
|
else:
|
|
323
428
|
# Clear specific stream
|
|
@@ -332,6 +437,8 @@ class StreamStatistics:
|
|
|
332
437
|
self.frame_sizes_history[key].clear()
|
|
333
438
|
if key in self.frame_timestamps_history:
|
|
334
439
|
self.frame_timestamps_history[key].clear()
|
|
440
|
+
if key in self.encoding_times_history:
|
|
441
|
+
self.encoding_times_history[key].clear()
|
|
335
442
|
self.logger.debug(f"Cleared timing history for stream: {stream_key}")
|
|
336
443
|
|
|
337
444
|
def reset(self):
|
|
@@ -5,7 +5,9 @@ import cv2
|
|
|
5
5
|
import requests
|
|
6
6
|
import os
|
|
7
7
|
import tempfile
|
|
8
|
+
import hashlib
|
|
8
9
|
from pathlib import Path
|
|
10
|
+
from urllib.parse import urlparse, urlunparse
|
|
9
11
|
from typing import Union, Optional, Tuple, Dict, Any
|
|
10
12
|
|
|
11
13
|
|
|
@@ -14,18 +16,28 @@ class VideoSourceConfig:
|
|
|
14
16
|
MAX_CAPTURE_RETRIES = 3
|
|
15
17
|
CAPTURE_RETRY_DELAY = 2.0
|
|
16
18
|
MAX_CONSECUTIVE_FAILURES = 10
|
|
17
|
-
DOWNLOAD_TIMEOUT = 30
|
|
19
|
+
DOWNLOAD_TIMEOUT = 30 # Base timeout in seconds
|
|
20
|
+
DOWNLOAD_TIMEOUT_PER_100MB = 30 # Additional seconds per 100MB
|
|
21
|
+
MAX_DOWNLOAD_TIMEOUT = 600 # 10 minutes max
|
|
18
22
|
DOWNLOAD_CHUNK_SIZE = 8192
|
|
19
|
-
DEFAULT_BUFFER_SIZE = 1
|
|
23
|
+
DEFAULT_BUFFER_SIZE = 5 # Increased from 1 to 5 for better throughput
|
|
20
24
|
DEFAULT_FPS = 30
|
|
21
25
|
|
|
22
26
|
|
|
23
27
|
class VideoCaptureManager:
|
|
24
|
-
"""Manages video capture from various sources with retry logic and caching.
|
|
28
|
+
"""Manages video capture from various sources with retry logic and caching.
|
|
29
|
+
|
|
30
|
+
Features URL deduplication: if multiple cameras use the same video URL
|
|
31
|
+
(ignoring query parameters like AWS signed URL tokens), the video is only
|
|
32
|
+
downloaded once and the local path is shared between cameras.
|
|
33
|
+
"""
|
|
25
34
|
|
|
26
35
|
def __init__(self):
|
|
27
36
|
"""Initialize video capture manager."""
|
|
37
|
+
# Maps full URL -> local file path (for backwards compatibility)
|
|
28
38
|
self.downloaded_files: Dict[str, str] = {}
|
|
39
|
+
# Maps normalized URL (without query params) -> local file path (for deduplication)
|
|
40
|
+
self._normalized_url_to_path: Dict[str, str] = {}
|
|
29
41
|
self.temp_dir = Path(tempfile.gettempdir()) / "matrice_streaming_cache"
|
|
30
42
|
self.temp_dir.mkdir(exist_ok=True)
|
|
31
43
|
self.logger = logging.getLogger(__name__)
|
|
@@ -43,7 +55,7 @@ class VideoCaptureManager:
|
|
|
43
55
|
if isinstance(source, str) and self._is_downloadable_url(source):
|
|
44
56
|
local_path = self._download_video_file(source, stream_key)
|
|
45
57
|
if local_path:
|
|
46
|
-
self.logger.
|
|
58
|
+
self.logger.debug(f"Using downloaded file: {local_path}")
|
|
47
59
|
return local_path
|
|
48
60
|
else:
|
|
49
61
|
self.logger.warning(f"Failed to download {source}, will try to use URL directly")
|
|
@@ -82,11 +94,28 @@ class VideoCaptureManager:
|
|
|
82
94
|
return cap, source_type
|
|
83
95
|
|
|
84
96
|
except Exception as e:
|
|
85
|
-
|
|
97
|
+
# Gather detailed source info for error logging
|
|
98
|
+
source_info = ""
|
|
99
|
+
if isinstance(source, str):
|
|
100
|
+
if os.path.exists(source):
|
|
101
|
+
file_size = os.path.getsize(source)
|
|
102
|
+
source_info = f" | File exists: {file_size/(1024*1024):.1f}MB"
|
|
103
|
+
elif source.startswith("rtsp://") or source.startswith("http://") or source.startswith("https://"):
|
|
104
|
+
source_info = f" | Network source"
|
|
105
|
+
else:
|
|
106
|
+
source_info = " | File does not exist"
|
|
107
|
+
|
|
108
|
+
self.logger.error(
|
|
109
|
+
f"Attempt {attempt + 1}/{VideoSourceConfig.MAX_CAPTURE_RETRIES} failed to open "
|
|
110
|
+
f"{source_type} source: {type(e).__name__}: {e}{source_info}"
|
|
111
|
+
)
|
|
86
112
|
if attempt < VideoSourceConfig.MAX_CAPTURE_RETRIES - 1:
|
|
87
113
|
time.sleep(VideoSourceConfig.CAPTURE_RETRY_DELAY)
|
|
88
114
|
else:
|
|
89
|
-
raise RuntimeError(
|
|
115
|
+
raise RuntimeError(
|
|
116
|
+
f"Failed to open source after {VideoSourceConfig.MAX_CAPTURE_RETRIES} attempts: "
|
|
117
|
+
f"{type(e).__name__}: {e}{source_info}"
|
|
118
|
+
)
|
|
90
119
|
|
|
91
120
|
def get_video_properties(self, cap: cv2.VideoCapture) -> Dict[str, Any]:
|
|
92
121
|
"""Extract video properties from capture.
|
|
@@ -123,14 +152,20 @@ class VideoCaptureManager:
|
|
|
123
152
|
|
|
124
153
|
def cleanup(self):
|
|
125
154
|
"""Clean up downloaded temporary files."""
|
|
126
|
-
|
|
155
|
+
# Collect unique file paths (since multiple URLs may point to the same file)
|
|
156
|
+
unique_files = set(self.downloaded_files.values())
|
|
157
|
+
unique_files.update(self._normalized_url_to_path.values())
|
|
158
|
+
|
|
159
|
+
for filepath in unique_files:
|
|
127
160
|
try:
|
|
128
161
|
if os.path.exists(filepath):
|
|
129
162
|
os.remove(filepath)
|
|
130
163
|
self.logger.debug(f"Removed temp file: {filepath}")
|
|
131
164
|
except Exception as e:
|
|
132
165
|
self.logger.warning(f"Failed to remove temp file {filepath}: {e}")
|
|
166
|
+
|
|
133
167
|
self.downloaded_files.clear()
|
|
168
|
+
self._normalized_url_to_path.clear()
|
|
134
169
|
|
|
135
170
|
# Private methods
|
|
136
171
|
|
|
@@ -138,36 +173,197 @@ class VideoCaptureManager:
|
|
|
138
173
|
"""Check if source is a downloadable URL (not RTSP)."""
|
|
139
174
|
return (source.startswith('http://') or source.startswith('https://')) and not source.startswith('rtsp')
|
|
140
175
|
|
|
176
|
+
def _normalize_url(self, url: str) -> str:
|
|
177
|
+
"""Normalize URL by stripping query parameters.
|
|
178
|
+
|
|
179
|
+
This allows URLs that point to the same file but have different
|
|
180
|
+
query parameters (e.g., AWS signed URLs with different tokens)
|
|
181
|
+
to be recognized as the same resource.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
url: Full URL with potential query parameters
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Normalized URL without query parameters
|
|
188
|
+
"""
|
|
189
|
+
parsed = urlparse(url)
|
|
190
|
+
# Rebuild URL without query string and fragment
|
|
191
|
+
normalized = urlunparse((
|
|
192
|
+
parsed.scheme,
|
|
193
|
+
parsed.netloc,
|
|
194
|
+
parsed.path,
|
|
195
|
+
'', # params
|
|
196
|
+
'', # query
|
|
197
|
+
'' # fragment
|
|
198
|
+
))
|
|
199
|
+
return normalized
|
|
200
|
+
|
|
201
|
+
def _get_url_hash(self, normalized_url: str) -> str:
|
|
202
|
+
"""Generate a short hash for the normalized URL.
|
|
203
|
+
|
|
204
|
+
This is used for creating unique but consistent file names.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
normalized_url: URL without query parameters
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Short hash string
|
|
211
|
+
"""
|
|
212
|
+
return hashlib.md5(normalized_url.encode()).hexdigest()[:12]
|
|
213
|
+
|
|
141
214
|
def _download_video_file(self, url: str, stream_key: str) -> Optional[str]:
|
|
142
|
-
"""Download video file from URL and cache it locally.
|
|
215
|
+
"""Download video file from URL and cache it locally.
|
|
216
|
+
|
|
217
|
+
Uses URL deduplication: if the same video (by normalized URL without
|
|
218
|
+
query parameters) has already been downloaded, returns the existing
|
|
219
|
+
local path instead of downloading again.
|
|
220
|
+
|
|
221
|
+
Features dynamic timeout calculation based on file size and progress
|
|
222
|
+
tracking for large files.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
url: Video file URL (may include query parameters like AWS signatures)
|
|
226
|
+
stream_key: Stream identifier
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Local file path or None if download failed
|
|
230
|
+
"""
|
|
231
|
+
# Initialize tracking variables for error reporting
|
|
232
|
+
content_length = 0
|
|
233
|
+
file_size_mb = 0.0
|
|
234
|
+
bytes_downloaded = 0
|
|
235
|
+
timeout = VideoSourceConfig.DOWNLOAD_TIMEOUT
|
|
236
|
+
expected_path = None
|
|
237
|
+
|
|
143
238
|
try:
|
|
144
|
-
#
|
|
239
|
+
# Normalize URL to check for duplicate downloads
|
|
240
|
+
# (same file but different query params, e.g., different AWS signatures)
|
|
241
|
+
normalized_url = self._normalize_url(url)
|
|
242
|
+
|
|
243
|
+
# Generate a consistent filename using URL hash
|
|
244
|
+
file_ext = Path(url.split('?')[0]).suffix or '.mp4'
|
|
245
|
+
url_hash = self._get_url_hash(normalized_url)
|
|
246
|
+
expected_path = self.temp_dir / f"video_{url_hash}{file_ext}"
|
|
247
|
+
expected_path_str = str(expected_path)
|
|
248
|
+
|
|
249
|
+
# Quick check: if file already exists on disk, use it
|
|
250
|
+
if os.path.exists(expected_path):
|
|
251
|
+
existing_size = os.path.getsize(expected_path)
|
|
252
|
+
self.logger.debug(
|
|
253
|
+
f"Reusing existing video file for {stream_key}: {expected_path} "
|
|
254
|
+
f"({existing_size / (1024*1024):.1f}MB, already downloaded)"
|
|
255
|
+
)
|
|
256
|
+
# Update caches
|
|
257
|
+
self.downloaded_files[url] = expected_path_str
|
|
258
|
+
self._normalized_url_to_path[normalized_url] = expected_path_str
|
|
259
|
+
return expected_path_str
|
|
260
|
+
|
|
261
|
+
# Check memory cache for exact URL
|
|
145
262
|
if url in self.downloaded_files:
|
|
146
263
|
local_path = self.downloaded_files[url]
|
|
147
264
|
if os.path.exists(local_path):
|
|
148
|
-
self.logger.
|
|
265
|
+
self.logger.debug(f"Using cached video file (exact URL match): {local_path}")
|
|
149
266
|
return local_path
|
|
150
|
-
|
|
151
|
-
#
|
|
152
|
-
|
|
153
|
-
|
|
267
|
+
|
|
268
|
+
# Check memory cache for normalized URL
|
|
269
|
+
if normalized_url in self._normalized_url_to_path:
|
|
270
|
+
local_path = self._normalized_url_to_path[normalized_url]
|
|
271
|
+
if os.path.exists(local_path):
|
|
272
|
+
self.logger.debug(
|
|
273
|
+
f"Reusing previously downloaded file for {stream_key}: {local_path} "
|
|
274
|
+
f"(same base URL, different query params)"
|
|
275
|
+
)
|
|
276
|
+
self.downloaded_files[url] = local_path
|
|
277
|
+
return local_path
|
|
278
|
+
|
|
279
|
+
# HEAD request to get file size for dynamic timeout calculation
|
|
280
|
+
try:
|
|
281
|
+
head_response = requests.head(url, timeout=10, allow_redirects=True)
|
|
282
|
+
content_length = int(head_response.headers.get('Content-Length', 0))
|
|
283
|
+
file_size_mb = content_length / (1024 * 1024)
|
|
284
|
+
except Exception as head_err:
|
|
285
|
+
self.logger.debug(f"HEAD request failed for {stream_key}: {head_err}")
|
|
286
|
+
content_length = 0
|
|
287
|
+
file_size_mb = 0
|
|
288
|
+
|
|
289
|
+
# Calculate dynamic timeout based on file size
|
|
290
|
+
if content_length > 0:
|
|
291
|
+
# Base timeout + additional time per 100MB
|
|
292
|
+
timeout = min(
|
|
293
|
+
VideoSourceConfig.DOWNLOAD_TIMEOUT +
|
|
294
|
+
int(file_size_mb // 100) * VideoSourceConfig.DOWNLOAD_TIMEOUT_PER_100MB,
|
|
295
|
+
VideoSourceConfig.MAX_DOWNLOAD_TIMEOUT
|
|
296
|
+
)
|
|
297
|
+
self.logger.info(
|
|
298
|
+
f"Downloading video file for {stream_key}: {file_size_mb:.1f}MB "
|
|
299
|
+
f"(timeout: {timeout}s)"
|
|
300
|
+
)
|
|
301
|
+
else:
|
|
302
|
+
timeout = VideoSourceConfig.DOWNLOAD_TIMEOUT
|
|
303
|
+
self.logger.info(f"Downloading video file for {stream_key} (size unknown, timeout: {timeout}s)")
|
|
304
|
+
|
|
305
|
+
# Download the file with progress tracking
|
|
306
|
+
response = requests.get(url, stream=True, timeout=timeout)
|
|
154
307
|
response.raise_for_status()
|
|
155
|
-
|
|
156
|
-
#
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
308
|
+
|
|
309
|
+
# Update content_length from response if HEAD failed
|
|
310
|
+
if content_length == 0:
|
|
311
|
+
content_length = int(response.headers.get('Content-Length', 0))
|
|
312
|
+
file_size_mb = content_length / (1024 * 1024) if content_length > 0 else 0
|
|
313
|
+
|
|
314
|
+
last_progress_log = 0
|
|
315
|
+
|
|
316
|
+
with open(expected_path, 'wb') as f:
|
|
161
317
|
for chunk in response.iter_content(chunk_size=VideoSourceConfig.DOWNLOAD_CHUNK_SIZE):
|
|
162
318
|
f.write(chunk)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
319
|
+
bytes_downloaded += len(chunk)
|
|
320
|
+
|
|
321
|
+
# Log progress every 50MB for large files (>50MB)
|
|
322
|
+
if content_length > 50_000_000:
|
|
323
|
+
mb_downloaded = bytes_downloaded // (1024 * 1024)
|
|
324
|
+
if mb_downloaded - last_progress_log >= 50:
|
|
325
|
+
progress = (bytes_downloaded / content_length * 100) if content_length else 0
|
|
326
|
+
self.logger.info(
|
|
327
|
+
f"Download progress for {stream_key}: "
|
|
328
|
+
f"{mb_downloaded}MB / {file_size_mb:.0f}MB ({progress:.1f}%)"
|
|
329
|
+
)
|
|
330
|
+
last_progress_log = mb_downloaded
|
|
331
|
+
|
|
332
|
+
# Cache for both exact URL and normalized URL
|
|
333
|
+
self.downloaded_files[url] = expected_path_str
|
|
334
|
+
self._normalized_url_to_path[normalized_url] = expected_path_str
|
|
335
|
+
|
|
336
|
+
self.logger.info(
|
|
337
|
+
f"Downloaded video file for {stream_key}: {expected_path} "
|
|
338
|
+
f"({bytes_downloaded / (1024*1024):.1f}MB)"
|
|
339
|
+
)
|
|
340
|
+
return expected_path_str
|
|
341
|
+
|
|
342
|
+
except requests.Timeout as e:
|
|
343
|
+
self.logger.error(
|
|
344
|
+
f"Download timeout for {stream_key}: {e} | "
|
|
345
|
+
f"File size: {file_size_mb:.1f}MB, Downloaded: {bytes_downloaded/(1024*1024):.1f}MB, "
|
|
346
|
+
f"Timeout: {timeout}s"
|
|
347
|
+
)
|
|
348
|
+
return None
|
|
349
|
+
except requests.HTTPError as e:
|
|
350
|
+
self.logger.error(
|
|
351
|
+
f"HTTP error downloading {stream_key}: {e.response.status_code} - {e.response.reason} | "
|
|
352
|
+
f"URL: {url[:100]}..."
|
|
353
|
+
)
|
|
354
|
+
return None
|
|
355
|
+
except IOError as e:
|
|
356
|
+
self.logger.error(
|
|
357
|
+
f"Disk I/O error downloading {stream_key}: {e} | "
|
|
358
|
+
f"Downloaded: {bytes_downloaded/(1024*1024):.1f}MB, Path: {expected_path}"
|
|
359
|
+
)
|
|
360
|
+
return None
|
|
169
361
|
except Exception as e:
|
|
170
|
-
|
|
362
|
+
size_info = f"{file_size_mb:.1f}MB" if content_length > 0 else "unknown"
|
|
363
|
+
self.logger.error(
|
|
364
|
+
f"Failed to download video file for {stream_key}: {type(e).__name__}: {e} | "
|
|
365
|
+
f"File size: {size_info}, Downloaded: {bytes_downloaded/(1024*1024):.1f}MB"
|
|
366
|
+
)
|
|
171
367
|
return None
|
|
172
368
|
|
|
173
369
|
def _detect_source_type(self, source: Union[str, int]) -> str:
|