nedo-vision-worker 1.1.2__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. nedo_vision_worker/__init__.py +1 -1
  2. nedo_vision_worker/cli.py +197 -168
  3. nedo_vision_worker/database/DatabaseManager.py +3 -3
  4. nedo_vision_worker/doctor.py +1066 -386
  5. nedo_vision_worker/models/ai_model.py +35 -2
  6. nedo_vision_worker/protos/AIModelService_pb2.py +12 -10
  7. nedo_vision_worker/protos/AIModelService_pb2_grpc.py +1 -1
  8. nedo_vision_worker/protos/DatasetSourceService_pb2.py +2 -2
  9. nedo_vision_worker/protos/DatasetSourceService_pb2_grpc.py +1 -1
  10. nedo_vision_worker/protos/HumanDetectionService_pb2.py +2 -2
  11. nedo_vision_worker/protos/HumanDetectionService_pb2_grpc.py +1 -1
  12. nedo_vision_worker/protos/PPEDetectionService_pb2.py +2 -2
  13. nedo_vision_worker/protos/PPEDetectionService_pb2_grpc.py +1 -1
  14. nedo_vision_worker/protos/VisionWorkerService_pb2.py +2 -2
  15. nedo_vision_worker/protos/VisionWorkerService_pb2_grpc.py +1 -1
  16. nedo_vision_worker/protos/WorkerSourcePipelineService_pb2.py +2 -2
  17. nedo_vision_worker/protos/WorkerSourcePipelineService_pb2_grpc.py +1 -1
  18. nedo_vision_worker/protos/WorkerSourceService_pb2.py +2 -2
  19. nedo_vision_worker/protos/WorkerSourceService_pb2_grpc.py +1 -1
  20. nedo_vision_worker/services/AIModelClient.py +184 -160
  21. nedo_vision_worker/services/DirectDeviceToRTMPStreamer.py +534 -0
  22. nedo_vision_worker/services/GrpcClientBase.py +142 -108
  23. nedo_vision_worker/services/PPEDetectionClient.py +0 -7
  24. nedo_vision_worker/services/RestrictedAreaClient.py +0 -5
  25. nedo_vision_worker/services/SharedDirectDeviceClient.py +278 -0
  26. nedo_vision_worker/services/SharedVideoStreamServer.py +315 -0
  27. nedo_vision_worker/services/SystemWideDeviceCoordinator.py +236 -0
  28. nedo_vision_worker/services/VideoSharingDaemon.py +832 -0
  29. nedo_vision_worker/services/VideoStreamClient.py +30 -13
  30. nedo_vision_worker/services/WorkerSourceClient.py +1 -1
  31. nedo_vision_worker/services/WorkerSourcePipelineClient.py +28 -6
  32. nedo_vision_worker/services/WorkerSourceUpdater.py +30 -3
  33. nedo_vision_worker/util/VideoProbeUtil.py +222 -15
  34. nedo_vision_worker/worker/DataSyncWorker.py +1 -0
  35. nedo_vision_worker/worker/PipelineImageWorker.py +1 -1
  36. nedo_vision_worker/worker/VideoStreamWorker.py +27 -3
  37. nedo_vision_worker/worker/WorkerManager.py +2 -29
  38. nedo_vision_worker/worker_service.py +24 -11
  39. {nedo_vision_worker-1.1.2.dist-info → nedo_vision_worker-1.2.0.dist-info}/METADATA +1 -3
  40. {nedo_vision_worker-1.1.2.dist-info → nedo_vision_worker-1.2.0.dist-info}/RECORD +43 -38
  41. {nedo_vision_worker-1.1.2.dist-info → nedo_vision_worker-1.2.0.dist-info}/WHEEL +0 -0
  42. {nedo_vision_worker-1.1.2.dist-info → nedo_vision_worker-1.2.0.dist-info}/entry_points.txt +0 -0
  43. {nedo_vision_worker-1.1.2.dist-info → nedo_vision_worker-1.2.0.dist-info}/top_level.txt +0 -0
@@ -1,453 +1,1133 @@
1
- #!/usr/bin/env python3
2
- """
3
- Nedo Vision Worker Service Doctor
4
-
5
- This module provides diagnostic capabilities to check system requirements
6
- and dependencies for the Nedo Vision Worker Service.
7
- """
8
-
9
1
  import subprocess
10
2
  import sys
11
3
  import platform
12
4
  import shutil
13
5
  import os
6
+ import time
7
+ import socket
14
8
  from pathlib import Path
9
+ from typing import List, Optional, Dict, Any
10
+ from dataclasses import dataclass
11
+ from enum import Enum
12
+
13
+
14
+ class HealthStatus(Enum):
15
+ """Health check status levels."""
16
+ EXCELLENT = ("🎉", "Excellent")
17
+ GOOD = ("✅", "Good")
18
+ WARNING = ("⚠️ ", "Warning")
19
+ CRITICAL = ("❌", "Critical")
20
+ INFO = ("ℹ️ ", "Info")
15
21
 
16
22
 
17
- def check_python_version():
18
- """Check if Python version meets requirements."""
19
- print("🐍 Checking Python version...")
23
+ @dataclass
24
+ class HealthCheck:
25
+ """Individual health check result."""
26
+ name: str
27
+ status: HealthStatus
28
+ message: str
29
+ details: Optional[List[str]] = None
30
+ recommendations: Optional[List[str]] = None
31
+ is_blocking: bool = False # Prevents service from running
32
+ performance_impact: str = "None" # None, Low, Medium, High
33
+
34
+
35
+ class WorkerServiceDoctor:
36
+ """Comprehensive health checker for Nedo Vision Worker Service."""
20
37
 
21
- version = sys.version_info
22
- min_version = (3, 8)
38
+ def __init__(self):
39
+ self.results: List[HealthCheck] = []
40
+ self.start_time = time.time()
41
+ self.system_info = self._gather_system_info()
23
42
 
24
- if version >= min_version:
25
- print(f" Python {version.major}.{version.minor}.{version.micro} (meets requirement >= {min_version[0]}.{min_version[1]})")
26
- return True
27
- else:
28
- print(f" ❌ Python {version.major}.{version.minor}.{version.micro} (requires >= {min_version[0]}.{min_version[1]})")
43
+ def _gather_system_info(self) -> Dict[str, Any]:
44
+ """Gather comprehensive system information."""
45
+ return {
46
+ 'os': platform.system(),
47
+ 'release': platform.release(),
48
+ 'machine': platform.machine(),
49
+ 'processor': platform.processor(),
50
+ 'python_version': platform.python_version(),
51
+ 'python_executable': sys.executable,
52
+ 'is_arm': platform.machine() in ["aarch64", "armv7l", "arm64"],
53
+ 'is_jetson': self._detect_jetson(),
54
+ 'is_container': self._detect_container(),
55
+ }
56
+
57
+ def _detect_jetson(self) -> bool:
58
+ """Detect if running on NVIDIA Jetson device."""
59
+ jetson_indicators = [
60
+ Path("/sys/firmware/devicetree/base/model"),
61
+ Path("/proc/device-tree/model"),
62
+ Path("/etc/nv_tegra_release")
63
+ ]
64
+
65
+ for indicator in jetson_indicators:
66
+ try:
67
+ if indicator.exists():
68
+ content = indicator.read_text().lower()
69
+ if any(keyword in content for keyword in ["jetson", "tegra", "nvidia"]):
70
+ return True
71
+ except (OSError, UnicodeDecodeError):
72
+ continue
29
73
  return False
30
-
31
-
32
- def check_ffmpeg():
33
- """Check if FFmpeg is installed and accessible."""
34
- print("🎬 Checking FFmpeg...")
35
74
 
36
- try:
37
- # Check if ffmpeg is in PATH
75
+ def _detect_container(self) -> Dict[str, bool]:
76
+ """Detect containerized environment."""
77
+ return {
78
+ 'docker': Path("/.dockerenv").exists(),
79
+ 'kubernetes': bool(os.environ.get("KUBERNETES_SERVICE_HOST")),
80
+ 'any': Path("/.dockerenv").exists() or bool(os.environ.get("KUBERNETES_SERVICE_HOST"))
81
+ }
82
+
83
+ def _add_result(self, result: HealthCheck) -> None:
84
+ """Add a health check result."""
85
+ self.results.append(result)
86
+
87
+ def check_python_environment(self) -> None:
88
+ """Comprehensive Python environment validation."""
89
+ version = sys.version_info
90
+ min_version = (3, 8)
91
+ recommended_version = (3, 9)
92
+
93
+ details = [
94
+ f"Python {version.major}.{version.minor}.{version.micro}",
95
+ f"Executable: {sys.executable}",
96
+ f"Platform: {platform.python_implementation()}"
97
+ ]
98
+
99
+ # Check virtual environment
100
+ if hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix:
101
+ details.append("Virtual environment: Active ✓")
102
+ venv_status = HealthStatus.GOOD
103
+ else:
104
+ details.append("Virtual environment: Not detected")
105
+ venv_status = HealthStatus.WARNING
106
+
107
+ recommendations = []
108
+
109
+ if version < min_version:
110
+ status = HealthStatus.CRITICAL
111
+ message = f"Python {version.major}.{version.minor} - Too old"
112
+ recommendations.extend([
113
+ f"Upgrade to Python >= {min_version[0]}.{min_version[1]}",
114
+ "Use pyenv, conda, or system package manager",
115
+ "Consider using the latest stable Python version"
116
+ ])
117
+ is_blocking = True
118
+ performance_impact = "High"
119
+ elif version < recommended_version:
120
+ status = HealthStatus.WARNING
121
+ message = f"Python {version.major}.{version.minor} - Works but outdated"
122
+ recommendations.append(f"Consider upgrading to Python >= {recommended_version[0]}.{recommended_version[1]} for better performance")
123
+ is_blocking = False
124
+ performance_impact = "Low"
125
+ elif version >= (3, 11):
126
+ status = HealthStatus.EXCELLENT
127
+ message = f"Python {version.major}.{version.minor} - Excellent (Latest features)"
128
+ performance_impact = "None"
129
+ is_blocking = False
130
+ else:
131
+ status = HealthStatus.GOOD
132
+ message = f"Python {version.major}.{version.minor} - Good"
133
+ performance_impact = "None"
134
+ is_blocking = False
135
+
136
+ if venv_status == HealthStatus.WARNING:
137
+ recommendations.append("Use virtual environment to avoid dependency conflicts")
138
+
139
+ self._add_result(HealthCheck(
140
+ name="Python Environment",
141
+ status=status,
142
+ message=message,
143
+ details=details,
144
+ recommendations=recommendations if recommendations else None,
145
+ is_blocking=is_blocking,
146
+ performance_impact=performance_impact
147
+ ))
148
+
149
+ def check_ffmpeg_installation(self) -> None:
150
+ """Comprehensive FFmpeg installation and capability check."""
151
+ details = []
152
+ recommendations = []
153
+
154
+ # Check if FFmpeg is available
38
155
  ffmpeg_path = shutil.which("ffmpeg")
39
156
  if not ffmpeg_path:
40
- print(" ❌ FFmpeg not found in PATH")
41
- return False
42
-
43
- # Check ffmpeg version
44
- result = subprocess.run(
45
- ["ffmpeg", "-version"],
46
- capture_output=True,
47
- text=True,
48
- timeout=10
49
- )
50
-
51
- if result.returncode == 0:
52
- # Extract version from output
53
- version_line = result.stdout.split('\n')[0]
54
- print(f" ✅ {version_line}")
55
- print(f" 📍 Location: {ffmpeg_path}")
56
- return True
157
+ status = HealthStatus.CRITICAL
158
+ message = "FFmpeg not found"
159
+ details.append("FFmpeg executable not found in PATH")
160
+ recommendations.extend([
161
+ "Install FFmpeg using your system package manager",
162
+ self._get_ffmpeg_install_cmd(),
163
+ "Ensure FFmpeg is added to system PATH"
164
+ ])
165
+ is_blocking = True
166
+ performance_impact = "High"
57
167
  else:
58
- print(" ❌ FFmpeg found but failed to get version")
59
- return False
60
-
61
- except subprocess.TimeoutExpired:
62
- print(" ❌ FFmpeg check timed out")
63
- return False
64
- except Exception as e:
65
- print(f" ❌ Error checking FFmpeg: {e}")
66
- return False
67
-
68
-
69
- def check_opencv():
70
- """Check if OpenCV is properly installed."""
71
- print("👁️ Checking OpenCV...")
168
+ try:
169
+ # Get FFmpeg version and build info
170
+ result = subprocess.run(
171
+ ["ffmpeg", "-version"],
172
+ capture_output=True,
173
+ text=True,
174
+ timeout=10
175
+ )
176
+
177
+ if result.returncode == 0:
178
+ version_lines = result.stdout.split('\n')
179
+ version_info = version_lines[0] if version_lines else "Unknown version"
180
+
181
+ details.extend([
182
+ f"Location: {ffmpeg_path}",
183
+ f"Version: {version_info}",
184
+ ])
185
+
186
+ # Check for hardware acceleration support
187
+ build_info = result.stdout
188
+ hw_accelerations = []
189
+
190
+ hw_checks = {
191
+ 'nvidia': 'NVIDIA GPU acceleration',
192
+ 'cuda': 'CUDA acceleration',
193
+ 'nvenc': 'NVIDIA encoding',
194
+ 'vaapi': 'VA-API acceleration',
195
+ 'libx264': 'H.264 encoding',
196
+ 'libx265': 'H.265/HEVC encoding'
197
+ }
198
+
199
+ for hw_key, hw_desc in hw_checks.items():
200
+ if hw_key in build_info.lower():
201
+ hw_accelerations.append(hw_desc)
202
+
203
+ if hw_accelerations:
204
+ details.append(f"Hardware support: {', '.join(hw_accelerations)}")
205
+ status = HealthStatus.EXCELLENT
206
+ message = "FFmpeg with hardware acceleration"
207
+ performance_impact = "None"
208
+ else:
209
+ details.append("Hardware support: Software-only")
210
+ status = HealthStatus.GOOD
211
+ message = "FFmpeg installed (software-only)"
212
+ performance_impact = "Medium"
213
+ recommendations.append("Consider FFmpeg build with hardware acceleration for better performance")
214
+
215
+ is_blocking = False
216
+
217
+ else:
218
+ status = HealthStatus.WARNING
219
+ message = "FFmpeg found but version check failed"
220
+ details.append(f"Location: {ffmpeg_path}")
221
+ details.append("Version check returned error")
222
+ is_blocking = False
223
+ performance_impact = "Unknown"
224
+ recommendations.append("Verify FFmpeg installation integrity")
225
+
226
+ except subprocess.TimeoutExpired:
227
+ status = HealthStatus.WARNING
228
+ message = "FFmpeg found but unresponsive"
229
+ details.append(f"Location: {ffmpeg_path}")
230
+ details.append("Version check timed out")
231
+ is_blocking = False
232
+ performance_impact = "Unknown"
233
+ recommendations.append("Check FFmpeg installation - may be corrupted")
234
+ except Exception as e:
235
+ status = HealthStatus.WARNING
236
+ message = "FFmpeg check failed"
237
+ details.append(f"Location: {ffmpeg_path}")
238
+ details.append(f"Error: {str(e)}")
239
+ is_blocking = False
240
+ performance_impact = "Unknown"
241
+
242
+ self._add_result(HealthCheck(
243
+ name="FFmpeg Media Processing",
244
+ status=status,
245
+ message=message,
246
+ details=details,
247
+ recommendations=recommendations if recommendations else None,
248
+ is_blocking=is_blocking,
249
+ performance_impact=performance_impact
250
+ ))
72
251
 
73
- try:
74
- import cv2
75
- version = cv2.__version__
76
- build_info = cv2.getBuildInformation()
77
-
78
- print(f" ✅ OpenCV {version} installed")
79
-
80
- # Check OpenCV build configuration
81
- if "CUDA" in build_info:
82
- print(" 🚀 OpenCV built with CUDA support")
83
- if "OpenMP" in build_info:
84
- print(" ⚡ OpenCV built with OpenMP support")
85
-
86
- # Check for platform-specific optimizations
87
- machine = platform.machine()
88
- if machine in ["aarch64", "armv7l", "arm64"]:
89
- if "NEON" in build_info:
90
- print(" 🎯 OpenCV built with ARM NEON optimizations")
252
+ def _get_ffmpeg_install_cmd(self) -> str:
253
+ """Get platform-specific FFmpeg installation command."""
254
+ system = self.system_info['os']
255
+ if system == "Windows":
256
+ return "Windows: choco install ffmpeg OR winget install FFmpeg"
257
+ elif system == "Darwin":
258
+ return "macOS: brew install ffmpeg"
259
+ else: # Linux
260
+ if self.system_info['is_jetson']:
261
+ return "Jetson: sudo apt install ffmpeg (usually pre-installed)"
91
262
  else:
92
- print(" ⚠️ OpenCV may not have ARM optimizations")
263
+ return "Linux: sudo apt install ffmpeg (Ubuntu/Debian) OR sudo yum install ffmpeg (CentOS/RHEL)"
264
+
265
+ def check_opencv_installation(self) -> None:
266
+ """Comprehensive OpenCV installation and optimization check."""
267
+ details = []
268
+ recommendations = []
93
269
 
94
- # Test basic functionality
95
- import numpy as np
96
- test_img = np.zeros((100, 100, 3), dtype=np.uint8)
270
+ try:
271
+ import cv2
272
+ version = cv2.__version__
273
+ build_info = cv2.getBuildInformation()
274
+
275
+ details.append(f"OpenCV version: {version}")
276
+
277
+ # Check build optimizations
278
+ optimizations = []
279
+ performance_flags = {
280
+ 'CUDA': 'NVIDIA GPU acceleration',
281
+ 'OpenMP': 'Multi-threading optimization',
282
+ 'TBB': 'Intel Threading Building Blocks',
283
+ 'EIGEN': 'Eigen library optimization',
284
+ 'LAPACK': 'Linear algebra optimization'
285
+ }
286
+
287
+ # ARM-specific optimizations
288
+ if self.system_info['is_arm']:
289
+ performance_flags.update({
290
+ 'NEON': 'ARM NEON SIMD optimization',
291
+ 'VFPV3': 'ARM floating-point optimization'
292
+ })
293
+
294
+ for flag, description in performance_flags.items():
295
+ if flag in build_info:
296
+ optimizations.append(description)
297
+
298
+ if optimizations:
299
+ details.append(f"Optimizations: {', '.join(optimizations)}")
300
+ else:
301
+ details.append("Optimizations: Basic build (no advanced optimizations)")
302
+ recommendations.append("Consider installing optimized OpenCV build for better performance")
303
+
304
+ # Test basic functionality
305
+ try:
306
+ import numpy as np
307
+ test_img = np.zeros((100, 100, 3), dtype=np.uint8)
308
+
309
+ # Test image encoding/decoding
310
+ success, encoded = cv2.imencode('.jpg', test_img)
311
+ if success:
312
+ decoded = cv2.imdecode(encoded, cv2.IMREAD_COLOR)
313
+ if decoded is not None:
314
+ details.append("Functionality: Encoding/decoding ✓")
315
+
316
+ # Additional functional tests
317
+ if self._test_opencv_advanced_features():
318
+ details.append("Advanced features: Available ✓")
319
+ status = HealthStatus.EXCELLENT if optimizations else HealthStatus.GOOD
320
+ message = "OpenCV fully functional" + (" with optimizations" if optimizations else "")
321
+ else:
322
+ details.append("Advanced features: Limited")
323
+ status = HealthStatus.WARNING
324
+ message = "OpenCV basic functionality only"
325
+ recommendations.append("Some OpenCV features may be unavailable")
326
+ else:
327
+ raise Exception("Image decoding failed")
328
+ else:
329
+ raise Exception("Image encoding failed")
330
+
331
+ performance_impact = "None" if optimizations else "Low"
332
+ is_blocking = False
333
+
334
+ except Exception as e:
335
+ status = HealthStatus.WARNING
336
+ message = "OpenCV installed but functionality limited"
337
+ details.append(f"Functionality test failed: {str(e)}")
338
+ performance_impact = "Medium"
339
+ is_blocking = False
340
+ recommendations.append("Reinstall OpenCV or check dependencies")
97
341
 
98
- # Test encoding/decoding
99
- _, encoded = cv2.imencode('.jpg', test_img)
100
- decoded = cv2.imdecode(encoded, cv2.IMREAD_COLOR)
342
+ except ImportError:
343
+ status = HealthStatus.CRITICAL
344
+ message = "OpenCV not installed"
345
+ details.append("cv2 module not found")
346
+ recommendations.extend([
347
+ "Install OpenCV: pip install opencv-python",
348
+ "For servers: pip install opencv-python-headless",
349
+ "For ARM devices: Consider building from source for optimizations"
350
+ ])
351
+ if self.system_info['is_jetson']:
352
+ recommendations.append("Jetson: Use 'sudo apt install python3-opencv' for GPU-optimized version")
353
+
354
+ performance_impact = "High"
355
+ is_blocking = True
101
356
 
102
- if decoded is not None:
103
- print(" ✅ OpenCV basic functionality working")
104
- return True
105
- else:
106
- print(" ❌ OpenCV encoding/decoding test failed")
107
- return False
357
+ except Exception as e:
358
+ status = HealthStatus.WARNING
359
+ message = "OpenCV check failed"
360
+ details.append(f"Import error: {str(e)}")
361
+ performance_impact = "Unknown"
362
+ is_blocking = True
363
+ recommendations.append("Check OpenCV installation and dependencies")
364
+
365
+ self._add_result(HealthCheck(
366
+ name="OpenCV Computer Vision",
367
+ status=status,
368
+ message=message,
369
+ details=details,
370
+ recommendations=recommendations if recommendations else None,
371
+ is_blocking=is_blocking,
372
+ performance_impact=performance_impact
373
+ ))
374
+
375
+ def _test_opencv_advanced_features(self) -> bool:
376
+ """Test advanced OpenCV features."""
377
+ try:
378
+ import cv2
379
+ import numpy as np
108
380
 
109
- except ImportError:
110
- print(" ❌ OpenCV not installed")
111
- return False
112
- except Exception as e:
113
- print(f" ❌ OpenCV test failed: {e}")
114
- return False
115
-
116
-
117
- def check_grpc():
118
- """Check if gRPC is properly installed."""
119
- print("🌐 Checking gRPC...")
381
+ # Test advanced operations
382
+ img = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
383
+
384
+ # Test filter operations
385
+ blurred = cv2.GaussianBlur(img, (5, 5), 0)
386
+
387
+ # Test feature detection (if available)
388
+ try:
389
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
390
+ orb = cv2.ORB_create()
391
+ keypoints = orb.detect(gray, None)
392
+ return True
393
+ except:
394
+ return True # Basic operations work
395
+
396
+ except Exception:
397
+ return False
120
398
 
121
- try:
122
- import grpc
123
- print(f" ✅ gRPC installed")
399
+ def check_gpu_capabilities(self) -> None:
400
+ """Comprehensive GPU detection and capability assessment."""
401
+ details = []
402
+ recommendations = []
403
+ gpu_found = False
124
404
 
125
- # Test basic gRPC functionality
126
- from grpc import StatusCode
127
- print(" ✅ gRPC basic imports working")
128
- return True
405
+ # Check NVIDIA GPUs via pynvml
406
+ nvidia_info = self._check_nvidia_gpu()
407
+ if nvidia_info:
408
+ details.extend(nvidia_info['details'])
409
+ gpu_found = True
410
+ if nvidia_info['status'] == 'excellent':
411
+ status = HealthStatus.EXCELLENT
412
+ message = "High-performance NVIDIA GPU detected"
413
+ else:
414
+ status = HealthStatus.GOOD
415
+ message = "NVIDIA GPU available"
129
416
 
130
- except ImportError:
131
- print(" ❌ gRPC not installed")
132
- return False
133
- except Exception as e:
134
- print(f" ❌ gRPC test failed: {e}")
135
- return False
136
-
137
-
138
- def check_pynvml():
139
- """Check if pynvml (NVIDIA management) is available."""
140
- print("🎮 Checking NVIDIA GPU support...")
141
-
142
- try:
143
- import pynvml
144
- pynvml.nvmlInit()
417
+ # Check for integrated/other GPUs
418
+ integrated_info = self._check_integrated_gpu()
419
+ if integrated_info and not gpu_found:
420
+ details.extend(integrated_info)
421
+ gpu_found = True
422
+ status = HealthStatus.INFO
423
+ message = "Integrated GPU detected"
424
+ recommendations.append("Integrated GPU has limited ML performance compared to dedicated GPUs")
425
+
426
+ # ARM-specific GPU checks
427
+ if self.system_info['is_arm'] and not gpu_found:
428
+ arm_gpu_info = self._check_arm_gpu()
429
+ if arm_gpu_info:
430
+ details.extend(arm_gpu_info)
431
+ gpu_found = True
432
+ status = HealthStatus.INFO
433
+ message = "ARM GPU detected"
434
+
435
+ if not gpu_found:
436
+ status = HealthStatus.WARNING
437
+ message = "No GPU detected - CPU processing only"
438
+ details.append("No GPU acceleration available")
439
+ recommendations.extend([
440
+ "GPU acceleration will significantly improve performance",
441
+ "Consider cloud instances with GPU (AWS, GCP, Azure)",
442
+ "For local development: NVIDIA RTX series recommended"
443
+ ])
444
+ performance_impact = "High"
445
+ else:
446
+ performance_impact = "None" if status == HealthStatus.EXCELLENT else "Low"
145
447
 
146
- device_count = pynvml.nvmlDeviceGetCount()
147
- if device_count > 0:
448
+ # Add general GPU recommendations
449
+ if gpu_found and not any("NVIDIA" in detail for detail in details):
450
+ recommendations.append("For optimal ML performance, NVIDIA GPUs with CUDA support are recommended")
451
+
452
+ self._add_result(HealthCheck(
453
+ name="GPU Acceleration",
454
+ status=status,
455
+ message=message,
456
+ details=details,
457
+ recommendations=recommendations if recommendations else None,
458
+ is_blocking=False,
459
+ performance_impact=performance_impact
460
+ ))
461
+
462
+ def _check_nvidia_gpu(self) -> Optional[Dict]:
463
+ """Check NVIDIA GPU via pynvml."""
464
+ try:
465
+ import pynvml
466
+ pynvml.nvmlInit()
467
+
468
+ device_count = pynvml.nvmlDeviceGetCount()
469
+ if device_count == 0:
470
+ return None
471
+
472
+ details = []
473
+ gpu_status = 'good'
474
+
148
475
  for i in range(device_count):
149
476
  handle = pynvml.nvmlDeviceGetHandleByIndex(i)
150
477
  name = pynvml.nvmlDeviceGetName(handle)
151
- # Handle both string and bytes return types for compatibility
478
+
479
+ # Handle both string and bytes return types
152
480
  if isinstance(name, bytes):
153
481
  name = name.decode('utf-8')
154
482
 
155
- # Get additional GPU information
483
+ gpu_info = [f"GPU {i}: {name}"]
484
+
156
485
  try:
486
+ # Memory information
157
487
  memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
158
488
  memory_total_gb = memory_info.total / (1024**3)
159
- print(f" ✅ GPU {i}: {name}")
160
- print(f" 💾 Memory: {memory_total_gb:.1f} GB")
489
+ memory_used_gb = memory_info.used / (1024**3)
490
+ memory_free_gb = memory_info.free / (1024**3)
491
+
492
+ gpu_info.append(f" Memory: {memory_total_gb:.1f}GB total, {memory_free_gb:.1f}GB free")
161
493
 
162
- # Check for Jetson-specific GPUs
163
- if "tegra" in name.lower() or "jetson" in name.lower():
164
- print(" 🚀 Jetson GPU detected")
494
+ # Assess GPU capability
495
+ if memory_total_gb >= 24:
496
+ gpu_info.append(" Capability: Excellent for large models")
497
+ gpu_status = 'excellent'
498
+ elif memory_total_gb >= 8:
499
+ gpu_info.append(" Capability: Good for most models")
500
+ elif memory_total_gb >= 4:
501
+ gpu_info.append(" Capability: Suitable for smaller models")
502
+ else:
503
+ gpu_info.append(" Capability: Limited VRAM - small models only")
165
504
 
166
- # Check compute capability for deep learning
505
+ # Compute capability
167
506
  try:
168
507
  major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
169
- print(f" 🔢 Compute Capability: {major}.{minor}")
170
- if major >= 6: # Pascal architecture or newer
171
- print(" ✅ GPU supports modern deep learning frameworks")
508
+ compute_cap = f"{major}.{minor}"
509
+ gpu_info.append(f" Compute Capability: {compute_cap}")
510
+
511
+ if major >= 7: # Volta, Turing, Ampere, or newer
512
+ gpu_info.append(" Architecture: Modern (excellent ML support)")
513
+ elif major >= 6: # Pascal
514
+ gpu_info.append(" Architecture: Good ML support")
172
515
  else:
173
- print(" ⚠️ GPU may have limited deep learning support")
516
+ gpu_info.append(" Architecture: Limited ML performance")
174
517
  except:
175
- pass
518
+ gpu_info.append(" Compute Capability: Unknown")
519
+
520
+ # Temperature and power
521
+ try:
522
+ temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
523
+ gpu_info.append(f" Temperature: {temp}°C")
176
524
 
525
+ if temp > 80:
526
+ gpu_info.append(" Status: High temperature - check cooling")
527
+ elif temp > 70:
528
+ gpu_info.append(" Status: Warm but normal")
529
+ else:
530
+ gpu_info.append(" Status: Good temperature")
531
+ except:
532
+ pass
533
+
177
534
  except Exception as e:
178
- print(f" GPU {i}: {name} (limited info: {e})")
535
+ gpu_info.append(f" Info: Limited details ({str(e)})")
536
+
537
+ details.extend(gpu_info)
179
538
 
180
- return True
181
- else:
182
- print(" ⚠️ pynvml installed but no NVIDIA GPUs detected")
183
- # Check if we're on ARM and might have integrated GPU
184
- if platform.machine() in ["aarch64", "armv7l", "arm64"]:
185
- print(" 💡 ARM device may use integrated GPU (Mali, Adreno, etc.)")
186
- return True # Not an error, just no NVIDIA GPU
187
-
188
- except ImportError:
189
- print(" ❌ pynvml not installed")
190
- print(" 💡 Install with: pip install pynvml")
191
- return False
192
- except Exception as e:
193
- print(f" ⚠️ GPU check failed: {e}")
194
- # More helpful error messages for common issues
195
- if "driver" in str(e).lower():
196
- print(" 💡 NVIDIA drivers may not be installed")
197
- elif "nvml" in str(e).lower():
198
- print(" 💡 NVIDIA Management Library not available")
199
- return True # Not critical for service operation
200
-
201
-
202
- def check_storage_permissions():
203
- """Check if we can create storage directories."""
204
- print("💾 Checking storage permissions...")
205
-
206
- try:
207
- # Test default storage path
208
- test_path = Path("data") / "test_permissions"
209
- test_path.mkdir(parents=True, exist_ok=True)
539
+ return {
540
+ 'details': details,
541
+ 'status': gpu_status,
542
+ 'count': device_count
543
+ }
210
544
 
211
- # Test file creation
212
- test_file = test_path / "test.txt"
213
- test_file.write_text("test")
545
+ except ImportError:
546
+ return None
547
+ except Exception:
548
+ return None
549
+
550
+ def _check_integrated_gpu(self) -> Optional[List[str]]:
551
+ """Check for integrated GPU on Linux."""
552
+ if self.system_info['os'] != 'Linux':
553
+ return None
214
554
 
215
- # Test file reading
216
- content = test_file.read_text()
555
+ try:
556
+ result = subprocess.run(
557
+ ["lspci", "-nn"],
558
+ capture_output=True,
559
+ text=True,
560
+ timeout=5
561
+ )
562
+
563
+ if result.returncode == 0:
564
+ gpu_lines = []
565
+ for line in result.stdout.split('\n'):
566
+ if any(keyword in line.lower() for keyword in ['vga', 'display', '3d']):
567
+ if any(vendor in line.lower() for vendor in ['intel', 'amd', 'ati']):
568
+ gpu_lines.append(f"Integrated GPU: {line.split(':')[-1].strip()}")
569
+
570
+ return gpu_lines if gpu_lines else None
217
571
 
218
- # Cleanup
219
- test_file.unlink()
220
- test_path.rmdir()
572
+ except (subprocess.TimeoutExpired, FileNotFoundError):
573
+ pass
221
574
 
222
- if content == "test":
223
- print(" ✅ Storage read/write permissions OK")
224
- return True
225
- else:
226
- print(" ❌ Storage read/write test failed")
227
- return False
228
-
229
- except Exception as e:
230
- print(f" ❌ Storage permission check failed: {e}")
231
- return False
232
-
233
-
234
- def check_network_connectivity():
235
- """Check basic network connectivity."""
236
- print("🌐 Checking network connectivity...")
575
+ return None
237
576
 
238
- try:
239
- import socket
577
+ def _check_arm_gpu(self) -> Optional[List[str]]:
578
+ """Check for ARM-specific GPU information."""
579
+ if not self.system_info['is_arm']:
580
+ return None
240
581
 
241
- # Test DNS resolution
242
- socket.gethostbyname("be.vision.sindika.co.id")
243
- print(" ✅ DNS resolution working (be.vision.sindika.co.id)")
582
+ gpu_info = []
244
583
 
245
- # Test basic socket connectivity
246
- sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
247
- sock.settimeout(5)
248
- result = sock.connect_ex(("be.vision.sindika.co.id", 50051))
249
- sock.close()
584
+ # Check for Mali GPU
585
+ mali_paths = [
586
+ "/sys/class/misc/mali0",
587
+ "/dev/mali0",
588
+ "/sys/kernel/debug/mali"
589
+ ]
250
590
 
251
- if result == 0:
252
- print(" ✅ Network connectivity to be.vision.sindika.co.id:50051 OK")
253
- return True
254
- else:
255
- print(" ⚠️ Cannot connect to be.vision.sindika.co.id:50051 (may be normal if service is down)")
256
- return True # Not critical for initial setup
591
+ for path in mali_paths:
592
+ if Path(path).exists():
593
+ gpu_info.append("ARM Mali GPU detected")
594
+ break
595
+
596
+ # Check for Adreno GPU (Qualcomm)
597
+ if Path("/sys/class/kgsl").exists():
598
+ gpu_info.append("Qualcomm Adreno GPU detected")
599
+
600
+ # Jetson-specific GPU info
601
+ if self.system_info['is_jetson']:
602
+ gpu_info.append("NVIDIA Tegra integrated GPU")
257
603
 
258
- except Exception as e:
259
- print(f" ⚠️ Network check failed: {e}")
260
- return True # Not critical for service installation
261
-
262
-
263
- def check_platform_compatibility():
264
- """Check platform-specific compatibility."""
265
- print("🔧 Checking platform compatibility...")
266
-
267
- system = platform.system()
268
- machine = platform.machine()
269
-
270
- # Check for known compatible platforms
271
- compatible_platforms = {
272
- "Linux": ["x86_64", "aarch64", "armv7l", "arm64"],
273
- "Windows": ["AMD64", "x86_64"],
274
- "Darwin": ["x86_64", "arm64"] # macOS Intel and Apple Silicon
275
- }
276
-
277
- if system in compatible_platforms:
278
- if machine in compatible_platforms[system]:
279
- print(f" ✅ Platform {system}/{machine} is supported")
280
-
281
- # Special handling for ARM devices
282
- if machine in ["aarch64", "armv7l", "arm64"]:
283
- print(" 📱 ARM-based device detected")
284
- if "tegra" in platform.platform().lower():
285
- print(" 🚀 NVIDIA Jetson device detected")
286
- elif "raspberry" in platform.platform().lower():
287
- print(" 🍓 Raspberry Pi device detected")
288
-
289
- return True
290
- else:
291
- print(f" ⚠️ Architecture {machine} may have limited support on {system}")
292
- return True # Still allow execution
293
- else:
294
- print(f" ⚠️ Platform {system} may have limited support")
295
- return True # Still allow execution
296
-
297
-
298
- def print_system_info():
299
- """Print basic system information."""
300
- print("💻 System Information:")
301
- print(f" 🖥️ OS: {platform.system()} {platform.release()}")
302
- print(f" 🏗️ Architecture: {platform.machine()}")
303
- print(f" 🐍 Python: {platform.python_version()}")
304
- print(f" 📍 Python executable: {sys.executable}")
305
-
306
- # Additional platform details
307
- try:
308
- import os
309
- if hasattr(os, 'uname'):
310
- uname = os.uname()
311
- print(f" 🔧 Kernel: {uname.sysname} {uname.release}")
312
- except:
313
- pass
314
-
315
- # Check for containerized environment
316
- if Path("/.dockerenv").exists():
317
- print(" 🐳 Running in Docker container")
318
- elif os.environ.get("KUBERNETES_SERVICE_HOST"):
319
- print(" ☸️ Running in Kubernetes pod")
320
-
321
-
322
- def print_installation_help():
323
- """Print installation help for missing dependencies."""
324
- print("\n📋 Installation Help:")
325
- print("=" * 50)
326
-
327
- system = platform.system()
328
- machine = platform.machine()
329
-
330
- print("\n🎬 FFmpeg Installation:")
331
- if system == "Windows":
332
- print(" • Using Chocolatey: choco install ffmpeg")
333
- print(" • Using winget: winget install FFmpeg")
334
- print(" • Manual: Download from https://ffmpeg.org/download.html")
335
- elif system == "Darwin": # macOS
336
- print(" • Using Homebrew: brew install ffmpeg")
337
- print(" • Using MacPorts: sudo port install ffmpeg")
338
- else: # Linux
339
- print(" • Ubuntu/Debian: sudo apt update && sudo apt install ffmpeg")
340
- print(" • CentOS/RHEL: sudo yum install ffmpeg")
341
- print(" • Fedora: sudo dnf install ffmpeg")
342
- print(" • Alpine: apk add ffmpeg")
343
-
344
- # ARM-specific guidance
345
- if machine in ["aarch64", "armv7l", "arm64"]:
346
- print(" • For ARM devices:")
347
- print(" - Jetson: Usually pre-installed with JetPack")
348
- print(" - Raspberry Pi: sudo apt install ffmpeg")
349
- print(" - Build from source for optimal performance")
350
-
351
- print("\n🐍 Python Dependencies:")
352
- print(" • Install all: pip install -r requirements.txt")
353
- print(" • Or install individually:")
354
- print(" - pip install opencv-python")
355
- print(" - pip install grpcio")
356
- print(" - pip install pynvml")
357
-
358
- # Platform-specific OpenCV guidance
359
- if machine in ["aarch64", "armv7l", "arm64"]:
360
- print("\n🔧 ARM-Specific Notes:")
361
- print(" • OpenCV: Consider opencv-python-headless for servers")
362
- print(" • Jetson: Use opencv built with JetPack for GPU acceleration")
363
- print(" • Build from source for optimal ARM performance")
364
- print(" • For Jetson: Install with 'sudo apt install python3-opencv'")
365
-
366
- print("\n🎮 GPU Support:")
367
- if system == "Linux" and machine in ["aarch64", "armv7l"]:
368
- print(" • Jetson devices:")
369
- print(" - Install JetPack SDK from NVIDIA")
370
- print(" - Verify with: sudo /usr/bin/tegrastats")
371
- print(" - Check CUDA: nvcc --version")
372
- else:
373
- print(" • NVIDIA GPU (Optional):")
374
- print(" - Install NVIDIA drivers from https://www.nvidia.com/drivers/")
375
- print(" - Install CUDA toolkit if needed")
376
- print(" - pynvml should work automatically if drivers are installed")
377
-
378
- print("\n☁️ Cloud/Container Deployment:")
379
- print(" • Docker: Use nvidia/cuda base images for GPU support")
380
- print(" • Cloud: Ensure GPU instances have proper drivers")
381
- print(" • Kubernetes: Use nvidia.com/gpu resource limits")
382
- print(" • AWS: Use Deep Learning AMI or ECS GPU instances")
383
- print(" • GCP: Use AI Platform or GPU-enabled Compute instances")
384
-
385
- print("\n📦 Package Installation Tips:")
386
- print(" • Use virtual environments: python -m venv venv")
387
- print(" • Update pip: pip install --upgrade pip")
388
- print(" • For ARM: pip install --extra-index-url https://www.piwheels.org/simple/")
389
- print(" • Build tools: sudo apt install build-essential python3-dev")
390
-
391
-
392
- def main():
393
- """Run all diagnostic checks."""
394
- print("🏥 Nedo Vision Worker Service Doctor")
395
- print("=" * 50)
396
-
397
- # Print system info
398
- print_system_info()
399
- print()
604
+ # Try to get detailed Jetson info
605
+ try:
606
+ result = subprocess.run(
607
+ ["tegrastats", "--interval", "100", "--logfile", "/dev/stdout"],
608
+ capture_output=True,
609
+ text=True,
610
+ timeout=2
611
+ )
612
+ if result.returncode == 0 and "GPU" in result.stdout:
613
+ gpu_info.append("Jetson GPU active and monitored")
614
+ except:
615
+ pass
616
+
617
+ return gpu_info if gpu_info else None
400
618
 
401
- # Run all checks
402
- checks = [
403
- ("Platform Compatibility", check_platform_compatibility),
404
- ("Python Version", check_python_version),
405
- ("FFmpeg", check_ffmpeg),
406
- ("OpenCV", check_opencv),
407
- ("gRPC", check_grpc),
408
- ("NVIDIA GPU Support", check_pynvml),
409
- ("Storage Permissions", check_storage_permissions),
410
- ("Network Connectivity", check_network_connectivity),
411
- ]
619
+ def check_storage_and_permissions(self) -> None:
620
+ """Comprehensive storage and permission validation."""
621
+ details = []
622
+ recommendations = []
623
+
624
+ try:
625
+ # Test storage permissions
626
+ test_dirs = ["data", "models", "logs", "temp"]
627
+ successful_dirs = []
628
+ failed_dirs = []
629
+
630
+ for dir_name in test_dirs:
631
+ test_path = Path(dir_name) / "health_check"
632
+ try:
633
+ test_path.mkdir(parents=True, exist_ok=True)
634
+
635
+ # Test file operations
636
+ test_file = test_path / "test.json"
637
+ test_content = '{"test": true, "timestamp": "' + str(time.time()) + '"}'
638
+ test_file.write_text(test_content)
639
+
640
+ # Read back
641
+ read_content = test_file.read_text()
642
+
643
+ # Cleanup
644
+ test_file.unlink()
645
+ test_path.rmdir()
646
+
647
+ if read_content == test_content:
648
+ successful_dirs.append(dir_name)
649
+ else:
650
+ failed_dirs.append(f"{dir_name} (read/write mismatch)")
651
+
652
+ except Exception as e:
653
+ failed_dirs.append(f"{dir_name} ({str(e)})")
654
+
655
+ # Check disk space
656
+ disk_info = self._check_disk_space()
657
+ details.extend(disk_info['details'])
658
+
659
+ # Determine overall status
660
+ if failed_dirs:
661
+ status = HealthStatus.CRITICAL
662
+ message = f"Storage permission issues in {len(failed_dirs)} directories"
663
+ details.append(f"Failed directories: {', '.join(failed_dirs)}")
664
+ recommendations.extend([
665
+ "Check directory permissions and ownership",
666
+ "Ensure write access to working directory",
667
+ "Consider running with appropriate user privileges"
668
+ ])
669
+ is_blocking = True
670
+ performance_impact = "High"
671
+ elif disk_info['status'] == 'critical':
672
+ status = HealthStatus.CRITICAL
673
+ message = "Critical disk space shortage"
674
+ is_blocking = True
675
+ performance_impact = "High"
676
+ elif disk_info['status'] == 'warning':
677
+ status = HealthStatus.WARNING
678
+ message = "Storage OK with warnings"
679
+ recommendations.extend(disk_info.get('recommendations', []))
680
+ is_blocking = False
681
+ performance_impact = "Low"
682
+ else:
683
+ status = HealthStatus.GOOD
684
+ message = "Storage permissions and space OK"
685
+ details.append(f"Tested directories: {', '.join(successful_dirs)}")
686
+ is_blocking = False
687
+ performance_impact = "None"
688
+
689
+ except Exception as e:
690
+ status = HealthStatus.CRITICAL
691
+ message = "Storage check failed"
692
+ details.append(f"Unexpected error: {str(e)}")
693
+ recommendations.append("Investigate storage system issues")
694
+ is_blocking = True
695
+ performance_impact = "High"
696
+
697
+ self._add_result(HealthCheck(
698
+ name="Storage & Permissions",
699
+ status=status,
700
+ message=message,
701
+ details=details,
702
+ recommendations=recommendations if recommendations else None,
703
+ is_blocking=is_blocking,
704
+ performance_impact=performance_impact
705
+ ))
412
706
 
413
- results = []
414
- print("🔍 Running Diagnostic Checks:")
415
- print("-" * 30)
707
+ def _check_disk_space(self) -> Dict:
708
+ """Check available disk space with ML-specific thresholds."""
709
+ try:
710
+ current_dir = Path.cwd()
711
+ disk_usage = shutil.disk_usage(current_dir)
712
+
713
+ total_gb = disk_usage.total / (1024**3)
714
+ free_gb = disk_usage.free / (1024**3)
715
+ used_gb = (disk_usage.total - disk_usage.free) / (1024**3)
716
+ free_percent = (free_gb / total_gb) * 100
717
+
718
+ details = [
719
+ f"Total space: {total_gb:.1f}GB",
720
+ f"Used space: {used_gb:.1f}GB",
721
+ f"Free space: {free_gb:.1f}GB ({free_percent:.1f}%)"
722
+ ]
723
+
724
+ recommendations = []
725
+
726
+ if free_gb < 1:
727
+ status = 'critical'
728
+ details.append("Status: Critical - Immediate action required")
729
+ recommendations.extend([
730
+ "Free up disk space immediately",
731
+ "Service may fail to start or crash during operation",
732
+ "Consider moving to system with more storage"
733
+ ])
734
+ elif free_gb < 5:
735
+ status = 'warning'
736
+ details.append("Status: Low - Monitor closely")
737
+ recommendations.extend([
738
+ "Free up disk space soon",
739
+ "Model downloads and processing may fail",
740
+ "ML models typically require 2-20GB+ storage"
741
+ ])
742
+ elif free_gb < 20:
743
+ status = 'warning'
744
+ details.append("Status: Adequate for small models")
745
+ recommendations.append("Large models may require additional space")
746
+ else:
747
+ status = 'good'
748
+ details.append("Status: Good - Sufficient for most operations")
749
+
750
+ return {
751
+ 'status': status,
752
+ 'details': details,
753
+ 'recommendations': recommendations
754
+ }
755
+
756
+ except Exception as e:
757
+ return {
758
+ 'status': 'unknown',
759
+ 'details': [f"Disk space check failed: {str(e)}"],
760
+ 'recommendations': ["Manually verify available disk space"]
761
+ }
416
762
 
417
- for name, check_func in checks:
763
+ def check_system_resources(self) -> None:
764
+ """Check system resources and performance characteristics."""
765
+ details = []
766
+ recommendations = []
767
+
418
768
  try:
419
- result = check_func()
420
- results.append((name, result))
769
+ # Try to get detailed system info
770
+ try:
771
+ import psutil
772
+
773
+ # CPU information
774
+ cpu_count_physical = psutil.cpu_count(logical=False)
775
+ cpu_count_logical = psutil.cpu_count(logical=True)
776
+ cpu_freq = psutil.cpu_freq()
777
+
778
+ details.append(f"CPU cores: {cpu_count_physical} physical, {cpu_count_logical} logical")
779
+
780
+ if cpu_freq:
781
+ details.append(f"CPU frequency: {cpu_freq.current:.0f} MHz (max: {cpu_freq.max:.0f} MHz)")
782
+
783
+ # Memory information
784
+ memory = psutil.virtual_memory()
785
+ memory_gb = memory.total / (1024**3)
786
+ memory_available_gb = memory.available / (1024**3)
787
+ memory_percent = memory.percent
788
+
789
+ details.append(f"RAM: {memory_gb:.1f}GB total, {memory_available_gb:.1f}GB available ({100-memory_percent:.1f}% free)")
790
+
791
+ # Memory recommendations
792
+ memory_status = "good"
793
+ if memory_gb < 4:
794
+ memory_status = "critical"
795
+ recommendations.extend([
796
+ "Insufficient RAM for ML workloads",
797
+ "Consider upgrading to at least 8GB RAM",
798
+ "ML models may fail to load or crash"
799
+ ])
800
+ elif memory_gb < 8:
801
+ memory_status = "warning"
802
+ recommendations.append("8GB+ RAM recommended for better performance")
803
+ elif memory_gb >= 32:
804
+ details.append("RAM: Excellent for large model processing")
805
+
806
+ # Swap information
807
+ try:
808
+ swap = psutil.swap_memory()
809
+ if swap.total > 0:
810
+ swap_gb = swap.total / (1024**3)
811
+ details.append(f"Swap: {swap_gb:.1f}GB")
812
+ else:
813
+ details.append("Swap: Not configured")
814
+ if memory_gb < 16:
815
+ recommendations.append("Consider configuring swap space for memory-intensive operations")
816
+ except:
817
+ pass
818
+
819
+ # CPU usage check (brief sample)
820
+ cpu_percent = psutil.cpu_percent(interval=1)
821
+ details.append(f"CPU usage: {cpu_percent:.1f}%")
822
+
823
+ if cpu_percent > 80:
824
+ recommendations.append("High CPU usage detected - check for background processes")
825
+
826
+ except ImportError:
827
+ # Fallback without psutil
828
+ import multiprocessing
829
+ cpu_count = multiprocessing.cpu_count()
830
+ details.append(f"CPU cores: {cpu_count} (detected)")
831
+ details.append("Install 'psutil' for detailed system monitoring")
832
+ recommendations.append("pip install psutil for enhanced system monitoring")
833
+ memory_status = "unknown"
834
+
835
+ # Platform-specific optimizations
836
+ if self.system_info['is_arm']:
837
+ details.append("Platform: ARM architecture")
838
+ if self.system_info['is_jetson']:
839
+ details.append("Optimization: Jetson-specific optimizations available")
840
+ recommendations.append("Use Jetson-optimized libraries when available")
841
+ else:
842
+ recommendations.append("Consider ARM-optimized ML libraries")
843
+
844
+ # Container considerations
845
+ container_info = self._detect_container()
846
+ if container_info['any']:
847
+ container_types = []
848
+ if container_info['docker']:
849
+ container_types.append("Docker")
850
+ if container_info['kubernetes']:
851
+ container_types.append("Kubernetes")
852
+
853
+ details.append(f"Environment: Containerized ({', '.join(container_types)})")
854
+ recommendations.extend([
855
+ "Ensure container has adequate resource limits",
856
+ "Consider GPU passthrough for ML acceleration"
857
+ ])
858
+
859
+ # Determine status
860
+ if memory_status == "critical":
861
+ status = HealthStatus.CRITICAL
862
+ message = "Insufficient system resources"
863
+ is_blocking = True
864
+ performance_impact = "High"
865
+ elif memory_status == "warning":
866
+ status = HealthStatus.WARNING
867
+ message = "Limited system resources"
868
+ is_blocking = False
869
+ performance_impact = "Medium"
870
+ else:
871
+ status = HealthStatus.GOOD
872
+ message = "System resources adequate"
873
+ is_blocking = False
874
+ performance_impact = "None"
875
+
421
876
  except Exception as e:
422
- print(f" ❌ {name} check failed with exception: {e}")
423
- results.append((name, False))
424
- print()
877
+ status = HealthStatus.WARNING
878
+ message = "System resource check failed"
879
+ details.append(f"Error: {str(e)}")
880
+ recommendations.append("Manual system resource verification recommended")
881
+ is_blocking = False
882
+ performance_impact = "Unknown"
883
+
884
+ self._add_result(HealthCheck(
885
+ name="System Resources",
886
+ status=status,
887
+ message=message,
888
+ details=details,
889
+ recommendations=recommendations if recommendations else None,
890
+ is_blocking=is_blocking,
891
+ performance_impact=performance_impact
892
+ ))
425
893
 
426
- # Summary
427
- print("📊 Summary:")
428
- print("-" * 20)
894
+ def run_comprehensive_health_check(self) -> List[HealthCheck]:
895
+ """Execute all health checks with progress indication."""
896
+ print("🏥 Nedo Vision Worker Service - Comprehensive Health Check")
897
+ print("=" * 65)
898
+
899
+ # Display system summary
900
+ self._print_system_summary()
901
+ print()
902
+
903
+ # Define all health checks
904
+ health_checks = [
905
+ ("Python Environment", self.check_python_environment),
906
+ ("System Resources", self.check_system_resources),
907
+ ("Storage & Permissions", self.check_storage_and_permissions),
908
+ ("FFmpeg Media Processing", self.check_ffmpeg_installation),
909
+ ("OpenCV Computer Vision", self.check_opencv_installation),
910
+ ("GPU Acceleration", self.check_gpu_capabilities),
911
+ ]
912
+
913
+ print(f"🔍 Running {len(health_checks)} comprehensive health checks...")
914
+ print("-" * 50)
915
+
916
+ # Execute checks with progress indication
917
+ for i, (check_name, check_function) in enumerate(health_checks, 1):
918
+ print(f"[{i:2d}/{len(health_checks)}] {check_name:<30} ", end="", flush=True)
919
+
920
+ try:
921
+ start_time = time.time()
922
+ check_function()
923
+ duration = time.time() - start_time
924
+ print(f"✓ ({duration:.1f}s)")
925
+ except Exception as e:
926
+ print(f"✗ Error: {str(e)}")
927
+ # Add error result
928
+ self._add_result(HealthCheck(
929
+ name=check_name,
930
+ status=HealthStatus.CRITICAL,
931
+ message=f"Health check failed: {str(e)}",
932
+ is_blocking=True,
933
+ performance_impact="High"
934
+ ))
935
+
936
+ time.sleep(0.1) # Brief pause for better UX
937
+
938
+ return self.results
429
939
 
430
- passed = 0
431
- failed = 0
940
+ def _print_system_summary(self) -> None:
941
+ """Print concise system information summary."""
942
+ info = self.system_info
943
+
944
+ print("💻 System Summary:")
945
+ print(f" OS: {info['os']} {info['release']}")
946
+ print(f" Architecture: {info['machine']}")
947
+ print(f" Python: {info['python_version']}")
948
+
949
+ if info['is_jetson']:
950
+ print(" Platform: NVIDIA Jetson Device")
951
+ elif info['is_arm']:
952
+ print(" Platform: ARM-based Device")
953
+
954
+ container_info = self._detect_container()
955
+ if container_info['any']:
956
+ environments = []
957
+ if container_info['docker']:
958
+ environments.append("Docker")
959
+ if container_info['kubernetes']:
960
+ environments.append("Kubernetes")
961
+ print(f" Environment: {', '.join(environments)}")
432
962
 
433
- for name, result in results:
434
- if result:
435
- print(f" ✅ {name}")
436
- passed += 1
963
+ def generate_detailed_report(self) -> bool:
964
+ """Generate comprehensive health report with recommendations."""
965
+ elapsed_time = time.time() - self.start_time
966
+
967
+ print(f"\n{'='*65}")
968
+ print("📊 COMPREHENSIVE HEALTH REPORT")
969
+ print(f"{'='*65}")
970
+ print(f"⏱️ Scan completed in {elapsed_time:.1f} seconds")
971
+ print(f"🔍 {len(self.results)} health checks performed")
972
+
973
+ # Categorize results
974
+ excellent = [r for r in self.results if r.status == HealthStatus.EXCELLENT]
975
+ good = [r for r in self.results if r.status == HealthStatus.GOOD]
976
+ warnings = [r for r in self.results if r.status in [HealthStatus.WARNING, HealthStatus.INFO]]
977
+ critical = [r for r in self.results if r.status == HealthStatus.CRITICAL]
978
+ blocking = [r for r in self.results if r.is_blocking]
979
+
980
+ # Print detailed results
981
+ print(f"\n{'─'*65}")
982
+ print("📋 DETAILED RESULTS")
983
+ print(f"{'─'*65}")
984
+
985
+ for result in self.results:
986
+ icon, status_text = result.status.value
987
+ print(f"\n{icon} {result.name}")
988
+ print(f" Status: {result.message}")
989
+
990
+ if result.details:
991
+ print(" Details:")
992
+ for detail in result.details:
993
+ print(f" • {detail}")
994
+
995
+ if result.performance_impact != "None":
996
+ print(f" Performance Impact: {result.performance_impact}")
997
+
998
+ if result.recommendations:
999
+ print(" 💡 Recommendations:")
1000
+ for rec in result.recommendations:
1001
+ print(f" ▶ {rec}")
1002
+
1003
+ # Generate executive summary
1004
+ print(f"\n{'='*65}")
1005
+ print("📈 EXECUTIVE SUMMARY")
1006
+ print(f"{'='*65}")
1007
+
1008
+ print(f"🎉 Excellent: {len(excellent)}")
1009
+ print(f"✅ Good: {len(good)}")
1010
+ print(f"⚠️ Warnings: {len(warnings)}")
1011
+ print(f"❌ Critical: {len(critical)}")
1012
+
1013
+ # Service readiness assessment
1014
+ print(f"\n{'─'*40}")
1015
+ if blocking:
1016
+ print("🚫 SERVICE STATUS: NOT READY")
1017
+ print(f" {len(blocking)} blocking issues must be resolved")
1018
+ print("\n🔧 REQUIRED ACTIONS:")
1019
+ for issue in blocking:
1020
+ print(f" ❌ Fix: {issue.name} - {issue.message}")
1021
+ if issue.recommendations:
1022
+ for rec in issue.recommendations[:2]: # Show top 2 recommendations
1023
+ print(f" ▶ {rec}")
1024
+ elif critical:
1025
+ print("⚠️ SERVICE STATUS: DEGRADED")
1026
+ print(" Service may work but with significant limitations")
1027
+ print(f" {len(critical)} critical issues detected")
1028
+ elif warnings:
1029
+ print("✅ SERVICE STATUS: READY WITH RECOMMENDATIONS")
1030
+ print(" Service should work well with minor optimizations available")
437
1031
  else:
438
- print(f" {name}")
439
- failed += 1
1032
+ print("🎉 SERVICE STATUS: OPTIMAL")
1033
+ print(" All systems green - maximum performance expected")
1034
+
1035
+ # Performance optimization summary
1036
+ high_impact = [r for r in self.results if r.performance_impact == "High"]
1037
+ medium_impact = [r for r in self.results if r.performance_impact == "Medium"]
1038
+
1039
+ if high_impact or medium_impact:
1040
+ print(f"\n⚡ PERFORMANCE OPTIMIZATION OPPORTUNITIES:")
1041
+ if high_impact:
1042
+ print(f" High Impact ({len(high_impact)}): ", end="")
1043
+ print(", ".join([r.name for r in high_impact]))
1044
+ if medium_impact:
1045
+ print(f" Medium Impact ({len(medium_impact)}): ", end="")
1046
+ print(", ".join([r.name for r in medium_impact]))
1047
+
1048
+ # Installation help
1049
+ if blocking or critical:
1050
+ print(f"\n{'='*65}")
1051
+ print("📚 INSTALLATION GUIDE")
1052
+ print(f"{'='*65}")
1053
+ self._print_installation_guide()
1054
+
1055
+ print(f"{'='*65}")
1056
+
1057
+ return len(blocking) == 0
440
1058
 
441
- print(f"\n🎯 Results: {passed} passed, {failed} failed")
1059
+ def _print_installation_guide(self) -> None:
1060
+ """Print comprehensive installation guide."""
1061
+ system = self.system_info['os']
1062
+ is_arm = self.system_info['is_arm']
1063
+ is_jetson = self.system_info['is_jetson']
1064
+
1065
+ print("\n🎬 FFmpeg Installation:")
1066
+ if system == "Windows":
1067
+ print(" • Chocolatey: choco install ffmpeg")
1068
+ print(" • Winget: winget install FFmpeg")
1069
+ print(" • Manual: https://ffmpeg.org/download.html")
1070
+ elif system == "Darwin":
1071
+ print(" • Homebrew: brew install ffmpeg")
1072
+ print(" • MacPorts: sudo port install ffmpeg")
1073
+ else: # Linux
1074
+ if is_jetson:
1075
+ print(" • Jetson: Usually pre-installed with JetPack")
1076
+ print(" • If missing: sudo apt install ffmpeg")
1077
+ else:
1078
+ print(" • Ubuntu/Debian: sudo apt update && sudo apt install ffmpeg")
1079
+ print(" • CentOS/RHEL: sudo yum install ffmpeg")
1080
+ print(" • Fedora: sudo dnf install ffmpeg")
1081
+
1082
+ print("\n🐍 Python Dependencies:")
1083
+ print(" • Core packages: pip install opencv-python grpcio protobuf")
1084
+ print(" • GPU support: pip install pynvml")
1085
+ print(" • System monitoring: pip install psutil")
1086
+
1087
+ if is_arm:
1088
+ print(" • ARM optimizations:")
1089
+ if is_jetson:
1090
+ print(" - OpenCV: sudo apt install python3-opencv (GPU-optimized)")
1091
+ print(" - JetPack SDK includes optimized libraries")
1092
+ else:
1093
+ print(" - Consider: opencv-python-headless for servers")
1094
+ print(" - ARM wheels: --extra-index-url https://www.piwheels.org/simple/")
1095
+
1096
+ print("\n🎮 GPU Setup:")
1097
+ if is_jetson:
1098
+ print(" • Jetson devices:")
1099
+ print(" - Install JetPack SDK")
1100
+ print(" - Verify: sudo /usr/bin/tegrastats")
1101
+ print(" - Check CUDA: nvcc --version")
1102
+ else:
1103
+ print(" • NVIDIA GPUs:")
1104
+ print(" - Install drivers: https://www.nvidia.com/drivers/")
1105
+ print(" - CUDA toolkit for development")
1106
+ print(" - Verify: nvidia-smi")
1107
+
1108
+ print("\n💾 Storage Requirements:")
1109
+ print(" • Minimum 5GB free space")
1110
+ print(" • 20GB+ recommended for model storage")
1111
+ print(" • Write permissions in working directory")
1112
+
1113
+
1114
+ def main() -> int:
1115
+ """Main entry point for worker service health check."""
1116
+ try:
1117
+ doctor = WorkerServiceDoctor()
1118
+ results = doctor.run_comprehensive_health_check()
1119
+ is_ready = doctor.generate_detailed_report()
1120
+ return 0 if is_ready else 1
442
1121
 
443
- if failed > 0:
444
- print("\n⚠️ Some checks failed. See installation help below:")
445
- print_installation_help()
1122
+ except KeyboardInterrupt:
1123
+ print("\n\n🛑 Health check interrupted by user.")
1124
+ return 130
1125
+ except Exception as e:
1126
+ print(f"\n\n💥 Unexpected error during health check: {e}")
1127
+ import traceback
1128
+ traceback.print_exc()
446
1129
  return 1
447
- else:
448
- print("\n🎉 All checks passed! Your system is ready for Nedo Vision Worker Service.")
449
- return 0
450
1130
 
451
1131
 
452
1132
  if __name__ == "__main__":
453
- sys.exit(main())
1133
+ sys.exit(main())