dgenerate-ultralytics-headless 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/METADATA +41 -34
- dgenerate_ultralytics_headless-8.3.224.dist-info/RECORD +285 -0
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/WHEEL +1 -1
- tests/__init__.py +7 -6
- tests/conftest.py +15 -39
- tests/test_cli.py +17 -17
- tests/test_cuda.py +17 -8
- tests/test_engine.py +36 -10
- tests/test_exports.py +98 -37
- tests/test_integrations.py +12 -15
- tests/test_python.py +126 -82
- tests/test_solutions.py +319 -135
- ultralytics/__init__.py +27 -9
- ultralytics/cfg/__init__.py +83 -87
- ultralytics/cfg/datasets/Argoverse.yaml +4 -4
- ultralytics/cfg/datasets/DOTAv1.5.yaml +2 -2
- ultralytics/cfg/datasets/DOTAv1.yaml +2 -2
- ultralytics/cfg/datasets/GlobalWheat2020.yaml +2 -2
- ultralytics/cfg/datasets/HomeObjects-3K.yaml +4 -5
- ultralytics/cfg/datasets/ImageNet.yaml +3 -3
- ultralytics/cfg/datasets/Objects365.yaml +24 -20
- ultralytics/cfg/datasets/SKU-110K.yaml +9 -9
- ultralytics/cfg/datasets/VOC.yaml +10 -13
- ultralytics/cfg/datasets/VisDrone.yaml +43 -33
- ultralytics/cfg/datasets/african-wildlife.yaml +5 -5
- ultralytics/cfg/datasets/brain-tumor.yaml +4 -5
- ultralytics/cfg/datasets/carparts-seg.yaml +5 -5
- ultralytics/cfg/datasets/coco-pose.yaml +26 -4
- ultralytics/cfg/datasets/coco.yaml +4 -4
- ultralytics/cfg/datasets/coco128-seg.yaml +2 -2
- ultralytics/cfg/datasets/coco128.yaml +2 -2
- ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
- ultralytics/cfg/datasets/coco8-multispectral.yaml +2 -2
- ultralytics/cfg/datasets/coco8-pose.yaml +23 -2
- ultralytics/cfg/datasets/coco8-seg.yaml +2 -2
- ultralytics/cfg/datasets/coco8.yaml +2 -2
- ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
- ultralytics/cfg/datasets/crack-seg.yaml +5 -5
- ultralytics/cfg/datasets/dog-pose.yaml +32 -4
- ultralytics/cfg/datasets/dota8-multispectral.yaml +2 -2
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/hand-keypoints.yaml +29 -4
- ultralytics/cfg/datasets/lvis.yaml +9 -9
- ultralytics/cfg/datasets/medical-pills.yaml +4 -5
- ultralytics/cfg/datasets/open-images-v7.yaml +7 -10
- ultralytics/cfg/datasets/package-seg.yaml +5 -5
- ultralytics/cfg/datasets/signature.yaml +4 -4
- ultralytics/cfg/datasets/tiger-pose.yaml +20 -4
- ultralytics/cfg/datasets/xView.yaml +5 -5
- ultralytics/cfg/default.yaml +96 -93
- ultralytics/cfg/trackers/botsort.yaml +16 -17
- ultralytics/cfg/trackers/bytetrack.yaml +9 -11
- ultralytics/data/__init__.py +4 -4
- ultralytics/data/annotator.py +12 -12
- ultralytics/data/augment.py +531 -564
- ultralytics/data/base.py +76 -81
- ultralytics/data/build.py +206 -42
- ultralytics/data/converter.py +179 -78
- ultralytics/data/dataset.py +121 -121
- ultralytics/data/loaders.py +114 -91
- ultralytics/data/split.py +28 -15
- ultralytics/data/split_dota.py +67 -48
- ultralytics/data/utils.py +110 -89
- ultralytics/engine/exporter.py +422 -460
- ultralytics/engine/model.py +224 -252
- ultralytics/engine/predictor.py +94 -89
- ultralytics/engine/results.py +345 -595
- ultralytics/engine/trainer.py +231 -134
- ultralytics/engine/tuner.py +279 -73
- ultralytics/engine/validator.py +53 -46
- ultralytics/hub/__init__.py +26 -28
- ultralytics/hub/auth.py +30 -16
- ultralytics/hub/google/__init__.py +34 -36
- ultralytics/hub/session.py +53 -77
- ultralytics/hub/utils.py +23 -109
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/model.py +36 -18
- ultralytics/models/fastsam/predict.py +33 -44
- ultralytics/models/fastsam/utils.py +4 -5
- ultralytics/models/fastsam/val.py +12 -14
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +16 -20
- ultralytics/models/nas/predict.py +12 -14
- ultralytics/models/nas/val.py +4 -5
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +9 -9
- ultralytics/models/rtdetr/predict.py +22 -17
- ultralytics/models/rtdetr/train.py +20 -16
- ultralytics/models/rtdetr/val.py +79 -59
- ultralytics/models/sam/__init__.py +8 -2
- ultralytics/models/sam/amg.py +53 -38
- ultralytics/models/sam/build.py +29 -31
- ultralytics/models/sam/model.py +33 -38
- ultralytics/models/sam/modules/blocks.py +159 -182
- ultralytics/models/sam/modules/decoders.py +38 -47
- ultralytics/models/sam/modules/encoders.py +114 -133
- ultralytics/models/sam/modules/memory_attention.py +38 -31
- ultralytics/models/sam/modules/sam.py +114 -93
- ultralytics/models/sam/modules/tiny_encoder.py +268 -291
- ultralytics/models/sam/modules/transformer.py +59 -66
- ultralytics/models/sam/modules/utils.py +55 -72
- ultralytics/models/sam/predict.py +745 -341
- ultralytics/models/utils/loss.py +118 -107
- ultralytics/models/utils/ops.py +118 -71
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +28 -26
- ultralytics/models/yolo/classify/train.py +50 -81
- ultralytics/models/yolo/classify/val.py +68 -61
- ultralytics/models/yolo/detect/predict.py +12 -15
- ultralytics/models/yolo/detect/train.py +56 -46
- ultralytics/models/yolo/detect/val.py +279 -223
- ultralytics/models/yolo/model.py +167 -86
- ultralytics/models/yolo/obb/predict.py +7 -11
- ultralytics/models/yolo/obb/train.py +23 -25
- ultralytics/models/yolo/obb/val.py +107 -99
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +12 -14
- ultralytics/models/yolo/pose/train.py +31 -69
- ultralytics/models/yolo/pose/val.py +119 -254
- ultralytics/models/yolo/segment/predict.py +21 -25
- ultralytics/models/yolo/segment/train.py +12 -66
- ultralytics/models/yolo/segment/val.py +126 -305
- ultralytics/models/yolo/world/train.py +53 -45
- ultralytics/models/yolo/world/train_world.py +51 -32
- ultralytics/models/yolo/yoloe/__init__.py +7 -7
- ultralytics/models/yolo/yoloe/predict.py +30 -37
- ultralytics/models/yolo/yoloe/train.py +89 -71
- ultralytics/models/yolo/yoloe/train_seg.py +15 -17
- ultralytics/models/yolo/yoloe/val.py +56 -41
- ultralytics/nn/__init__.py +9 -11
- ultralytics/nn/autobackend.py +179 -107
- ultralytics/nn/modules/__init__.py +67 -67
- ultralytics/nn/modules/activation.py +8 -7
- ultralytics/nn/modules/block.py +302 -323
- ultralytics/nn/modules/conv.py +61 -104
- ultralytics/nn/modules/head.py +488 -186
- ultralytics/nn/modules/transformer.py +183 -123
- ultralytics/nn/modules/utils.py +15 -20
- ultralytics/nn/tasks.py +327 -203
- ultralytics/nn/text_model.py +81 -65
- ultralytics/py.typed +1 -0
- ultralytics/solutions/__init__.py +12 -12
- ultralytics/solutions/ai_gym.py +19 -27
- ultralytics/solutions/analytics.py +36 -26
- ultralytics/solutions/config.py +29 -28
- ultralytics/solutions/distance_calculation.py +23 -24
- ultralytics/solutions/heatmap.py +17 -19
- ultralytics/solutions/instance_segmentation.py +21 -19
- ultralytics/solutions/object_blurrer.py +16 -17
- ultralytics/solutions/object_counter.py +48 -53
- ultralytics/solutions/object_cropper.py +22 -16
- ultralytics/solutions/parking_management.py +61 -58
- ultralytics/solutions/queue_management.py +19 -19
- ultralytics/solutions/region_counter.py +63 -50
- ultralytics/solutions/security_alarm.py +22 -25
- ultralytics/solutions/similarity_search.py +107 -60
- ultralytics/solutions/solutions.py +343 -262
- ultralytics/solutions/speed_estimation.py +35 -31
- ultralytics/solutions/streamlit_inference.py +104 -40
- ultralytics/solutions/templates/similarity-search.html +31 -24
- ultralytics/solutions/trackzone.py +24 -24
- ultralytics/solutions/vision_eye.py +11 -12
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +18 -27
- ultralytics/trackers/bot_sort.py +48 -39
- ultralytics/trackers/byte_tracker.py +94 -94
- ultralytics/trackers/track.py +7 -16
- ultralytics/trackers/utils/gmc.py +37 -69
- ultralytics/trackers/utils/kalman_filter.py +68 -76
- ultralytics/trackers/utils/matching.py +13 -17
- ultralytics/utils/__init__.py +251 -275
- ultralytics/utils/autobatch.py +19 -7
- ultralytics/utils/autodevice.py +68 -38
- ultralytics/utils/benchmarks.py +169 -130
- ultralytics/utils/callbacks/base.py +12 -13
- ultralytics/utils/callbacks/clearml.py +14 -15
- ultralytics/utils/callbacks/comet.py +139 -66
- ultralytics/utils/callbacks/dvc.py +19 -27
- ultralytics/utils/callbacks/hub.py +8 -6
- ultralytics/utils/callbacks/mlflow.py +6 -10
- ultralytics/utils/callbacks/neptune.py +11 -19
- ultralytics/utils/callbacks/platform.py +73 -0
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +9 -12
- ultralytics/utils/callbacks/wb.py +33 -30
- ultralytics/utils/checks.py +163 -114
- ultralytics/utils/cpu.py +89 -0
- ultralytics/utils/dist.py +24 -20
- ultralytics/utils/downloads.py +176 -146
- ultralytics/utils/errors.py +11 -13
- ultralytics/utils/events.py +113 -0
- ultralytics/utils/export/__init__.py +7 -0
- ultralytics/utils/{export.py → export/engine.py} +81 -63
- ultralytics/utils/export/imx.py +294 -0
- ultralytics/utils/export/tensorflow.py +217 -0
- ultralytics/utils/files.py +33 -36
- ultralytics/utils/git.py +137 -0
- ultralytics/utils/instance.py +105 -120
- ultralytics/utils/logger.py +404 -0
- ultralytics/utils/loss.py +99 -61
- ultralytics/utils/metrics.py +649 -478
- ultralytics/utils/nms.py +337 -0
- ultralytics/utils/ops.py +263 -451
- ultralytics/utils/patches.py +70 -31
- ultralytics/utils/plotting.py +253 -223
- ultralytics/utils/tal.py +48 -61
- ultralytics/utils/torch_utils.py +244 -251
- ultralytics/utils/tqdm.py +438 -0
- ultralytics/utils/triton.py +22 -23
- ultralytics/utils/tuner.py +11 -10
- dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD +0 -272
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import queue
|
|
5
|
+
import shutil
|
|
6
|
+
import sys
|
|
7
|
+
import threading
|
|
8
|
+
import time
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from ultralytics.utils import MACOS, RANK
|
|
13
|
+
from ultralytics.utils.checks import check_requirements
|
|
14
|
+
|
|
15
|
+
# Initialize default log file
|
|
16
|
+
DEFAULT_LOG_PATH = Path("train.log")
|
|
17
|
+
if RANK in {-1, 0} and DEFAULT_LOG_PATH.exists():
|
|
18
|
+
DEFAULT_LOG_PATH.unlink(missing_ok=True)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ConsoleLogger:
|
|
22
|
+
"""Console output capture with API/file streaming and deduplication.
|
|
23
|
+
|
|
24
|
+
Captures stdout/stderr output and streams it to either an API endpoint or local file, with intelligent deduplication
|
|
25
|
+
to reduce noise from repetitive console output.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
destination (str | Path): Target destination for streaming (URL or Path object).
|
|
29
|
+
is_api (bool): Whether destination is an API endpoint (True) or local file (False).
|
|
30
|
+
original_stdout: Reference to original sys.stdout for restoration.
|
|
31
|
+
original_stderr: Reference to original sys.stderr for restoration.
|
|
32
|
+
log_queue (queue.Queue): Thread-safe queue for buffering log messages.
|
|
33
|
+
active (bool): Whether console capture is currently active.
|
|
34
|
+
worker_thread (threading.Thread): Background thread for processing log queue.
|
|
35
|
+
last_line (str): Last processed line for deduplication.
|
|
36
|
+
last_time (float): Timestamp of last processed line.
|
|
37
|
+
last_progress_line (str): Last progress bar line for progress deduplication.
|
|
38
|
+
last_was_progress (bool): Whether the last line was a progress bar.
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
Basic file logging:
|
|
42
|
+
>>> logger = ConsoleLogger("training.log")
|
|
43
|
+
>>> logger.start_capture()
|
|
44
|
+
>>> print("This will be logged")
|
|
45
|
+
>>> logger.stop_capture()
|
|
46
|
+
|
|
47
|
+
API streaming:
|
|
48
|
+
>>> logger = ConsoleLogger("https://api.example.com/logs")
|
|
49
|
+
>>> logger.start_capture()
|
|
50
|
+
>>> # All output streams to API
|
|
51
|
+
>>> logger.stop_capture()
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self, destination):
|
|
55
|
+
"""Initialize with API endpoint or local file path.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
destination (str | Path): API endpoint URL (http/https) or local file path for streaming output.
|
|
59
|
+
"""
|
|
60
|
+
self.destination = destination
|
|
61
|
+
self.is_api = isinstance(destination, str) and destination.startswith(("http://", "https://"))
|
|
62
|
+
if not self.is_api:
|
|
63
|
+
self.destination = Path(destination)
|
|
64
|
+
|
|
65
|
+
# Console capture
|
|
66
|
+
self.original_stdout = sys.stdout
|
|
67
|
+
self.original_stderr = sys.stderr
|
|
68
|
+
self.log_queue = queue.Queue(maxsize=1000)
|
|
69
|
+
self.active = False
|
|
70
|
+
self.worker_thread = None
|
|
71
|
+
|
|
72
|
+
# State tracking
|
|
73
|
+
self.last_line = ""
|
|
74
|
+
self.last_time = 0.0
|
|
75
|
+
self.last_progress_line = "" # Track last progress line for deduplication
|
|
76
|
+
self.last_was_progress = False # Track if last line was a progress bar
|
|
77
|
+
|
|
78
|
+
def start_capture(self):
|
|
79
|
+
"""Start capturing console output and redirect stdout/stderr to custom capture objects."""
|
|
80
|
+
if self.active:
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
self.active = True
|
|
84
|
+
sys.stdout = self._ConsoleCapture(self.original_stdout, self._queue_log)
|
|
85
|
+
sys.stderr = self._ConsoleCapture(self.original_stderr, self._queue_log)
|
|
86
|
+
|
|
87
|
+
# Hook Ultralytics logger
|
|
88
|
+
try:
|
|
89
|
+
handler = self._LogHandler(self._queue_log)
|
|
90
|
+
logging.getLogger("ultralytics").addHandler(handler)
|
|
91
|
+
except Exception:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
self.worker_thread = threading.Thread(target=self._stream_worker, daemon=True)
|
|
95
|
+
self.worker_thread.start()
|
|
96
|
+
|
|
97
|
+
def stop_capture(self):
|
|
98
|
+
"""Stop capturing console output and restore original stdout/stderr."""
|
|
99
|
+
if not self.active:
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
self.active = False
|
|
103
|
+
sys.stdout = self.original_stdout
|
|
104
|
+
sys.stderr = self.original_stderr
|
|
105
|
+
self.log_queue.put(None)
|
|
106
|
+
|
|
107
|
+
def _queue_log(self, text):
|
|
108
|
+
"""Queue console text with deduplication and timestamp processing."""
|
|
109
|
+
if not self.active:
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
current_time = time.time()
|
|
113
|
+
|
|
114
|
+
# Handle carriage returns and process lines
|
|
115
|
+
if "\r" in text:
|
|
116
|
+
text = text.split("\r")[-1]
|
|
117
|
+
|
|
118
|
+
lines = text.split("\n")
|
|
119
|
+
if lines and lines[-1] == "":
|
|
120
|
+
lines.pop()
|
|
121
|
+
|
|
122
|
+
for line in lines:
|
|
123
|
+
line = line.rstrip()
|
|
124
|
+
|
|
125
|
+
# Skip lines with only thin progress bars (partial progress)
|
|
126
|
+
if "─" in line: # Has thin lines but no thick lines
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
# Deduplicate completed progress bars only if they match the previous progress line
|
|
130
|
+
if " ━━" in line:
|
|
131
|
+
progress_core = line.split(" ━━")[0].strip()
|
|
132
|
+
if progress_core == self.last_progress_line and self.last_was_progress:
|
|
133
|
+
continue
|
|
134
|
+
self.last_progress_line = progress_core
|
|
135
|
+
self.last_was_progress = True
|
|
136
|
+
else:
|
|
137
|
+
# Skip empty line after progress bar
|
|
138
|
+
if not line and self.last_was_progress:
|
|
139
|
+
self.last_was_progress = False
|
|
140
|
+
continue
|
|
141
|
+
self.last_was_progress = False
|
|
142
|
+
|
|
143
|
+
# General deduplication
|
|
144
|
+
if line == self.last_line and current_time - self.last_time < 0.1:
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
self.last_line = line
|
|
148
|
+
self.last_time = current_time
|
|
149
|
+
|
|
150
|
+
# Add timestamp if needed
|
|
151
|
+
if not line.startswith("[20"):
|
|
152
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
153
|
+
line = f"[{timestamp}] {line}"
|
|
154
|
+
|
|
155
|
+
# Queue with overflow protection
|
|
156
|
+
if not self._safe_put(f"{line}\n"):
|
|
157
|
+
continue # Skip if queue handling fails
|
|
158
|
+
|
|
159
|
+
def _safe_put(self, item):
|
|
160
|
+
"""Safely put item in queue with overflow handling."""
|
|
161
|
+
try:
|
|
162
|
+
self.log_queue.put_nowait(item)
|
|
163
|
+
return True
|
|
164
|
+
except queue.Full:
|
|
165
|
+
try:
|
|
166
|
+
self.log_queue.get_nowait() # Drop oldest
|
|
167
|
+
self.log_queue.put_nowait(item)
|
|
168
|
+
return True
|
|
169
|
+
except queue.Empty:
|
|
170
|
+
return False
|
|
171
|
+
|
|
172
|
+
def _stream_worker(self):
|
|
173
|
+
"""Background worker for streaming logs to destination."""
|
|
174
|
+
while self.active:
|
|
175
|
+
try:
|
|
176
|
+
log_text = self.log_queue.get(timeout=1)
|
|
177
|
+
if log_text is None:
|
|
178
|
+
break
|
|
179
|
+
self._write_log(log_text)
|
|
180
|
+
except queue.Empty:
|
|
181
|
+
continue
|
|
182
|
+
|
|
183
|
+
def _write_log(self, text):
|
|
184
|
+
"""Write log to API endpoint or local file destination."""
|
|
185
|
+
try:
|
|
186
|
+
if self.is_api:
|
|
187
|
+
import requests # scoped as slow import
|
|
188
|
+
|
|
189
|
+
payload = {"timestamp": datetime.now().isoformat(), "message": text.strip()}
|
|
190
|
+
requests.post(str(self.destination), json=payload, timeout=5)
|
|
191
|
+
else:
|
|
192
|
+
self.destination.parent.mkdir(parents=True, exist_ok=True)
|
|
193
|
+
with self.destination.open("a", encoding="utf-8") as f:
|
|
194
|
+
f.write(text)
|
|
195
|
+
except Exception as e:
|
|
196
|
+
print(f"Platform logging error: {e}", file=self.original_stderr)
|
|
197
|
+
|
|
198
|
+
class _ConsoleCapture:
|
|
199
|
+
"""Lightweight stdout/stderr capture."""
|
|
200
|
+
|
|
201
|
+
__slots__ = ("callback", "original")
|
|
202
|
+
|
|
203
|
+
def __init__(self, original, callback):
|
|
204
|
+
self.original = original
|
|
205
|
+
self.callback = callback
|
|
206
|
+
|
|
207
|
+
def write(self, text):
|
|
208
|
+
self.original.write(text)
|
|
209
|
+
self.callback(text)
|
|
210
|
+
|
|
211
|
+
def flush(self):
|
|
212
|
+
self.original.flush()
|
|
213
|
+
|
|
214
|
+
class _LogHandler(logging.Handler):
|
|
215
|
+
"""Lightweight logging handler."""
|
|
216
|
+
|
|
217
|
+
__slots__ = ("callback",)
|
|
218
|
+
|
|
219
|
+
def __init__(self, callback):
|
|
220
|
+
super().__init__()
|
|
221
|
+
self.callback = callback
|
|
222
|
+
|
|
223
|
+
def emit(self, record):
|
|
224
|
+
self.callback(self.format(record) + "\n")
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class SystemLogger:
|
|
228
|
+
"""Log dynamic system metrics for training monitoring.
|
|
229
|
+
|
|
230
|
+
Captures real-time system metrics including CPU, RAM, disk I/O, network I/O, and NVIDIA GPU statistics for training
|
|
231
|
+
performance monitoring and analysis.
|
|
232
|
+
|
|
233
|
+
Attributes:
|
|
234
|
+
pynvml: NVIDIA pynvml module instance if successfully imported, None otherwise.
|
|
235
|
+
nvidia_initialized (bool): Whether NVIDIA GPU monitoring is available and initialized.
|
|
236
|
+
net_start: Initial network I/O counters for calculating cumulative usage.
|
|
237
|
+
disk_start: Initial disk I/O counters for calculating cumulative usage.
|
|
238
|
+
|
|
239
|
+
Examples:
|
|
240
|
+
Basic usage:
|
|
241
|
+
>>> logger = SystemLogger()
|
|
242
|
+
>>> metrics = logger.get_metrics()
|
|
243
|
+
>>> print(f"CPU: {metrics['cpu']}%, RAM: {metrics['ram']}%")
|
|
244
|
+
>>> if metrics["gpus"]:
|
|
245
|
+
... gpu0 = metrics["gpus"]["0"]
|
|
246
|
+
... print(f"GPU0: {gpu0['usage']}% usage, {gpu0['temp']}°C")
|
|
247
|
+
|
|
248
|
+
Training loop integration:
|
|
249
|
+
>>> system_logger = SystemLogger()
|
|
250
|
+
>>> for epoch in range(epochs):
|
|
251
|
+
... # Training code here
|
|
252
|
+
... metrics = system_logger.get_metrics()
|
|
253
|
+
... # Log to database/file
|
|
254
|
+
"""
|
|
255
|
+
|
|
256
|
+
def __init__(self):
|
|
257
|
+
"""Initialize the system logger."""
|
|
258
|
+
import psutil # scoped as slow import
|
|
259
|
+
|
|
260
|
+
self.pynvml = None
|
|
261
|
+
self.nvidia_initialized = self._init_nvidia()
|
|
262
|
+
self.net_start = psutil.net_io_counters()
|
|
263
|
+
self.disk_start = psutil.disk_io_counters()
|
|
264
|
+
|
|
265
|
+
def _init_nvidia(self):
|
|
266
|
+
"""Initialize NVIDIA GPU monitoring with pynvml."""
|
|
267
|
+
try:
|
|
268
|
+
assert not MACOS
|
|
269
|
+
check_requirements("nvidia-ml-py>=12.0.0")
|
|
270
|
+
self.pynvml = __import__("pynvml")
|
|
271
|
+
self.pynvml.nvmlInit()
|
|
272
|
+
return True
|
|
273
|
+
except Exception:
|
|
274
|
+
return False
|
|
275
|
+
|
|
276
|
+
def get_metrics(self):
|
|
277
|
+
"""Get current system metrics.
|
|
278
|
+
|
|
279
|
+
Collects comprehensive system metrics including CPU usage, RAM usage, disk I/O statistics, network I/O
|
|
280
|
+
statistics, and GPU metrics (if available). Example output:
|
|
281
|
+
|
|
282
|
+
```python
|
|
283
|
+
metrics = {
|
|
284
|
+
"cpu": 45.2,
|
|
285
|
+
"ram": 78.9,
|
|
286
|
+
"disk": {"read_mb": 156.7, "write_mb": 89.3, "used_gb": 256.8},
|
|
287
|
+
"network": {"recv_mb": 157.2, "sent_mb": 89.1},
|
|
288
|
+
"gpus": {
|
|
289
|
+
0: {"usage": 95.6, "memory": 85.4, "temp": 72, "power": 285},
|
|
290
|
+
1: {"usage": 94.1, "memory": 82.7, "temp": 70, "power": 278},
|
|
291
|
+
},
|
|
292
|
+
}
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
- cpu (float): CPU usage percentage (0-100%)
|
|
296
|
+
- ram (float): RAM usage percentage (0-100%)
|
|
297
|
+
- disk (dict):
|
|
298
|
+
- read_mb (float): Cumulative disk read in MB since initialization
|
|
299
|
+
- write_mb (float): Cumulative disk write in MB since initialization
|
|
300
|
+
- used_gb (float): Total disk space used in GB
|
|
301
|
+
- network (dict):
|
|
302
|
+
- recv_mb (float): Cumulative network received in MB since initialization
|
|
303
|
+
- sent_mb (float): Cumulative network sent in MB since initialization
|
|
304
|
+
- gpus (dict): GPU metrics by device index (e.g., 0, 1) containing:
|
|
305
|
+
- usage (int): GPU utilization percentage (0-100%)
|
|
306
|
+
- memory (float): CUDA memory usage percentage (0-100%)
|
|
307
|
+
- temp (int): GPU temperature in degrees Celsius
|
|
308
|
+
- power (int): GPU power consumption in watts
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
metrics (dict): System metrics containing 'cpu', 'ram', 'disk', 'network', 'gpus' with usage data.
|
|
312
|
+
"""
|
|
313
|
+
import psutil # scoped as slow import
|
|
314
|
+
|
|
315
|
+
net = psutil.net_io_counters()
|
|
316
|
+
disk = psutil.disk_io_counters()
|
|
317
|
+
memory = psutil.virtual_memory()
|
|
318
|
+
disk_usage = shutil.disk_usage("/")
|
|
319
|
+
|
|
320
|
+
metrics = {
|
|
321
|
+
"cpu": round(psutil.cpu_percent(), 3),
|
|
322
|
+
"ram": round(memory.percent, 3),
|
|
323
|
+
"disk": {
|
|
324
|
+
"read_mb": round((disk.read_bytes - self.disk_start.read_bytes) / (1 << 20), 3),
|
|
325
|
+
"write_mb": round((disk.write_bytes - self.disk_start.write_bytes) / (1 << 20), 3),
|
|
326
|
+
"used_gb": round(disk_usage.used / (1 << 30), 3),
|
|
327
|
+
},
|
|
328
|
+
"network": {
|
|
329
|
+
"recv_mb": round((net.bytes_recv - self.net_start.bytes_recv) / (1 << 20), 3),
|
|
330
|
+
"sent_mb": round((net.bytes_sent - self.net_start.bytes_sent) / (1 << 20), 3),
|
|
331
|
+
},
|
|
332
|
+
"gpus": {},
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
# Add GPU metrics (NVIDIA only)
|
|
336
|
+
if self.nvidia_initialized:
|
|
337
|
+
metrics["gpus"].update(self._get_nvidia_metrics())
|
|
338
|
+
|
|
339
|
+
return metrics
|
|
340
|
+
|
|
341
|
+
def _get_nvidia_metrics(self):
|
|
342
|
+
"""Get NVIDIA GPU metrics including utilization, memory, temperature, and power."""
|
|
343
|
+
gpus = {}
|
|
344
|
+
if not self.nvidia_initialized or not self.pynvml:
|
|
345
|
+
return gpus
|
|
346
|
+
try:
|
|
347
|
+
device_count = self.pynvml.nvmlDeviceGetCount()
|
|
348
|
+
for i in range(device_count):
|
|
349
|
+
handle = self.pynvml.nvmlDeviceGetHandleByIndex(i)
|
|
350
|
+
util = self.pynvml.nvmlDeviceGetUtilizationRates(handle)
|
|
351
|
+
memory = self.pynvml.nvmlDeviceGetMemoryInfo(handle)
|
|
352
|
+
temp = self.pynvml.nvmlDeviceGetTemperature(handle, self.pynvml.NVML_TEMPERATURE_GPU)
|
|
353
|
+
power = self.pynvml.nvmlDeviceGetPowerUsage(handle) // 1000
|
|
354
|
+
|
|
355
|
+
gpus[str(i)] = {
|
|
356
|
+
"usage": round(util.gpu, 3),
|
|
357
|
+
"memory": round((memory.used / memory.total) * 100, 3),
|
|
358
|
+
"temp": temp,
|
|
359
|
+
"power": power,
|
|
360
|
+
}
|
|
361
|
+
except Exception:
|
|
362
|
+
pass
|
|
363
|
+
return gpus
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
if __name__ == "__main__":
|
|
367
|
+
print("SystemLogger Real-time Metrics Monitor")
|
|
368
|
+
print("Press Ctrl+C to stop\n")
|
|
369
|
+
|
|
370
|
+
logger = SystemLogger()
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
while True:
|
|
374
|
+
metrics = logger.get_metrics()
|
|
375
|
+
|
|
376
|
+
# Clear screen (works on most terminals)
|
|
377
|
+
print("\033[H\033[J", end="")
|
|
378
|
+
|
|
379
|
+
# Display system metrics
|
|
380
|
+
print(f"CPU: {metrics['cpu']:5.1f}%")
|
|
381
|
+
print(f"RAM: {metrics['ram']:5.1f}%")
|
|
382
|
+
print(f"Disk Read: {metrics['disk']['read_mb']:8.1f} MB")
|
|
383
|
+
print(f"Disk Write: {metrics['disk']['write_mb']:7.1f} MB")
|
|
384
|
+
print(f"Disk Used: {metrics['disk']['used_gb']:8.1f} GB")
|
|
385
|
+
print(f"Net Recv: {metrics['network']['recv_mb']:9.1f} MB")
|
|
386
|
+
print(f"Net Sent: {metrics['network']['sent_mb']:9.1f} MB")
|
|
387
|
+
|
|
388
|
+
# Display GPU metrics if available
|
|
389
|
+
if metrics["gpus"]:
|
|
390
|
+
print("\nGPU Metrics:")
|
|
391
|
+
for gpu_id, gpu_data in metrics["gpus"].items():
|
|
392
|
+
print(
|
|
393
|
+
f" GPU {gpu_id}: {gpu_data['usage']:3}% | "
|
|
394
|
+
f"Mem: {gpu_data['memory']:5.1f}% | "
|
|
395
|
+
f"Temp: {gpu_data['temp']:2}°C | "
|
|
396
|
+
f"Power: {gpu_data['power']:3}W"
|
|
397
|
+
)
|
|
398
|
+
else:
|
|
399
|
+
print("\nGPU: No NVIDIA GPUs detected")
|
|
400
|
+
|
|
401
|
+
time.sleep(1)
|
|
402
|
+
|
|
403
|
+
except KeyboardInterrupt:
|
|
404
|
+
print("\n\nStopped monitoring.")
|