mcpbr 0.4.16__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,487 @@
1
+ """Resource limit configuration for Docker containers and evaluation runs.
2
+
3
+ Provides dataclasses and utilities for configuring CPU, memory, disk, network,
4
+ and timeout limits for Docker containers used in benchmark evaluations. Includes
5
+ monitoring capabilities to check container resource usage and detect violations.
6
+
7
+ Key capabilities:
8
+ - Configure CPU, memory, disk, and PID limits for Docker containers
9
+ - Set per-task and per-evaluation timeouts
10
+ - Convert limits to Docker container creation kwargs
11
+ - Monitor container resource usage via docker stats
12
+ - Detect and report resource limit violations
13
+ - Parse resource limits from YAML config dictionaries
14
+ """
15
+
16
+ import json
17
+ import logging
18
+ import subprocess
19
+ from dataclasses import dataclass
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Valid Docker network modes
24
+ VALID_NETWORK_MODES = ("bridge", "host", "none")
25
+
26
+
27
+ @dataclass
28
+ class ResourceLimits:
29
+ """Resource limits for a Docker container or evaluation run.
30
+
31
+ Attributes:
32
+ cpu_count: Number of CPUs to allocate (e.g., 2.0 for 2 CPUs). None means no limit.
33
+ memory_mb: Memory limit in megabytes (e.g., 4096 for 4GB). None means no limit.
34
+ memory_swap_mb: Total memory + swap limit in megabytes. None means no limit.
35
+ Must be >= memory_mb if both are set. Set to same value as memory_mb to
36
+ disable swap.
37
+ disk_mb: Disk space limit in megabytes. None means no limit. Note: Docker does
38
+ not natively enforce disk limits per container without storage drivers;
39
+ this is tracked for monitoring/reporting purposes.
40
+ network_mode: Docker network mode (bridge, host, none). None uses Docker default.
41
+ pids_limit: Maximum number of processes in the container. None means no limit.
42
+ task_timeout_seconds: Timeout in seconds for a single task. Defaults to 600 (10 min).
43
+ total_timeout_seconds: Timeout in seconds for the entire evaluation run.
44
+ None means no total timeout.
45
+ """
46
+
47
+ cpu_count: float | None = None
48
+ memory_mb: int | None = None
49
+ memory_swap_mb: int | None = None
50
+ disk_mb: int | None = None
51
+ network_mode: str | None = None
52
+ pids_limit: int | None = None
53
+ task_timeout_seconds: int = 600
54
+ total_timeout_seconds: int | None = None
55
+
56
+
57
+ @dataclass
58
+ class ResourceUsage:
59
+ """Current resource usage snapshot for a container.
60
+
61
+ Attributes:
62
+ cpu_percent: CPU usage as a percentage (e.g., 150.0 means 1.5 CPUs).
63
+ memory_mb: Current memory usage in megabytes.
64
+ disk_mb: Current disk usage in megabytes.
65
+ pids: Current number of processes.
66
+ """
67
+
68
+ cpu_percent: float = 0.0
69
+ memory_mb: float = 0.0
70
+ disk_mb: float = 0.0
71
+ pids: int = 0
72
+
73
+
74
+ # Sensible defaults for benchmark evaluation containers
75
+ DEFAULT_LIMITS = ResourceLimits(
76
+ cpu_count=2.0,
77
+ memory_mb=4096,
78
+ memory_swap_mb=8192,
79
+ disk_mb=10240,
80
+ network_mode="bridge",
81
+ pids_limit=256,
82
+ task_timeout_seconds=600,
83
+ total_timeout_seconds=None,
84
+ )
85
+
86
+
87
+ class ContainerResourceConfig:
88
+ """Converts ResourceLimits into Docker container creation kwargs.
89
+
90
+ Provides a bridge between the ResourceLimits dataclass and the keyword
91
+ arguments expected by docker-py's ``containers.run()`` or
92
+ ``containers.create()`` methods.
93
+ """
94
+
95
+ def __init__(self, limits: ResourceLimits) -> None:
96
+ """Initialize with resource limits.
97
+
98
+ Args:
99
+ limits: ResourceLimits to convert to Docker configuration.
100
+ """
101
+ self.limits = limits
102
+
103
+ @staticmethod
104
+ def from_limits(limits: ResourceLimits) -> dict:
105
+ """Convert ResourceLimits to Docker container creation kwargs.
106
+
107
+ Translates each supported resource limit field into the corresponding
108
+ Docker API parameter. Fields set to None are omitted from the output.
109
+
110
+ Args:
111
+ limits: ResourceLimits instance to convert.
112
+
113
+ Returns:
114
+ Dictionary of keyword arguments suitable for passing to
115
+ ``docker.containers.run()`` or ``docker.containers.create()``.
116
+ """
117
+ config: dict = {}
118
+
119
+ if limits.cpu_count is not None:
120
+ # nano_cpus expects integer nanoseconds (1 CPU = 1e9 nano CPUs)
121
+ config["nano_cpus"] = int(limits.cpu_count * 1e9)
122
+
123
+ if limits.memory_mb is not None:
124
+ # mem_limit accepts bytes or string like "4g"
125
+ config["mem_limit"] = f"{limits.memory_mb}m"
126
+
127
+ if limits.memory_swap_mb is not None:
128
+ # memswap_limit is total memory + swap
129
+ config["memswap_limit"] = f"{limits.memory_swap_mb}m"
130
+
131
+ if limits.network_mode is not None:
132
+ config["network_mode"] = limits.network_mode
133
+
134
+ if limits.pids_limit is not None:
135
+ config["pids_limit"] = limits.pids_limit
136
+
137
+ return config
138
+
139
+ def validate(self) -> list[str]:
140
+ """Validate the resource limits and return any warnings.
141
+
142
+ Checks for common misconfigurations such as swap being less than
143
+ memory, excessively low limits, or invalid network modes.
144
+
145
+ Returns:
146
+ List of warning messages. Empty list means no issues found.
147
+ """
148
+ warnings: list[str] = []
149
+
150
+ limits = self.limits
151
+
152
+ # Validate memory_swap_mb >= memory_mb
153
+ if (
154
+ limits.memory_mb is not None
155
+ and limits.memory_swap_mb is not None
156
+ and limits.memory_swap_mb < limits.memory_mb
157
+ ):
158
+ warnings.append(
159
+ f"memory_swap_mb ({limits.memory_swap_mb}) is less than "
160
+ f"memory_mb ({limits.memory_mb}). Swap limit must be >= memory limit."
161
+ )
162
+
163
+ # Validate network_mode
164
+ if limits.network_mode is not None and limits.network_mode not in VALID_NETWORK_MODES:
165
+ warnings.append(
166
+ f"network_mode '{limits.network_mode}' is not a standard Docker network mode. "
167
+ f"Valid modes: {', '.join(VALID_NETWORK_MODES)}"
168
+ )
169
+
170
+ # Warn about very low memory
171
+ if limits.memory_mb is not None and limits.memory_mb < 256:
172
+ warnings.append(
173
+ f"memory_mb ({limits.memory_mb}) is very low. "
174
+ "Most benchmark tasks require at least 256MB."
175
+ )
176
+
177
+ # Warn about very low CPU
178
+ if limits.cpu_count is not None and limits.cpu_count < 0.5:
179
+ warnings.append(
180
+ f"cpu_count ({limits.cpu_count}) is very low. "
181
+ "Most benchmark tasks require at least 0.5 CPUs."
182
+ )
183
+
184
+ # Warn about very low task timeout
185
+ if limits.task_timeout_seconds < 30:
186
+ warnings.append(
187
+ f"task_timeout_seconds ({limits.task_timeout_seconds}) is very low. "
188
+ "Most benchmark tasks require at least 30 seconds."
189
+ )
190
+
191
+ # Warn about very low PID limit
192
+ if limits.pids_limit is not None and limits.pids_limit < 16:
193
+ warnings.append(
194
+ f"pids_limit ({limits.pids_limit}) is very low. "
195
+ "Most containers need at least 16 PIDs to function."
196
+ )
197
+
198
+ # Validate positive values
199
+ if limits.cpu_count is not None and limits.cpu_count <= 0:
200
+ warnings.append(f"cpu_count ({limits.cpu_count}) must be positive.")
201
+
202
+ if limits.memory_mb is not None and limits.memory_mb <= 0:
203
+ warnings.append(f"memory_mb ({limits.memory_mb}) must be positive.")
204
+
205
+ if limits.disk_mb is not None and limits.disk_mb <= 0:
206
+ warnings.append(f"disk_mb ({limits.disk_mb}) must be positive.")
207
+
208
+ if limits.pids_limit is not None and limits.pids_limit <= 0:
209
+ warnings.append(f"pids_limit ({limits.pids_limit}) must be positive.")
210
+
211
+ if limits.task_timeout_seconds <= 0:
212
+ warnings.append(
213
+ f"task_timeout_seconds ({limits.task_timeout_seconds}) must be positive."
214
+ )
215
+
216
+ if limits.total_timeout_seconds is not None and limits.total_timeout_seconds <= 0:
217
+ warnings.append(
218
+ f"total_timeout_seconds ({limits.total_timeout_seconds}) must be positive."
219
+ )
220
+
221
+ return warnings
222
+
223
+
224
+ class ResourceMonitor:
225
+ """Monitors Docker container resource usage and checks against limits.
226
+
227
+ Uses ``docker stats`` to query current container resource consumption
228
+ and compares it against configured limits to detect violations.
229
+ """
230
+
231
+ def __init__(self, limits: ResourceLimits) -> None:
232
+ """Initialize the resource monitor.
233
+
234
+ Args:
235
+ limits: ResourceLimits to check usage against.
236
+ """
237
+ self.limits = limits
238
+
239
+ def check_container_resources(self, container_id: str) -> ResourceUsage:
240
+ """Get current resource usage for a Docker container.
241
+
242
+ Queries ``docker stats`` for a single snapshot of the container's
243
+ CPU, memory, and PID usage.
244
+
245
+ Args:
246
+ container_id: Docker container ID or name.
247
+
248
+ Returns:
249
+ ResourceUsage with current consumption metrics.
250
+
251
+ Raises:
252
+ RuntimeError: If docker stats command fails.
253
+ """
254
+ try:
255
+ result = subprocess.run(
256
+ [
257
+ "docker",
258
+ "stats",
259
+ container_id,
260
+ "--no-stream",
261
+ "--format",
262
+ '{"cpu":"{{.CPUPerc}}","mem":"{{.MemUsage}}","pids":"{{.PIDs}}"}',
263
+ ],
264
+ capture_output=True,
265
+ text=True,
266
+ timeout=10,
267
+ )
268
+
269
+ if result.returncode != 0:
270
+ raise RuntimeError(
271
+ f"docker stats failed for container {container_id}: {result.stderr}"
272
+ )
273
+
274
+ stats_str = result.stdout.strip()
275
+ if not stats_str:
276
+ raise RuntimeError(f"Empty output from docker stats for container {container_id}")
277
+
278
+ stats = json.loads(stats_str)
279
+
280
+ # Parse CPU percentage (e.g., "150.25%" -> 150.25)
281
+ cpu_str = stats.get("cpu", "0%").rstrip("%")
282
+ cpu_percent = float(cpu_str) if cpu_str else 0.0
283
+
284
+ # Parse memory usage (e.g., "512MiB / 4GiB" -> 512.0)
285
+ memory_mb = _parse_memory_usage(stats.get("mem", "0B / 0B"))
286
+
287
+ # Parse PIDs
288
+ pids_str = stats.get("pids", "0")
289
+ pids = int(pids_str) if pids_str and pids_str != "--" else 0
290
+
291
+ return ResourceUsage(
292
+ cpu_percent=cpu_percent,
293
+ memory_mb=memory_mb,
294
+ disk_mb=0.0, # Docker stats does not report disk usage
295
+ pids=pids,
296
+ )
297
+
298
+ except subprocess.TimeoutExpired:
299
+ logger.warning(f"docker stats timed out for container {container_id}")
300
+ raise RuntimeError(f"docker stats timed out for container {container_id}")
301
+ except json.JSONDecodeError as e:
302
+ logger.warning(f"Failed to parse docker stats output: {e}")
303
+ raise RuntimeError(f"Failed to parse docker stats output: {e}")
304
+
305
+ def is_within_limits(self, usage: ResourceUsage) -> bool:
306
+ """Check whether resource usage is within configured limits.
307
+
308
+ Args:
309
+ usage: Current resource usage to check.
310
+
311
+ Returns:
312
+ True if all usage metrics are within limits, False otherwise.
313
+ """
314
+ return len(self.get_violations(usage)) == 0
315
+
316
+ def get_violations(self, usage: ResourceUsage) -> list[str]:
317
+ """Get list of resource limit violations.
318
+
319
+ Compares each usage metric against the corresponding limit and
320
+ returns human-readable descriptions of any violations found.
321
+
322
+ Args:
323
+ usage: Current resource usage to check.
324
+
325
+ Returns:
326
+ List of violation description strings. Empty if all within limits.
327
+ """
328
+ violations: list[str] = []
329
+
330
+ # Check CPU: convert cpu_count limit to percentage for comparison
331
+ # e.g., 2.0 CPUs = 200% max
332
+ if self.limits.cpu_count is not None:
333
+ max_cpu_percent = self.limits.cpu_count * 100
334
+ if usage.cpu_percent > max_cpu_percent:
335
+ violations.append(
336
+ f"CPU usage ({usage.cpu_percent:.1f}%) exceeds limit "
337
+ f"({max_cpu_percent:.1f}% = {self.limits.cpu_count} CPUs)"
338
+ )
339
+
340
+ # Check memory
341
+ if self.limits.memory_mb is not None:
342
+ if usage.memory_mb > self.limits.memory_mb:
343
+ violations.append(
344
+ f"Memory usage ({usage.memory_mb:.1f}MB) exceeds limit "
345
+ f"({self.limits.memory_mb}MB)"
346
+ )
347
+
348
+ # Check disk
349
+ if self.limits.disk_mb is not None:
350
+ if usage.disk_mb > self.limits.disk_mb:
351
+ violations.append(
352
+ f"Disk usage ({usage.disk_mb:.1f}MB) exceeds limit ({self.limits.disk_mb}MB)"
353
+ )
354
+
355
+ # Check PIDs
356
+ if self.limits.pids_limit is not None:
357
+ if usage.pids > self.limits.pids_limit:
358
+ violations.append(
359
+ f"PID count ({usage.pids}) exceeds limit ({self.limits.pids_limit})"
360
+ )
361
+
362
+ return violations
363
+
364
+
365
+ def _parse_memory_usage(mem_str: str) -> float:
366
+ """Parse Docker memory usage string to megabytes.
367
+
368
+ Docker stats reports memory usage in formats like:
369
+ - "512MiB / 4GiB"
370
+ - "1.5GiB / 8GiB"
371
+ - "256KiB / 4GiB"
372
+
373
+ This function extracts the current usage (before the "/") and
374
+ converts it to megabytes.
375
+
376
+ Args:
377
+ mem_str: Memory usage string from docker stats.
378
+
379
+ Returns:
380
+ Current memory usage in megabytes.
381
+ """
382
+ # Extract the usage part (before " / ")
383
+ parts = mem_str.split("/")
384
+ if not parts:
385
+ return 0.0
386
+
387
+ usage_str = parts[0].strip()
388
+
389
+ # Parse the value and unit
390
+ value = 0.0
391
+ unit = ""
392
+
393
+ # Find where the number ends and the unit begins
394
+ i = 0
395
+ while i < len(usage_str) and (usage_str[i].isdigit() or usage_str[i] == "."):
396
+ i += 1
397
+
398
+ try:
399
+ value = float(usage_str[:i]) if i > 0 else 0.0
400
+ except ValueError:
401
+ return 0.0
402
+
403
+ unit = usage_str[i:].strip().lower()
404
+
405
+ # Convert to MB
406
+ if unit in ("gib", "gb"):
407
+ return value * 1024
408
+ elif unit in ("mib", "mb"):
409
+ return value
410
+ elif unit in ("kib", "kb"):
411
+ return value / 1024
412
+ elif unit in ("b",):
413
+ return value / (1024 * 1024)
414
+ else:
415
+ # Default: assume MB
416
+ return value
417
+
418
+
419
+ def parse_resource_limits(config: dict) -> ResourceLimits:
420
+ """Parse resource limits from a YAML configuration dictionary.
421
+
422
+ Accepts a dictionary (typically loaded from a YAML config file) and
423
+ creates a ResourceLimits instance. Unknown keys are ignored with a
424
+ warning. Missing keys use the dataclass defaults.
425
+
426
+ Expected YAML structure::
427
+
428
+ resource_limits:
429
+ cpu_count: 2.0
430
+ memory_mb: 4096
431
+ memory_swap_mb: 8192
432
+ disk_mb: 10240
433
+ network_mode: bridge
434
+ pids_limit: 256
435
+ task_timeout_seconds: 600
436
+ total_timeout_seconds: 3600
437
+
438
+ Args:
439
+ config: Dictionary of resource limit configuration values.
440
+
441
+ Returns:
442
+ ResourceLimits instance with values from the config dict.
443
+ """
444
+ known_fields = {
445
+ "cpu_count",
446
+ "memory_mb",
447
+ "memory_swap_mb",
448
+ "disk_mb",
449
+ "network_mode",
450
+ "pids_limit",
451
+ "task_timeout_seconds",
452
+ "total_timeout_seconds",
453
+ }
454
+
455
+ # Warn about unknown keys
456
+ unknown_keys = set(config.keys()) - known_fields
457
+ for key in sorted(unknown_keys):
458
+ logger.warning(f"Unknown resource limit key '{key}' will be ignored.")
459
+
460
+ kwargs: dict = {}
461
+
462
+ # Parse each field with appropriate type conversion
463
+ if "cpu_count" in config:
464
+ kwargs["cpu_count"] = float(config["cpu_count"])
465
+
466
+ if "memory_mb" in config:
467
+ kwargs["memory_mb"] = int(config["memory_mb"])
468
+
469
+ if "memory_swap_mb" in config:
470
+ kwargs["memory_swap_mb"] = int(config["memory_swap_mb"])
471
+
472
+ if "disk_mb" in config:
473
+ kwargs["disk_mb"] = int(config["disk_mb"])
474
+
475
+ if "network_mode" in config:
476
+ kwargs["network_mode"] = str(config["network_mode"])
477
+
478
+ if "pids_limit" in config:
479
+ kwargs["pids_limit"] = int(config["pids_limit"])
480
+
481
+ if "task_timeout_seconds" in config:
482
+ kwargs["task_timeout_seconds"] = int(config["task_timeout_seconds"])
483
+
484
+ if "total_timeout_seconds" in config:
485
+ kwargs["total_timeout_seconds"] = int(config["total_timeout_seconds"])
486
+
487
+ return ResourceLimits(**kwargs)