mcpbr 0.4.16__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcpbr/docker_cache.py ADDED
@@ -0,0 +1,539 @@
1
+ """Docker image caching strategy for optimized evaluation startup.
2
+
3
+ This module provides an LRU-based cache management system for Docker images
4
+ used in mcpbr benchmark evaluations. It tracks locally cached images, enforces
5
+ size limits, supports eviction strategies, and recommends cache warming based
6
+ on benchmark history.
7
+ """
8
+
9
+ import json
10
+ import logging
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime, timezone
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import docker
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Prefix used to identify mcpbr-related Docker images
21
+ MCPBR_IMAGE_PREFIX = "mcpbr"
22
+ SWEBENCH_IMAGE_PREFIX = "ghcr.io/epoch-research/swe-bench"
23
+
24
+ # Default metadata file name
25
+ CACHE_METADATA_FILE = "docker_cache_metadata.json"
26
+
27
+
28
+ @dataclass
29
+ class CacheEntry:
30
+ """Metadata for a single cached Docker image.
31
+
32
+ Attributes:
33
+ image_tag: Full Docker image tag (e.g., 'ghcr.io/epoch-research/swe-bench.eval.x86_64.astropy__astropy-12907').
34
+ size_mb: Image size in megabytes.
35
+ last_used: Timestamp of the last time this image was used.
36
+ use_count: Number of times this image has been used.
37
+ layers: List of layer digest strings for deduplication awareness.
38
+ created: Timestamp when this image was first cached.
39
+ """
40
+
41
+ image_tag: str
42
+ size_mb: float
43
+ last_used: datetime
44
+ use_count: int
45
+ layers: list[str]
46
+ created: datetime
47
+
48
+ def to_dict(self) -> dict[str, Any]:
49
+ """Convert to dictionary for JSON serialization.
50
+
51
+ Returns:
52
+ Dictionary representation of the cache entry.
53
+ """
54
+ return {
55
+ "image_tag": self.image_tag,
56
+ "size_mb": self.size_mb,
57
+ "last_used": self.last_used.isoformat(),
58
+ "use_count": self.use_count,
59
+ "layers": self.layers,
60
+ "created": self.created.isoformat(),
61
+ }
62
+
63
+ @classmethod
64
+ def from_dict(cls, data: dict[str, Any]) -> "CacheEntry":
65
+ """Create a CacheEntry from a dictionary.
66
+
67
+ Args:
68
+ data: Dictionary with cache entry fields.
69
+
70
+ Returns:
71
+ CacheEntry instance.
72
+ """
73
+ return cls(
74
+ image_tag=data["image_tag"],
75
+ size_mb=data["size_mb"],
76
+ last_used=datetime.fromisoformat(data["last_used"]),
77
+ use_count=data["use_count"],
78
+ layers=data.get("layers", []),
79
+ created=datetime.fromisoformat(data["created"]),
80
+ )
81
+
82
+
83
+ @dataclass
84
+ class CacheConfig:
85
+ """Configuration for Docker image cache management.
86
+
87
+ Attributes:
88
+ max_size_gb: Maximum total disk usage for cached images in gigabytes.
89
+ max_images: Maximum number of cached images to retain.
90
+ eviction_strategy: Strategy for evicting images when limits are reached.
91
+ Currently only 'lru' (least recently used) is supported.
92
+ cache_dir: Directory for storing cache metadata files.
93
+ """
94
+
95
+ max_size_gb: float = 50.0
96
+ max_images: int = 100
97
+ eviction_strategy: str = "lru"
98
+ cache_dir: Path = field(default_factory=lambda: Path.home() / ".cache" / "mcpbr" / "docker")
99
+
100
+ def __post_init__(self) -> None:
101
+ """Validate configuration values after initialization."""
102
+ if self.max_size_gb <= 0:
103
+ raise ValueError(f"max_size_gb must be positive, got {self.max_size_gb}")
104
+ if self.max_images <= 0:
105
+ raise ValueError(f"max_images must be positive, got {self.max_images}")
106
+ if self.eviction_strategy not in ("lru",):
107
+ raise ValueError(
108
+ f"Unsupported eviction strategy: {self.eviction_strategy!r}. "
109
+ f"Supported strategies: 'lru'"
110
+ )
111
+
112
+
113
+ @dataclass
114
+ class CacheStats:
115
+ """Statistics about the Docker image cache.
116
+
117
+ Attributes:
118
+ total_images: Number of images currently tracked in the cache.
119
+ total_size_gb: Total size of cached images in gigabytes.
120
+ cache_hit_rate: Ratio of cache hits to total lookups (0.0 to 1.0).
121
+ most_used: List of image tags sorted by descending use count (top 5).
122
+ least_used: List of image tags sorted by ascending use count (bottom 5).
123
+ potential_savings_gb: Estimated savings from layer deduplication in gigabytes.
124
+ """
125
+
126
+ total_images: int
127
+ total_size_gb: float
128
+ cache_hit_rate: float
129
+ most_used: list[str]
130
+ least_used: list[str]
131
+ potential_savings_gb: float
132
+
133
+
134
+ def _is_mcpbr_image(tag: str) -> bool:
135
+ """Check if a Docker image tag is mcpbr-related.
136
+
137
+ Args:
138
+ tag: Docker image tag string.
139
+
140
+ Returns:
141
+ True if the image is related to mcpbr benchmarks.
142
+ """
143
+ tag_lower = tag.lower()
144
+ return (
145
+ tag_lower.startswith(MCPBR_IMAGE_PREFIX)
146
+ or SWEBENCH_IMAGE_PREFIX in tag_lower
147
+ or "swe-bench" in tag_lower
148
+ )
149
+
150
+
151
+ class ImageCache:
152
+ """Manages Docker image caching with LRU eviction for mcpbr evaluations.
153
+
154
+ Tracks which Docker images are cached locally, records usage metadata,
155
+ enforces cache size and count limits via LRU eviction, and provides
156
+ statistics and cache warming recommendations.
157
+
158
+ The cache stores metadata in a JSON file on disk and interacts with
159
+ the Docker daemon to inspect and remove images.
160
+ """
161
+
162
+ def __init__(self, config: CacheConfig | None = None) -> None:
163
+ """Initialize the Docker image cache manager.
164
+
165
+ Args:
166
+ config: Cache configuration. Uses defaults if not provided.
167
+ """
168
+ self._config = config or CacheConfig()
169
+ self._entries: dict[str, CacheEntry] = {}
170
+ self._hits: int = 0
171
+ self._misses: int = 0
172
+ self._benchmark_history: dict[str, list[str]] = {}
173
+ self._client: Any = None
174
+
175
+ # Ensure the metadata directory exists
176
+ self._config.cache_dir.mkdir(parents=True, exist_ok=True)
177
+
178
+ # Load existing metadata from disk
179
+ self._load_metadata()
180
+
181
+ @property
182
+ def _docker_client(self) -> Any:
183
+ """Lazily initialize and return the Docker client.
184
+
185
+ Returns:
186
+ Docker client instance, or None if Docker is unavailable.
187
+ """
188
+ if self._client is None:
189
+ try:
190
+ self._client = docker.from_env()
191
+ except Exception as e:
192
+ logger.warning(f"Docker is not available: {e}")
193
+ self._client = None
194
+ return self._client
195
+
196
+ def _metadata_path(self) -> Path:
197
+ """Return the path to the cache metadata file.
198
+
199
+ Returns:
200
+ Path to the JSON metadata file.
201
+ """
202
+ return self._config.cache_dir / CACHE_METADATA_FILE
203
+
204
+ def _load_metadata(self) -> None:
205
+ """Load cache metadata from disk."""
206
+ metadata_path = self._metadata_path()
207
+ if not metadata_path.exists():
208
+ return
209
+
210
+ try:
211
+ with open(metadata_path) as f:
212
+ data = json.load(f)
213
+
214
+ for entry_data in data.get("entries", []):
215
+ try:
216
+ entry = CacheEntry.from_dict(entry_data)
217
+ self._entries[entry.image_tag] = entry
218
+ except (KeyError, ValueError) as e:
219
+ logger.debug(f"Skipping corrupted cache entry: {e}")
220
+
221
+ self._hits = data.get("hits", 0)
222
+ self._misses = data.get("misses", 0)
223
+ self._benchmark_history = data.get("benchmark_history", {})
224
+
225
+ except (json.JSONDecodeError, KeyError, ValueError) as e:
226
+ logger.warning(f"Failed to load cache metadata, starting fresh: {e}")
227
+ self._entries = {}
228
+ self._hits = 0
229
+ self._misses = 0
230
+ self._benchmark_history = {}
231
+
232
+ def _save_metadata(self) -> None:
233
+ """Persist cache metadata to disk."""
234
+ data = {
235
+ "entries": [entry.to_dict() for entry in self._entries.values()],
236
+ "hits": self._hits,
237
+ "misses": self._misses,
238
+ "benchmark_history": self._benchmark_history,
239
+ }
240
+
241
+ metadata_path = self._metadata_path()
242
+ try:
243
+ with open(metadata_path, "w") as f:
244
+ json.dump(data, f, indent=2)
245
+ except OSError as e:
246
+ logger.warning(f"Failed to save cache metadata: {e}")
247
+
248
+ def scan(self) -> list[CacheEntry]:
249
+ """Scan local Docker images and update cache entries for mcpbr-related images.
250
+
251
+ Queries the Docker daemon for locally available images, filters for
252
+ mcpbr-related ones, and updates the internal metadata. New images
253
+ are added with initial metadata; existing entries retain their usage
254
+ counters.
255
+
256
+ Returns:
257
+ List of CacheEntry objects for all mcpbr-related images found locally.
258
+ """
259
+ client = self._docker_client
260
+ if client is None:
261
+ logger.warning("Cannot scan images: Docker is not available")
262
+ return list(self._entries.values())
263
+
264
+ try:
265
+ images = client.images.list()
266
+ except Exception as e:
267
+ logger.warning(f"Failed to list Docker images: {e}")
268
+ return list(self._entries.values())
269
+
270
+ now = datetime.now(timezone.utc)
271
+ found_tags: set[str] = set()
272
+
273
+ for image in images:
274
+ tags = image.tags if image.tags else []
275
+ for tag in tags:
276
+ if not _is_mcpbr_image(tag):
277
+ continue
278
+
279
+ found_tags.add(tag)
280
+
281
+ if tag in self._entries:
282
+ # Update size from Docker (it may have changed)
283
+ size_mb = image.attrs.get("Size", 0) / (1024 * 1024)
284
+ self._entries[tag].size_mb = size_mb
285
+ else:
286
+ # New image discovered
287
+ size_mb = image.attrs.get("Size", 0) / (1024 * 1024)
288
+ layers = []
289
+ root_fs = image.attrs.get("RootFS", {})
290
+ if root_fs.get("Type") == "layers":
291
+ layers = root_fs.get("Layers", [])
292
+
293
+ self._entries[tag] = CacheEntry(
294
+ image_tag=tag,
295
+ size_mb=size_mb,
296
+ last_used=now,
297
+ use_count=0,
298
+ layers=layers,
299
+ created=now,
300
+ )
301
+
302
+ # Remove entries for images that no longer exist locally
303
+ stale_tags = set(self._entries.keys()) - found_tags
304
+ for stale_tag in stale_tags:
305
+ del self._entries[stale_tag]
306
+
307
+ self._save_metadata()
308
+ return list(self._entries.values())
309
+
310
+ def get_cached(self, image_tag: str) -> CacheEntry | None:
311
+ """Look up a cached image by tag.
312
+
313
+ Records a cache hit or miss for statistics tracking.
314
+
315
+ Args:
316
+ image_tag: Docker image tag to look up.
317
+
318
+ Returns:
319
+ CacheEntry if the image is tracked in the cache, None otherwise.
320
+ """
321
+ entry = self._entries.get(image_tag)
322
+ if entry is not None:
323
+ self._hits += 1
324
+ else:
325
+ self._misses += 1
326
+ self._save_metadata()
327
+ return entry
328
+
329
+ def record_use(self, image_tag: str) -> None:
330
+ """Record usage of a cached image, updating last_used and use_count.
331
+
332
+ If the image is not currently tracked, this is a no-op.
333
+
334
+ Args:
335
+ image_tag: Docker image tag that was used.
336
+ """
337
+ entry = self._entries.get(image_tag)
338
+ if entry is None:
339
+ logger.debug(f"Image {image_tag!r} is not tracked in cache, skipping record_use")
340
+ return
341
+
342
+ entry.last_used = datetime.now(timezone.utc)
343
+ entry.use_count += 1
344
+ self._save_metadata()
345
+
346
+ def record_benchmark_use(self, benchmark_name: str, image_tag: str) -> None:
347
+ """Record that a benchmark used a specific image for warmup recommendations.
348
+
349
+ Args:
350
+ benchmark_name: Name of the benchmark (e.g., 'swe-bench-lite').
351
+ image_tag: Docker image tag used by the benchmark.
352
+ """
353
+ if benchmark_name not in self._benchmark_history:
354
+ self._benchmark_history[benchmark_name] = []
355
+
356
+ history = self._benchmark_history[benchmark_name]
357
+ if image_tag not in history:
358
+ history.append(image_tag)
359
+
360
+ self._save_metadata()
361
+
362
+ def evict_lru(self, target_size_gb: float | None = None) -> list[str]:
363
+ """Evict least recently used images to meet cache size or count limits.
364
+
365
+ Removes images from both the cache metadata and the local Docker daemon.
366
+ Images are evicted in order of least recent usage until the target size
367
+ is reached or the image count limit is satisfied.
368
+
369
+ Args:
370
+ target_size_gb: Target cache size in gigabytes. If None, uses the
371
+ configured max_size_gb.
372
+
373
+ Returns:
374
+ List of image tags that were evicted.
375
+ """
376
+ target_gb = target_size_gb if target_size_gb is not None else self._config.max_size_gb
377
+ evicted: list[str] = []
378
+
379
+ # Sort entries by last_used ascending (oldest first = evict first)
380
+ sorted_entries = sorted(self._entries.values(), key=lambda e: e.last_used)
381
+
382
+ current_size_gb = sum(e.size_mb for e in self._entries.values()) / 1024.0
383
+ current_count = len(self._entries)
384
+
385
+ for entry in sorted_entries:
386
+ # Check if we are within limits
387
+ size_ok = current_size_gb <= target_gb
388
+ count_ok = current_count <= self._config.max_images
389
+ if size_ok and count_ok:
390
+ break
391
+
392
+ # Evict the image
393
+ tag = entry.image_tag
394
+ self._remove_docker_image(tag)
395
+ del self._entries[tag]
396
+ evicted.append(tag)
397
+
398
+ current_size_gb -= entry.size_mb / 1024.0
399
+ current_count -= 1
400
+
401
+ if evicted:
402
+ self._save_metadata()
403
+ logger.info(f"Evicted {len(evicted)} image(s) from cache: {evicted}")
404
+
405
+ return evicted
406
+
407
+ def _remove_docker_image(self, image_tag: str) -> bool:
408
+ """Remove a Docker image from the local daemon.
409
+
410
+ Args:
411
+ image_tag: Docker image tag to remove.
412
+
413
+ Returns:
414
+ True if the image was successfully removed, False otherwise.
415
+ """
416
+ client = self._docker_client
417
+ if client is None:
418
+ return False
419
+
420
+ try:
421
+ client.images.remove(image_tag, force=True)
422
+ return True
423
+ except Exception as e:
424
+ logger.warning(f"Failed to remove Docker image {image_tag!r}: {e}")
425
+ return False
426
+
427
+ def get_stats(self) -> CacheStats:
428
+ """Compute and return current cache statistics.
429
+
430
+ Returns:
431
+ CacheStats with totals, hit rate, usage rankings, and savings estimate.
432
+ """
433
+ entries = list(self._entries.values())
434
+ total_images = len(entries)
435
+ total_size_gb = sum(e.size_mb for e in entries) / 1024.0
436
+
437
+ total_lookups = self._hits + self._misses
438
+ cache_hit_rate = self._hits / total_lookups if total_lookups > 0 else 0.0
439
+
440
+ # Most used (top 5, descending by use_count)
441
+ by_use_desc = sorted(entries, key=lambda e: e.use_count, reverse=True)
442
+ most_used = [e.image_tag for e in by_use_desc[:5]]
443
+
444
+ # Least used (bottom 5, ascending by use_count)
445
+ by_use_asc = sorted(entries, key=lambda e: e.use_count)
446
+ least_used = [e.image_tag for e in by_use_asc[:5]]
447
+
448
+ # Estimate potential savings from shared layers
449
+ potential_savings_gb = self._estimate_layer_savings()
450
+
451
+ return CacheStats(
452
+ total_images=total_images,
453
+ total_size_gb=round(total_size_gb, 3),
454
+ cache_hit_rate=round(cache_hit_rate, 4),
455
+ most_used=most_used,
456
+ least_used=least_used,
457
+ potential_savings_gb=round(potential_savings_gb, 3),
458
+ )
459
+
460
+ def _estimate_layer_savings(self) -> float:
461
+ """Estimate disk savings from Docker layer deduplication.
462
+
463
+ Calculates the total size of all images minus the deduplicated size
464
+ based on unique layers.
465
+
466
+ Returns:
467
+ Estimated savings in gigabytes.
468
+ """
469
+ entries = list(self._entries.values())
470
+ if not entries:
471
+ return 0.0
472
+
473
+ # Count total layers vs unique layers
474
+ all_layers: list[str] = []
475
+ unique_layers: set[str] = set()
476
+
477
+ for entry in entries:
478
+ all_layers.extend(entry.layers)
479
+ unique_layers.update(entry.layers)
480
+
481
+ total_layer_count = len(all_layers)
482
+ unique_layer_count = len(unique_layers)
483
+
484
+ if total_layer_count == 0 or unique_layer_count == 0:
485
+ return 0.0
486
+
487
+ # Estimate: shared layers reduce total size proportionally
488
+ total_size_gb = sum(e.size_mb for e in entries) / 1024.0
489
+ dedup_ratio = 1.0 - (unique_layer_count / total_layer_count)
490
+ return total_size_gb * dedup_ratio
491
+
492
+ def recommend_warmup(self, benchmark_name: str) -> list[str]:
493
+ """Recommend images to pre-pull based on benchmark history.
494
+
495
+ Analyzes past benchmark runs to determine which images are commonly
496
+ needed and not currently cached locally.
497
+
498
+ Args:
499
+ benchmark_name: Name of the benchmark to prepare for
500
+ (e.g., 'swe-bench-lite').
501
+
502
+ Returns:
503
+ List of image tags that should be pre-pulled for optimal performance.
504
+ """
505
+ history = self._benchmark_history.get(benchmark_name, [])
506
+ if not history:
507
+ return []
508
+
509
+ # Recommend images that were used before but are not currently cached
510
+ cached_tags = set(self._entries.keys())
511
+ recommendations = [tag for tag in history if tag not in cached_tags]
512
+
513
+ return recommendations
514
+
515
+ def cleanup_dangling(self) -> int:
516
+ """Remove dangling (untagged) Docker images to reclaim disk space.
517
+
518
+ Returns:
519
+ Number of dangling images removed.
520
+ """
521
+ client = self._docker_client
522
+ if client is None:
523
+ logger.warning("Cannot clean up dangling images: Docker is not available")
524
+ return 0
525
+
526
+ try:
527
+ result = client.images.prune(filters={"dangling": True})
528
+ deleted = result.get("ImagesDeleted") or []
529
+ count = len(deleted)
530
+ space_reclaimed = result.get("SpaceReclaimed", 0)
531
+ if count > 0:
532
+ logger.info(
533
+ f"Removed {count} dangling image(s), "
534
+ f"reclaimed {space_reclaimed / (1024 * 1024):.1f} MB"
535
+ )
536
+ return count
537
+ except Exception as e:
538
+ logger.warning(f"Failed to prune dangling images: {e}")
539
+ return 0