ml-dash 0.5.8__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ml_dash/experiment.py CHANGED
@@ -67,6 +67,83 @@ class RunManager:
67
67
  """Mark experiment as cancelled (status: CANCELLED)."""
68
68
  self._experiment._close(status="CANCELLED")
69
69
 
70
+ @property
71
+ def folder(self) -> Optional[str]:
72
+ """
73
+ Get the current folder for this experiment.
74
+
75
+ Returns:
76
+ Current folder path or None
77
+
78
+ Example:
79
+ current_folder = exp.run.folder
80
+ """
81
+ return self._experiment.folder
82
+
83
+ @folder.setter
84
+ def folder(self, value: Optional[str]) -> None:
85
+ """
86
+ Set the folder for this experiment before initialization.
87
+
88
+ This can ONLY be set before the experiment is started (initialized).
89
+ Once the experiment is opened, the folder cannot be changed.
90
+
91
+ Supports template variables:
92
+ - {RUN.name} - Experiment name
93
+ - {RUN.project} - Project name
94
+
95
+ Args:
96
+ value: Folder path with optional template variables
97
+ (e.g., "experiments/{RUN.name}" or None)
98
+
99
+ Raises:
100
+ RuntimeError: If experiment is already initialized/open
101
+
102
+ Examples:
103
+ from ml_dash import dxp
104
+
105
+ # Static folder
106
+ dxp.run.folder = "experiments/vision/resnet"
107
+
108
+ # Template with experiment name
109
+ dxp.run.folder = "/iclr_2024/{RUN.name}"
110
+
111
+ # Template with multiple variables
112
+ dxp.run.folder = "{RUN.project}/experiments/{RUN.name}"
113
+
114
+ # Now start the experiment
115
+ with dxp.run:
116
+ dxp.params.set(lr=0.001)
117
+ """
118
+ if self._experiment._is_open:
119
+ raise RuntimeError(
120
+ "Cannot change folder after experiment is initialized. "
121
+ "Set folder before calling start() or entering 'with' block."
122
+ )
123
+
124
+ # Process template variables if present
125
+ if value and '{RUN.' in value:
126
+ # Generate unique run ID (timestamp-based)
127
+ from datetime import datetime
128
+ run_timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
129
+
130
+ # Simple string replacement for template variables
131
+ # Supports: {RUN.name}, {RUN.project}, {RUN.id}, {RUN.timestamp}
132
+ replacements = {
133
+ '{RUN.name}': f"{self._experiment.name}_{run_timestamp}", # Unique name with timestamp
134
+ '{RUN.project}': self._experiment.project,
135
+ '{RUN.id}': run_timestamp, # Just the timestamp
136
+ '{RUN.timestamp}': run_timestamp, # Alias for id
137
+ }
138
+
139
+ # Replace all template variables
140
+ for template, replacement in replacements.items():
141
+ if template in value:
142
+ value = value.replace(template, replacement)
143
+
144
+ # Update the folder on the experiment
145
+ self._experiment.folder = value
146
+
70
147
  def __enter__(self) -> "Experiment":
71
148
  """Context manager entry - starts the experiment."""
72
149
  return self.start()
@@ -105,7 +182,7 @@ class Experiment:
105
182
  experiment = Experiment(
106
183
  name="my-experiment",
107
184
  project="my-project",
108
- remote="http://localhost:3000",
185
+ remote="https://api.dash.ml",
109
186
  api_key="your-jwt-token"
110
187
  )
111
188
 
@@ -139,7 +216,6 @@ class Experiment:
139
216
  # Mode configuration
140
217
  remote: Optional[str] = None,
141
218
  api_key: Optional[str] = None,
142
- user_name: Optional[str] = None,
143
219
  local_path: Optional[str] = None,
144
220
  # Internal parameters
145
221
  _write_protected: bool = False,
@@ -155,9 +231,8 @@ class Experiment:
155
231
  bindrs: Optional list of bindrs
156
232
  folder: Optional folder path (e.g., "/experiments/baseline")
157
233
  metadata: Optional metadata dict
158
- remote: Remote API URL (e.g., "http://localhost:3000")
159
- api_key: JWT token for authentication (if not provided, will be generated from user_name)
160
- user_name: Username for authentication (generates API key if api_key not provided)
234
+ remote: Remote API URL (e.g., "https://api.dash.ml")
235
+ api_key: JWT token for authentication (auto-loaded from storage if not provided)
161
236
  local_path: Local storage root path (for local mode)
162
237
  _write_protected: Internal parameter - if True, experiment becomes immutable after creation
163
238
  """
@@ -170,10 +245,6 @@ class Experiment:
170
245
  self._write_protected = _write_protected
171
246
  self.metadata = metadata
172
247
 
173
- # Generate API key from username if not provided
174
- if remote and not api_key and user_name:
175
- api_key = self._generate_api_key_from_username(user_name)
176
-
177
248
  # Determine operation mode
178
249
  if remote and local_path:
179
250
  self.mode = OperationMode.HYBRID
@@ -183,7 +254,7 @@ class Experiment:
183
254
  self.mode = OperationMode.LOCAL
184
255
  else:
185
256
  raise ValueError(
186
- "Must specify either 'remote' (with api_key/user_name) or 'local_path'"
257
+ "Must specify either 'remote' (with api_key) or 'local_path'"
187
258
  )
188
259
 
189
260
  # Initialize backend
@@ -192,10 +263,10 @@ class Experiment:
192
263
  self._experiment_id: Optional[str] = None
193
264
  self._experiment_data: Optional[Dict[str, Any]] = None
194
265
  self._is_open = False
266
+ self._metrics_manager: Optional['MetricsManager'] = None # Cached metrics manager
195
267
 
196
268
  if self.mode in (OperationMode.REMOTE, OperationMode.HYBRID):
197
- if not api_key:
198
- raise ValueError("Either api_key or user_name is required for remote mode")
269
+ # api_key can be None - RemoteClient will auto-load from storage
199
270
  self._client = RemoteClient(base_url=remote, api_key=api_key)
200
271
 
201
272
  if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
@@ -203,43 +274,6 @@ class Experiment:
203
274
  raise ValueError("local_path is required for local mode")
204
275
  self._storage = LocalStorage(root_path=Path(local_path))
205
276
 
206
- @staticmethod
207
- def _generate_api_key_from_username(user_name: str) -> str:
208
- """
209
- Generate a deterministic API key (JWT) from username.
210
-
211
- This is a temporary solution until proper user authentication is implemented.
212
- Generates a unique user ID from the username and creates a JWT token.
213
-
214
- Args:
215
- user_name: Username to generate API key from
216
-
217
- Returns:
218
- JWT token string
219
- """
220
- import hashlib
221
- import time
222
- import jwt
223
-
224
- # Generate deterministic user ID from username (first 10 digits of SHA256 hash)
225
- user_id = str(int(hashlib.sha256(user_name.encode()).hexdigest()[:16], 16))[:10]
226
-
227
- # JWT payload
228
- payload = {
229
- "userId": user_id,
230
- "userName": user_name,
231
- "iat": int(time.time()),
232
- "exp": int(time.time()) + (30 * 24 * 60 * 60) # 30 days expiration
233
- }
234
-
235
- # Secret key for signing (should match server's JWT_SECRET)
236
- secret = "your-secret-key-change-this-in-production"
237
-
238
- # Generate JWT
239
- token = jwt.encode(payload, secret, algorithm="HS256")
240
-
241
- return token
242
-
243
277
  def _open(self) -> "Experiment":
244
278
  """
245
279
  Internal method to open the experiment (create or update on server/filesystem).
@@ -350,7 +384,12 @@ class Experiment:
350
384
  RuntimeError: If experiment is not open
351
385
  """
352
386
  if not self._is_open:
353
- raise RuntimeError("Experiment not open. Use experiment.run.start() or context manager.")
387
+ raise RuntimeError(
388
+ "Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
389
+ "Example:\n"
390
+ " with dxp.run:\n"
391
+ " dxp.params.set(lr=0.001)"
392
+ )
354
393
 
355
394
  return ParametersBuilder(self)
356
395
 
@@ -395,7 +434,12 @@ class Experiment:
395
434
  ValueError: If log level is invalid
396
435
  """
397
436
  if not self._is_open:
398
- raise RuntimeError("Experiment not open. Use experiment.open() or context manager.")
437
+ raise RuntimeError(
438
+ "Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
439
+ "Example:\n"
440
+ " with dxp.run:\n"
441
+ " dxp.log().info('Training started')"
442
+ )
399
443
 
400
444
  # Fluent mode: return LogBuilder
401
445
  if message is None:
@@ -424,7 +468,7 @@ class Experiment:
424
468
  ) -> None:
425
469
  """
426
470
  Internal method to write a log entry immediately.
427
- No buffering - writes directly to storage/remote.
471
+ No buffering - writes directly to storage/remote AND stdout/stderr.
428
472
 
429
473
  Args:
430
474
  message: Log message
@@ -441,6 +485,9 @@ class Experiment:
441
485
  if metadata:
442
486
  log_entry["metadata"] = metadata
443
487
 
488
+ # Mirror to stdout/stderr before writing to storage
489
+ self._print_log(message, level, metadata)
490
+
444
491
  # Write immediately (no buffering)
445
492
  if self._client:
446
493
  # Remote mode: send to API (wrapped in array for batch API)
@@ -454,12 +501,50 @@ class Experiment:
454
501
  self._storage.write_log(
455
502
  project=self.project,
456
503
  experiment=self.name,
504
+ folder=self.folder,
457
505
  message=log_entry["message"],
458
506
  level=log_entry["level"],
459
507
  metadata=log_entry.get("metadata"),
460
508
  timestamp=log_entry["timestamp"]
461
509
  )
462
510
 
511
+ def _print_log(
512
+ self,
513
+ message: str,
514
+ level: str,
515
+ metadata: Optional[Dict[str, Any]]
516
+ ) -> None:
517
+ """
518
+ Print log to stdout or stderr based on level.
519
+
520
+ ERROR and FATAL go to stderr, all others go to stdout.
521
+
522
+ Args:
523
+ message: Log message
524
+ level: Log level
525
+ metadata: Optional metadata dict
526
+ """
527
+ import sys
528
+
529
+ # Format the log message
530
+ level_upper = level.upper()
531
+
532
+ # Build metadata string if present
533
+ metadata_str = ""
534
+ if metadata:
535
+ # Format metadata as key=value pairs
536
+ pairs = [f"{k}={v}" for k, v in metadata.items()]
537
+ metadata_str = f" [{', '.join(pairs)}]"
538
+
539
+ # Format: [LEVEL] message [key=value, ...]
540
+ formatted_message = f"[{level_upper}] {message}{metadata_str}"
541
+
542
+ # Route to stdout or stderr based on level
543
+ if level in ("error", "fatal"):
544
+ print(formatted_message, file=sys.stderr)
545
+ else:
546
+ print(formatted_message, file=sys.stdout)
547
+
463
548
  def files(self, **kwargs) -> FileBuilder:
464
549
  """
465
550
  Get a FileBuilder for fluent file operations.
@@ -485,7 +570,12 @@ class Experiment:
485
570
  experiment.files(file_id="123").delete()
486
571
  """
487
572
  if not self._is_open:
488
- raise RuntimeError("Experiment not open. Use experiment.open() or context manager.")
573
+ raise RuntimeError(
574
+ "Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
575
+ "Example:\n"
576
+ " with dxp.run:\n"
577
+ " dxp.files().save()"
578
+ )
489
579
 
490
580
  return FileBuilder(self, **kwargs)
491
581
 
@@ -540,6 +630,7 @@ class Experiment:
540
630
  result = self._storage.write_file(
541
631
  project=self.project,
542
632
  experiment=self.name,
633
+ folder=self.folder,
543
634
  file_path=file_path,
544
635
  prefix=prefix,
545
636
  filename=filename,
@@ -716,6 +807,7 @@ class Experiment:
716
807
  self._storage.write_parameters(
717
808
  project=self.project,
718
809
  experiment=self.name,
810
+ folder=self.folder,
719
811
  data=flattened_params
720
812
  )
721
813
 
@@ -787,7 +879,10 @@ class Experiment:
787
879
  "Use 'with Experiment(...).run as experiment:' or call experiment.run.start() first."
788
880
  )
789
881
 
790
- return MetricsManager(self)
882
+ # Cache the MetricsManager instance to preserve MetricBuilder cache across calls
883
+ if self._metrics_manager is None:
884
+ self._metrics_manager = MetricsManager(self)
885
+ return self._metrics_manager
791
886
 
792
887
  def _append_to_metric(
793
888
  self,
@@ -828,6 +923,7 @@ class Experiment:
828
923
  result = self._storage.append_to_metric(
829
924
  project=self.project,
830
925
  experiment=self.name,
926
+ folder=self.folder,
831
927
  metric_name=name,
832
928
  data=data,
833
929
  description=description,
@@ -999,7 +1095,7 @@ def ml_dash_experiment(
999
1095
  @ml_dash_experiment(
1000
1096
  name="my-experiment",
1001
1097
  project="my-project",
1002
- remote="http://localhost:3000",
1098
+ remote="https://api.dash.ml",
1003
1099
  api_key="your-token"
1004
1100
  )
1005
1101
  def train_model():
ml_dash/files.py CHANGED
@@ -607,6 +607,103 @@ class FileBuilder:
607
607
  except Exception:
608
608
  pass
609
609
 
610
+ def duplicate(self, source: Union[str, Dict[str, Any]], to: str) -> Dict[str, Any]:
611
+ """
612
+ Duplicate an existing file to a new path within the same experiment.
613
+
614
+ Useful for checkpoint rotation patterns where you save versioned checkpoints
615
+ and maintain a "latest" or "best" pointer.
616
+
617
+ Args:
618
+ source: Source file - either file ID (str) or metadata dict with 'id' key
619
+ to: Target path like "models/latest.pt" or "/checkpoints/best.pt"
620
+
621
+ Returns:
622
+ File metadata dict for the duplicated file with id, path, filename, checksum, etc.
623
+
624
+ Raises:
625
+ RuntimeError: If experiment is not open or write-protected
626
+ ValueError: If source file not found or target path invalid
627
+
628
+ Examples:
629
+ # Using file ID
630
+ dxp.files().duplicate("file-id-123", to="models/latest.pt")
631
+
632
+ # Using metadata dict from save_torch
633
+ snapshot = dxp.files(prefix="/models").save_torch(model, f"model_{epoch:05d}.pt")
634
+ dxp.files().duplicate(snapshot, to="models/latest.pt")
635
+
636
+ # Checkpoint rotation pattern
637
+ snap = dxp.files(prefix="/checkpoints").save_torch(model, f"model_{epoch:05d}.pt")
638
+ dxp.files().duplicate(snap, to="checkpoints/best.pt")
639
+ """
640
+ import tempfile
641
+ import os
642
+
643
+ if not self._experiment._is_open:
644
+ raise RuntimeError("Experiment not open. Use experiment.run.start() or context manager.")
645
+
646
+ if self._experiment._write_protected:
647
+ raise RuntimeError("Experiment is write-protected and cannot be modified.")
648
+
649
+ # Extract source file ID
650
+ if isinstance(source, str):
651
+ source_id = source
652
+ elif isinstance(source, dict) and 'id' in source:
653
+ source_id = source['id']
654
+ else:
655
+ raise ValueError("source must be a file ID (str) or metadata dict with 'id' key")
656
+
657
+ if not source_id:
658
+ raise ValueError("Invalid source: file ID is empty")
659
+
660
+ # Parse target path into prefix and filename
661
+ to = to.lstrip('/')
662
+ if '/' in to:
663
+ target_prefix, target_filename = to.rsplit('/', 1)
664
+ target_prefix = '/' + target_prefix
665
+ else:
666
+ target_prefix = '/'
667
+ target_filename = to
668
+
669
+ if not target_filename:
670
+ raise ValueError(f"Invalid target path '{to}': must include filename")
671
+
672
+ # Download source file to temp location
673
+ temp_dir = tempfile.mkdtemp()
674
+ temp_path = os.path.join(temp_dir, target_filename)
675
+
676
+ try:
677
+ # Download the source file
678
+ downloaded_path = self._experiment._download_file(
679
+ file_id=source_id,
680
+ dest_path=temp_path
681
+ )
682
+
683
+ # Save to new location using existing save() method
684
+ original_file_path = self._file_path
685
+ original_prefix = self._prefix
686
+
687
+ self._file_path = downloaded_path
688
+ self._prefix = target_prefix
689
+
690
+ # Upload and get result
691
+ result = self.save()
692
+
693
+ # Restore original values
694
+ self._file_path = original_file_path
695
+ self._prefix = original_prefix
696
+
697
+ return result
698
+ finally:
699
+ # Clean up temp file and directory
700
+ try:
701
+ if os.path.exists(temp_path):
702
+ os.unlink(temp_path)
703
+ os.rmdir(temp_dir)
704
+ except Exception:
705
+ pass
706
+
610
707
 
611
708
  def compute_sha256(file_path: str) -> str:
612
709
  """
ml_dash/metric.py CHANGED
@@ -6,11 +6,165 @@ validation losses, system measurements, etc.
6
6
  """
7
7
 
8
8
  from typing import Dict, Any, List, Optional, TYPE_CHECKING
9
+ from collections import defaultdict
10
+ import statistics
9
11
 
10
12
  if TYPE_CHECKING:
11
13
  from .experiment import Experiment
12
14
 
13
15
 
16
+ class SummaryCache:
17
+ """
18
+ Buffer for collecting metric values and computing statistics periodically.
19
+
20
+ Inspired by ml-logger's SummaryCache design:
21
+ - Lazy computation: Store raw values, compute stats on demand
22
+ - Hierarchical naming: Stats get suffixes (loss.mean, loss.std)
23
+ - Robust handling: Converts None → NaN, filters before stats
24
+ """
25
+
26
+ def __init__(self, metric_builder: 'MetricBuilder'):
27
+ """
28
+ Initialize SummaryCache.
29
+
30
+ Args:
31
+ metric_builder: Parent MetricBuilder instance
32
+ """
33
+ self._metric_builder = metric_builder
34
+ self._buffer: Dict[str, List[float]] = defaultdict(list)
35
+ self._metadata: Dict[str, Any] = {} # For set() metadata
36
+
37
+ def store(self, **kwargs) -> None:
38
+ """
39
+ Store values in buffer without immediate logging (deferred computation).
40
+
41
+ Args:
42
+ **kwargs: Metric values to buffer (e.g., loss=0.5, accuracy=0.9)
43
+
44
+ Example:
45
+ cache.store(loss=0.5, accuracy=0.9)
46
+ cache.store(loss=0.48) # Accumulates
47
+ """
48
+ for key, value in kwargs.items():
49
+ # Handle None values gracefully
50
+ if value is None:
51
+ value = float('nan')
52
+ try:
53
+ self._buffer[key].append(float(value))
54
+ except (TypeError, ValueError):
55
+ # Skip non-numeric values silently
56
+ continue
57
+
58
+ def set(self, **kwargs) -> None:
59
+ """
60
+ Set metadata values without aggregation (replaces previous values).
61
+
62
+ Used for contextual metadata like learning rate, epoch number, etc.
63
+ These values are included in the final data point when summarize() is called.
64
+
65
+ Args:
66
+ **kwargs: Metadata to set (e.g., lr=0.001, epoch=5)
67
+
68
+ Example:
69
+ cache.set(lr=0.001, epoch=5)
70
+ cache.set(lr=0.0005) # Replaces lr, keeps epoch
71
+ """
72
+ self._metadata.update(kwargs)
73
+
74
+ def _compute_stats(self) -> Dict[str, float]:
75
+ """
76
+ Compute statistics from buffered values (idempotent, read-only).
77
+
78
+ Returns:
79
+ Dict with hierarchical metric names (key.mean, key.std, etc.)
80
+
81
+ Note: This is idempotent - can be called multiple times without side effects.
82
+ """
83
+ stats_data = {}
84
+
85
+ for key, values in self._buffer.items():
86
+ if not values:
87
+ continue
88
+
89
+ # Filter out NaN values (ml-logger pattern)
90
+ clean_values = [v for v in values if not (isinstance(v, float) and v != v)]
91
+
92
+ if not clean_values:
93
+ continue
94
+
95
+ # Compute statistics with hierarchical naming
96
+ stats_data[f"{key}.mean"] = statistics.mean(clean_values)
97
+ stats_data[f"{key}.min"] = min(clean_values)
98
+ stats_data[f"{key}.max"] = max(clean_values)
99
+ stats_data[f"{key}.count"] = len(clean_values)
100
+
101
+ # Std dev requires at least 2 values
102
+ if len(clean_values) >= 2:
103
+ stats_data[f"{key}.std"] = statistics.stdev(clean_values)
104
+ else:
105
+ stats_data[f"{key}.std"] = 0.0
106
+
107
+ return stats_data
108
+
109
+ def summarize(self, clear: bool = True) -> None:
110
+ """
111
+ Compute statistics from buffered values and log them (non-idempotent).
112
+
113
+ Args:
114
+ clear: If True (default), clear buffer after computing statistics.
115
+ This creates a "rolling window" behavior matching ml-logger's "tiled" mode.
116
+
117
+ Example:
118
+ # After storing 10 loss values and setting lr=0.001:
119
+ cache.store(loss=0.5)
120
+ cache.set(lr=0.001, epoch=5)
121
+ cache.summarize()
122
+ # Logs: {lr: 0.001, epoch: 5, loss.mean: 0.5, loss.std: 0.0, ...}
123
+
124
+ Note: This is non-idempotent - calling it multiple times has side effects.
125
+ """
126
+ if not self._buffer and not self._metadata:
127
+ return
128
+
129
+ # Compute statistics (delegated to idempotent method)
130
+ stats_data = self._compute_stats()
131
+
132
+ # Merge metadata with statistics
133
+ output_data = {**self._metadata, **stats_data}
134
+
135
+ if not output_data:
136
+ return
137
+
138
+ # Append combined data as a single metric data point
139
+ self._metric_builder.append(**output_data)
140
+
141
+ # Clear buffer if requested (default behavior for "tiled" mode)
142
+ if clear:
143
+ self._buffer.clear()
144
+ self._metadata.clear() # Also clear metadata
145
+
146
+ def peek(self, *keys: str, limit: int = 5) -> Dict[str, List[float]]:
147
+ """
148
+ Non-destructive inspection of buffered values (idempotent, read-only).
149
+
150
+ Args:
151
+ *keys: Optional specific keys to peek at. If empty, shows all.
152
+ limit: Number of most recent values to show (default 5)
153
+
154
+ Returns:
155
+ Dict of buffered values (truncated to last `limit` items)
156
+
157
+ Example:
158
+ cache.peek('loss', limit=3) # {'loss': [0.5, 0.48, 0.52]}
159
+ """
160
+ keys_to_show = keys if keys else self._buffer.keys()
161
+ return {
162
+ k: self._buffer[k][-limit:] if limit else self._buffer[k]
163
+ for k in keys_to_show
164
+ if k in self._buffer and self._buffer[k]
165
+ }
166
+
167
+
14
168
  class MetricsManager:
15
169
  """
16
170
  Manager for metric operations that supports both named and unnamed usage.
@@ -39,11 +193,12 @@ class MetricsManager:
39
193
  experiment: Parent Experiment instance
40
194
  """
41
195
  self._experiment = experiment
196
+ self._metric_builders: Dict[str, 'MetricBuilder'] = {} # Cache for MetricBuilder instances
42
197
 
43
198
  def __call__(self, name: str, description: Optional[str] = None,
44
199
  tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None) -> 'MetricBuilder':
45
200
  """
46
- Get a MetricBuilder for a specific metric name.
201
+ Get a MetricBuilder for a specific metric name (cached for reuse).
47
202
 
48
203
  Args:
49
204
  name: Metric name (unique within experiment)
@@ -52,12 +207,20 @@ class MetricsManager:
52
207
  metadata: Optional structured metadata
53
208
 
54
209
  Returns:
55
- MetricBuilder instance for the named metric
210
+ MetricBuilder instance for the named metric (same instance on repeated calls)
56
211
 
57
212
  Examples:
58
213
  experiment.metrics("loss").append(value=0.5, step=1)
214
+
215
+ Note:
216
+ MetricBuilder instances are cached by name, so repeated calls with the
217
+ same name return the same instance. This ensures summary_cache works
218
+ correctly when called multiple times within a loop.
59
219
  """
60
- return MetricBuilder(self._experiment, name, description, tags, metadata)
220
+ # Cache key includes name only (description/tags/metadata are set once on first call)
221
+ if name not in self._metric_builders:
222
+ self._metric_builders[name] = MetricBuilder(self._experiment, name, description, tags, metadata)
223
+ return self._metric_builders[name]
61
224
 
62
225
  def append(self, name: Optional[str] = None, data: Optional[Dict[str, Any]] = None, **kwargs) -> Dict[str, Any]:
63
226
  """
@@ -157,6 +320,7 @@ class MetricBuilder:
157
320
  self._description = description
158
321
  self._tags = tags
159
322
  self._metadata = metadata
323
+ self._summary_cache = None # Lazy initialization
160
324
 
161
325
  def append(self, **kwargs) -> 'MetricBuilder':
162
326
  """
@@ -290,3 +454,28 @@ class MetricBuilder:
290
454
  print(f"{metric['name']}: {metric['totalDataPoints']} points")
291
455
  """
292
456
  return self._experiment._list_metrics()
457
+
458
+ @property
459
+ def summary_cache(self) -> SummaryCache:
460
+ """
461
+ Get summary cache for this metric (lazy initialization).
462
+
463
+ The summary cache allows buffering values and computing statistics
464
+ periodically, which is much more efficient than logging every value.
465
+
466
+ Returns:
467
+ SummaryCache instance for this metric
468
+
469
+ Example:
470
+ metric = experiment.metrics("train")
471
+ # Store values every batch
472
+ metric.summary_cache.store(loss=0.5)
473
+ metric.summary_cache.store(loss=0.48)
474
+ # Set metadata
475
+ metric.summary_cache.set(lr=0.001, epoch=1)
476
+ # Compute stats and log periodically
477
+ metric.summary_cache.summarize()
478
+ """
479
+ if self._summary_cache is None:
480
+ self._summary_cache = SummaryCache(self)
481
+ return self._summary_cache