ml-dash 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ml_dash/experiment.py CHANGED
@@ -11,13 +11,13 @@ import functools
11
11
  from datetime import datetime
12
12
  from enum import Enum
13
13
  from pathlib import Path
14
- from typing import Any, Callable, Dict, List, Optional, Union, Unpack
14
+ from typing import Any, Callable, Dict, List, Optional, Union
15
15
 
16
16
  from .client import RemoteClient
17
17
  from .files import BindrsBuilder, FilesAccessor
18
18
  from .log import LogBuilder, LogLevel
19
19
  from .params import ParametersBuilder
20
- from .run import RUN
20
+ from .run import RUN, requires_open
21
21
  from .storage import LocalStorage
22
22
 
23
23
 
@@ -60,140 +60,6 @@ class OperationMode(Enum):
60
60
  HYBRID = "hybrid" # Future: sync local to remote
61
61
 
62
62
 
63
- class RunManager:
64
- """
65
- Lifecycle manager for experiments.
66
-
67
- Supports three usage patterns:
68
- 1. Method calls: experiment.run.start(), experiment.run.complete()
69
- 2. Context manager: with Experiment(...).run as exp:
70
- 3. Decorator: @exp.run or @Experiment(...).run
71
- """
72
-
73
- def __init__(self, experiment: "Experiment"):
74
- """
75
- Initialize RunManager.
76
-
77
- Args:
78
- experiment: Parent Experiment instance
79
- """
80
- self._experiment = experiment
81
-
82
- def start(self) -> "Experiment":
83
- """
84
- Start the experiment (sets status to RUNNING).
85
-
86
- Returns:
87
- The experiment instance for chaining
88
- """
89
- return self._experiment._open()
90
-
91
- def complete(self) -> None:
92
- """Mark experiment as completed (status: COMPLETED)."""
93
- self._experiment._close(status="COMPLETED")
94
-
95
- def fail(self) -> None:
96
- """Mark experiment as failed (status: FAILED)."""
97
- self._experiment._close(status="FAILED")
98
-
99
- def cancel(self) -> None:
100
- """Mark experiment as cancelled (status: CANCELLED)."""
101
- self._experiment._close(status="CANCELLED")
102
-
103
- @property
104
- def prefix(self) -> Optional[str]:
105
- """
106
- Get the current folder prefix for this experiment.
107
-
108
- Returns:
109
- Current folder prefix path or None
110
-
111
- Example:
112
- current_prefix = exp.run.prefix
113
- """
114
- return self._experiment._folder_path
115
-
116
- @prefix.setter
117
- def prefix(self, value: Optional[str]) -> None:
118
- """
119
- Set the folder prefix for this experiment before initialization.
120
-
121
- This can ONLY be set before the experiment is started (initialized).
122
- Once the experiment is opened, the prefix cannot be changed.
123
-
124
- Supports template variables:
125
- - {EXP.name} - Experiment name
126
- - {EXP.id} - Experiment ID
127
-
128
- Args:
129
- value: Folder prefix path with optional template variables
130
- (e.g., "ge/myproject/{EXP.name}" or None)
131
-
132
- Raises:
133
- RuntimeError: If experiment is already initialized/open
134
-
135
- Examples:
136
- from ml_dash import dxp
137
-
138
- # Static folder
139
- dxp.run.prefix = "ge/myproject/experiments/resnet"
140
-
141
- # Template with experiment name
142
- dxp.run.prefix = "ge/iclr_2024/{EXP.name}"
143
-
144
- # Now start the experiment
145
- with dxp.run:
146
- dxp.params.set(lr=0.001)
147
- """
148
- if self._experiment._is_open:
149
- raise RuntimeError(
150
- "Cannot change prefix after experiment is initialized. "
151
- "Set prefix before calling start() or entering 'with' block."
152
- )
153
-
154
- if value:
155
- # Sync EXP with this experiment's values
156
- RUN.name = self._experiment.name
157
- RUN.description = self._experiment.description
158
- # Generate id/timestamp if not already set
159
- if RUN.id is None:
160
- RUN._init_run()
161
- # Format with EXP - use helper to expand properties correctly
162
- value = _expand_exp_template(value)
163
-
164
- # Update the folder on the experiment
165
- self._experiment._folder_path = value
166
-
167
- def __enter__(self) -> "Experiment":
168
- """Context manager entry - starts the experiment."""
169
- return self.start()
170
-
171
- def __exit__(self, exc_type, exc_val, exc_tb):
172
- """Context manager exit - completes or fails the experiment."""
173
- if exc_type is not None:
174
- self.fail()
175
- else:
176
- self.complete()
177
- return False
178
-
179
- def __call__(self, func: Callable) -> Callable:
180
- """
181
- Decorator support for wrapping functions with experiment lifecycle.
182
-
183
- Usage:
184
- @exp.run
185
- def train(exp):
186
- exp.log("Training...")
187
- """
188
-
189
- @functools.wraps(func)
190
- def wrapper(*args, **kwargs):
191
- with self as exp:
192
- return func(exp, *args, **kwargs)
193
-
194
- return wrapper
195
-
196
-
197
63
  class Experiment:
198
64
  """
199
65
  ML-Dash experiment for metricing experiments.
@@ -231,6 +97,28 @@ class Experiment:
231
97
  ...
232
98
  """
233
99
 
100
+ run: RUN
101
+ """
102
+ Get the RunManager for lifecycle operations.
103
+
104
+ Usage:
105
+ # Method calls
106
+ experiment.run.start()
107
+ experiment.run.complete()
108
+
109
+ # Context manager
110
+ with Experiment(...).run as exp:
111
+ exp.log("Training...")
112
+
113
+ # Decorator
114
+ @experiment.run
115
+ def train(exp):
116
+ exp.log("Training...")
117
+
118
+ Returns:
119
+ RunManager instance
120
+ """
121
+
234
122
  def __init__(
235
123
  self,
236
124
  prefix: Optional[str] = None,
@@ -251,7 +139,7 @@ class Experiment:
251
139
  # Internal parameters
252
140
  _write_protected: bool = False,
253
141
  # The rest of the params go directly to populate the RUN object.
254
- **run_params: Unpack[RUN],
142
+ **run_params,
255
143
  ):
256
144
  """
257
145
  Initialize an ML-Dash experiment.
@@ -278,7 +166,6 @@ class Experiment:
278
166
  - dash_url + dash_root: Hybrid mode (local + remote)
279
167
  - dash_url + dash_root=None: Remote-only mode
280
168
  """
281
- import os
282
169
  import warnings
283
170
 
284
171
  # Handle backward compatibility
@@ -286,7 +173,7 @@ class Experiment:
286
173
  warnings.warn(
287
174
  "Parameter 'remote' is deprecated. Use 'dash_url' instead.",
288
175
  DeprecationWarning,
289
- stacklevel=2
176
+ stacklevel=2,
290
177
  )
291
178
  if dash_url is None:
292
179
  dash_url = remote
@@ -295,28 +182,15 @@ class Experiment:
295
182
  warnings.warn(
296
183
  "Parameter 'local_path' is deprecated. Use 'dash_root' instead.",
297
184
  DeprecationWarning,
298
- stacklevel=2
185
+ stacklevel=2,
299
186
  )
300
187
  if dash_root == ".dash": # Only override if dash_root is default
301
188
  dash_root = local_path
302
189
 
303
- # Resolve prefix from environment variable if not provided
304
- self._folder_path = prefix or os.getenv("DASH_PREFIX")
305
-
306
- if not self._folder_path:
307
- raise ValueError("prefix (or DASH_PREFIX env var) must be provided")
308
-
309
- # Parse prefix: {owner}/{project}/path.../[name]
310
- parts = self._folder_path.strip("/").split("/")
311
- if len(parts) < 2:
312
- raise ValueError(
313
- f"prefix must have at least owner/project: got '{self._folder_path}'"
314
- )
190
+ if prefix:
191
+ run_params["prefix"] = prefix
315
192
 
316
- self.owner = parts[0]
317
- self.project = parts[1]
318
- # Name is the last segment (may be a seed/id, not always a meaningful name)
319
- self.name = parts[-1] if len(parts) > 2 else parts[1]
193
+ self.run = RUN(_experiment=self, **run_params)
320
194
 
321
195
  self.readme = readme
322
196
  self.tags = tags
@@ -324,11 +198,6 @@ class Experiment:
324
198
  self._write_protected = _write_protected
325
199
  self.metadata = metadata
326
200
 
327
- # Initialize RUN with experiment values
328
- RUN.name = self.name
329
- if readme:
330
- RUN.readme = readme
331
-
332
201
  # Determine operation mode
333
202
  # dash_root defaults to ".dash", dash_url defaults to None
334
203
  if dash_url and dash_root:
@@ -339,21 +208,19 @@ class Experiment:
339
208
  self.mode = OperationMode.LOCAL
340
209
 
341
210
  # Initialize backend
342
- self._client: Optional[RemoteClient] = None
343
- self._storage: Optional[LocalStorage] = None
344
211
  self._experiment_id: Optional[str] = None
345
212
  self._experiment_data: Optional[Dict[str, Any]] = None
346
213
  self._is_open = False
347
214
  self._metrics_manager: Optional["MetricsManager"] = None # Cached metrics manager
348
215
 
349
216
  if self.mode in (OperationMode.REMOTE, OperationMode.HYBRID):
350
- # RemoteClient will auto-load token from ~/.dash/token.enc
217
+ # RemoteClient will autoload token from ~/.dash/token.enc
351
218
  # Use RUN.api_url if dash_url=True (boolean), otherwise use the provided URL
352
219
  api_url = RUN.api_url if dash_url is True else dash_url
353
- self._client = RemoteClient(base_url=api_url, namespace=self.owner)
220
+ self.run._client = RemoteClient(base_url=api_url, namespace=self.run.owner)
354
221
 
355
222
  if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
356
- self._storage = LocalStorage(root_path=Path(dash_root))
223
+ self.run._storage = LocalStorage(root_path=Path(dash_root))
357
224
 
358
225
  def _open(self) -> "Experiment":
359
226
  """
@@ -365,16 +232,16 @@ class Experiment:
365
232
  if self._is_open:
366
233
  return self
367
234
 
368
- if self._client:
235
+ if self.run._client:
369
236
  # Remote mode: create/update experiment via API
370
237
  try:
371
- response = self._client.create_or_update_experiment(
372
- project=self.project,
373
- name=self.name,
238
+ response = self.run._client.create_or_update_experiment(
239
+ project=self.run.project,
240
+ name=self.run.name,
374
241
  description=self.readme,
375
242
  tags=self.tags,
376
243
  bindrs=self._bindrs_list,
377
- prefix=self._folder_path,
244
+ prefix=self.run._folder_path,
378
245
  write_protected=self._write_protected,
379
246
  metadata=self.metadata,
380
247
  )
@@ -387,13 +254,13 @@ class Experiment:
387
254
 
388
255
  console = Console()
389
256
  console.print(
390
- f"[dim]✓ Experiment started: [bold]{self.name}[/bold] (project: {self.project})[/dim]\n"
257
+ f"[dim]✓ Experiment started: [bold]{self.run.name}[/bold] (project: {self.run.project})[/dim]\n"
391
258
  f"[dim]View your data, statistics, and plots online at:[/dim] "
392
259
  f"[link=https://dash.ml]https://dash.ml[/link]"
393
260
  )
394
261
  except ImportError:
395
262
  # Fallback if rich is not available
396
- print(f"✓ Experiment started: {self.name} (project: {self.project})")
263
+ print(f"✓ Experiment started: {self.run.name} (project: {self.run.project})")
397
264
  print("View your data at: https://dash.ml")
398
265
 
399
266
  except Exception as e:
@@ -446,12 +313,11 @@ class Experiment:
446
313
  # Re-raise other exceptions
447
314
  raise
448
315
 
449
- if self._storage:
316
+ if self.run._storage:
450
317
  # Local mode: create experiment directory structure
451
- self._storage.create_experiment(
452
- owner=self.owner,
453
- project=self.project,
454
- prefix=self._folder_path,
318
+ self.run._storage.create_experiment(
319
+ project=self.run.project,
320
+ prefix=self.run._folder_path,
455
321
  description=self.readme,
456
322
  tags=self.tags,
457
323
  bindrs=self._bindrs_list,
@@ -468,17 +334,19 @@ class Experiment:
468
334
  Args:
469
335
  status: Status to set - "COMPLETED" (default), "FAILED", or "CANCELLED"
470
336
  """
471
- if not self._is_open:
472
- return
473
-
474
- # Flush any pending writes
475
- if self._storage:
476
- self._storage.flush()
337
+ # if not self._is_open:
338
+ # return
339
+ #
340
+ # note-ge: do NOT flush because the upload will be async. we will NEVER reuse
341
+ # experiment objects.
342
+ # # Flush any pending writes
343
+ # if self.run._storage:
344
+ # self.run._storage.flush()
477
345
 
478
346
  # Update experiment status in remote mode
479
- if self._client and self._experiment_id:
347
+ if self.run._client and self._experiment_id:
480
348
  try:
481
- self._client.update_experiment_status(
349
+ self.run._client.update_experiment_status(
482
350
  experiment_id=self._experiment_id, status=status
483
351
  )
484
352
 
@@ -499,14 +367,14 @@ class Experiment:
499
367
  console = Console()
500
368
  console.print(
501
369
  f"[{status_color}]{status_emoji} Experiment {status.lower()}: "
502
- f"[bold]{self.name}[/bold] (project: {self.project})[/{status_color}]\n"
370
+ f"[bold]{self.run.name}[/bold] (project: {self.run.project})[/{status_color}]\n"
503
371
  f"[dim]View results, statistics, and plots online at:[/dim] "
504
372
  f"[link=https://dash.ml]https://dash.ml[/link]"
505
373
  )
506
374
  except ImportError:
507
375
  # Fallback if rich is not available
508
376
  print(
509
- f"{status_emoji} Experiment {status.lower()}: {self.name} (project: {self.project})"
377
+ f"{status_emoji} Experiment {status.lower()}: {self.run.name} (project: {self.run.project})"
510
378
  )
511
379
  print("View results at: https://dash.ml")
512
380
 
@@ -516,35 +384,8 @@ class Experiment:
516
384
 
517
385
  self._is_open = False
518
386
 
519
- # Reset RUN for next experiment
520
- # TODO: RUN._reset() - method doesn't exist
521
- # RUN._reset()
522
-
523
- @property
524
- def run(self) -> RunManager:
525
- """
526
- Get the RunManager for lifecycle operations.
527
-
528
- Usage:
529
- # Method calls
530
- experiment.run.start()
531
- experiment.run.complete()
532
-
533
- # Context manager
534
- with Experiment(...).run as exp:
535
- exp.log("Training...")
536
-
537
- # Decorator
538
- @experiment.run
539
- def train(exp):
540
- exp.log("Training...")
541
-
542
- Returns:
543
- RunManager instance
544
- """
545
- return RunManager(self)
546
-
547
387
  @property
388
+ @requires_open
548
389
  def params(self) -> ParametersBuilder:
549
390
  """
550
391
  Get a ParametersBuilder for parameter operations.
@@ -562,17 +403,10 @@ class Experiment:
562
403
  Raises:
563
404
  RuntimeError: If experiment is not open
564
405
  """
565
- if not self._is_open:
566
- raise RuntimeError(
567
- "Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
568
- "Example:\n"
569
- " with dxp.run:\n"
570
- " dxp.params.set(lr=0.001)"
571
- )
572
-
573
406
  return ParametersBuilder(self)
574
407
 
575
408
  @property
409
+ @requires_open
576
410
  def logs(self) -> LogBuilder:
577
411
  """
578
412
  Get a LogBuilder for fluent-style logging.
@@ -592,16 +426,9 @@ class Experiment:
592
426
  exp.logs.warn("GPU memory low", memory_available="1GB")
593
427
  exp.logs.debug("Debug info", step=100)
594
428
  """
595
- if not self._is_open:
596
- raise RuntimeError(
597
- "Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
598
- "Example:\n"
599
- " with dxp.run:\n"
600
- " dxp.logs.info('Training started')"
601
- )
602
-
603
429
  return LogBuilder(self, metadata=None)
604
430
 
431
+ @requires_open
605
432
  def log(
606
433
  self,
607
434
  message: Optional[str] = None,
@@ -638,22 +465,16 @@ class Experiment:
638
465
  RuntimeError: If experiment is not open
639
466
  ValueError: If log level is invalid
640
467
  """
641
- if not self._is_open:
642
- raise RuntimeError(
643
- "Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
644
- "Example:\n"
645
- " with dxp.run:\n"
646
- " dxp.logs.info('Training started')"
647
- )
648
468
 
649
469
  # Fluent mode: return LogBuilder (deprecated)
650
470
  if message is None:
651
471
  import warnings
472
+
652
473
  warnings.warn(
653
474
  "Using exp.log() without a message is deprecated. "
654
475
  "Use exp.logs.info('message') instead.",
655
476
  DeprecationWarning,
656
- stacklevel=2
477
+ stacklevel=2,
657
478
  )
658
479
  combined_metadata = {**(metadata or {}), **extra_metadata}
659
480
  return LogBuilder(self, combined_metadata if combined_metadata else None)
@@ -701,30 +522,31 @@ class Experiment:
701
522
  self._print_log(message, level, metadata)
702
523
 
703
524
  # Write immediately (no buffering)
704
- if self._client:
525
+ if self.run._client:
705
526
  # Remote mode: send to API (wrapped in array for batch API)
706
527
  try:
707
- self._client.create_log_entries(
528
+ self.run._client.create_log_entries(
708
529
  experiment_id=self._experiment_id,
709
530
  logs=[log_entry], # Single log in array
710
531
  )
711
532
  except Exception as e:
712
533
  # Log warning but don't crash training
713
534
  import warnings
535
+
714
536
  warnings.warn(
715
537
  f"Failed to write log to remote server: {e}. Training will continue.",
716
538
  RuntimeWarning,
717
- stacklevel=4
539
+ stacklevel=4,
718
540
  )
719
541
  # Fall through to local storage if available
720
542
 
721
- if self._storage:
543
+ if self.run._storage:
722
544
  # Local mode: write to file immediately
723
545
  try:
724
- self._storage.write_log(
725
- owner=self.owner,
726
- project=self.project,
727
- prefix=self._folder_path,
546
+ self.run._storage.write_log(
547
+ owner=self.run.owner,
548
+ project=self.run.project,
549
+ prefix=self.run._folder_path,
728
550
  message=log_entry["message"],
729
551
  level=log_entry["level"],
730
552
  metadata=log_entry.get("metadata"),
@@ -732,10 +554,9 @@ class Experiment:
732
554
  )
733
555
  except Exception as e:
734
556
  import warnings
557
+
735
558
  warnings.warn(
736
- f"Failed to write log to local storage: {e}",
737
- RuntimeWarning,
738
- stacklevel=4
559
+ f"Failed to write log to local storage: {e}", RuntimeWarning, stacklevel=4
739
560
  )
740
561
 
741
562
  def _print_log(
@@ -773,6 +594,7 @@ class Experiment:
773
594
  print(formatted_message, file=sys.stdout)
774
595
 
775
596
  @property
597
+ @requires_open
776
598
  def files(self) -> FilesAccessor:
777
599
  """
778
600
  Get a FilesAccessor for fluent file operations.
@@ -813,16 +635,9 @@ class Experiment:
813
635
  dxp.files.save_json(dict(hey="yo"), to="config.json")
814
636
  dxp.files.save_blob(b"xxx", to="data.bin")
815
637
  """
816
- if not self._is_open:
817
- raise RuntimeError(
818
- "Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
819
- "Example:\n"
820
- " with dxp.run:\n"
821
- " dxp.files('path').upload()"
822
- )
823
-
824
638
  return FilesAccessor(self)
825
639
 
640
+ @requires_open
826
641
  def bindrs(self, bindr_name: str) -> BindrsBuilder:
827
642
  """
828
643
  Get a BindrsBuilder for working with file collections (bindrs).
@@ -845,14 +660,6 @@ class Experiment:
845
660
  Note:
846
661
  This is a placeholder for future bindr functionality.
847
662
  """
848
- if not self._is_open:
849
- raise RuntimeError(
850
- "Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
851
- "Example:\n"
852
- " with dxp.run:\n"
853
- " files = dxp.bindrs('my-bindr').list()"
854
- )
855
-
856
663
  return BindrsBuilder(self, bindr_name)
857
664
 
858
665
  def _upload_file(
@@ -886,9 +693,9 @@ class Experiment:
886
693
  """
887
694
  result = None
888
695
 
889
- if self._client:
696
+ if self.run._client:
890
697
  # Remote mode: upload to API
891
- result = self._client.upload_file(
698
+ result = self.run._client.upload_file(
892
699
  experiment_id=self._experiment_id,
893
700
  file_path=file_path,
894
701
  prefix=prefix,
@@ -901,12 +708,12 @@ class Experiment:
901
708
  size_bytes=size_bytes,
902
709
  )
903
710
 
904
- if self._storage:
711
+ if self.run._storage:
905
712
  # Local mode: copy to local storage
906
- result = self._storage.write_file(
907
- owner=self.owner,
908
- project=self.project,
909
- prefix=self._folder_path,
713
+ result = self.run._storage.write_file(
714
+ owner=self.run.owner,
715
+ project=self.run.project,
716
+ prefix=self.run._folder_path,
910
717
  file_path=file_path,
911
718
  path=prefix,
912
719
  filename=filename,
@@ -935,18 +742,18 @@ class Experiment:
935
742
  """
936
743
  files = []
937
744
 
938
- if self._client:
745
+ if self.run._client:
939
746
  # Remote mode: fetch from API
940
- files = self._client.list_files(
747
+ files = self.run._client.list_files(
941
748
  experiment_id=self._experiment_id, prefix=prefix, tags=tags
942
749
  )
943
750
 
944
- if self._storage:
751
+ if self.run._storage:
945
752
  # Local mode: read from metadata file
946
- files = self._storage.list_files(
947
- owner=self.owner,
948
- project=self.project,
949
- prefix=self._folder_path,
753
+ files = self.run._storage.list_files(
754
+ owner=self.run.owner,
755
+ project=self.run.project,
756
+ prefix=self.run._folder_path,
950
757
  path_prefix=prefix,
951
758
  tags=tags,
952
759
  )
@@ -964,18 +771,18 @@ class Experiment:
964
771
  Returns:
965
772
  Path to downloaded file
966
773
  """
967
- if self._client:
774
+ if self.run._client:
968
775
  # Remote mode: download from API
969
- return self._client.download_file(
776
+ return self.run._client.download_file(
970
777
  experiment_id=self._experiment_id, file_id=file_id, dest_path=dest_path
971
778
  )
972
779
 
973
- if self._storage:
780
+ if self.run._storage:
974
781
  # Local mode: copy from local storage
975
- return self._storage.read_file(
976
- owner=self.owner,
977
- project=self.project,
978
- prefix=self._folder_path,
782
+ return self.run._storage.read_file(
783
+ owner=self.run.owner,
784
+ project=self.run.project,
785
+ prefix=self.run._folder_path,
979
786
  file_id=file_id,
980
787
  dest_path=dest_path,
981
788
  )
@@ -994,18 +801,18 @@ class Experiment:
994
801
  """
995
802
  result = None
996
803
 
997
- if self._client:
804
+ if self.run._client:
998
805
  # Remote mode: delete via API
999
- result = self._client.delete_file(
806
+ result = self.run._client.delete_file(
1000
807
  experiment_id=self._experiment_id, file_id=file_id
1001
808
  )
1002
809
 
1003
- if self._storage:
810
+ if self.run._storage:
1004
811
  # Local mode: soft delete in metadata
1005
- result = self._storage.delete_file(
1006
- owner=self.owner,
1007
- project=self.project,
1008
- prefix=self._folder_path,
812
+ result = self.run._storage.delete_file(
813
+ owner=self.run.owner,
814
+ project=self.run.project,
815
+ prefix=self.run._folder_path,
1009
816
  file_id=file_id,
1010
817
  )
1011
818
 
@@ -1032,9 +839,9 @@ class Experiment:
1032
839
  """
1033
840
  result = None
1034
841
 
1035
- if self._client:
842
+ if self.run._client:
1036
843
  # Remote mode: update via API
1037
- result = self._client.update_file(
844
+ result = self.run._client.update_file(
1038
845
  experiment_id=self._experiment_id,
1039
846
  file_id=file_id,
1040
847
  description=description,
@@ -1042,12 +849,12 @@ class Experiment:
1042
849
  metadata=metadata,
1043
850
  )
1044
851
 
1045
- if self._storage:
852
+ if self.run._storage:
1046
853
  # Local mode: update in metadata file
1047
- result = self._storage.update_file_metadata(
1048
- owner=self.owner,
1049
- project=self.project,
1050
- prefix=self._folder_path,
854
+ result = self.run._storage.update_file_metadata(
855
+ owner=self.run.owner,
856
+ project=self.run.project,
857
+ prefix=self.run._folder_path,
1051
858
  file_id=file_id,
1052
859
  description=description,
1053
860
  tags=tags,
@@ -1063,18 +870,18 @@ class Experiment:
1063
870
  Args:
1064
871
  flattened_params: Already-flattened parameter dict with dot notation
1065
872
  """
1066
- if self._client:
873
+ if self.run._client:
1067
874
  # Remote mode: send to API
1068
- self._client.set_parameters(
875
+ self.run._client.set_parameters(
1069
876
  experiment_id=self._experiment_id, data=flattened_params
1070
877
  )
1071
878
 
1072
- if self._storage:
879
+ if self.run._storage:
1073
880
  # Local mode: write to file
1074
- self._storage.write_parameters(
1075
- owner=self.owner,
1076
- project=self.project,
1077
- prefix=self._folder_path,
881
+ self.run._storage.write_parameters(
882
+ owner=self.run.owner,
883
+ project=self.run.project,
884
+ prefix=self.run._folder_path,
1078
885
  data=flattened_params,
1079
886
  )
1080
887
 
@@ -1087,23 +894,24 @@ class Experiment:
1087
894
  """
1088
895
  params = None
1089
896
 
1090
- if self._client:
897
+ if self.run._client:
1091
898
  # Remote mode: fetch from API
1092
899
  try:
1093
- params = self._client.get_parameters(experiment_id=self._experiment_id)
900
+ params = self.run._client.get_parameters(experiment_id=self._experiment_id)
1094
901
  except Exception:
1095
902
  # Parameters don't exist yet
1096
903
  params = None
1097
904
 
1098
- if self._storage:
905
+ if self.run._storage:
1099
906
  # Local mode: read from file
1100
- params = self._storage.read_parameters(
1101
- owner=self.owner, project=self.project, prefix=self._folder_path
907
+ params = self.run._storage.read_parameters(
908
+ owner=self.run.owner, project=self.run.project, prefix=self.run._folder_path
1102
909
  )
1103
910
 
1104
911
  return params
1105
912
 
1106
913
  @property
914
+ @requires_open
1107
915
  def metrics(self) -> "MetricsManager":
1108
916
  """
1109
917
  Get a MetricsManager for metric operations.
@@ -1139,12 +947,6 @@ class Experiment:
1139
947
  """
1140
948
  from .metric import MetricsManager
1141
949
 
1142
- if not self._is_open:
1143
- raise RuntimeError(
1144
- "Cannot use metrics on closed experiment. "
1145
- "Use 'with Experiment(...).run as experiment:' or call experiment.run.start() first."
1146
- )
1147
-
1148
950
  # Cache the MetricsManager instance to preserve MetricBuilder cache across calls
1149
951
  if self._metrics_manager is None:
1150
952
  self._metrics_manager = MetricsManager(self)
@@ -1173,10 +975,10 @@ class Experiment:
1173
975
  """
1174
976
  result = None
1175
977
 
1176
- if self._client:
978
+ if self.run._client:
1177
979
  # Remote mode: append via API
1178
980
  try:
1179
- result = self._client.append_to_metric(
981
+ result = self.run._client.append_to_metric(
1180
982
  experiment_id=self._experiment_id,
1181
983
  metric_name=name,
1182
984
  data=data,
@@ -1187,22 +989,23 @@ class Experiment:
1187
989
  except Exception as e:
1188
990
  # Log warning but don't crash training
1189
991
  import warnings
992
+
1190
993
  metric_display = f"'{name}'" if name else "unnamed metric"
1191
994
  warnings.warn(
1192
995
  f"Failed to log {metric_display} to remote server: {e}. "
1193
996
  f"Training will continue.",
1194
997
  RuntimeWarning,
1195
- stacklevel=3
998
+ stacklevel=3,
1196
999
  )
1197
1000
  # Fall through to local storage if available
1198
1001
 
1199
- if self._storage:
1002
+ if self.run._storage:
1200
1003
  # Local mode: append to local storage
1201
1004
  try:
1202
- result = self._storage.append_to_metric(
1203
- owner=self.owner,
1204
- project=self.project,
1205
- prefix=self._folder_path,
1005
+ result = self.run._storage.append_to_metric(
1006
+ owner=self.run.owner,
1007
+ project=self.run.project,
1008
+ prefix=self.run._folder_path,
1206
1009
  metric_name=name,
1207
1010
  data=data,
1208
1011
  description=description,
@@ -1211,11 +1014,12 @@ class Experiment:
1211
1014
  )
1212
1015
  except Exception as e:
1213
1016
  import warnings
1017
+
1214
1018
  metric_display = f"'{name}'" if name else "unnamed metric"
1215
1019
  warnings.warn(
1216
1020
  f"Failed to log {metric_display} to local storage: {e}",
1217
1021
  RuntimeWarning,
1218
- stacklevel=3
1022
+ stacklevel=3,
1219
1023
  )
1220
1024
 
1221
1025
  return result
@@ -1243,10 +1047,10 @@ class Experiment:
1243
1047
  """
1244
1048
  result = None
1245
1049
 
1246
- if self._client:
1050
+ if self.run._client:
1247
1051
  # Remote mode: append batch via API
1248
1052
  try:
1249
- result = self._client.append_batch_to_metric(
1053
+ result = self.run._client.append_batch_to_metric(
1250
1054
  experiment_id=self._experiment_id,
1251
1055
  metric_name=name,
1252
1056
  data_points=data_points,
@@ -1257,22 +1061,23 @@ class Experiment:
1257
1061
  except Exception as e:
1258
1062
  # Log warning but don't crash training
1259
1063
  import warnings
1064
+
1260
1065
  metric_display = f"'{name}'" if name else "unnamed metric"
1261
1066
  warnings.warn(
1262
1067
  f"Failed to log batch to {metric_display} on remote server: {e}. "
1263
1068
  f"Training will continue.",
1264
1069
  RuntimeWarning,
1265
- stacklevel=3
1070
+ stacklevel=3,
1266
1071
  )
1267
1072
  # Fall through to local storage if available
1268
1073
 
1269
- if self._storage:
1074
+ if self.run._storage:
1270
1075
  # Local mode: append batch to local storage
1271
1076
  try:
1272
- result = self._storage.append_batch_to_metric(
1273
- owner=self.owner,
1274
- project=self.project,
1275
- prefix=self._folder_path,
1077
+ result = self.run._storage.append_batch_to_metric(
1078
+ owner=self.run.owner,
1079
+ project=self.run.project,
1080
+ prefix=self.run._folder_path,
1276
1081
  metric_name=name,
1277
1082
  data_points=data_points,
1278
1083
  description=description,
@@ -1281,11 +1086,12 @@ class Experiment:
1281
1086
  )
1282
1087
  except Exception as e:
1283
1088
  import warnings
1089
+
1284
1090
  metric_display = f"'{name}'" if name else "unnamed metric"
1285
1091
  warnings.warn(
1286
1092
  f"Failed to log batch to {metric_display} in local storage: {e}",
1287
1093
  RuntimeWarning,
1288
- stacklevel=3
1094
+ stacklevel=3,
1289
1095
  )
1290
1096
 
1291
1097
  return result
@@ -1306,21 +1112,21 @@ class Experiment:
1306
1112
  """
1307
1113
  result = None
1308
1114
 
1309
- if self._client:
1115
+ if self.run._client:
1310
1116
  # Remote mode: read via API
1311
- result = self._client.read_metric_data(
1117
+ result = self.run._client.read_metric_data(
1312
1118
  experiment_id=self._experiment_id,
1313
1119
  metric_name=name,
1314
1120
  start_index=start_index,
1315
1121
  limit=limit,
1316
1122
  )
1317
1123
 
1318
- if self._storage:
1124
+ if self.run._storage:
1319
1125
  # Local mode: read from local storage
1320
- result = self._storage.read_metric_data(
1321
- owner=self.owner,
1322
- project=self.project,
1323
- prefix=self._folder_path,
1126
+ result = self.run._storage.read_metric_data(
1127
+ owner=self.run.owner,
1128
+ project=self.run.project,
1129
+ prefix=self.run._folder_path,
1324
1130
  metric_name=name,
1325
1131
  start_index=start_index,
1326
1132
  limit=limit,
@@ -1340,18 +1146,18 @@ class Experiment:
1340
1146
  """
1341
1147
  result = None
1342
1148
 
1343
- if self._client:
1149
+ if self.run._client:
1344
1150
  # Remote mode: get stats via API
1345
- result = self._client.get_metric_stats(
1151
+ result = self.run._client.get_metric_stats(
1346
1152
  experiment_id=self._experiment_id, metric_name=name
1347
1153
  )
1348
1154
 
1349
- if self._storage:
1155
+ if self.run._storage:
1350
1156
  # Local mode: get stats from local storage
1351
- result = self._storage.get_metric_stats(
1352
- owner=self.owner,
1353
- project=self.project,
1354
- prefix=self._folder_path,
1157
+ result = self.run._storage.get_metric_stats(
1158
+ owner=self.run.owner,
1159
+ project=self.run.project,
1160
+ prefix=self.run._folder_path,
1355
1161
  metric_name=name,
1356
1162
  )
1357
1163
 
@@ -1366,18 +1172,86 @@ class Experiment:
1366
1172
  """
1367
1173
  result = None
1368
1174
 
1369
- if self._client:
1175
+ if self.run._client:
1370
1176
  # Remote mode: list via API
1371
- result = self._client.list_metrics(experiment_id=self._experiment_id)
1177
+ result = self.run._client.list_metrics(experiment_id=self._experiment_id)
1372
1178
 
1373
- if self._storage:
1179
+ if self.run._storage:
1374
1180
  # Local mode: list from local storage
1375
- result = self._storage.list_metrics(
1376
- owner=self.owner, project=self.project, prefix=self._folder_path
1181
+ result = self.run._storage.list_metrics(
1182
+ owner=self.run.owner, project=self.run.project, prefix=self.run._folder_path
1377
1183
  )
1378
1184
 
1379
1185
  return result or []
1380
1186
 
1187
+ @property
1188
+ def owner(self) -> Optional[str]:
1189
+ """Get the owner (first segment of prefix)."""
1190
+ return self.run.owner
1191
+
1192
+ @owner.setter
1193
+ def owner(self, value: str) -> None:
1194
+ """Set the owner."""
1195
+ self.run.owner = value
1196
+
1197
+ @property
1198
+ def project(self) -> Optional[str]:
1199
+ """Get the project (second segment of prefix or RUN.project)."""
1200
+ return self.run.project
1201
+
1202
+ @project.setter
1203
+ def project(self, value: str) -> None:
1204
+ """Set the project."""
1205
+ self.run.project = value
1206
+
1207
+ @property
1208
+ def name(self) -> Optional[str]:
1209
+ """Get the experiment name (last segment of prefix)."""
1210
+ return self.run.name
1211
+
1212
+ @name.setter
1213
+ def name(self, value: str) -> None:
1214
+ """Set the name."""
1215
+ self.run.name = value
1216
+
1217
+ @property
1218
+ def _folder_path(self) -> Optional[str]:
1219
+ """Get the full folder path (same as prefix)."""
1220
+ return self.run._folder_path
1221
+
1222
+ @_folder_path.setter
1223
+ def _folder_path(self, value: str) -> None:
1224
+ """Set the full folder path and re-parse into components."""
1225
+ self.run._folder_path = value
1226
+ self.run.prefix = value
1227
+ # Re-parse prefix into components
1228
+ if value:
1229
+ parts = value.strip("/").split("/")
1230
+ if len(parts) >= 2:
1231
+ self.run.owner = parts[0]
1232
+ self.run.project = parts[1]
1233
+ self.run.name = parts[-1] if len(parts) > 2 else parts[1]
1234
+
1235
+ @property
1236
+ def _client(self):
1237
+ """Get the remote client."""
1238
+ return self.run._client
1239
+
1240
+ @_client.setter
1241
+ def _client(self, value) -> None:
1242
+ """Set the remote client."""
1243
+ self.run._client = value
1244
+
1245
+ @property
1246
+ def _storage(self):
1247
+ """Get the local storage."""
1248
+ return self.run._storage
1249
+
1250
+ @_storage.setter
1251
+ def _storage(self, value) -> None:
1252
+ """Set the local storage."""
1253
+ self.run._storage = value
1254
+
1381
1255
  @property
1382
1256
  def id(self) -> Optional[str]:
1383
1257
  """Get the experiment ID (only available after open in remote mode)."""