ml-dash 0.6.2rc1__py3-none-any.whl → 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,18 +9,18 @@ IMPORTANT: Before using rdxp, you must authenticate with the ML-Dash server:
9
9
  python -m ml_dash.cli login
10
10
 
11
11
  Usage:
12
- from ml_dash import rdxp
12
+ from ml_dash.remote_auto_start import rdxp
13
13
 
14
14
  # Use with statement (recommended)
15
15
  with rdxp.run:
16
- rdxp.log().info("Hello from rdxp!")
16
+ rdxp.log("Hello from rdxp!", level="info")
17
17
  rdxp.params.set(lr=0.001)
18
- rdxp.metrics("loss").append(step=0, value=0.5)
18
+ rdxp.metrics("train").log(loss=0.5, step=0)
19
19
  # Automatically completes on exit from with block
20
20
 
21
21
  # Or start/complete manually
22
22
  rdxp.run.start()
23
- rdxp.log().info("Training...")
23
+ rdxp.log("Training...", level="info")
24
24
  rdxp.run.complete()
25
25
 
26
26
  Configuration:
@@ -30,25 +30,28 @@ Configuration:
30
30
  """
31
31
 
32
32
  import atexit
33
- from .experiment import Experiment
34
33
 
35
34
  # Create pre-configured singleton experiment for remote mode
36
35
  # Uses remote API server - token auto-loaded from storage
37
- rdxp = Experiment(
38
- name="rdxp",
39
- project="scratch",
40
- remote="https://api.dash.ml"
41
- )
36
+ # Prefix format: {owner}/{project}/path...
37
+ import getpass
38
+
39
+ from .experiment import Experiment
40
+
41
+ _owner = getpass.getuser()
42
+ rdxp = Experiment(prefix=f"{_owner}/scratch/rdxp", dash_url="https://api.dash.ml")
43
+
42
44
 
43
45
  # Register cleanup handler to complete experiment on Python exit (if still open)
44
46
  def _cleanup():
45
- """Complete the rdxp experiment on exit if still open."""
46
- if rdxp._is_open:
47
- try:
48
- rdxp.run.complete()
49
- except Exception:
50
- # Silently ignore errors during cleanup
51
- pass
47
+ """Complete the rdxp experiment on exit if still open."""
48
+ if rdxp._is_open:
49
+ try:
50
+ rdxp.run.complete()
51
+ except Exception:
52
+ # Silently ignore errors during cleanup
53
+ pass
54
+
52
55
 
53
56
  atexit.register(_cleanup)
54
57
 
ml_dash/run.py CHANGED
@@ -1,85 +1,231 @@
1
1
  """
2
- RUN - Global run configuration object for ML-Dash.
2
+ RUN - Global experiment configuration object for ML-Dash.
3
3
 
4
4
  This module provides a global RUN object that serves as the single source
5
- of truth for run/experiment metadata. Uses params-proto for configuration.
5
+ of truth for experiment metadata. Uses params-proto for configuration.
6
6
 
7
7
  Usage:
8
8
  from ml_dash import RUN
9
9
 
10
- # Configure the run
11
- RUN.name = "my-experiment"
12
- RUN.project = "my-project"
10
+ # Configure via environment variable
11
+ # export ML_DASH_PREFIX="ge/myproject/experiments/exp1"
12
+
13
+ # Or set directly
14
+ RUN.PREFIX = "ge/myproject/experiments/exp1"
13
15
 
14
16
  # Use in templates
15
- folder = "/experiments/{RUN.name}".format(RUN=RUN)
17
+ prefix = "{RUN.PREFIX}/{RUN.name}.{RUN.id}".format(RUN=RUN)
16
18
 
17
- # With dxp singleton (RUN is auto-populated)
18
- from ml_dash import dxp
19
- with dxp.run:
20
- # RUN.name, RUN.project, RUN.id, RUN.timestamp are set
21
- dxp.log().info(f"Running {RUN.name}")
19
+ # With Experiment (RUN is auto-populated)
20
+ from ml_dash import Experiment
21
+ with Experiment(prefix=RUN.PREFIX).run as exp:
22
+ exp.logs.info(f"Running {RUN.name}")
22
23
  """
23
24
 
25
+ import os
26
+ import sys
24
27
  from datetime import datetime
25
- from params_proto import proto
28
+ from pathlib import Path
29
+ from typing import Union
30
+
31
+ from params_proto import EnvVar, proto
32
+
33
+ PROJECT_ROOT_FILES = ("pyproject.toml", "requirements.txt", "setup.py", "setup.cfg")
34
+
35
+
36
+ def find_project_root(
37
+ start: Union[str, Path] = None,
38
+ verbose: bool = False,
39
+ ) -> str:
40
+ """Find the nearest project root by looking for common project files.
41
+
42
+ Walks up the directory tree from `start` until it finds a directory
43
+ containing pyproject.toml, requirements.txt, setup.py, or setup.cfg.
44
+
45
+ Args:
46
+ start: Starting directory or file path. Defaults to cwd.
47
+ verbose: If True, print search progress.
48
+
49
+ Returns:
50
+ String path to the project root directory, or cwd if not found.
51
+ """
52
+ if start is None:
53
+ start = Path.cwd()
54
+ else:
55
+ start = Path(start)
56
+
57
+ if start.is_file():
58
+ start = start.parent
59
+
60
+ if verbose:
61
+ print(f"Searching for project root from: {start}")
62
+
63
+ for parent in [start, *start.parents]:
64
+ if verbose:
65
+ print(f" Checking: {parent}")
66
+ for filename in PROJECT_ROOT_FILES:
67
+ if (parent / filename).exists():
68
+ if verbose:
69
+ print(f" Found: {parent / filename}")
70
+ return str(parent)
71
+
72
+ if verbose:
73
+ print(f" No project root found, using cwd: {Path.cwd()}")
74
+ return str(Path.cwd())
26
75
 
27
76
 
28
77
  @proto.prefix
29
78
  class RUN:
79
+ """
80
+ Global Experiment Run Configuration.
81
+
82
+ This class is the single source of truth for experiment metadata.
83
+ Configure it before starting an experiment, or through the Experiment
84
+ constructor.
85
+
86
+ Default prefix template:
87
+ {project}/{now:%Y/%m-%d}/{path_stem}/{job_name}
88
+
89
+ Example:
90
+ # Set prefix via environment variable
91
+ # export ML_DASH_PREFIX="ge/myproject/exp1"
92
+
93
+ # Or configure directly
94
+ from ml_dash.run import RUN
95
+
96
+ RUN.project = "my-project"
97
+ RUN.prefix = "{username}/{project}/{now:%Y-%m-%d}/{entry}"
98
+
99
+ Auto-detection:
100
+ project_root is auto-detected by searching for pyproject.toml,
101
+ requirements.txt, setup.py, or setup.cfg in parent directories.
102
+ """
103
+
104
+ user: str = EnvVar @ "ML_DASH_USER" @ "USER"
105
+
106
+ api_url: str = EnvVar @ "ML_DASH_API_URL" | "https://api.dash.ml"
107
+ """Remote API server URL"""
108
+
109
+ ### Experiment and project information
110
+ project = "{user}/scratch" # default project name
111
+
112
+ prefix: str = (
113
+ EnvVar @ "ML_DASH_PREFIX" | "{project}/{now:%Y/%m-%d}/{path_stem}/{job_name}"
114
+ )
115
+ """Full experiment path: {owner}/{project}/path.../[name]"""
116
+
117
+ readme = None
118
+
119
+ id: int = None
120
+ """Unique experiment ID (snowflake, auto-generated at run start)"""
121
+
122
+ now = datetime.now()
123
+ """Timestamp at import time. Does not change during the session."""
124
+
125
+ timestamp: str = None
126
+ """Timestamp created at instantiation"""
127
+
128
+ ### file properties
129
+ project_root: str = None
130
+ """Root directory for experiment hierarchy (for auto-detection)"""
131
+
132
+ entry: Union[Path, str] = None
133
+ """Entry point file/directory path"""
134
+
135
+ path_stem: str = None
136
+
137
+ job_counter: int = 1 # Default to 0. Use True to increment by 1.
138
+
139
+ job_name: str = "{now:%H.%M.%S}/{job_counter:03d}"
140
+
141
+ """
142
+ Default to '{now:%H.%M.%S}'. use '{now:%H.%M.%S}/{job_counter:03d}'
143
+
144
+ for multiple launches. You can do so by setting:
145
+
146
+ ```python
147
+ RUN.job_name += "/{job_counter}"
148
+
149
+ for params in sweep:
150
+ thunk = instr(main)
151
+ jaynes.run(thun)
152
+ jaynes.listen()
153
+ ```
154
+ """
155
+
156
+ debug = "pydevd" in sys.modules
157
+ "set to True automatically for pyCharm"
158
+
159
+ def __post_init__(self):
30
160
  """
31
- Global run configuration.
161
+ Initialize RUN with auto-detected prefix from entry path.
162
+
163
+ Args:
164
+ entry: Path to entry file/directory (e.g., __file__ or directory
165
+ containing sweep.jsonl). If not provided, uses caller's
166
+ __file__ automatically.
167
+
168
+ Computes prefix as relative path from project_root to entry's directory.
169
+
170
+ Example:
171
+ # experiments/__init__.py
172
+ from ml_dash import RUN
32
173
 
33
- This class is the single source of truth for run metadata.
34
- Configure it before starting an experiment, or let dxp auto-configure.
174
+ RUN.project_root = "/path/to/my-project/experiments"
175
+
176
+ # experiments/vision/resnet/train.py
177
+ from ml_dash import RUN
178
+
179
+ RUN.__post_init__(entry=__file__)
180
+ # Result: RUN.prefix = "vision/resnet", RUN.name = "resnet"
35
181
  """
36
- # Core identifiers
37
- name: str = "untitled" # Run/experiment name
38
- project: str = "scratch" # Project name
39
-
40
- # Auto-generated identifiers (populated at run.start())
41
- id: str = None # Unique run ID (auto-generated)
42
- timestamp: str = None # Run timestamp (same as id)
43
-
44
- # Optional configuration
45
- folder: str = None # Folder path with optional templates
46
- description: str = None # Run description
47
-
48
- @classmethod
49
- def _generate_id(cls) -> str:
50
- """Generate a unique run ID based on current timestamp."""
51
- return datetime.utcnow().strftime("%Y%m%d_%H%M%S")
52
-
53
- @classmethod
54
- def _init_run(cls) -> None:
55
- """Initialize run ID and timestamp if not already set."""
56
- if cls.id is None:
57
- cls.id = cls._generate_id()
58
- cls.timestamp = cls.id
59
-
60
- @classmethod
61
- def _format(cls, template: str) -> str:
62
- """
63
- Format a template string with RUN values.
64
-
65
- Args:
66
- template: String with {RUN.attr} placeholders
67
-
68
- Returns:
69
- Formatted string with placeholders replaced
70
-
71
- Example:
72
- RUN._format("/experiments/{RUN.name}_{RUN.id}")
73
- # -> "/experiments/my-exp_20241219_143022"
74
- """
75
- return template.format(RUN=cls)
76
-
77
- @classmethod
78
- def _reset(cls) -> None:
79
- """Reset RUN to defaults (for testing or new runs)."""
80
- cls.name = "untitled"
81
- cls.project = "scratch"
82
- cls.id = None
83
- cls.timestamp = None
84
- cls.folder = None
85
- cls.description = None
182
+
183
+ # Use provided entry or try to auto-detect from caller
184
+ if self.entry is None:
185
+ import inspect
186
+
187
+ # Walk up the stack to find the actual caller (skip params_proto frames)
188
+ frame = inspect.currentframe().f_back
189
+ while frame:
190
+ file_path = frame.f_globals.get("__file__", "")
191
+ if "params_proto" not in file_path and "ml_dash/run.py" not in file_path:
192
+ break
193
+ frame = frame.f_back
194
+
195
+ self.entry = frame.f_globals.get("__file__") if frame else None
196
+
197
+ if not self.path_stem:
198
+
199
+ def stem(path):
200
+ return os.path.splitext(str(path))[0]
201
+
202
+ def truncate(path, depth):
203
+ return "/".join(str(path).split("/")[depth:])
204
+
205
+ self.project_root = str(self.project_root or find_project_root(self.entry))
206
+ script_root_depth = self.project_root.split("/").__len__()
207
+
208
+ script_truncated = truncate(os.path.abspath(self.entry), depth=script_root_depth)
209
+
210
+ self.path_stem = stem(script_truncated)
211
+
212
+ if isinstance(RUN.job_counter, int) or isinstance(RUN.job_counter, float):
213
+ RUN.job_counter += 1
214
+
215
+ while "{" in self.prefix:
216
+ data = vars(self)
217
+ for k, v in data.items():
218
+ if isinstance(v, str):
219
+ setattr(self, k, v.format(**data))
220
+
221
+ # for k, v in data.items():
222
+ # print(f"> {k:>30}: {v}")
223
+
224
+
225
+ if __name__ == "__main__":
226
+ RUN.description = ""
227
+ RUN.entry = __file__
228
+ RUN.prefix = "you you"
229
+
230
+ run = RUN()
231
+ print(vars(run))
ml_dash/snowflake.py ADDED
@@ -0,0 +1,173 @@
1
+ """
2
+ Snowflake ID generator for ML-Dash.
3
+
4
+ Snowflake IDs are 64-bit unique identifiers with the following structure:
5
+ - 1 bit: unused (always 0)
6
+ - 41 bits: timestamp in milliseconds since custom epoch
7
+ - 10 bits: worker/machine ID (0-1023)
8
+ - 12 bits: sequence number (0-4095)
9
+
10
+ This provides:
11
+ - Unique IDs across distributed systems
12
+ - Time-sortable (newer IDs are larger)
13
+ - ~69 years of IDs from custom epoch
14
+ - Up to 4096 IDs per millisecond per worker
15
+ """
16
+
17
+ import time
18
+ import threading
19
+ import os
20
+
21
+
22
+ class SnowflakeIDGenerator:
23
+ """
24
+ Thread-safe Snowflake ID generator.
25
+
26
+ Based on Twitter's Snowflake algorithm.
27
+ """
28
+
29
+ # Custom epoch: 2024-01-01 00:00:00 UTC (in milliseconds)
30
+ EPOCH = 1704067200000
31
+
32
+ # Bit lengths
33
+ TIMESTAMP_BITS = 41
34
+ WORKER_BITS = 10
35
+ SEQUENCE_BITS = 12
36
+
37
+ # Max values
38
+ MAX_WORKER_ID = (1 << WORKER_BITS) - 1 # 1023
39
+ MAX_SEQUENCE = (1 << SEQUENCE_BITS) - 1 # 4095
40
+
41
+ # Bit shifts
42
+ TIMESTAMP_SHIFT = WORKER_BITS + SEQUENCE_BITS # 22
43
+ WORKER_SHIFT = SEQUENCE_BITS # 12
44
+
45
+ def __init__(self, worker_id: int = None):
46
+ """
47
+ Initialize Snowflake ID generator.
48
+
49
+ Args:
50
+ worker_id: Worker/machine ID (0-1023). If None, derived from process ID.
51
+ """
52
+ if worker_id is None:
53
+ # Derive from process ID
54
+ worker_id = os.getpid() & self.MAX_WORKER_ID
55
+
56
+ if not 0 <= worker_id <= self.MAX_WORKER_ID:
57
+ raise ValueError(f"worker_id must be between 0 and {self.MAX_WORKER_ID}")
58
+
59
+ self.worker_id = worker_id
60
+ self.sequence = 0
61
+ self.last_timestamp = -1
62
+ self.lock = threading.Lock()
63
+
64
+ def _current_millis(self) -> int:
65
+ """Get current timestamp in milliseconds since custom epoch."""
66
+ return int(time.time() * 1000) - self.EPOCH
67
+
68
+ def _wait_next_millis(self, last_timestamp: int) -> int:
69
+ """Wait until next millisecond."""
70
+ timestamp = self._current_millis()
71
+ while timestamp <= last_timestamp:
72
+ timestamp = self._current_millis()
73
+ return timestamp
74
+
75
+ def generate(self) -> int:
76
+ """
77
+ Generate a new Snowflake ID.
78
+
79
+ Returns:
80
+ A unique 64-bit integer ID
81
+
82
+ Raises:
83
+ RuntimeError: If clock moves backwards
84
+ """
85
+ with self.lock:
86
+ timestamp = self._current_millis()
87
+
88
+ # Check for clock moving backwards
89
+ if timestamp < self.last_timestamp:
90
+ raise RuntimeError(
91
+ f"Clock moved backwards. Refusing to generate ID. "
92
+ f"Last: {self.last_timestamp}, Current: {timestamp}"
93
+ )
94
+
95
+ if timestamp == self.last_timestamp:
96
+ # Same millisecond - increment sequence
97
+ self.sequence = (self.sequence + 1) & self.MAX_SEQUENCE
98
+ if self.sequence == 0:
99
+ # Sequence overflow - wait for next millisecond
100
+ timestamp = self._wait_next_millis(self.last_timestamp)
101
+ else:
102
+ # New millisecond - reset sequence
103
+ self.sequence = 0
104
+
105
+ self.last_timestamp = timestamp
106
+
107
+ # Construct the ID
108
+ snowflake_id = (
109
+ (timestamp << self.TIMESTAMP_SHIFT) |
110
+ (self.worker_id << self.WORKER_SHIFT) |
111
+ self.sequence
112
+ )
113
+
114
+ return snowflake_id
115
+
116
+ def parse(self, snowflake_id: int) -> dict:
117
+ """
118
+ Parse a Snowflake ID into its components.
119
+
120
+ Args:
121
+ snowflake_id: The Snowflake ID to parse
122
+
123
+ Returns:
124
+ Dictionary with timestamp, worker_id, and sequence
125
+ """
126
+ timestamp = (snowflake_id >> self.TIMESTAMP_SHIFT) + self.EPOCH
127
+ worker_id = (snowflake_id >> self.WORKER_SHIFT) & self.MAX_WORKER_ID
128
+ sequence = snowflake_id & self.MAX_SEQUENCE
129
+
130
+ return {
131
+ "timestamp": timestamp,
132
+ "timestamp_ms": timestamp,
133
+ "worker_id": worker_id,
134
+ "sequence": sequence,
135
+ }
136
+
137
+
138
+ # Global singleton instance
139
+ _generator = None
140
+ _generator_lock = threading.Lock()
141
+
142
+
143
+ def get_generator() -> SnowflakeIDGenerator:
144
+ """Get or create the global Snowflake ID generator instance."""
145
+ global _generator
146
+ if _generator is None:
147
+ with _generator_lock:
148
+ if _generator is None:
149
+ _generator = SnowflakeIDGenerator()
150
+ return _generator
151
+
152
+
153
+ def generate_id() -> int:
154
+ """
155
+ Generate a new Snowflake ID using the global generator.
156
+
157
+ Returns:
158
+ A unique 64-bit integer ID
159
+ """
160
+ return get_generator().generate()
161
+
162
+
163
+ def parse_id(snowflake_id: int) -> dict:
164
+ """
165
+ Parse a Snowflake ID into its components.
166
+
167
+ Args:
168
+ snowflake_id: The Snowflake ID to parse
169
+
170
+ Returns:
171
+ Dictionary with timestamp, worker_id, and sequence
172
+ """
173
+ return get_generator().parse(snowflake_id)