ml-dash 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,90 +0,0 @@
1
- """Local file system storage backend for ML-Logger."""
2
-
3
- from pathlib import Path
4
- from typing import Optional, List
5
- import os
6
-
7
- from .base import StorageBackend
8
-
9
-
10
- class LocalBackend(StorageBackend):
11
- """Local file system storage backend.
12
-
13
- Stores all data in the local file system.
14
-
15
- Args:
16
- root_dir: Root directory for storage (default: ".ml-logger")
17
- """
18
-
19
- def __init__(self, root_dir: str = ".ml-logger"):
20
- """Initialize local backend.
21
-
22
- Args:
23
- root_dir: Root directory for storage
24
- """
25
- self.root_dir = Path(root_dir).resolve()
26
- self.root_dir.mkdir(parents=True, exist_ok=True)
27
-
28
- def _resolve_path(self, path: str) -> Path:
29
- """Resolve a path relative to root_dir.
30
-
31
- Args:
32
- path: Relative path
33
-
34
- Returns:
35
- Absolute Path object
36
- """
37
- return self.root_dir / path
38
-
39
- def exists(self, path: str) -> bool:
40
- """Check if a file or directory exists."""
41
- return self._resolve_path(path).exists()
42
-
43
- def write_bytes(self, path: str, data: bytes) -> None:
44
- """Write binary data to a file."""
45
- full_path = self._resolve_path(path)
46
- full_path.parent.mkdir(parents=True, exist_ok=True)
47
- full_path.write_bytes(data)
48
-
49
- def read_bytes(self, path: str) -> bytes:
50
- """Read binary data from a file."""
51
- return self._resolve_path(path).read_bytes()
52
-
53
- def write_text(self, path: str, text: str) -> None:
54
- """Write text to a file."""
55
- full_path = self._resolve_path(path)
56
- full_path.parent.mkdir(parents=True, exist_ok=True)
57
- full_path.write_text(text, encoding="utf-8")
58
-
59
- def read_text(self, path: str) -> str:
60
- """Read text from a file."""
61
- return self._resolve_path(path).read_text(encoding="utf-8")
62
-
63
- def append_text(self, path: str, text: str) -> None:
64
- """Append text to a file."""
65
- full_path = self._resolve_path(path)
66
- full_path.parent.mkdir(parents=True, exist_ok=True)
67
- with open(full_path, "a", encoding="utf-8") as f:
68
- f.write(text)
69
-
70
- def list_dir(self, path: str = "") -> List[str]:
71
- """List contents of a directory."""
72
- full_path = self._resolve_path(path) if path else self.root_dir
73
- if not full_path.exists():
74
- return []
75
- return [item.name for item in full_path.iterdir()]
76
-
77
- def get_url(self, path: str) -> Optional[str]:
78
- """Get a file:// URL for accessing a file."""
79
- full_path = self._resolve_path(path)
80
- return f"file://{full_path}" if full_path.exists() else None
81
-
82
- def makedirs(self, path: str, exist_ok: bool = True) -> None:
83
- """Create directories recursively."""
84
- self._resolve_path(path).mkdir(parents=True, exist_ok=exist_ok)
85
-
86
- def delete(self, path: str) -> None:
87
- """Delete a file."""
88
- full_path = self._resolve_path(path)
89
- if full_path.exists():
90
- full_path.unlink()
@@ -1,13 +0,0 @@
1
- """Logger components for managing different data types."""
2
-
3
- from .parameters import ParameterManager
4
- from .metrics import MetricsLogger
5
- from .files import FileManager
6
- from .logs import LogManager
7
-
8
- __all__ = [
9
- "ParameterManager",
10
- "MetricsLogger",
11
- "FileManager",
12
- "LogManager",
13
- ]
@@ -1,246 +0,0 @@
1
- """File management component for ML-Logger."""
2
-
3
- import json
4
- import pickle
5
- from typing import Any, Optional
6
- from pathlib import Path
7
-
8
- from ..backends.base import StorageBackend
9
-
10
-
11
- class FileManager:
12
- """Manages file storage and retrieval.
13
-
14
- Files are stored in the files/ subdirectory.
15
-
16
- Args:
17
- backend: Storage backend
18
- prefix: Experiment prefix path
19
- namespace: Optional namespace for files (e.g., "checkpoints")
20
- """
21
-
22
- def __init__(
23
- self,
24
- backend: StorageBackend,
25
- prefix: str,
26
- namespace: str = ""
27
- ):
28
- """Initialize file manager.
29
-
30
- Args:
31
- backend: Storage backend
32
- prefix: Experiment prefix path
33
- namespace: Optional namespace subdirectory
34
- """
35
- self.backend = backend
36
- self.prefix = prefix
37
- self.namespace = namespace
38
-
39
- def _get_file_path(self, filename: str) -> str:
40
- """Get full file path with namespace.
41
-
42
- Args:
43
- filename: File name
44
-
45
- Returns:
46
- Full path including prefix, files/, and namespace
47
- """
48
- parts = [self.prefix, "files"]
49
- if self.namespace:
50
- parts.append(self.namespace)
51
- parts.append(filename)
52
- return "/".join(parts)
53
-
54
- def save(self, data: Any, filename: str) -> None:
55
- """Save data to a file (auto-detects format).
56
-
57
- Supports: JSON (.json), pickle (.pkl, .pickle), PyTorch (.pt, .pth),
58
- NumPy (.npy, .npz), and raw bytes.
59
-
60
- Args:
61
- data: Data to save
62
- filename: File name
63
- """
64
- file_path = self._get_file_path(filename)
65
- suffix = Path(filename).suffix.lower()
66
-
67
- if suffix == ".json":
68
- # Save as JSON
69
- json_str = json.dumps(data, indent=2)
70
- self.backend.write_text(file_path, json_str)
71
-
72
- elif suffix in [".pkl", ".pickle"]:
73
- # Save as pickle
74
- pickled = pickle.dumps(data)
75
- self.backend.write_bytes(file_path, pickled)
76
-
77
- elif suffix in [".pt", ".pth"]:
78
- # Save PyTorch tensor/model
79
- try:
80
- import torch
81
- import io
82
- buffer = io.BytesIO()
83
- torch.save(data, buffer)
84
- self.backend.write_bytes(file_path, buffer.getvalue())
85
- except ImportError:
86
- raise ImportError("PyTorch is required to save .pt/.pth files")
87
-
88
- elif suffix in [".npy", ".npz"]:
89
- # Save NumPy array
90
- try:
91
- import numpy as np
92
- import io
93
- buffer = io.BytesIO()
94
- if suffix == ".npy":
95
- np.save(buffer, data)
96
- else:
97
- np.savez(buffer, data)
98
- self.backend.write_bytes(file_path, buffer.getvalue())
99
- except ImportError:
100
- raise ImportError("NumPy is required to save .npy/.npz files")
101
-
102
- else:
103
- # Save as raw bytes
104
- if isinstance(data, bytes):
105
- self.backend.write_bytes(file_path, data)
106
- elif isinstance(data, str):
107
- self.backend.write_text(file_path, data)
108
- else:
109
- # Fallback to pickle
110
- pickled = pickle.dumps(data)
111
- self.backend.write_bytes(file_path, pickled)
112
-
113
- def save_pkl(self, data: Any, filename: str) -> None:
114
- """Save data as pickle file.
115
-
116
- Args:
117
- data: Data to save
118
- filename: File name (will add .pkl if missing)
119
- """
120
- if not filename.endswith((".pkl", ".pickle")):
121
- filename = f"{filename}.pkl"
122
- self.save(data, filename)
123
-
124
- def load(self, filename: str) -> Any:
125
- """Load data from a file (auto-detects format).
126
-
127
- Args:
128
- filename: File name
129
-
130
- Returns:
131
- Loaded data
132
- """
133
- file_path = self._get_file_path(filename)
134
- suffix = Path(filename).suffix.lower()
135
-
136
- if suffix == ".json":
137
- # Load JSON
138
- json_str = self.backend.read_text(file_path)
139
- return json.loads(json_str)
140
-
141
- elif suffix in [".pkl", ".pickle"]:
142
- # Load pickle
143
- pickled = self.backend.read_bytes(file_path)
144
- return pickle.loads(pickled)
145
-
146
- elif suffix in [".pt", ".pth"]:
147
- # Load PyTorch
148
- try:
149
- import torch
150
- import io
151
- data = self.backend.read_bytes(file_path)
152
- buffer = io.BytesIO(data)
153
- return torch.load(buffer)
154
- except ImportError:
155
- raise ImportError("PyTorch is required to load .pt/.pth files")
156
-
157
- elif suffix in [".npy", ".npz"]:
158
- # Load NumPy
159
- try:
160
- import numpy as np
161
- import io
162
- data = self.backend.read_bytes(file_path)
163
- buffer = io.BytesIO(data)
164
- if suffix == ".npy":
165
- return np.load(buffer)
166
- else:
167
- return np.load(buffer, allow_pickle=True)
168
- except ImportError:
169
- raise ImportError("NumPy is required to load .npy/.npz files")
170
-
171
- else:
172
- # For unknown extensions, try different strategies
173
- data = self.backend.read_bytes(file_path)
174
-
175
- # If it looks like a binary extension, return bytes directly
176
- if suffix in [".bin", ".dat", ".raw"]:
177
- return data
178
-
179
- # Try to unpickle first (handles custom extensions from save())
180
- try:
181
- return pickle.loads(data)
182
- except (pickle.UnpicklingError, EOFError, AttributeError):
183
- pass
184
-
185
- # Try to decode as text
186
- try:
187
- return data.decode('utf-8')
188
- except UnicodeDecodeError:
189
- # Return raw bytes as fallback
190
- return data
191
-
192
- def load_torch(self, filename: str) -> Any:
193
- """Load PyTorch checkpoint.
194
-
195
- Args:
196
- filename: File name
197
-
198
- Returns:
199
- Loaded PyTorch data
200
- """
201
- if not filename.endswith((".pt", ".pth")):
202
- filename = f"{filename}.pt"
203
- return self.load(filename)
204
-
205
- def __call__(self, namespace: str) -> "FileManager":
206
- """Create a namespaced file manager.
207
-
208
- Args:
209
- namespace: Namespace subdirectory (e.g., "checkpoints")
210
-
211
- Returns:
212
- New FileManager with the namespace
213
- """
214
- new_namespace = f"{self.namespace}/{namespace}" if self.namespace else namespace
215
- return FileManager(
216
- backend=self.backend,
217
- prefix=self.prefix,
218
- namespace=new_namespace
219
- )
220
-
221
- def exists(self, filename: str) -> bool:
222
- """Check if a file exists.
223
-
224
- Args:
225
- filename: File name
226
-
227
- Returns:
228
- True if file exists
229
- """
230
- file_path = self._get_file_path(filename)
231
- return self.backend.exists(file_path)
232
-
233
- def list(self) -> list:
234
- """List files in the current namespace.
235
-
236
- Returns:
237
- List of file names
238
- """
239
- dir_path = f"{self.prefix}/files"
240
- if self.namespace:
241
- dir_path = f"{dir_path}/{self.namespace}"
242
-
243
- try:
244
- return self.backend.list_dir(dir_path)
245
- except Exception:
246
- return []
@@ -1,104 +0,0 @@
1
- """Text logging component for ML-Logger."""
2
-
3
- import json
4
- import time
5
- from typing import Any, Dict, Optional, List
6
-
7
- from ..backends.base import StorageBackend
8
-
9
-
10
- class LogManager:
11
- """Manages structured text logging.
12
-
13
- Logs are stored in a JSONL file (logs.jsonl).
14
-
15
- Args:
16
- backend: Storage backend
17
- prefix: Experiment prefix path
18
- """
19
-
20
- def __init__(self, backend: StorageBackend, prefix: str):
21
- """Initialize log manager.
22
-
23
- Args:
24
- backend: Storage backend
25
- prefix: Experiment prefix path
26
- """
27
- self.backend = backend
28
- self.prefix = prefix
29
- self.logs_file = f"{prefix}/logs.jsonl"
30
-
31
- def log(self, message: str, level: str = "INFO", **context) -> None:
32
- """Log a message with context.
33
-
34
- Args:
35
- message: Log message
36
- level: Log level (DEBUG, INFO, WARNING, ERROR)
37
- **context: Additional context fields
38
- """
39
- entry = {
40
- "timestamp": time.time(),
41
- "level": level.upper(),
42
- "message": message,
43
- }
44
-
45
- if context:
46
- entry["context"] = context
47
-
48
- line = json.dumps(entry) + "\n"
49
- self.backend.append_text(self.logs_file, line)
50
-
51
- def info(self, message: str, **context) -> None:
52
- """Log an info message.
53
-
54
- Args:
55
- message: Log message
56
- **context: Additional context fields
57
- """
58
- self.log(message, level="INFO", **context)
59
-
60
- def warning(self, message: str, **context) -> None:
61
- """Log a warning message.
62
-
63
- Args:
64
- message: Log message
65
- **context: Additional context fields
66
- """
67
- self.log(message, level="WARNING", **context)
68
-
69
- def error(self, message: str, **context) -> None:
70
- """Log an error message.
71
-
72
- Args:
73
- message: Log message
74
- **context: Additional context fields
75
- """
76
- self.log(message, level="ERROR", **context)
77
-
78
- def debug(self, message: str, **context) -> None:
79
- """Log a debug message.
80
-
81
- Args:
82
- message: Log message
83
- **context: Additional context fields
84
- """
85
- self.log(message, level="DEBUG", **context)
86
-
87
- def read(self) -> List[Dict[str, Any]]:
88
- """Read all logs from file.
89
-
90
- Returns:
91
- List of log entries
92
- """
93
- if not self.backend.exists(self.logs_file):
94
- return []
95
-
96
- content = self.backend.read_text(self.logs_file)
97
- logs = []
98
-
99
- for line in content.strip().split("\n"):
100
- if not line:
101
- continue
102
- logs.append(json.loads(line))
103
-
104
- return logs
@@ -1,169 +0,0 @@
1
- """Metrics logging component for ML-Logger."""
2
-
3
- import json
4
- import time
5
- from typing import Any, Dict, Optional, List
6
- from collections import defaultdict
7
-
8
- from ..backends.base import StorageBackend
9
-
10
-
11
- class MetricsLogger:
12
- """Logs metrics with support for namespacing and aggregation.
13
-
14
- Metrics are stored in a single JSONL file (metrics.jsonl).
15
-
16
- Args:
17
- backend: Storage backend
18
- prefix: Experiment prefix path
19
- namespace: Optional namespace for metrics (e.g., "train", "val")
20
- """
21
-
22
- def __init__(
23
- self,
24
- backend: StorageBackend,
25
- prefix: str,
26
- namespace: str = ""
27
- ):
28
- """Initialize metrics logger.
29
-
30
- Args:
31
- backend: Storage backend
32
- prefix: Experiment prefix path
33
- namespace: Optional namespace prefix
34
- """
35
- self.backend = backend
36
- self.prefix = prefix
37
- self.namespace = namespace
38
- self.metrics_file = f"{prefix}/metrics.jsonl"
39
- self._collect_buffer: Dict[str, List[float]] = defaultdict(list)
40
-
41
- def log(self, step: Optional[int] = None, **metrics) -> None:
42
- """Log metrics immediately.
43
-
44
- Args:
45
- step: Step number (epoch, iteration, etc.)
46
- **metrics: Metric name-value pairs
47
- """
48
- # Apply namespace to metric names
49
- namespaced_metrics = {}
50
- for key, value in metrics.items():
51
- if self.namespace:
52
- key = f"{self.namespace}.{key}"
53
- namespaced_metrics[key] = value
54
-
55
- entry = {
56
- "timestamp": time.time(),
57
- "metrics": namespaced_metrics
58
- }
59
-
60
- if step is not None:
61
- entry["step"] = step
62
-
63
- line = json.dumps(entry) + "\n"
64
- self.backend.append_text(self.metrics_file, line)
65
-
66
- def collect(self, step: Optional[int] = None, **metrics) -> None:
67
- """Collect metrics for later aggregation.
68
-
69
- Args:
70
- step: Step number (optional, used by flush)
71
- **metrics: Metric name-value pairs
72
- """
73
- for key, value in metrics.items():
74
- if self.namespace:
75
- key = f"{self.namespace}.{key}"
76
- self._collect_buffer[key].append(float(value))
77
-
78
- def flush(
79
- self,
80
- _aggregation: str = "mean",
81
- step: Optional[int] = None,
82
- **additional_metrics
83
- ) -> None:
84
- """Flush collected metrics with aggregation.
85
-
86
- Args:
87
- _aggregation: Aggregation method ("mean", "sum", "min", "max", "last")
88
- step: Step number for logged metrics
89
- **additional_metrics: Additional metrics to log (not aggregated)
90
- """
91
- if not self._collect_buffer and not additional_metrics:
92
- return
93
-
94
- aggregated = {}
95
-
96
- # Aggregate collected metrics
97
- for key, values in self._collect_buffer.items():
98
- if not values:
99
- continue
100
-
101
- if _aggregation == "mean":
102
- aggregated[key] = sum(values) / len(values)
103
- elif _aggregation == "sum":
104
- aggregated[key] = sum(values)
105
- elif _aggregation == "min":
106
- aggregated[key] = min(values)
107
- elif _aggregation == "max":
108
- aggregated[key] = max(values)
109
- elif _aggregation == "last":
110
- aggregated[key] = values[-1]
111
- else:
112
- raise ValueError(f"Unknown aggregation method: {_aggregation}")
113
-
114
- # Add non-aggregated metrics
115
- for key, value in additional_metrics.items():
116
- if self.namespace:
117
- key = f"{self.namespace}.{key}"
118
- aggregated[key] = value
119
-
120
- # Log aggregated metrics
121
- if aggregated:
122
- entry = {
123
- "timestamp": time.time(),
124
- "metrics": aggregated
125
- }
126
-
127
- if step is not None:
128
- entry["step"] = step
129
-
130
- line = json.dumps(entry) + "\n"
131
- self.backend.append_text(self.metrics_file, line)
132
-
133
- # Clear buffer
134
- self._collect_buffer.clear()
135
-
136
- def __call__(self, namespace: str) -> "MetricsLogger":
137
- """Create a namespaced metrics logger.
138
-
139
- Args:
140
- namespace: Namespace name (e.g., "train", "val")
141
-
142
- Returns:
143
- New MetricsLogger with the namespace
144
- """
145
- new_namespace = f"{self.namespace}.{namespace}" if self.namespace else namespace
146
- return MetricsLogger(
147
- backend=self.backend,
148
- prefix=self.prefix,
149
- namespace=new_namespace
150
- )
151
-
152
- def read(self) -> List[Dict[str, Any]]:
153
- """Read all metrics from file.
154
-
155
- Returns:
156
- List of metric entries
157
- """
158
- if not self.backend.exists(self.metrics_file):
159
- return []
160
-
161
- content = self.backend.read_text(self.metrics_file)
162
- metrics = []
163
-
164
- for line in content.strip().split("\n"):
165
- if not line:
166
- continue
167
- metrics.append(json.loads(line))
168
-
169
- return metrics