ml-dash 0.6.14__tar.gz → 0.6.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {ml_dash-0.6.14 → ml_dash-0.6.15}/PKG-INFO +1 -1
  2. {ml_dash-0.6.14 → ml_dash-0.6.15}/pyproject.toml +1 -1
  3. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/buffer.py +132 -12
  4. {ml_dash-0.6.14 → ml_dash-0.6.15}/LICENSE +0 -0
  5. {ml_dash-0.6.14 → ml_dash-0.6.15}/README.md +0 -0
  6. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/__init__.py +0 -0
  7. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/auth/__init__.py +0 -0
  8. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/auth/constants.py +0 -0
  9. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/auth/device_flow.py +0 -0
  10. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/auth/device_secret.py +0 -0
  11. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/auth/exceptions.py +0 -0
  12. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/auth/token_storage.py +0 -0
  13. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/auto_start.py +0 -0
  14. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/cli.py +0 -0
  15. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/cli_commands/__init__.py +0 -0
  16. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/cli_commands/api.py +0 -0
  17. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/cli_commands/create.py +0 -0
  18. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/cli_commands/download.py +0 -0
  19. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/cli_commands/list.py +0 -0
  20. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/cli_commands/login.py +0 -0
  21. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/cli_commands/logout.py +0 -0
  22. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/cli_commands/profile.py +0 -0
  23. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/cli_commands/remove.py +0 -0
  24. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/cli_commands/upload.py +0 -0
  25. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/client.py +0 -0
  26. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/config.py +0 -0
  27. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/experiment.py +0 -0
  28. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/files.py +0 -0
  29. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/log.py +0 -0
  30. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/metric.py +0 -0
  31. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/params.py +0 -0
  32. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/py.typed +0 -0
  33. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/remote_auto_start.py +0 -0
  34. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/run.py +0 -0
  35. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/snowflake.py +0 -0
  36. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/storage.py +0 -0
  37. {ml_dash-0.6.14 → ml_dash-0.6.15}/src/ml_dash/track.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ml-dash
3
- Version: 0.6.14
3
+ Version: 0.6.15
4
4
  Summary: ML experiment tracking and data storage
5
5
  Keywords: machine-learning,experiment-tracking,mlops,data-storage
6
6
  Author: Ge Yang, Tom Tao
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ml-dash"
3
- version = "0.6.14"
3
+ version = "0.6.15"
4
4
  description = "ML experiment tracking and data storage"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.9"
@@ -57,6 +57,11 @@ def _serialize_value(value: Any) -> Any:
57
57
  class BufferConfig:
58
58
  """Configuration for buffering behavior."""
59
59
 
60
+ # Internal constants for queue management (not exposed to users)
61
+ _MAX_QUEUE_SIZE = 100000 # Maximum items before blocking
62
+ _WARNING_THRESHOLD = 80000 # Warn at 80% capacity
63
+ _AGGRESSIVE_FLUSH_THRESHOLD = 50000 # Trigger immediate flush at 50% capacity
64
+
60
65
  def __init__(
61
66
  self,
62
67
  flush_interval: float = 5.0,
@@ -114,17 +119,20 @@ class BackgroundBufferManager:
114
119
  self._experiment = experiment
115
120
  self._config = config
116
121
 
117
- # Resource-specific queues
118
- self._log_queue: Queue = Queue()
122
+ # Resource-specific queues with bounded size to prevent OOM
123
+ self._log_queue: Queue = Queue(maxsize=config._MAX_QUEUE_SIZE)
119
124
  self._metric_queues: Dict[Optional[str], Queue] = {} # Per-metric queues
120
125
  self._track_buffers: Dict[str, Dict[float, Dict[str, Any]]] = {} # Per-topic: {timestamp: merged_data}
121
- self._file_queue: Queue = Queue()
126
+ self._file_queue: Queue = Queue(maxsize=config._MAX_QUEUE_SIZE)
122
127
 
123
128
  # Track last flush times per resource type
124
129
  self._last_log_flush = time.time()
125
130
  self._last_metric_flush: Dict[Optional[str], float] = {}
126
131
  self._last_track_flush: Dict[str, float] = {} # Per-topic flush times
127
132
 
133
+ # Track warnings to avoid spamming
134
+ self._warned_queues: set = set()
135
+
128
136
  # Background thread control
129
137
  self._thread: Optional[threading.Thread] = None
130
138
  self._stop_event = threading.Event()
@@ -184,6 +192,34 @@ class BackgroundBufferManager:
184
192
 
185
193
  self._thread = None
186
194
 
195
+ def _check_queue_pressure(self, queue: Queue, queue_name: str) -> None:
196
+ """
197
+ Check queue size and trigger aggressive flushing if needed.
198
+
199
+ This prevents OOM by flushing immediately when queue fills up.
200
+
201
+ Args:
202
+ queue: The queue to check
203
+ queue_name: Name for warning messages
204
+ """
205
+ qsize = queue.qsize()
206
+
207
+ # Trigger immediate flush if queue is getting full
208
+ if qsize >= self._config._AGGRESSIVE_FLUSH_THRESHOLD:
209
+ self._flush_event.set()
210
+
211
+ # Warn once if queue is filling up (80% capacity)
212
+ if qsize >= self._config._WARNING_THRESHOLD:
213
+ if queue_name not in self._warned_queues:
214
+ warnings.warn(
215
+ f"[ML-Dash] {queue_name} queue is {qsize}/{self._config._MAX_QUEUE_SIZE} full. "
216
+ f"Data is being generated faster than it can be flushed. "
217
+ f"Consider reducing logging frequency or the background flush will block to prevent OOM.",
218
+ RuntimeWarning,
219
+ stacklevel=3
220
+ )
221
+ self._warned_queues.add(queue_name)
222
+
187
223
  def buffer_log(
188
224
  self,
189
225
  message: str,
@@ -192,7 +228,10 @@ class BackgroundBufferManager:
192
228
  timestamp: Optional[datetime],
193
229
  ) -> None:
194
230
  """
195
- Add log to buffer (non-blocking).
231
+ Add log to buffer with automatic backpressure.
232
+
233
+ If queue is full, this will block until space is available.
234
+ This prevents OOM when logs are generated faster than they can be flushed.
196
235
 
197
236
  Args:
198
237
  message: Log message
@@ -200,6 +239,9 @@ class BackgroundBufferManager:
200
239
  metadata: Optional metadata
201
240
  timestamp: Optional timestamp
202
241
  """
242
+ # Check queue pressure and trigger aggressive flushing if needed
243
+ self._check_queue_pressure(self._log_queue, "Log")
244
+
203
245
  log_entry = {
204
246
  "timestamp": (timestamp or datetime.utcnow()).isoformat() + "Z",
205
247
  "level": level,
@@ -209,6 +251,7 @@ class BackgroundBufferManager:
209
251
  if metadata:
210
252
  log_entry["metadata"] = metadata
211
253
 
254
+ # Will block if queue is full (backpressure to prevent OOM)
212
255
  self._log_queue.put(log_entry)
213
256
 
214
257
  def buffer_metric(
@@ -220,7 +263,10 @@ class BackgroundBufferManager:
220
263
  metadata: Optional[Dict[str, Any]],
221
264
  ) -> None:
222
265
  """
223
- Add metric datapoint to buffer (non-blocking).
266
+ Add metric datapoint to buffer with automatic backpressure.
267
+
268
+ If queue is full, this will block until space is available.
269
+ This prevents OOM when metrics are generated faster than they can be flushed.
224
270
 
225
271
  Args:
226
272
  metric_name: Metric name (can be None for unnamed metrics)
@@ -229,11 +275,18 @@ class BackgroundBufferManager:
229
275
  tags: Optional tags
230
276
  metadata: Optional metadata
231
277
  """
232
- # Get or create queue for this metric
278
+ # Get or create queue for this metric (with bounded size)
233
279
  if metric_name not in self._metric_queues:
234
- self._metric_queues[metric_name] = Queue()
280
+ self._metric_queues[metric_name] = Queue(maxsize=self._config._MAX_QUEUE_SIZE)
235
281
  self._last_metric_flush[metric_name] = time.time()
236
282
 
283
+ # Check queue pressure and trigger aggressive flushing if needed
284
+ metric_display = f"'{metric_name}'" if metric_name else "unnamed"
285
+ self._check_queue_pressure(
286
+ self._metric_queues[metric_name],
287
+ f"Metric {metric_display}"
288
+ )
289
+
237
290
  metric_entry = {
238
291
  "data": data,
239
292
  "description": description,
@@ -241,6 +294,7 @@ class BackgroundBufferManager:
241
294
  "metadata": metadata,
242
295
  }
243
296
 
297
+ # Will block if queue is full (backpressure to prevent OOM)
244
298
  self._metric_queues[metric_name].put(metric_entry)
245
299
 
246
300
  def buffer_track(
@@ -286,7 +340,9 @@ class BackgroundBufferManager:
286
340
  size_bytes: int,
287
341
  ) -> None:
288
342
  """
289
- Add file upload to queue (non-blocking).
343
+ Add file upload to queue with automatic backpressure.
344
+
345
+ If queue is full, this will block until space is available.
290
346
 
291
347
  Args:
292
348
  file_path: Local file path
@@ -299,6 +355,9 @@ class BackgroundBufferManager:
299
355
  content_type: MIME type
300
356
  size_bytes: File size in bytes
301
357
  """
358
+ # Check queue pressure and trigger aggressive flushing if needed
359
+ self._check_queue_pressure(self._file_queue, "File")
360
+
302
361
  file_entry = {
303
362
  "file_path": file_path,
304
363
  "prefix": prefix,
@@ -311,6 +370,7 @@ class BackgroundBufferManager:
311
370
  "size_bytes": size_bytes,
312
371
  }
313
372
 
373
+ # Will block if queue is full (backpressure to prevent OOM)
314
374
  self._file_queue.put(file_entry)
315
375
 
316
376
  def flush_all(self) -> None:
@@ -425,13 +485,73 @@ class BackgroundBufferManager:
425
485
  if triggered:
426
486
  self._flush_event.clear()
427
487
 
428
- # Final flush on shutdown
429
- self._flush_logs()
488
+ # Final flush on shutdown - loop until all queues are empty
489
+ # This ensures no data is lost when shutting down with large queues
490
+ # Show progress bar for large flushes
491
+ initial_counts = {
492
+ 'logs': self._log_queue.qsize(),
493
+ 'metrics': {name: q.qsize() for name, q in self._metric_queues.items()},
494
+ 'tracks': {topic: len(entries) for topic, entries in self._track_buffers.items()},
495
+ 'files': self._file_queue.qsize(),
496
+ }
497
+
498
+ total_items = (
499
+ initial_counts['logs'] +
500
+ sum(initial_counts['metrics'].values()) +
501
+ sum(initial_counts['tracks'].values()) +
502
+ initial_counts['files']
503
+ )
504
+
505
+ # Show progress bar if there are many items to flush
506
+ show_progress = total_items > 200
507
+ items_flushed = 0
508
+
509
+ def update_progress():
510
+ nonlocal items_flushed
511
+ if show_progress:
512
+ progress = items_flushed / total_items
513
+ bar_length = 40
514
+ filled = int(bar_length * progress)
515
+ bar = '█' * filled + '░' * (bar_length - filled)
516
+ percent = progress * 100
517
+ print(f'\r[ML-Dash] Flushing: |{bar}| {percent:.1f}% ({items_flushed}/{total_items})', end='', flush=True)
518
+
519
+ # Flush logs
520
+ log_batch_size = self._config.log_batch_size
521
+ while not self._log_queue.empty():
522
+ before = self._log_queue.qsize()
523
+ self._flush_logs()
524
+ after = self._log_queue.qsize()
525
+ items_flushed += before - after
526
+ update_progress()
527
+
528
+ # Flush metrics
529
+ metric_batch_size = self._config.metric_batch_size
430
530
  for metric_name in list(self._metric_queues.keys()):
431
- self._flush_metric(metric_name)
531
+ while not self._metric_queues[metric_name].empty():
532
+ before = self._metric_queues[metric_name].qsize()
533
+ self._flush_metric(metric_name)
534
+ after = self._metric_queues[metric_name].qsize()
535
+ items_flushed += before - after
536
+ update_progress()
537
+
538
+ # Flush tracks
432
539
  for topic in list(self._track_buffers.keys()):
540
+ track_count = len(self._track_buffers.get(topic, {}))
433
541
  self._flush_track(topic)
434
- self._flush_files()
542
+ items_flushed += track_count
543
+ update_progress()
544
+
545
+ # Flush files
546
+ while not self._file_queue.empty():
547
+ before = self._file_queue.qsize()
548
+ self._flush_files()
549
+ after = self._file_queue.qsize()
550
+ items_flushed += before - after
551
+ update_progress()
552
+
553
+ if show_progress:
554
+ print() # New line after progress bar
435
555
 
436
556
  def _flush_logs(self) -> None:
437
557
  """Batch flush logs using client.create_log_entries()."""
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes