additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -176
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -304
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/distributions.py +0 -22
  100. additory/synthetic/forecast.py +0 -1132
  101. additory/synthetic/linked_list_parser.py +0 -415
  102. additory/synthetic/namespace_lookup.py +0 -129
  103. additory/synthetic/smote.py +0 -320
  104. additory/synthetic/strategies.py +0 -850
  105. additory/synthetic/synthesizer.py +0 -713
  106. additory/utilities/__init__.py +0 -53
  107. additory/utilities/encoding.py +0 -600
  108. additory/utilities/games.py +0 -300
  109. additory/utilities/keys.py +0 -8
  110. additory/utilities/lookup.py +0 -103
  111. additory/utilities/matchers.py +0 -216
  112. additory/utilities/resolvers.py +0 -286
  113. additory/utilities/settings.py +0 -167
  114. additory/utilities/units.py +0 -749
  115. additory/utilities/validators.py +0 -153
  116. additory-0.1.0a3.dist-info/METADATA +0 -288
  117. additory-0.1.0a3.dist-info/RECORD +0 -71
  118. additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
  119. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  120. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -1,547 +1,209 @@
1
- # memory_manager.py
2
- # Centralized memory management for enhanced expressions system
1
+ """
2
+ Memory management for Additory operations.
3
3
 
4
- import gc
5
- import os
6
- import psutil
7
- import threading
8
- import time
9
- from typing import Dict, List, Optional, Callable, Any
10
- from dataclasses import dataclass, field
11
- from datetime import datetime, timedelta
12
- from contextlib import contextmanager
13
-
14
- from .logging import log_info, log_warning
15
-
16
-
17
- @dataclass
18
- class MemorySnapshot:
19
- """Memory usage snapshot at a point in time"""
20
- timestamp: datetime
21
- process_memory_mb: float
22
- arrow_allocated_bytes: int
23
- python_objects_count: int
24
- gc_collections: Dict[int, int]
25
- custom_metrics: Dict[str, Any] = field(default_factory=dict)
4
+ Provides automatic cleanup of intermediate objects to free memory:
5
+ - Tracks temporary DataFrames
6
+ - Cleans up after operations
7
+ - Can be enabled/disabled for debugging
8
+ """
26
9
 
27
-
28
- @dataclass
29
- class MemoryThresholds:
30
- """Memory usage thresholds for cleanup triggers"""
31
- process_memory_mb: float = 500.0 # Process memory threshold
32
- arrow_memory_bytes: int = 100 * 1024 * 1024 # 100MB Arrow memory
33
- python_objects_count: int = 100000 # Python object count
34
- cleanup_interval_seconds: float = 30.0 # Periodic cleanup interval
35
-
36
-
37
- class MemoryLeakDetector:
38
- """Detects potential memory leaks by tracking memory growth patterns"""
39
-
40
- def __init__(self, window_size: int = 10, growth_threshold: float = 1.5):
41
- self.window_size = window_size
42
- self.growth_threshold = growth_threshold
43
- self.snapshots: List[MemorySnapshot] = []
44
- self.leak_warnings = 0
45
-
46
- def add_snapshot(self, snapshot: MemorySnapshot):
47
- """Add a memory snapshot for leak detection"""
48
- self.snapshots.append(snapshot)
49
-
50
- # Keep only the last window_size snapshots
51
- if len(self.snapshots) > self.window_size:
52
- self.snapshots.pop(0)
53
-
54
- # Check for potential leaks
55
- if len(self.snapshots) >= self.window_size:
56
- self._check_for_leaks()
57
-
58
- def _check_for_leaks(self):
59
- """Check if memory usage shows signs of leaking"""
60
- if len(self.snapshots) < 2:
61
- return
62
-
63
- # Calculate memory growth rate
64
- first_snapshot = self.snapshots[0]
65
- last_snapshot = self.snapshots[-1]
66
-
67
- memory_growth = last_snapshot.process_memory_mb / max(first_snapshot.process_memory_mb, 1.0) # Avoid division by zero
68
- time_span = (last_snapshot.timestamp - first_snapshot.timestamp).total_seconds()
69
-
70
- # Check for sustained memory growth (relaxed time threshold for testing)
71
- if memory_growth > self.growth_threshold and time_span > 10: # 10 seconds minimum
72
- self.leak_warnings += 1
73
- log_warning(f"[memory_manager] Potential memory leak detected: "
74
- f"{memory_growth:.2f}x growth over {time_span:.1f}s")
75
-
76
- def get_leak_status(self) -> Dict[str, Any]:
77
- """Get current leak detection status"""
78
- return {
79
- "leak_warnings": self.leak_warnings,
80
- "snapshots_count": len(self.snapshots),
81
- "monitoring_window": self.window_size,
82
- "growth_threshold": self.growth_threshold
83
- }
10
+ import gc
11
+ import sys
12
+ from typing import Any, Callable, List, Optional, Tuple
13
+ from functools import wraps
84
14
 
85
15
 
86
16
  class MemoryManager:
87
- """Centralized memory management for enhanced expressions system"""
17
+ """
18
+ Manages memory cleanup after operations.
19
+
20
+ Tracks intermediate objects created during processing and
21
+ cleans them up automatically to free memory.
22
+ """
88
23
 
89
24
  def __init__(self):
90
- # Configuration
91
- self.thresholds = MemoryThresholds()
92
- self.monitoring_enabled = True
93
- self.auto_cleanup_enabled = True
94
-
95
- # State tracking
96
- self.snapshots: List[MemorySnapshot] = []
97
- self.cleanup_callbacks: List[Callable[[], None]] = []
98
- self.leak_detector = MemoryLeakDetector()
99
-
100
- # Statistics
101
- self.stats = {
102
- "total_cleanups": 0,
103
- "forced_cleanups": 0,
104
- "auto_cleanups": 0,
105
- "memory_freed_mb": 0.0,
106
- "last_cleanup": None
107
- }
108
-
109
- # Background monitoring
110
- self._monitoring_thread = None
111
- self._stop_monitoring = threading.Event()
112
-
113
- # Process reference for memory monitoring
114
- try:
115
- self.process = psutil.Process(os.getpid())
116
- except Exception as e:
117
- log_warning(f"[memory_manager] Failed to initialize process monitor: {e}")
118
- self.process = None
119
-
120
- log_info("[memory_manager] Memory Manager initialized")
25
+ """Initialize memory manager."""
26
+ self.tracked_objects: List[Tuple[Any, Optional[str]]] = []
27
+ self.cleanup_enabled: bool = True
121
28
 
122
- def start_monitoring(self, interval_seconds: float = 30.0):
123
- """Start background memory monitoring"""
124
- if self._monitoring_thread and self._monitoring_thread.is_alive():
125
- log_warning("[memory_manager] Monitoring already running")
126
- return
127
-
128
- self.thresholds.cleanup_interval_seconds = interval_seconds
129
- self._stop_monitoring.clear()
130
-
131
- self._monitoring_thread = threading.Thread(
132
- target=self._monitoring_loop,
133
- daemon=True,
134
- name="MemoryMonitor"
135
- )
136
- self._monitoring_thread.start()
29
+ def track(self, obj: Any, name: Optional[str] = None) -> None:
30
+ """
31
+ Track object for cleanup.
137
32
 
138
- log_info(f"[memory_manager] Started background monitoring (interval: {interval_seconds}s)")
139
-
140
- def stop_monitoring(self):
141
- """Stop background memory monitoring"""
142
- if self._monitoring_thread and self._monitoring_thread.is_alive():
143
- self._stop_monitoring.set()
144
- self._monitoring_thread.join(timeout=5.0)
145
- log_info("[memory_manager] Stopped background monitoring")
146
-
147
- def _monitoring_loop(self):
148
- """Background monitoring loop"""
149
- while not self._stop_monitoring.is_set():
150
- try:
151
- if self.monitoring_enabled:
152
- # Take memory snapshot
153
- snapshot = self.take_snapshot()
154
-
155
- # Check for cleanup needs
156
- if self.auto_cleanup_enabled and self._should_cleanup(snapshot):
157
- self.cleanup_if_needed()
158
-
159
- # Wait for next interval
160
- self._stop_monitoring.wait(self.thresholds.cleanup_interval_seconds)
161
-
162
- except Exception as e:
163
- log_warning(f"[memory_manager] Monitoring loop error: {e}")
164
- time.sleep(5) # Brief pause before retrying
165
-
166
- def register_cleanup_callback(self, callback: Callable[[], None]):
167
- """Register a callback function for memory cleanup"""
168
- self.cleanup_callbacks.append(callback)
169
- log_info(f"[memory_manager] Registered cleanup callback: {callback.__name__}")
170
-
171
- def unregister_cleanup_callback(self, callback: Callable[[], None]):
172
- """Unregister a cleanup callback"""
173
- if callback in self.cleanup_callbacks:
174
- self.cleanup_callbacks.remove(callback)
175
- log_info(f"[memory_manager] Unregistered cleanup callback: {callback.__name__}")
176
-
177
- def take_snapshot(self) -> MemorySnapshot:
178
- """Take a snapshot of current memory usage"""
179
- try:
180
- # Process memory
181
- process_memory_mb = 0.0
182
- if self.process:
183
- memory_info = self.process.memory_info()
184
- process_memory_mb = memory_info.rss / 1024 / 1024
185
-
186
- # Arrow memory (if available)
187
- arrow_allocated_bytes = 0
188
- try:
189
- import pyarrow as pa
190
- arrow_allocated_bytes = pa.default_memory_pool().bytes_allocated()
191
- except ImportError:
192
- pass
193
-
194
- # Python objects
195
- python_objects_count = len(gc.get_objects())
196
-
197
- # GC statistics
198
- gc_collections = {i: gc.get_count()[i] for i in range(3)}
199
-
200
- snapshot = MemorySnapshot(
201
- timestamp=datetime.now(),
202
- process_memory_mb=process_memory_mb,
203
- arrow_allocated_bytes=arrow_allocated_bytes,
204
- python_objects_count=python_objects_count,
205
- gc_collections=gc_collections
206
- )
207
-
208
- # Store snapshot
209
- self.snapshots.append(snapshot)
210
-
211
- # Keep only recent snapshots (last 100)
212
- if len(self.snapshots) > 100:
213
- self.snapshots.pop(0)
214
-
215
- # Add to leak detector
216
- self.leak_detector.add_snapshot(snapshot)
33
+ Args:
34
+ obj: Object to track (typically DataFrame or Series)
35
+ name: Optional name for debugging
217
36
 
218
- return snapshot
219
-
220
- except Exception as e:
221
- log_warning(f"[memory_manager] Failed to take memory snapshot: {e}")
222
- return MemorySnapshot(
223
- timestamp=datetime.now(),
224
- process_memory_mb=0.0,
225
- arrow_allocated_bytes=0,
226
- python_objects_count=0,
227
- gc_collections={}
228
- )
229
-
230
- def _should_cleanup(self, snapshot: MemorySnapshot) -> bool:
231
- """Check if cleanup is needed based on thresholds"""
232
- return (
233
- snapshot.process_memory_mb > self.thresholds.process_memory_mb or
234
- snapshot.arrow_allocated_bytes > self.thresholds.arrow_memory_bytes or
235
- snapshot.python_objects_count > self.thresholds.python_objects_count
236
- )
37
+ Example:
38
+ memory_manager.track(temp_df, 'intermediate_result')
39
+ """
40
+ if self.cleanup_enabled:
41
+ self.tracked_objects.append((obj, name))
237
42
 
238
- def cleanup_if_needed(self) -> bool:
239
- """Perform cleanup if memory usage exceeds thresholds"""
240
- snapshot = self.take_snapshot()
241
-
242
- if not self._should_cleanup(snapshot):
243
- return False
244
-
245
- log_info(f"[memory_manager] Auto cleanup triggered - "
246
- f"Memory: {snapshot.process_memory_mb:.1f}MB, "
247
- f"Arrow: {snapshot.arrow_allocated_bytes} bytes, "
248
- f"Objects: {snapshot.python_objects_count}")
249
-
250
- memory_before = snapshot.process_memory_mb
251
- self._perform_cleanup()
252
-
253
- # Take another snapshot to measure cleanup effectiveness
254
- after_snapshot = self.take_snapshot()
255
- memory_freed = max(0, memory_before - after_snapshot.process_memory_mb)
256
-
257
- self.stats["auto_cleanups"] += 1
258
- self.stats["memory_freed_mb"] += memory_freed
259
- self.stats["last_cleanup"] = datetime.now()
260
-
261
- log_info(f"[memory_manager] Auto cleanup completed - "
262
- f"Freed: {memory_freed:.1f}MB")
263
-
264
- return True
265
-
266
- def force_cleanup(self) -> Dict[str, float]:
267
- """Force immediate memory cleanup"""
268
- log_info("[memory_manager] Forcing memory cleanup")
269
-
270
- before_snapshot = self.take_snapshot()
271
- memory_before = before_snapshot.process_memory_mb
272
-
273
- self._perform_cleanup()
274
-
275
- after_snapshot = self.take_snapshot()
276
- memory_after = after_snapshot.process_memory_mb
277
- memory_freed = max(0, memory_before - memory_after)
278
-
279
- self.stats["forced_cleanups"] += 1
280
- self.stats["memory_freed_mb"] += memory_freed
281
- self.stats["last_cleanup"] = datetime.now()
282
-
283
- cleanup_stats = {
284
- "memory_before_mb": memory_before,
285
- "memory_after_mb": memory_after,
286
- "memory_freed_mb": memory_freed,
287
- "arrow_freed_bytes": before_snapshot.arrow_allocated_bytes - after_snapshot.arrow_allocated_bytes
288
- }
43
+ def cleanup(self) -> None:
44
+ """
45
+ Clean up all tracked objects.
289
46
 
290
- log_info(f"[memory_manager] Forced cleanup completed - "
291
- f"Freed: {memory_freed:.1f}MB")
47
+ Deletes references to tracked objects and runs garbage collection.
48
+ Called automatically at the end of every operation.
292
49
 
293
- return cleanup_stats
294
-
295
- def _perform_cleanup(self):
296
- """Perform the actual cleanup operations"""
297
- # Call registered cleanup callbacks
298
- for callback in self.cleanup_callbacks:
50
+ Example:
299
51
  try:
300
- callback()
301
- except Exception as e:
302
- log_warning(f"[memory_manager] Cleanup callback failed: {e}")
303
-
304
- # Force garbage collection
305
- collected = gc.collect()
306
-
307
- # Additional cleanup for specific libraries
308
- self._cleanup_arrow_memory()
309
- self._cleanup_polars_memory()
52
+ result = process_data(df)
53
+ return result
54
+ finally:
55
+ memory_manager.cleanup()
56
+ """
57
+ if not self.cleanup_enabled:
58
+ return
310
59
 
311
- self.stats["total_cleanups"] += 1
60
+ # Count objects before cleanup
61
+ count = len(self.tracked_objects)
312
62
 
313
- log_info(f"[memory_manager] Cleanup performed - "
314
- f"GC collected: {collected} objects")
315
-
316
- def _cleanup_arrow_memory(self):
317
- """Cleanup Arrow-specific memory"""
318
- try:
319
- import pyarrow as pa
320
- # Force Arrow memory pool cleanup
321
- pool = pa.default_memory_pool()
322
- allocated_before = pool.bytes_allocated()
323
-
324
- # Trigger garbage collection to free Arrow objects
325
- gc.collect()
63
+ if count > 0:
64
+ # Estimate memory before cleanup (optional, for logging)
65
+ total_memory = 0
66
+ for obj, name in self.tracked_objects:
67
+ try:
68
+ memory = estimate_memory_usage(obj)
69
+ total_memory += memory
70
+ except:
71
+ pass # Ignore errors in memory estimation
326
72
 
327
- allocated_after = pool.bytes_allocated()
328
- freed = allocated_before - allocated_after
73
+ # Clear references
74
+ self.tracked_objects.clear()
329
75
 
330
- if freed > 0:
331
- log_info(f"[memory_manager] Arrow cleanup freed {freed} bytes")
332
-
333
- except ImportError:
334
- pass
335
- except Exception as e:
336
- log_warning(f"[memory_manager] Arrow cleanup failed: {e}")
337
-
338
- def _cleanup_polars_memory(self):
339
- """Cleanup Polars-specific memory"""
340
- try:
341
- import polars as pl
342
- # Polars cleanup is mostly handled by Rust's memory management
343
- # But we can clear any cached data
76
+ # Force garbage collection
344
77
  gc.collect()
345
78
 
346
- except ImportError:
347
- pass
348
- except Exception as e:
349
- log_warning(f"[memory_manager] Polars cleanup failed: {e}")
350
-
351
- def get_memory_stats(self) -> Dict[str, Any]:
352
- """Get comprehensive memory statistics"""
353
- current_snapshot = self.take_snapshot()
354
-
355
- # Calculate memory trends
356
- memory_trend = "stable"
357
- if len(self.snapshots) >= 10:
358
- recent_memory = sum(s.process_memory_mb for s in self.snapshots[-5:]) / min(5, len(self.snapshots))
359
- older_memory = sum(s.process_memory_mb for s in self.snapshots[-10:-5]) / max(1, min(5, len(self.snapshots) - 5))
360
-
361
- if recent_memory > older_memory * 1.1:
362
- memory_trend = "increasing"
363
- elif recent_memory < older_memory * 0.9:
364
- memory_trend = "decreasing"
365
- elif len(self.snapshots) >= 2:
366
- # Simple trend for fewer snapshots
367
- if self.snapshots[-1].process_memory_mb > self.snapshots[0].process_memory_mb * 1.1:
368
- memory_trend = "increasing"
369
- elif self.snapshots[-1].process_memory_mb < self.snapshots[0].process_memory_mb * 0.9:
370
- memory_trend = "decreasing"
371
-
372
- return {
373
- "current": {
374
- "process_memory_mb": current_snapshot.process_memory_mb,
375
- "arrow_allocated_bytes": current_snapshot.arrow_allocated_bytes,
376
- "python_objects_count": current_snapshot.python_objects_count,
377
- "gc_collections": current_snapshot.gc_collections
378
- },
379
- "thresholds": {
380
- "process_memory_mb": self.thresholds.process_memory_mb,
381
- "arrow_memory_bytes": self.thresholds.arrow_memory_bytes,
382
- "python_objects_count": self.thresholds.python_objects_count
383
- },
384
- "trends": {
385
- "memory_trend": memory_trend,
386
- "snapshots_count": len(self.snapshots),
387
- "monitoring_enabled": self.monitoring_enabled
388
- },
389
- "cleanup_stats": self.stats.copy(),
390
- "leak_detection": self.leak_detector.get_leak_status()
391
- }
392
-
393
- def get_memory_usage_mb(self) -> float:
394
- """Get current memory usage in MB"""
395
- if self.process:
79
+ # Log cleanup (if logger is available)
396
80
  try:
397
- return self.process.memory_info().rss / 1024 / 1024
398
- except Exception:
399
- pass
400
- return 0.0
401
-
402
- def set_thresholds(self, **kwargs):
403
- """Update memory thresholds"""
404
- for key, value in kwargs.items():
405
- if hasattr(self.thresholds, key):
406
- setattr(self.thresholds, key, value)
407
- log_info(f"[memory_manager] Updated threshold {key} = {value}")
408
- else:
409
- log_warning(f"[memory_manager] Unknown threshold: {key}")
410
-
411
- def enable_monitoring(self):
412
- """Enable memory monitoring"""
413
- self.monitoring_enabled = True
414
- log_info("[memory_manager] Memory monitoring enabled")
81
+ from .logging import get_logger
82
+ logger = get_logger()
83
+ if total_memory > 0:
84
+ memory_mb = total_memory / (1024 * 1024)
85
+ logger.info(
86
+ f"Cleaned up {count} intermediate objects (~{memory_mb:.2f} MB)"
87
+ )
88
+ except:
89
+ pass # Ignore if logger not available
415
90
 
416
- def disable_monitoring(self):
417
- """Disable memory monitoring"""
418
- self.monitoring_enabled = False
419
- log_info("[memory_manager] Memory monitoring disabled")
420
-
421
- def enable_auto_cleanup(self):
422
- """Enable automatic cleanup"""
423
- self.auto_cleanup_enabled = True
424
- log_info("[memory_manager] Auto cleanup enabled")
425
-
426
- def disable_auto_cleanup(self):
427
- """Disable automatic cleanup"""
428
- self.auto_cleanup_enabled = False
429
- log_info("[memory_manager] Auto cleanup disabled")
91
+ def enable(self) -> None:
92
+ """
93
+ Enable automatic cleanup.
94
+
95
+ Called by user code via add.enable_memory_cleanup()
96
+ """
97
+ self.cleanup_enabled = True
430
98
 
431
- @contextmanager
432
- def memory_context(self, description: str = "operation"):
433
- """Context manager for monitoring memory usage during operations"""
434
- before_snapshot = self.take_snapshot()
435
- start_time = datetime.now()
99
+ def disable(self) -> None:
100
+ """
101
+ Disable automatic cleanup.
436
102
 
437
- log_info(f"[memory_manager] Starting {description} - "
438
- f"Memory: {before_snapshot.process_memory_mb:.1f}MB")
103
+ Useful for debugging to inspect intermediate objects.
104
+ Called by user code via add.disable_memory_cleanup()
105
+ """
106
+ self.cleanup_enabled = False
107
+
108
+ def get_tracked_count(self) -> int:
109
+ """
110
+ Get number of tracked objects.
439
111
 
440
- try:
441
- yield self
442
- finally:
443
- after_snapshot = self.take_snapshot()
444
- duration = (datetime.now() - start_time).total_seconds()
445
- memory_delta = after_snapshot.process_memory_mb - before_snapshot.process_memory_mb
112
+ Returns:
113
+ Number of tracked objects
446
114
 
447
- log_info(f"[memory_manager] Completed {description} - "
448
- f"Duration: {duration:.2f}s, "
449
- f"Memory delta: {memory_delta:+.1f}MB")
450
-
451
- # Auto cleanup if needed
452
- if self.auto_cleanup_enabled and memory_delta > 50: # 50MB increase
453
- self.cleanup_if_needed()
454
-
455
- def reset_stats(self):
456
- """Reset memory management statistics"""
457
- self.stats = {
458
- "total_cleanups": 0,
459
- "forced_cleanups": 0,
460
- "auto_cleanups": 0,
461
- "memory_freed_mb": 0.0,
462
- "last_cleanup": None
463
- }
464
- self.leak_detector = MemoryLeakDetector()
465
- log_info("[memory_manager] Statistics reset")
115
+ Used for testing and debugging.
116
+ """
117
+ return len(self.tracked_objects)
466
118
 
467
- def __del__(self):
468
- """Cleanup when memory manager is destroyed"""
469
- try:
470
- self.stop_monitoring()
471
- except Exception:
472
- pass
119
+ def clear_tracking(self) -> None:
120
+ """
121
+ Clear tracking list without cleanup.
122
+
123
+ Called after successful cleanup or when resetting state.
124
+ """
125
+ self.tracked_objects.clear()
473
126
 
474
127
 
475
128
  # Global memory manager instance
476
- _global_memory_manager = None
129
+ _global_memory_manager: Optional[MemoryManager] = None
477
130
 
478
131
 
479
132
  def get_memory_manager() -> MemoryManager:
480
- """Get the global memory manager instance"""
133
+ """
134
+ Get the global memory manager instance.
135
+
136
+ Returns:
137
+ Global MemoryManager instance
138
+
139
+ Example:
140
+ memory_manager = get_memory_manager()
141
+ memory_manager.track(temp_df)
142
+ """
481
143
  global _global_memory_manager
482
144
  if _global_memory_manager is None:
483
145
  _global_memory_manager = MemoryManager()
484
146
  return _global_memory_manager
485
147
 
486
148
 
487
- def cleanup_memory():
488
- """Convenience function for forcing memory cleanup"""
489
- return get_memory_manager().force_cleanup()
490
-
491
-
492
- def get_memory_stats():
493
- """Convenience function for getting memory statistics"""
494
- return get_memory_manager().get_memory_stats()
495
-
496
-
497
- def memory_context(description: str = "operation"):
498
- """Convenience function for memory monitoring context"""
499
- return get_memory_manager().memory_context(description)
500
-
501
-
502
- def memory_profile(description: str = None):
149
+ def cleanup_after_operation(func: Callable) -> Callable:
503
150
  """
504
- Decorator for profiling memory usage of functions.
151
+ Decorator to ensure cleanup after operation.
152
+
153
+ Wraps a function to automatically clean up tracked objects
154
+ after execution, even if an exception occurs.
505
155
 
506
156
  Args:
507
- description: Optional description for the operation
157
+ func: Function to wrap
158
+
159
+ Returns:
160
+ Wrapped function with automatic cleanup
508
161
 
509
162
  Example:
510
- @memory_profile("data processing")
511
- def process_data(df):
512
- return df.with_columns(...)
513
- """
514
- def decorator(func):
515
- def wrapper(*args, **kwargs):
516
- func_description = description or f"{func.__module__}.{func.__name__}"
517
- with memory_context(func_description):
518
- return func(*args, **kwargs)
519
- return wrapper
520
- return decorator
521
-
522
-
523
- def track_memory_usage(func):
524
- """
525
- Simple decorator to track memory usage of a function.
526
-
527
- Example:
528
- @track_memory_usage
529
- def expensive_operation():
530
- # ... do work
163
+ @cleanup_after_operation
164
+ def to(df, ...):
165
+ # Function implementation
531
166
  pass
532
167
  """
168
+ @wraps(func)
533
169
  def wrapper(*args, **kwargs):
534
- manager = get_memory_manager()
535
- before = manager.get_memory_usage_mb()
536
-
170
+ memory_manager = get_memory_manager()
537
171
  try:
538
172
  result = func(*args, **kwargs)
539
173
  return result
540
174
  finally:
541
- after = manager.get_memory_usage_mb()
542
- delta = after - before
543
-
544
- from .logging import log_info
545
- log_info(f"[memory_profile] {func.__name__}: {delta:+.1f}MB")
175
+ memory_manager.cleanup()
176
+
177
+ return wrapper
178
+
179
+
180
+ def estimate_memory_usage(obj: Any) -> int:
181
+ """
182
+ Estimate memory usage of an object.
546
183
 
547
- return wrapper
184
+ Args:
185
+ obj: Object to estimate (typically DataFrame or Series)
186
+
187
+ Returns:
188
+ Estimated memory usage in bytes
189
+
190
+ Note:
191
+ This is an approximation. Actual memory usage may vary.
192
+ """
193
+ try:
194
+ # Try to get memory usage from object (Polars/pandas)
195
+ if hasattr(obj, 'estimated_size'):
196
+ # Polars DataFrame
197
+ return obj.estimated_size()
198
+ elif hasattr(obj, 'memory_usage'):
199
+ # pandas DataFrame or Series
200
+ memory_usage = obj.memory_usage(deep=True)
201
+ if hasattr(memory_usage, 'sum'):
202
+ return int(memory_usage.sum())
203
+ return int(memory_usage)
204
+ else:
205
+ # Fallback to sys.getsizeof
206
+ return sys.getsizeof(obj)
207
+ except:
208
+ # If all else fails, return 0
209
+ return 0