additory 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +15 -0
- additory/analysis/__init__.py +48 -0
- additory/analysis/cardinality.py +126 -0
- additory/analysis/correlations.py +124 -0
- additory/analysis/distributions.py +376 -0
- additory/analysis/quality.py +158 -0
- additory/analysis/scan.py +400 -0
- additory/augment/__init__.py +24 -0
- additory/augment/augmentor.py +653 -0
- additory/augment/builtin_lists.py +430 -0
- additory/augment/distributions.py +22 -0
- additory/augment/forecast.py +1132 -0
- additory/augment/list_registry.py +177 -0
- additory/augment/smote.py +320 -0
- additory/augment/strategies.py +883 -0
- additory/common/__init__.py +157 -0
- additory/common/backend.py +355 -0
- additory/common/column_utils.py +191 -0
- additory/common/distributions.py +737 -0
- additory/common/exceptions.py +62 -0
- additory/common/lists.py +229 -0
- additory/common/patterns.py +240 -0
- additory/common/resolver.py +567 -0
- additory/common/sample_data.py +182 -0
- additory/common/validation.py +197 -0
- additory/core/__init__.py +27 -0
- additory/core/ast_builder.py +165 -0
- additory/core/backends/__init__.py +23 -0
- additory/core/backends/arrow_bridge.py +476 -0
- additory/core/backends/cudf_bridge.py +355 -0
- additory/core/column_positioning.py +358 -0
- additory/core/compiler_polars.py +166 -0
- additory/core/config.py +342 -0
- additory/core/enhanced_cache_manager.py +1119 -0
- additory/core/enhanced_matchers.py +473 -0
- additory/core/enhanced_version_manager.py +325 -0
- additory/core/executor.py +59 -0
- additory/core/integrity_manager.py +477 -0
- additory/core/loader.py +190 -0
- additory/core/logging.py +24 -0
- additory/core/memory_manager.py +547 -0
- additory/core/namespace_manager.py +657 -0
- additory/core/parser.py +176 -0
- additory/core/polars_expression_engine.py +551 -0
- additory/core/registry.py +176 -0
- additory/core/sample_data_manager.py +492 -0
- additory/core/user_namespace.py +751 -0
- additory/core/validator.py +27 -0
- additory/dynamic_api.py +308 -0
- additory/expressions/__init__.py +26 -0
- additory/expressions/engine.py +551 -0
- additory/expressions/parser.py +176 -0
- additory/expressions/proxy.py +546 -0
- additory/expressions/registry.py +313 -0
- additory/expressions/samples.py +492 -0
- additory/synthetic/__init__.py +101 -0
- additory/synthetic/api.py +220 -0
- additory/synthetic/common_integration.py +314 -0
- additory/synthetic/config.py +262 -0
- additory/synthetic/engines.py +529 -0
- additory/synthetic/exceptions.py +180 -0
- additory/synthetic/file_managers.py +518 -0
- additory/synthetic/generator.py +702 -0
- additory/synthetic/generator_parser.py +68 -0
- additory/synthetic/integration.py +319 -0
- additory/synthetic/models.py +241 -0
- additory/synthetic/pattern_resolver.py +573 -0
- additory/synthetic/performance.py +469 -0
- additory/synthetic/polars_integration.py +464 -0
- additory/synthetic/proxy.py +60 -0
- additory/synthetic/schema_parser.py +685 -0
- additory/synthetic/validator.py +553 -0
- additory/utilities/__init__.py +53 -0
- additory/utilities/encoding.py +600 -0
- additory/utilities/games.py +300 -0
- additory/utilities/keys.py +8 -0
- additory/utilities/lookup.py +103 -0
- additory/utilities/matchers.py +216 -0
- additory/utilities/resolvers.py +286 -0
- additory/utilities/settings.py +167 -0
- additory/utilities/units.py +746 -0
- additory/utilities/validators.py +153 -0
- additory-0.1.0a1.dist-info/METADATA +293 -0
- additory-0.1.0a1.dist-info/RECORD +87 -0
- additory-0.1.0a1.dist-info/WHEEL +5 -0
- additory-0.1.0a1.dist-info/licenses/LICENSE +21 -0
- additory-0.1.0a1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,547 @@
|
|
|
1
|
+
# memory_manager.py
|
|
2
|
+
# Centralized memory management for enhanced expressions system
|
|
3
|
+
|
|
4
|
+
import gc
|
|
5
|
+
import os
|
|
6
|
+
import psutil
|
|
7
|
+
import threading
|
|
8
|
+
import time
|
|
9
|
+
from typing import Dict, List, Optional, Callable, Any
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from datetime import datetime, timedelta
|
|
12
|
+
from contextlib import contextmanager
|
|
13
|
+
|
|
14
|
+
from .logging import log_info, log_warning
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class MemorySnapshot:
|
|
19
|
+
"""Memory usage snapshot at a point in time"""
|
|
20
|
+
timestamp: datetime
|
|
21
|
+
process_memory_mb: float
|
|
22
|
+
arrow_allocated_bytes: int
|
|
23
|
+
python_objects_count: int
|
|
24
|
+
gc_collections: Dict[int, int]
|
|
25
|
+
custom_metrics: Dict[str, Any] = field(default_factory=dict)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class MemoryThresholds:
|
|
30
|
+
"""Memory usage thresholds for cleanup triggers"""
|
|
31
|
+
process_memory_mb: float = 500.0 # Process memory threshold
|
|
32
|
+
arrow_memory_bytes: int = 100 * 1024 * 1024 # 100MB Arrow memory
|
|
33
|
+
python_objects_count: int = 100000 # Python object count
|
|
34
|
+
cleanup_interval_seconds: float = 30.0 # Periodic cleanup interval
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class MemoryLeakDetector:
|
|
38
|
+
"""Detects potential memory leaks by tracking memory growth patterns"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, window_size: int = 10, growth_threshold: float = 1.5):
|
|
41
|
+
self.window_size = window_size
|
|
42
|
+
self.growth_threshold = growth_threshold
|
|
43
|
+
self.snapshots: List[MemorySnapshot] = []
|
|
44
|
+
self.leak_warnings = 0
|
|
45
|
+
|
|
46
|
+
def add_snapshot(self, snapshot: MemorySnapshot):
|
|
47
|
+
"""Add a memory snapshot for leak detection"""
|
|
48
|
+
self.snapshots.append(snapshot)
|
|
49
|
+
|
|
50
|
+
# Keep only the last window_size snapshots
|
|
51
|
+
if len(self.snapshots) > self.window_size:
|
|
52
|
+
self.snapshots.pop(0)
|
|
53
|
+
|
|
54
|
+
# Check for potential leaks
|
|
55
|
+
if len(self.snapshots) >= self.window_size:
|
|
56
|
+
self._check_for_leaks()
|
|
57
|
+
|
|
58
|
+
def _check_for_leaks(self):
|
|
59
|
+
"""Check if memory usage shows signs of leaking"""
|
|
60
|
+
if len(self.snapshots) < 2:
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
# Calculate memory growth rate
|
|
64
|
+
first_snapshot = self.snapshots[0]
|
|
65
|
+
last_snapshot = self.snapshots[-1]
|
|
66
|
+
|
|
67
|
+
memory_growth = last_snapshot.process_memory_mb / max(first_snapshot.process_memory_mb, 1.0) # Avoid division by zero
|
|
68
|
+
time_span = (last_snapshot.timestamp - first_snapshot.timestamp).total_seconds()
|
|
69
|
+
|
|
70
|
+
# Check for sustained memory growth (relaxed time threshold for testing)
|
|
71
|
+
if memory_growth > self.growth_threshold and time_span > 10: # 10 seconds minimum
|
|
72
|
+
self.leak_warnings += 1
|
|
73
|
+
log_warning(f"[memory_manager] Potential memory leak detected: "
|
|
74
|
+
f"{memory_growth:.2f}x growth over {time_span:.1f}s")
|
|
75
|
+
|
|
76
|
+
def get_leak_status(self) -> Dict[str, Any]:
|
|
77
|
+
"""Get current leak detection status"""
|
|
78
|
+
return {
|
|
79
|
+
"leak_warnings": self.leak_warnings,
|
|
80
|
+
"snapshots_count": len(self.snapshots),
|
|
81
|
+
"monitoring_window": self.window_size,
|
|
82
|
+
"growth_threshold": self.growth_threshold
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class MemoryManager:
|
|
87
|
+
"""Centralized memory management for enhanced expressions system"""
|
|
88
|
+
|
|
89
|
+
def __init__(self):
|
|
90
|
+
# Configuration
|
|
91
|
+
self.thresholds = MemoryThresholds()
|
|
92
|
+
self.monitoring_enabled = True
|
|
93
|
+
self.auto_cleanup_enabled = True
|
|
94
|
+
|
|
95
|
+
# State tracking
|
|
96
|
+
self.snapshots: List[MemorySnapshot] = []
|
|
97
|
+
self.cleanup_callbacks: List[Callable[[], None]] = []
|
|
98
|
+
self.leak_detector = MemoryLeakDetector()
|
|
99
|
+
|
|
100
|
+
# Statistics
|
|
101
|
+
self.stats = {
|
|
102
|
+
"total_cleanups": 0,
|
|
103
|
+
"forced_cleanups": 0,
|
|
104
|
+
"auto_cleanups": 0,
|
|
105
|
+
"memory_freed_mb": 0.0,
|
|
106
|
+
"last_cleanup": None
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
# Background monitoring
|
|
110
|
+
self._monitoring_thread = None
|
|
111
|
+
self._stop_monitoring = threading.Event()
|
|
112
|
+
|
|
113
|
+
# Process reference for memory monitoring
|
|
114
|
+
try:
|
|
115
|
+
self.process = psutil.Process(os.getpid())
|
|
116
|
+
except Exception as e:
|
|
117
|
+
log_warning(f"[memory_manager] Failed to initialize process monitor: {e}")
|
|
118
|
+
self.process = None
|
|
119
|
+
|
|
120
|
+
log_info("[memory_manager] Memory Manager initialized")
|
|
121
|
+
|
|
122
|
+
def start_monitoring(self, interval_seconds: float = 30.0):
|
|
123
|
+
"""Start background memory monitoring"""
|
|
124
|
+
if self._monitoring_thread and self._monitoring_thread.is_alive():
|
|
125
|
+
log_warning("[memory_manager] Monitoring already running")
|
|
126
|
+
return
|
|
127
|
+
|
|
128
|
+
self.thresholds.cleanup_interval_seconds = interval_seconds
|
|
129
|
+
self._stop_monitoring.clear()
|
|
130
|
+
|
|
131
|
+
self._monitoring_thread = threading.Thread(
|
|
132
|
+
target=self._monitoring_loop,
|
|
133
|
+
daemon=True,
|
|
134
|
+
name="MemoryMonitor"
|
|
135
|
+
)
|
|
136
|
+
self._monitoring_thread.start()
|
|
137
|
+
|
|
138
|
+
log_info(f"[memory_manager] Started background monitoring (interval: {interval_seconds}s)")
|
|
139
|
+
|
|
140
|
+
def stop_monitoring(self):
|
|
141
|
+
"""Stop background memory monitoring"""
|
|
142
|
+
if self._monitoring_thread and self._monitoring_thread.is_alive():
|
|
143
|
+
self._stop_monitoring.set()
|
|
144
|
+
self._monitoring_thread.join(timeout=5.0)
|
|
145
|
+
log_info("[memory_manager] Stopped background monitoring")
|
|
146
|
+
|
|
147
|
+
def _monitoring_loop(self):
|
|
148
|
+
"""Background monitoring loop"""
|
|
149
|
+
while not self._stop_monitoring.is_set():
|
|
150
|
+
try:
|
|
151
|
+
if self.monitoring_enabled:
|
|
152
|
+
# Take memory snapshot
|
|
153
|
+
snapshot = self.take_snapshot()
|
|
154
|
+
|
|
155
|
+
# Check for cleanup needs
|
|
156
|
+
if self.auto_cleanup_enabled and self._should_cleanup(snapshot):
|
|
157
|
+
self.cleanup_if_needed()
|
|
158
|
+
|
|
159
|
+
# Wait for next interval
|
|
160
|
+
self._stop_monitoring.wait(self.thresholds.cleanup_interval_seconds)
|
|
161
|
+
|
|
162
|
+
except Exception as e:
|
|
163
|
+
log_warning(f"[memory_manager] Monitoring loop error: {e}")
|
|
164
|
+
time.sleep(5) # Brief pause before retrying
|
|
165
|
+
|
|
166
|
+
def register_cleanup_callback(self, callback: Callable[[], None]):
|
|
167
|
+
"""Register a callback function for memory cleanup"""
|
|
168
|
+
self.cleanup_callbacks.append(callback)
|
|
169
|
+
log_info(f"[memory_manager] Registered cleanup callback: {callback.__name__}")
|
|
170
|
+
|
|
171
|
+
def unregister_cleanup_callback(self, callback: Callable[[], None]):
|
|
172
|
+
"""Unregister a cleanup callback"""
|
|
173
|
+
if callback in self.cleanup_callbacks:
|
|
174
|
+
self.cleanup_callbacks.remove(callback)
|
|
175
|
+
log_info(f"[memory_manager] Unregistered cleanup callback: {callback.__name__}")
|
|
176
|
+
|
|
177
|
+
def take_snapshot(self) -> MemorySnapshot:
|
|
178
|
+
"""Take a snapshot of current memory usage"""
|
|
179
|
+
try:
|
|
180
|
+
# Process memory
|
|
181
|
+
process_memory_mb = 0.0
|
|
182
|
+
if self.process:
|
|
183
|
+
memory_info = self.process.memory_info()
|
|
184
|
+
process_memory_mb = memory_info.rss / 1024 / 1024
|
|
185
|
+
|
|
186
|
+
# Arrow memory (if available)
|
|
187
|
+
arrow_allocated_bytes = 0
|
|
188
|
+
try:
|
|
189
|
+
import pyarrow as pa
|
|
190
|
+
arrow_allocated_bytes = pa.default_memory_pool().bytes_allocated()
|
|
191
|
+
except ImportError:
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
# Python objects
|
|
195
|
+
python_objects_count = len(gc.get_objects())
|
|
196
|
+
|
|
197
|
+
# GC statistics
|
|
198
|
+
gc_collections = {i: gc.get_count()[i] for i in range(3)}
|
|
199
|
+
|
|
200
|
+
snapshot = MemorySnapshot(
|
|
201
|
+
timestamp=datetime.now(),
|
|
202
|
+
process_memory_mb=process_memory_mb,
|
|
203
|
+
arrow_allocated_bytes=arrow_allocated_bytes,
|
|
204
|
+
python_objects_count=python_objects_count,
|
|
205
|
+
gc_collections=gc_collections
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Store snapshot
|
|
209
|
+
self.snapshots.append(snapshot)
|
|
210
|
+
|
|
211
|
+
# Keep only recent snapshots (last 100)
|
|
212
|
+
if len(self.snapshots) > 100:
|
|
213
|
+
self.snapshots.pop(0)
|
|
214
|
+
|
|
215
|
+
# Add to leak detector
|
|
216
|
+
self.leak_detector.add_snapshot(snapshot)
|
|
217
|
+
|
|
218
|
+
return snapshot
|
|
219
|
+
|
|
220
|
+
except Exception as e:
|
|
221
|
+
log_warning(f"[memory_manager] Failed to take memory snapshot: {e}")
|
|
222
|
+
return MemorySnapshot(
|
|
223
|
+
timestamp=datetime.now(),
|
|
224
|
+
process_memory_mb=0.0,
|
|
225
|
+
arrow_allocated_bytes=0,
|
|
226
|
+
python_objects_count=0,
|
|
227
|
+
gc_collections={}
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
def _should_cleanup(self, snapshot: MemorySnapshot) -> bool:
|
|
231
|
+
"""Check if cleanup is needed based on thresholds"""
|
|
232
|
+
return (
|
|
233
|
+
snapshot.process_memory_mb > self.thresholds.process_memory_mb or
|
|
234
|
+
snapshot.arrow_allocated_bytes > self.thresholds.arrow_memory_bytes or
|
|
235
|
+
snapshot.python_objects_count > self.thresholds.python_objects_count
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
def cleanup_if_needed(self) -> bool:
|
|
239
|
+
"""Perform cleanup if memory usage exceeds thresholds"""
|
|
240
|
+
snapshot = self.take_snapshot()
|
|
241
|
+
|
|
242
|
+
if not self._should_cleanup(snapshot):
|
|
243
|
+
return False
|
|
244
|
+
|
|
245
|
+
log_info(f"[memory_manager] Auto cleanup triggered - "
|
|
246
|
+
f"Memory: {snapshot.process_memory_mb:.1f}MB, "
|
|
247
|
+
f"Arrow: {snapshot.arrow_allocated_bytes} bytes, "
|
|
248
|
+
f"Objects: {snapshot.python_objects_count}")
|
|
249
|
+
|
|
250
|
+
memory_before = snapshot.process_memory_mb
|
|
251
|
+
self._perform_cleanup()
|
|
252
|
+
|
|
253
|
+
# Take another snapshot to measure cleanup effectiveness
|
|
254
|
+
after_snapshot = self.take_snapshot()
|
|
255
|
+
memory_freed = max(0, memory_before - after_snapshot.process_memory_mb)
|
|
256
|
+
|
|
257
|
+
self.stats["auto_cleanups"] += 1
|
|
258
|
+
self.stats["memory_freed_mb"] += memory_freed
|
|
259
|
+
self.stats["last_cleanup"] = datetime.now()
|
|
260
|
+
|
|
261
|
+
log_info(f"[memory_manager] Auto cleanup completed - "
|
|
262
|
+
f"Freed: {memory_freed:.1f}MB")
|
|
263
|
+
|
|
264
|
+
return True
|
|
265
|
+
|
|
266
|
+
def force_cleanup(self) -> Dict[str, float]:
|
|
267
|
+
"""Force immediate memory cleanup"""
|
|
268
|
+
log_info("[memory_manager] Forcing memory cleanup")
|
|
269
|
+
|
|
270
|
+
before_snapshot = self.take_snapshot()
|
|
271
|
+
memory_before = before_snapshot.process_memory_mb
|
|
272
|
+
|
|
273
|
+
self._perform_cleanup()
|
|
274
|
+
|
|
275
|
+
after_snapshot = self.take_snapshot()
|
|
276
|
+
memory_after = after_snapshot.process_memory_mb
|
|
277
|
+
memory_freed = max(0, memory_before - memory_after)
|
|
278
|
+
|
|
279
|
+
self.stats["forced_cleanups"] += 1
|
|
280
|
+
self.stats["memory_freed_mb"] += memory_freed
|
|
281
|
+
self.stats["last_cleanup"] = datetime.now()
|
|
282
|
+
|
|
283
|
+
cleanup_stats = {
|
|
284
|
+
"memory_before_mb": memory_before,
|
|
285
|
+
"memory_after_mb": memory_after,
|
|
286
|
+
"memory_freed_mb": memory_freed,
|
|
287
|
+
"arrow_freed_bytes": before_snapshot.arrow_allocated_bytes - after_snapshot.arrow_allocated_bytes
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
log_info(f"[memory_manager] Forced cleanup completed - "
|
|
291
|
+
f"Freed: {memory_freed:.1f}MB")
|
|
292
|
+
|
|
293
|
+
return cleanup_stats
|
|
294
|
+
|
|
295
|
+
def _perform_cleanup(self):
|
|
296
|
+
"""Perform the actual cleanup operations"""
|
|
297
|
+
# Call registered cleanup callbacks
|
|
298
|
+
for callback in self.cleanup_callbacks:
|
|
299
|
+
try:
|
|
300
|
+
callback()
|
|
301
|
+
except Exception as e:
|
|
302
|
+
log_warning(f"[memory_manager] Cleanup callback failed: {e}")
|
|
303
|
+
|
|
304
|
+
# Force garbage collection
|
|
305
|
+
collected = gc.collect()
|
|
306
|
+
|
|
307
|
+
# Additional cleanup for specific libraries
|
|
308
|
+
self._cleanup_arrow_memory()
|
|
309
|
+
self._cleanup_polars_memory()
|
|
310
|
+
|
|
311
|
+
self.stats["total_cleanups"] += 1
|
|
312
|
+
|
|
313
|
+
log_info(f"[memory_manager] Cleanup performed - "
|
|
314
|
+
f"GC collected: {collected} objects")
|
|
315
|
+
|
|
316
|
+
def _cleanup_arrow_memory(self):
|
|
317
|
+
"""Cleanup Arrow-specific memory"""
|
|
318
|
+
try:
|
|
319
|
+
import pyarrow as pa
|
|
320
|
+
# Force Arrow memory pool cleanup
|
|
321
|
+
pool = pa.default_memory_pool()
|
|
322
|
+
allocated_before = pool.bytes_allocated()
|
|
323
|
+
|
|
324
|
+
# Trigger garbage collection to free Arrow objects
|
|
325
|
+
gc.collect()
|
|
326
|
+
|
|
327
|
+
allocated_after = pool.bytes_allocated()
|
|
328
|
+
freed = allocated_before - allocated_after
|
|
329
|
+
|
|
330
|
+
if freed > 0:
|
|
331
|
+
log_info(f"[memory_manager] Arrow cleanup freed {freed} bytes")
|
|
332
|
+
|
|
333
|
+
except ImportError:
|
|
334
|
+
pass
|
|
335
|
+
except Exception as e:
|
|
336
|
+
log_warning(f"[memory_manager] Arrow cleanup failed: {e}")
|
|
337
|
+
|
|
338
|
+
def _cleanup_polars_memory(self):
|
|
339
|
+
"""Cleanup Polars-specific memory"""
|
|
340
|
+
try:
|
|
341
|
+
import polars as pl
|
|
342
|
+
# Polars cleanup is mostly handled by Rust's memory management
|
|
343
|
+
# But we can clear any cached data
|
|
344
|
+
gc.collect()
|
|
345
|
+
|
|
346
|
+
except ImportError:
|
|
347
|
+
pass
|
|
348
|
+
except Exception as e:
|
|
349
|
+
log_warning(f"[memory_manager] Polars cleanup failed: {e}")
|
|
350
|
+
|
|
351
|
+
def get_memory_stats(self) -> Dict[str, Any]:
|
|
352
|
+
"""Get comprehensive memory statistics"""
|
|
353
|
+
current_snapshot = self.take_snapshot()
|
|
354
|
+
|
|
355
|
+
# Calculate memory trends
|
|
356
|
+
memory_trend = "stable"
|
|
357
|
+
if len(self.snapshots) >= 10:
|
|
358
|
+
recent_memory = sum(s.process_memory_mb for s in self.snapshots[-5:]) / min(5, len(self.snapshots))
|
|
359
|
+
older_memory = sum(s.process_memory_mb for s in self.snapshots[-10:-5]) / max(1, min(5, len(self.snapshots) - 5))
|
|
360
|
+
|
|
361
|
+
if recent_memory > older_memory * 1.1:
|
|
362
|
+
memory_trend = "increasing"
|
|
363
|
+
elif recent_memory < older_memory * 0.9:
|
|
364
|
+
memory_trend = "decreasing"
|
|
365
|
+
elif len(self.snapshots) >= 2:
|
|
366
|
+
# Simple trend for fewer snapshots
|
|
367
|
+
if self.snapshots[-1].process_memory_mb > self.snapshots[0].process_memory_mb * 1.1:
|
|
368
|
+
memory_trend = "increasing"
|
|
369
|
+
elif self.snapshots[-1].process_memory_mb < self.snapshots[0].process_memory_mb * 0.9:
|
|
370
|
+
memory_trend = "decreasing"
|
|
371
|
+
|
|
372
|
+
return {
|
|
373
|
+
"current": {
|
|
374
|
+
"process_memory_mb": current_snapshot.process_memory_mb,
|
|
375
|
+
"arrow_allocated_bytes": current_snapshot.arrow_allocated_bytes,
|
|
376
|
+
"python_objects_count": current_snapshot.python_objects_count,
|
|
377
|
+
"gc_collections": current_snapshot.gc_collections
|
|
378
|
+
},
|
|
379
|
+
"thresholds": {
|
|
380
|
+
"process_memory_mb": self.thresholds.process_memory_mb,
|
|
381
|
+
"arrow_memory_bytes": self.thresholds.arrow_memory_bytes,
|
|
382
|
+
"python_objects_count": self.thresholds.python_objects_count
|
|
383
|
+
},
|
|
384
|
+
"trends": {
|
|
385
|
+
"memory_trend": memory_trend,
|
|
386
|
+
"snapshots_count": len(self.snapshots),
|
|
387
|
+
"monitoring_enabled": self.monitoring_enabled
|
|
388
|
+
},
|
|
389
|
+
"cleanup_stats": self.stats.copy(),
|
|
390
|
+
"leak_detection": self.leak_detector.get_leak_status()
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
def get_memory_usage_mb(self) -> float:
|
|
394
|
+
"""Get current memory usage in MB"""
|
|
395
|
+
if self.process:
|
|
396
|
+
try:
|
|
397
|
+
return self.process.memory_info().rss / 1024 / 1024
|
|
398
|
+
except Exception:
|
|
399
|
+
pass
|
|
400
|
+
return 0.0
|
|
401
|
+
|
|
402
|
+
def set_thresholds(self, **kwargs):
|
|
403
|
+
"""Update memory thresholds"""
|
|
404
|
+
for key, value in kwargs.items():
|
|
405
|
+
if hasattr(self.thresholds, key):
|
|
406
|
+
setattr(self.thresholds, key, value)
|
|
407
|
+
log_info(f"[memory_manager] Updated threshold {key} = {value}")
|
|
408
|
+
else:
|
|
409
|
+
log_warning(f"[memory_manager] Unknown threshold: {key}")
|
|
410
|
+
|
|
411
|
+
def enable_monitoring(self):
|
|
412
|
+
"""Enable memory monitoring"""
|
|
413
|
+
self.monitoring_enabled = True
|
|
414
|
+
log_info("[memory_manager] Memory monitoring enabled")
|
|
415
|
+
|
|
416
|
+
def disable_monitoring(self):
|
|
417
|
+
"""Disable memory monitoring"""
|
|
418
|
+
self.monitoring_enabled = False
|
|
419
|
+
log_info("[memory_manager] Memory monitoring disabled")
|
|
420
|
+
|
|
421
|
+
def enable_auto_cleanup(self):
|
|
422
|
+
"""Enable automatic cleanup"""
|
|
423
|
+
self.auto_cleanup_enabled = True
|
|
424
|
+
log_info("[memory_manager] Auto cleanup enabled")
|
|
425
|
+
|
|
426
|
+
def disable_auto_cleanup(self):
|
|
427
|
+
"""Disable automatic cleanup"""
|
|
428
|
+
self.auto_cleanup_enabled = False
|
|
429
|
+
log_info("[memory_manager] Auto cleanup disabled")
|
|
430
|
+
|
|
431
|
+
@contextmanager
|
|
432
|
+
def memory_context(self, description: str = "operation"):
|
|
433
|
+
"""Context manager for monitoring memory usage during operations"""
|
|
434
|
+
before_snapshot = self.take_snapshot()
|
|
435
|
+
start_time = datetime.now()
|
|
436
|
+
|
|
437
|
+
log_info(f"[memory_manager] Starting {description} - "
|
|
438
|
+
f"Memory: {before_snapshot.process_memory_mb:.1f}MB")
|
|
439
|
+
|
|
440
|
+
try:
|
|
441
|
+
yield self
|
|
442
|
+
finally:
|
|
443
|
+
after_snapshot = self.take_snapshot()
|
|
444
|
+
duration = (datetime.now() - start_time).total_seconds()
|
|
445
|
+
memory_delta = after_snapshot.process_memory_mb - before_snapshot.process_memory_mb
|
|
446
|
+
|
|
447
|
+
log_info(f"[memory_manager] Completed {description} - "
|
|
448
|
+
f"Duration: {duration:.2f}s, "
|
|
449
|
+
f"Memory delta: {memory_delta:+.1f}MB")
|
|
450
|
+
|
|
451
|
+
# Auto cleanup if needed
|
|
452
|
+
if self.auto_cleanup_enabled and memory_delta > 50: # 50MB increase
|
|
453
|
+
self.cleanup_if_needed()
|
|
454
|
+
|
|
455
|
+
def reset_stats(self):
|
|
456
|
+
"""Reset memory management statistics"""
|
|
457
|
+
self.stats = {
|
|
458
|
+
"total_cleanups": 0,
|
|
459
|
+
"forced_cleanups": 0,
|
|
460
|
+
"auto_cleanups": 0,
|
|
461
|
+
"memory_freed_mb": 0.0,
|
|
462
|
+
"last_cleanup": None
|
|
463
|
+
}
|
|
464
|
+
self.leak_detector = MemoryLeakDetector()
|
|
465
|
+
log_info("[memory_manager] Statistics reset")
|
|
466
|
+
|
|
467
|
+
def __del__(self):
|
|
468
|
+
"""Cleanup when memory manager is destroyed"""
|
|
469
|
+
try:
|
|
470
|
+
self.stop_monitoring()
|
|
471
|
+
except Exception:
|
|
472
|
+
pass
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
# Global memory manager instance
|
|
476
|
+
_global_memory_manager = None
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def get_memory_manager() -> MemoryManager:
|
|
480
|
+
"""Get the global memory manager instance"""
|
|
481
|
+
global _global_memory_manager
|
|
482
|
+
if _global_memory_manager is None:
|
|
483
|
+
_global_memory_manager = MemoryManager()
|
|
484
|
+
return _global_memory_manager
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def cleanup_memory():
|
|
488
|
+
"""Convenience function for forcing memory cleanup"""
|
|
489
|
+
return get_memory_manager().force_cleanup()
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def get_memory_stats():
|
|
493
|
+
"""Convenience function for getting memory statistics"""
|
|
494
|
+
return get_memory_manager().get_memory_stats()
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def memory_context(description: str = "operation"):
|
|
498
|
+
"""Convenience function for memory monitoring context"""
|
|
499
|
+
return get_memory_manager().memory_context(description)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def memory_profile(description: str = None):
|
|
503
|
+
"""
|
|
504
|
+
Decorator for profiling memory usage of functions.
|
|
505
|
+
|
|
506
|
+
Args:
|
|
507
|
+
description: Optional description for the operation
|
|
508
|
+
|
|
509
|
+
Example:
|
|
510
|
+
@memory_profile("data processing")
|
|
511
|
+
def process_data(df):
|
|
512
|
+
return df.with_columns(...)
|
|
513
|
+
"""
|
|
514
|
+
def decorator(func):
|
|
515
|
+
def wrapper(*args, **kwargs):
|
|
516
|
+
func_description = description or f"{func.__module__}.{func.__name__}"
|
|
517
|
+
with memory_context(func_description):
|
|
518
|
+
return func(*args, **kwargs)
|
|
519
|
+
return wrapper
|
|
520
|
+
return decorator
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def track_memory_usage(func):
|
|
524
|
+
"""
|
|
525
|
+
Simple decorator to track memory usage of a function.
|
|
526
|
+
|
|
527
|
+
Example:
|
|
528
|
+
@track_memory_usage
|
|
529
|
+
def expensive_operation():
|
|
530
|
+
# ... do work
|
|
531
|
+
pass
|
|
532
|
+
"""
|
|
533
|
+
def wrapper(*args, **kwargs):
|
|
534
|
+
manager = get_memory_manager()
|
|
535
|
+
before = manager.get_memory_usage_mb()
|
|
536
|
+
|
|
537
|
+
try:
|
|
538
|
+
result = func(*args, **kwargs)
|
|
539
|
+
return result
|
|
540
|
+
finally:
|
|
541
|
+
after = manager.get_memory_usage_mb()
|
|
542
|
+
delta = after - before
|
|
543
|
+
|
|
544
|
+
from .logging import log_info
|
|
545
|
+
log_info(f"[memory_profile] {func.__name__}: {delta:+.1f}MB")
|
|
546
|
+
|
|
547
|
+
return wrapper
|