M3Drop 0.4.44__tar.gz → 0.4.45__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {m3drop-0.4.44 → m3drop-0.4.45/M3Drop.egg-info}/PKG-INFO +1 -1
- {m3drop-0.4.44 → m3drop-0.4.45}/M3Drop.egg-info/SOURCES.txt +2 -0
- {m3drop-0.4.44/M3Drop.egg-info → m3drop-0.4.45}/PKG-INFO +1 -1
- m3drop-0.4.45/m3Drop/ControlDeviceCPU.py +218 -0
- m3drop-0.4.45/m3Drop/ControlDeviceGPU.py +236 -0
- {m3drop-0.4.44 → m3drop-0.4.45}/m3Drop/CoreCPU.py +4 -6
- {m3drop-0.4.44 → m3drop-0.4.45}/m3Drop/DiagnosticsCPU.py +3 -9
- {m3drop-0.4.44 → m3drop-0.4.45}/m3Drop/NormalizationCPU.py +2 -5
- {m3drop-0.4.44 → m3drop-0.4.45}/setup.py +1 -1
- {m3drop-0.4.44 → m3drop-0.4.45}/LICENSE +0 -0
- {m3drop-0.4.44 → m3drop-0.4.45}/M3Drop.egg-info/dependency_links.txt +0 -0
- {m3drop-0.4.44 → m3drop-0.4.45}/M3Drop.egg-info/requires.txt +0 -0
- {m3drop-0.4.44 → m3drop-0.4.45}/M3Drop.egg-info/top_level.txt +0 -0
- {m3drop-0.4.44 → m3drop-0.4.45}/README.md +0 -0
- {m3drop-0.4.44 → m3drop-0.4.45}/m3Drop/CoreGPU.py +0 -0
- {m3drop-0.4.44 → m3drop-0.4.45}/m3Drop/DiagnosticsGPU.py +0 -0
- {m3drop-0.4.44 → m3drop-0.4.45}/m3Drop/NormalizationGPU.py +0 -0
- {m3drop-0.4.44 → m3drop-0.4.45}/m3Drop/__init__.py +0 -0
- {m3drop-0.4.44 → m3drop-0.4.45}/pyproject.toml +0 -0
- {m3drop-0.4.44 → m3drop-0.4.45}/setup.cfg +0 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import h5py
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
# Try to import cpuinfo for L3 cache detection
|
|
8
|
+
try:
|
|
9
|
+
import cpuinfo
|
|
10
|
+
except ImportError:
|
|
11
|
+
cpuinfo = None
|
|
12
|
+
|
|
13
|
+
# Try to import psutil for System RAM detection
|
|
14
|
+
try:
|
|
15
|
+
import psutil
|
|
16
|
+
except ImportError:
|
|
17
|
+
psutil = None
|
|
18
|
+
|
|
19
|
+
class ControlDevice:
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
indptr: np.ndarray,
|
|
23
|
+
total_rows: int,
|
|
24
|
+
n_genes: int,
|
|
25
|
+
l3_cache_mb: float = None,
|
|
26
|
+
ram_limit_mb: float = None,
|
|
27
|
+
os_floor: int = 2048,
|
|
28
|
+
mode: str = "auto",
|
|
29
|
+
manual_target: int = 5000,
|
|
30
|
+
**kwargs
|
|
31
|
+
):
|
|
32
|
+
"""
|
|
33
|
+
CPU-Optimized Resource Governor.
|
|
34
|
+
Manages chunk sizes based on L3 Cache (Speed) and System RAM (Safety).
|
|
35
|
+
"""
|
|
36
|
+
self.indptr = indptr
|
|
37
|
+
self.total_rows = total_rows
|
|
38
|
+
self.n_genes = n_genes
|
|
39
|
+
self.mode = mode.lower()
|
|
40
|
+
self.manual_target = manual_target
|
|
41
|
+
|
|
42
|
+
# --- AUTO-DETECT HARDWARE (SLURM PRIORITY) ---
|
|
43
|
+
# On CPU, the "Limit" is the System RAM (Host Memory)
|
|
44
|
+
if ram_limit_mb is None:
|
|
45
|
+
self.ram_limit_mb = self._detect_real_memory_limit()
|
|
46
|
+
else:
|
|
47
|
+
self.ram_limit_mb = ram_limit_mb
|
|
48
|
+
|
|
49
|
+
if l3_cache_mb is None:
|
|
50
|
+
self.l3_cache_mb = self._detect_l3_cache()
|
|
51
|
+
else:
|
|
52
|
+
self.l3_cache_mb = l3_cache_mb
|
|
53
|
+
|
|
54
|
+
# --- BUDGETS ---
|
|
55
|
+
# 1. L3 Budget (Speed Target) - 90% of L3
|
|
56
|
+
# Keeping chunks inside L3 prevents cache-thrashing, speeding up numpy ops.
|
|
57
|
+
self.l3_budget_bytes = (self.l3_cache_mb * 1024 * 1024) * 0.90
|
|
58
|
+
|
|
59
|
+
# 2. RAM Budget (Safety Net) - 85% of Total Available
|
|
60
|
+
# Slightly more conservative on CPU to leave room for OS/Python overhead
|
|
61
|
+
self.ram_budget_bytes = (self.ram_limit_mb * 1024 * 1024) * 0.85
|
|
62
|
+
|
|
63
|
+
self.os_floor = os_floor
|
|
64
|
+
self.bytes_per_item = 16 # Float64 Sparse estimate
|
|
65
|
+
|
|
66
|
+
# --- DIAGNOSTICS PRINT ---
|
|
67
|
+
print(f"\n-------------- CONTROL DEVICE (CPU) --------------")
|
|
68
|
+
if self.mode == "manual":
|
|
69
|
+
print(f" > Mode: MANUAL")
|
|
70
|
+
else:
|
|
71
|
+
print(f" > Mode: AUTO (L3 Cache Optimized)")
|
|
72
|
+
|
|
73
|
+
print(f" > L3 Cache: {self.l3_budget_bytes / (1024**2):.2f} MB / {self.l3_cache_mb:.2f} MB")
|
|
74
|
+
|
|
75
|
+
# RAM Reporting
|
|
76
|
+
ram_total_mb = self.ram_limit_mb
|
|
77
|
+
ram_budget_mb = self.ram_budget_bytes / (1024**2)
|
|
78
|
+
print(f" > RAM Budget: {ram_budget_mb:.2f} MB / {ram_total_mb:.2f} MB")
|
|
79
|
+
print(f"--------------------------------------------------\n")
|
|
80
|
+
|
|
81
|
+
def _detect_real_memory_limit(self) -> float:
|
|
82
|
+
"""
|
|
83
|
+
Detects the TRUE memory limit, prioritizing SLURM Env Vars.
|
|
84
|
+
Crucial for HPC environments where psutil sees the whole node, not the job limit.
|
|
85
|
+
"""
|
|
86
|
+
limits = []
|
|
87
|
+
# 1. Check SLURM (HPC)
|
|
88
|
+
if 'SLURM_MEM_PER_NODE' in os.environ:
|
|
89
|
+
try: limits.append(float(os.environ['SLURM_MEM_PER_NODE']))
|
|
90
|
+
except: pass
|
|
91
|
+
if 'SLURM_MEM_PER_CPU' in os.environ and 'SLURM_CPUS_ON_NODE' in os.environ:
|
|
92
|
+
try: limits.append(float(os.environ['SLURM_MEM_PER_CPU']) * float(os.environ['SLURM_CPUS_ON_NODE']))
|
|
93
|
+
except: pass
|
|
94
|
+
|
|
95
|
+
# 2. Check Cgroups (Docker/Containers)
|
|
96
|
+
if os.path.exists('/sys/fs/cgroup/memory/memory.limit_in_bytes'):
|
|
97
|
+
try:
|
|
98
|
+
with open('/sys/fs/cgroup/memory/memory.limit_in_bytes', 'r') as f:
|
|
99
|
+
val = float(f.read().strip())
|
|
100
|
+
if val < 1e15: limits.append(val / (1024**2))
|
|
101
|
+
except: pass
|
|
102
|
+
if os.path.exists('/sys/fs/cgroup/memory.max'):
|
|
103
|
+
try:
|
|
104
|
+
with open('/sys/fs/cgroup/memory.max', 'r') as f:
|
|
105
|
+
val_str = f.read().strip()
|
|
106
|
+
if val_str != "max":
|
|
107
|
+
val = float(val_str)
|
|
108
|
+
if val < 1e15: limits.append(val / (1024**2))
|
|
109
|
+
except: pass
|
|
110
|
+
|
|
111
|
+
# 3. Check Physical RAM (Laptop/Desktop)
|
|
112
|
+
if psutil:
|
|
113
|
+
# Use .total because we calculate budget as % of total.
|
|
114
|
+
# On a shared laptop, you might prefer .available, but .total is safer for consistency.
|
|
115
|
+
limits.append(psutil.virtual_memory().total / (1024**2))
|
|
116
|
+
|
|
117
|
+
if not limits: return 4096.0 # Default fallback
|
|
118
|
+
return min(limits)
|
|
119
|
+
|
|
120
|
+
def _detect_l3_cache(self) -> float:
|
|
121
|
+
try:
|
|
122
|
+
if cpuinfo:
|
|
123
|
+
info = cpuinfo.get_cpu_info()
|
|
124
|
+
if 'l3_cache_size' in info:
|
|
125
|
+
value = info['l3_cache_size']
|
|
126
|
+
if isinstance(value, int): return value / (1024 * 1024)
|
|
127
|
+
elif isinstance(value, str):
|
|
128
|
+
digits = float(re.findall(r"[\d\.]+", value)[0])
|
|
129
|
+
if "KB" in value.upper(): return digits / 1024
|
|
130
|
+
elif "MB" in value.upper(): return digits
|
|
131
|
+
except Exception: pass
|
|
132
|
+
return 8.0 # Conservative default for Laptops
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def from_h5ad(cls, filepath: str, mode: str = "auto", manual_target: int = 5000, **kwargs):
|
|
136
|
+
if not os.path.exists(filepath): raise FileNotFoundError(f"File not found: {filepath}")
|
|
137
|
+
with h5py.File(filepath, "r") as f:
|
|
138
|
+
if isinstance(f['X'], h5py.Group) and 'indptr' in f['X']:
|
|
139
|
+
indptr_loaded = f['X']['indptr'][:]
|
|
140
|
+
if 'shape' in f['X'].attrs:
|
|
141
|
+
shape = f['X'].attrs['shape']
|
|
142
|
+
total_rows, n_genes = shape[0], shape[1]
|
|
143
|
+
else:
|
|
144
|
+
total_rows = len(indptr_loaded) - 1
|
|
145
|
+
n_genes = len(f['var']) if 'var' in f else 1
|
|
146
|
+
return cls(
|
|
147
|
+
indptr=indptr_loaded,
|
|
148
|
+
total_rows=total_rows,
|
|
149
|
+
n_genes=n_genes,
|
|
150
|
+
mode=mode,
|
|
151
|
+
manual_target=manual_target,
|
|
152
|
+
**kwargs
|
|
153
|
+
)
|
|
154
|
+
else: raise ValueError("ControlDevice requires SPARSE (CSR) data.")
|
|
155
|
+
|
|
156
|
+
def get_next_chunk(self, start_row: int, mode: str = 'sparse', overhead_multiplier: float = 1.0) -> int:
|
|
157
|
+
if start_row >= self.total_rows: return None
|
|
158
|
+
|
|
159
|
+
overhead_multiplier = max(overhead_multiplier, 1.0)
|
|
160
|
+
|
|
161
|
+
# ==========================================
|
|
162
|
+
# STEP 1: DETERMINE TENTATIVE END ROW
|
|
163
|
+
# ==========================================
|
|
164
|
+
|
|
165
|
+
if self.mode == "manual" and self.manual_target > 0:
|
|
166
|
+
# --- MANUAL MODE ---
|
|
167
|
+
end_row = start_row + self.manual_target
|
|
168
|
+
else:
|
|
169
|
+
# --- AUTO MODE (L3 Optimized) ---
|
|
170
|
+
# Try to fit the working set into L3 Cache for speed
|
|
171
|
+
limit_bytes = self.l3_budget_bytes
|
|
172
|
+
max_items_capacity = int(limit_bytes / self.bytes_per_item)
|
|
173
|
+
current_ptr = self.indptr[start_row]
|
|
174
|
+
target_ptr = current_ptr + max_items_capacity
|
|
175
|
+
|
|
176
|
+
# Binary search for the row containing the target pointer
|
|
177
|
+
soft_limit_row = np.searchsorted(self.indptr, target_ptr, side='right') - 1
|
|
178
|
+
if soft_limit_row <= start_row: soft_limit_row = start_row + 1
|
|
179
|
+
end_row = soft_limit_row
|
|
180
|
+
|
|
181
|
+
# --- HARD CONSTRAINTS (Bounds Check) ---
|
|
182
|
+
if (end_row - start_row) < self.os_floor:
|
|
183
|
+
end_row = min(start_row + self.os_floor, self.total_rows)
|
|
184
|
+
|
|
185
|
+
if end_row > self.total_rows:
|
|
186
|
+
end_row = self.total_rows
|
|
187
|
+
|
|
188
|
+
# ==========================================
|
|
189
|
+
# STEP 2: RAM SAFETY CHECK (THE GATEKEEPER)
|
|
190
|
+
# ==========================================
|
|
191
|
+
# This prevents OOM (Out of Memory) kills.
|
|
192
|
+
|
|
193
|
+
chunk_rows = end_row - start_row
|
|
194
|
+
|
|
195
|
+
if mode == 'dense':
|
|
196
|
+
# Dense cost: Rows * Genes * 8 bytes (float64)
|
|
197
|
+
total_ram_cost = (chunk_rows * self.n_genes * 8) * overhead_multiplier
|
|
198
|
+
if total_ram_cost > self.ram_budget_bytes:
|
|
199
|
+
# Calculate safe max rows
|
|
200
|
+
bytes_per_row = self.n_genes * 8 * overhead_multiplier
|
|
201
|
+
max_ram_rows = int(self.ram_budget_bytes / bytes_per_row)
|
|
202
|
+
|
|
203
|
+
# Override the tentative end_row
|
|
204
|
+
end_row = start_row + max_ram_rows
|
|
205
|
+
|
|
206
|
+
else: # Sparse
|
|
207
|
+
# Sparse cost: NNZ * 16 bytes (val+idx)
|
|
208
|
+
actual_nnz = self.indptr[end_row] - self.indptr[start_row]
|
|
209
|
+
sparse_cost = (actual_nnz * 16) * overhead_multiplier
|
|
210
|
+
if sparse_cost > self.ram_budget_bytes:
|
|
211
|
+
# Calculate safe ratio
|
|
212
|
+
ratio = self.ram_budget_bytes / sparse_cost
|
|
213
|
+
new_count = int(chunk_rows * ratio)
|
|
214
|
+
|
|
215
|
+
# Override the tentative end_row
|
|
216
|
+
end_row = start_row + max(32, new_count)
|
|
217
|
+
|
|
218
|
+
return end_row
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import h5py
|
|
3
|
+
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
import re
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
# Try to import CuPy for robust VRAM detection
|
|
9
|
+
try:
|
|
10
|
+
import cupy
|
|
11
|
+
HAS_CUPY = True
|
|
12
|
+
except ImportError:
|
|
13
|
+
cupy = None
|
|
14
|
+
HAS_CUPY = False
|
|
15
|
+
|
|
16
|
+
# Try to import cpuinfo for L3 cache detection
|
|
17
|
+
try:
|
|
18
|
+
import cpuinfo
|
|
19
|
+
except ImportError:
|
|
20
|
+
cpuinfo = None
|
|
21
|
+
|
|
22
|
+
# Try to import psutil for System RAM detection
|
|
23
|
+
try:
|
|
24
|
+
import psutil
|
|
25
|
+
except ImportError:
|
|
26
|
+
psutil = None
|
|
27
|
+
|
|
28
|
+
class ControlDevice:
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
indptr: np.ndarray,
|
|
32
|
+
total_rows: int,
|
|
33
|
+
n_genes: int,
|
|
34
|
+
l3_cache_mb: float = None,
|
|
35
|
+
vram_limit_mb: float = None,
|
|
36
|
+
os_floor: int = 2048,
|
|
37
|
+
mode: str = "auto",
|
|
38
|
+
manual_target: int = 5000,
|
|
39
|
+
**kwargs
|
|
40
|
+
):
|
|
41
|
+
self.indptr = indptr
|
|
42
|
+
self.total_rows = total_rows
|
|
43
|
+
self.n_genes = n_genes
|
|
44
|
+
self.mode = mode.lower()
|
|
45
|
+
self.manual_target = manual_target
|
|
46
|
+
|
|
47
|
+
# --- AUTO-DETECT HARDWARE (SLURM PRIORITY) ---
|
|
48
|
+
self.sys_ram_limit_mb = self._detect_real_memory_limit()
|
|
49
|
+
|
|
50
|
+
if l3_cache_mb is None:
|
|
51
|
+
self.l3_cache_mb = self._detect_l3_cache()
|
|
52
|
+
else:
|
|
53
|
+
self.l3_cache_mb = l3_cache_mb
|
|
54
|
+
|
|
55
|
+
if vram_limit_mb is None:
|
|
56
|
+
self.vram_limit_mb = self._detect_vram()
|
|
57
|
+
else:
|
|
58
|
+
self.vram_limit_mb = vram_limit_mb
|
|
59
|
+
|
|
60
|
+
# --- BUDGETS ---
|
|
61
|
+
# 1. L3 Budget (Default/Baseline) - 90% of L3
|
|
62
|
+
self.l3_budget_bytes = (self.l3_cache_mb * 1024 * 1024) * 0.90
|
|
63
|
+
|
|
64
|
+
# 2. VRAM Budget (Safety Net) - 95% of Total
|
|
65
|
+
self.vram_budget_bytes = (self.vram_limit_mb * 1024 * 1024) * 0.95
|
|
66
|
+
|
|
67
|
+
self.os_floor = os_floor
|
|
68
|
+
self.bytes_per_item = 16 # Float64 Sparse estimate
|
|
69
|
+
|
|
70
|
+
# --- DIAGNOSTICS PRINT ---
|
|
71
|
+
print(f"\n-------------- CONTROL DEVICE --------------")
|
|
72
|
+
|
|
73
|
+
# UI UPDATE: Clean "Plane Cockpit" Feel
|
|
74
|
+
if self.mode == "manual":
|
|
75
|
+
print(f" > Mode: MANUAL")
|
|
76
|
+
else:
|
|
77
|
+
print(f" > Mode: AUTO (L3 Optimized)")
|
|
78
|
+
|
|
79
|
+
print(f" > L3 Cache: {self.l3_budget_bytes / (1024**2):.2f} MB / {self.l3_cache_mb:.2f} MB")
|
|
80
|
+
|
|
81
|
+
# VRAM Reporting
|
|
82
|
+
vram_mb = self.vram_limit_mb
|
|
83
|
+
vram_budget_mb = self.vram_budget_bytes / (1024**2)
|
|
84
|
+
print(f" > VRAM Budget: {vram_budget_mb:.2f} MB / {vram_mb:.2f} MB")
|
|
85
|
+
print(f"--------------------------------------------\n")
|
|
86
|
+
|
|
87
|
+
def _detect_real_memory_limit(self) -> float:
|
|
88
|
+
""" Detects the TRUE memory limit, prioritizing SLURM Env Vars. """
|
|
89
|
+
limits = []
|
|
90
|
+
if 'SLURM_MEM_PER_NODE' in os.environ:
|
|
91
|
+
try: limits.append(float(os.environ['SLURM_MEM_PER_NODE']))
|
|
92
|
+
except: pass
|
|
93
|
+
if 'SLURM_MEM_PER_CPU' in os.environ and 'SLURM_CPUS_ON_NODE' in os.environ:
|
|
94
|
+
try: limits.append(float(os.environ['SLURM_MEM_PER_CPU']) * float(os.environ['SLURM_CPUS_ON_NODE']))
|
|
95
|
+
except: pass
|
|
96
|
+
if os.path.exists('/sys/fs/cgroup/memory/memory.limit_in_bytes'):
|
|
97
|
+
try:
|
|
98
|
+
with open('/sys/fs/cgroup/memory/memory.limit_in_bytes', 'r') as f:
|
|
99
|
+
val = float(f.read().strip())
|
|
100
|
+
if val < 1e15: limits.append(val / (1024**2))
|
|
101
|
+
except: pass
|
|
102
|
+
if os.path.exists('/sys/fs/cgroup/memory.max'):
|
|
103
|
+
try:
|
|
104
|
+
with open('/sys/fs/cgroup/memory.max', 'r') as f:
|
|
105
|
+
val_str = f.read().strip()
|
|
106
|
+
if val_str != "max":
|
|
107
|
+
val = float(val_str)
|
|
108
|
+
if val < 1e15: limits.append(val / (1024**2))
|
|
109
|
+
except: pass
|
|
110
|
+
if psutil: limits.append(psutil.virtual_memory().total / (1024**2))
|
|
111
|
+
if not limits: return 4096.0
|
|
112
|
+
return min(limits)
|
|
113
|
+
|
|
114
|
+
def _detect_l3_cache(self) -> float:
|
|
115
|
+
try:
|
|
116
|
+
if cpuinfo:
|
|
117
|
+
info = cpuinfo.get_cpu_info()
|
|
118
|
+
if 'l3_cache_size' in info:
|
|
119
|
+
value = info['l3_cache_size']
|
|
120
|
+
if isinstance(value, int): return value / (1024 * 1024)
|
|
121
|
+
elif isinstance(value, str):
|
|
122
|
+
digits = float(re.findall(r"[\d\.]+", value)[0])
|
|
123
|
+
if "KB" in value.upper(): return digits / 1024
|
|
124
|
+
elif "MB" in value.upper(): return digits
|
|
125
|
+
except Exception: pass
|
|
126
|
+
return 16.0
|
|
127
|
+
|
|
128
|
+
def _detect_vram(self) -> float:
|
|
129
|
+
if HAS_CUPY:
|
|
130
|
+
try:
|
|
131
|
+
mempool = cupy.get_default_memory_pool()
|
|
132
|
+
mempool.free_all_blocks()
|
|
133
|
+
return float(cupy.cuda.Device(0).mem_info[1]) / (1024 * 1024)
|
|
134
|
+
except Exception: pass
|
|
135
|
+
try:
|
|
136
|
+
result = subprocess.check_output(
|
|
137
|
+
["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits"],
|
|
138
|
+
encoding="utf-8"
|
|
139
|
+
)
|
|
140
|
+
return float(result.strip().split('\n')[0])
|
|
141
|
+
except Exception: return 4000.0
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def from_h5ad(cls, filepath: str, mode: str = "auto", manual_target: int = 5000, **kwargs):
|
|
145
|
+
if not os.path.exists(filepath): raise FileNotFoundError(f"File not found: {filepath}")
|
|
146
|
+
with h5py.File(filepath, "r") as f:
|
|
147
|
+
if isinstance(f['X'], h5py.Group) and 'indptr' in f['X']:
|
|
148
|
+
indptr_loaded = f['X']['indptr'][:]
|
|
149
|
+
if 'shape' in f['X'].attrs:
|
|
150
|
+
shape = f['X'].attrs['shape']
|
|
151
|
+
total_rows, n_genes = shape[0], shape[1]
|
|
152
|
+
else:
|
|
153
|
+
total_rows = len(indptr_loaded) - 1
|
|
154
|
+
n_genes = len(f['var']) if 'var' in f else 1
|
|
155
|
+
return cls(
|
|
156
|
+
indptr=indptr_loaded,
|
|
157
|
+
total_rows=total_rows,
|
|
158
|
+
n_genes=n_genes,
|
|
159
|
+
mode=mode,
|
|
160
|
+
manual_target=manual_target,
|
|
161
|
+
**kwargs
|
|
162
|
+
)
|
|
163
|
+
else: raise ValueError("ControlDevice requires SPARSE (CSR) data.")
|
|
164
|
+
|
|
165
|
+
def get_next_chunk(self, start_row: int, mode: str = 'sparse', overhead_multiplier: float = 1.0) -> int:
|
|
166
|
+
if start_row >= self.total_rows: return None
|
|
167
|
+
|
|
168
|
+
overhead_multiplier = max(overhead_multiplier, 1.0)
|
|
169
|
+
|
|
170
|
+
# ==========================================
|
|
171
|
+
# STEP 1: DETERMINE TENTATIVE END ROW
|
|
172
|
+
# ==========================================
|
|
173
|
+
|
|
174
|
+
if self.mode == "manual" and self.manual_target > 0:
|
|
175
|
+
# --- MANUAL MODE ---
|
|
176
|
+
# Set tentative end based on user input
|
|
177
|
+
end_row = start_row + self.manual_target
|
|
178
|
+
else:
|
|
179
|
+
# --- AUTO MODE (L3 Optimized) ---
|
|
180
|
+
limit_bytes = self.l3_budget_bytes
|
|
181
|
+
max_items_capacity = int(limit_bytes / self.bytes_per_item)
|
|
182
|
+
current_ptr = self.indptr[start_row]
|
|
183
|
+
target_ptr = current_ptr + max_items_capacity
|
|
184
|
+
|
|
185
|
+
# Binary search for the row containing the target pointer
|
|
186
|
+
soft_limit_row = np.searchsorted(self.indptr, target_ptr, side='right') - 1
|
|
187
|
+
if soft_limit_row <= start_row: soft_limit_row = start_row + 1
|
|
188
|
+
end_row = soft_limit_row
|
|
189
|
+
|
|
190
|
+
# --- HARD CONSTRAINTS (Bounds Check) ---
|
|
191
|
+
if (end_row - start_row) < self.os_floor:
|
|
192
|
+
end_row = min(start_row + self.os_floor, self.total_rows)
|
|
193
|
+
|
|
194
|
+
if end_row > self.total_rows:
|
|
195
|
+
end_row = self.total_rows
|
|
196
|
+
|
|
197
|
+
# Align to Warp 32
|
|
198
|
+
count = end_row - start_row
|
|
199
|
+
if count > 32:
|
|
200
|
+
aligned_count = (count // 32) * 32
|
|
201
|
+
end_row = start_row + aligned_count
|
|
202
|
+
|
|
203
|
+
# ==========================================
|
|
204
|
+
# STEP 2: VRAM SAFETY CHECK (THE GATEKEEPER)
|
|
205
|
+
# ==========================================
|
|
206
|
+
# This logic is non-negotiable. It scales down ANY request (Manual or Auto)
|
|
207
|
+
# if it exceeds the VRAM budget.
|
|
208
|
+
|
|
209
|
+
chunk_rows = end_row - start_row
|
|
210
|
+
|
|
211
|
+
if mode == 'dense':
|
|
212
|
+
total_vram_cost = (chunk_rows * self.n_genes * 8) * overhead_multiplier
|
|
213
|
+
if total_vram_cost > self.vram_budget_bytes:
|
|
214
|
+
# Calculate safe max rows
|
|
215
|
+
bytes_per_row = self.n_genes * 8 * overhead_multiplier
|
|
216
|
+
max_vram_rows = int(self.vram_budget_bytes / bytes_per_row)
|
|
217
|
+
|
|
218
|
+
# Override the tentative end_row
|
|
219
|
+
end_row = start_row + max_vram_rows
|
|
220
|
+
|
|
221
|
+
# Re-align
|
|
222
|
+
if (end_row - start_row) > 32:
|
|
223
|
+
end_row = start_row + ((end_row - start_row) // 32 * 32)
|
|
224
|
+
|
|
225
|
+
else: # Sparse
|
|
226
|
+
actual_nnz = self.indptr[end_row] - self.indptr[start_row]
|
|
227
|
+
sparse_cost = (actual_nnz * 16) * overhead_multiplier
|
|
228
|
+
if sparse_cost > self.vram_budget_bytes:
|
|
229
|
+
# Calculate safe ratio
|
|
230
|
+
ratio = self.vram_budget_bytes / sparse_cost
|
|
231
|
+
new_count = int(chunk_rows * ratio)
|
|
232
|
+
|
|
233
|
+
# Override the tentative end_row
|
|
234
|
+
end_row = start_row + max(32, new_count)
|
|
235
|
+
|
|
236
|
+
return end_row
|
|
@@ -22,12 +22,10 @@ from scipy.stats import norm
|
|
|
22
22
|
from scipy import sparse
|
|
23
23
|
from statsmodels.stats.multitest import multipletests
|
|
24
24
|
|
|
25
|
-
# [
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
# Fallback for running script directly
|
|
30
|
-
from ControlDeviceCPU import ControlDevice
|
|
25
|
+
# [FIX] Strict Relative Import
|
|
26
|
+
# This ensures that if ControlDeviceCPU fails to load (e.g. missing dependency),
|
|
27
|
+
# the real error is shown instead of being masked.
|
|
28
|
+
from .ControlDeviceCPU import ControlDevice
|
|
31
29
|
|
|
32
30
|
# ==========================================
|
|
33
31
|
# NUMBA KERNELS (CPU OPTIMIZED)
|
|
@@ -14,15 +14,9 @@ import statsmodels.api as sm
|
|
|
14
14
|
from scipy.stats import norm
|
|
15
15
|
from statsmodels.stats.multitest import multipletests
|
|
16
16
|
|
|
17
|
-
# [
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
# Import the Numba-optimized kernel from CoreCPU
|
|
21
|
-
from .CoreCPU import hidden_calc_valsCPU, NBumiFitModelCPU, NBumiFitDispVsMeanCPU, dropout_prob_kernel_cpu
|
|
22
|
-
except ImportError:
|
|
23
|
-
# Fallback
|
|
24
|
-
from ControlDeviceCPU import ControlDevice
|
|
25
|
-
from CoreCPU import hidden_calc_valsCPU, NBumiFitModelCPU, NBumiFitDispVsMeanCPU, dropout_prob_kernel_cpu
|
|
17
|
+
# [FIX] Strict Relative Imports
|
|
18
|
+
from .ControlDeviceCPU import ControlDevice
|
|
19
|
+
from .CoreCPU import hidden_calc_valsCPU, NBumiFitModelCPU, NBumiFitDispVsMeanCPU, dropout_prob_kernel_cpu
|
|
26
20
|
|
|
27
21
|
# ==========================================
|
|
28
22
|
# DIAGNOSTICS & COMPARISON (CPU)
|
|
@@ -14,11 +14,8 @@ except ImportError:
|
|
|
14
14
|
print("CRITICAL ERROR: 'numba' not found. Please install it (pip install numba).")
|
|
15
15
|
sys.exit(1)
|
|
16
16
|
|
|
17
|
-
# [
|
|
18
|
-
|
|
19
|
-
from .ControlDeviceCPU import ControlDevice
|
|
20
|
-
except ImportError:
|
|
21
|
-
from ControlDeviceCPU import ControlDevice
|
|
17
|
+
# [FIX] Strict Relative Import
|
|
18
|
+
from .ControlDeviceCPU import ControlDevice
|
|
22
19
|
|
|
23
20
|
# ==========================================
|
|
24
21
|
# NUMBA KERNELS (CPU)
|
|
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
|
5
5
|
|
|
6
6
|
setuptools.setup(
|
|
7
7
|
name="M3Drop", # Name for pip (pip install M3Drop)
|
|
8
|
-
version="0.4.
|
|
8
|
+
version="0.4.45",
|
|
9
9
|
author="Tallulah Andrews",
|
|
10
10
|
author_email="tandrew6@uwo.ca",
|
|
11
11
|
description="A Python implementation of the M3Drop single-cell RNA-seq analysis tool.",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|