mlarray 0.0.39__tar.gz → 0.0.41__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mlarray-0.0.39 → mlarray-0.0.41}/PKG-INFO +5 -5
- {mlarray-0.0.39 → mlarray-0.0.41}/README.md +4 -4
- {mlarray-0.0.39 → mlarray-0.0.41}/docs/optimization.md +6 -6
- {mlarray-0.0.39 → mlarray-0.0.41}/docs/usage.md +4 -4
- mlarray-0.0.41/examples/example_asarray.py +168 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/examples/example_open.py +1 -1
- {mlarray-0.0.39 → mlarray-0.0.41}/mlarray/mlarray.py +376 -101
- {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/PKG-INFO +5 -5
- {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/SOURCES.txt +4 -0
- mlarray-0.0.41/tests/test_asarray.py +76 -0
- mlarray-0.0.41/tests/test_create.py +110 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/tests/test_metadata.py +4 -4
- mlarray-0.0.41/tests/test_open.py +93 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/tests/test_optimization.py +4 -4
- {mlarray-0.0.39 → mlarray-0.0.41}/tests/test_usage.py +1 -1
- {mlarray-0.0.39 → mlarray-0.0.41}/.github/workflows/workflow.yml +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/.gitignore +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/LICENSE +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/MANIFEST.in +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/assets/banner.png +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/assets/banner.png~ +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/docs/api.md +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/docs/cli.md +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/docs/index.md +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/docs/schema.md +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/docs/why.md +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/examples/example_channel.py +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/examples/example_metadata_only.py +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/examples/example_non_spatial.py +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/examples/example_save_load.py +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/mkdocs.yml +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/mlarray/__init__.py +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/mlarray/cli.py +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/mlarray/meta.py +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/mlarray/utils.py +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/dependency_links.txt +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/entry_points.txt +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/requires.txt +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/top_level.txt +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/pyproject.toml +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/setup.cfg +0 -0
- {mlarray-0.0.39 → mlarray-0.0.41}/tests/test_bboxes.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mlarray
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.41
|
|
4
4
|
Summary: Array format specialized for Machine Learning with Blosc2 backend and standardized metadata.
|
|
5
5
|
Author-email: Karol Gotkowski <karol.gotkowski@dkfz.de>
|
|
6
6
|
License: MIT
|
|
@@ -88,16 +88,16 @@ from mlarray import MLArray
|
|
|
88
88
|
import numpy as np
|
|
89
89
|
|
|
90
90
|
# read-only, partial access (default)
|
|
91
|
-
image = MLArray.open("sample.mla",
|
|
91
|
+
image = MLArray.open("sample.mla", mmap_mode='r')
|
|
92
92
|
crop = image[10:20, 50:60] # Read crop
|
|
93
93
|
|
|
94
94
|
# read/write, partial access
|
|
95
|
-
image = MLArray.open("sample.mla",
|
|
95
|
+
image = MLArray.open("sample.mla", mmap_mode='r+')
|
|
96
96
|
image[10:20, 50:60] *= 5 # Modify crop in memory and disk
|
|
97
97
|
|
|
98
98
|
# read/write, partial access, create/overwrite
|
|
99
99
|
array = np.random.random((128, 256, 256))
|
|
100
|
-
image = MLArray.
|
|
100
|
+
image = MLArray.create("sample.mla", shape=array.shape, dtype=array.dtype, mmap_mode='w+')
|
|
101
101
|
image[...] = array # Modify image in memory and disk
|
|
102
102
|
```
|
|
103
103
|
|
|
@@ -125,7 +125,7 @@ image.meta.source["study_id"] = "study-001"
|
|
|
125
125
|
image.save("with-metadata.mla")
|
|
126
126
|
|
|
127
127
|
# Open memory-mapped
|
|
128
|
-
image = MLArray.open("with-metadata.mla",
|
|
128
|
+
image = MLArray.open("with-metadata.mla", mmap_mode='r+')
|
|
129
129
|
image.meta.source["study_id"] = "new-study" # Modify metadata
|
|
130
130
|
image.close() # Close and save metadata, only necessary to save modified metadata
|
|
131
131
|
```
|
|
@@ -54,16 +54,16 @@ from mlarray import MLArray
|
|
|
54
54
|
import numpy as np
|
|
55
55
|
|
|
56
56
|
# read-only, partial access (default)
|
|
57
|
-
image = MLArray.open("sample.mla",
|
|
57
|
+
image = MLArray.open("sample.mla", mmap_mode='r')
|
|
58
58
|
crop = image[10:20, 50:60] # Read crop
|
|
59
59
|
|
|
60
60
|
# read/write, partial access
|
|
61
|
-
image = MLArray.open("sample.mla",
|
|
61
|
+
image = MLArray.open("sample.mla", mmap_mode='r+')
|
|
62
62
|
image[10:20, 50:60] *= 5 # Modify crop in memory and disk
|
|
63
63
|
|
|
64
64
|
# read/write, partial access, create/overwrite
|
|
65
65
|
array = np.random.random((128, 256, 256))
|
|
66
|
-
image = MLArray.
|
|
66
|
+
image = MLArray.create("sample.mla", shape=array.shape, dtype=array.dtype, mmap_mode='w+')
|
|
67
67
|
image[...] = array # Modify image in memory and disk
|
|
68
68
|
```
|
|
69
69
|
|
|
@@ -91,7 +91,7 @@ image.meta.source["study_id"] = "study-001"
|
|
|
91
91
|
image.save("with-metadata.mla")
|
|
92
92
|
|
|
93
93
|
# Open memory-mapped
|
|
94
|
-
image = MLArray.open("with-metadata.mla",
|
|
94
|
+
image = MLArray.open("with-metadata.mla", mmap_mode='r+')
|
|
95
95
|
image.meta.source["study_id"] = "new-study" # Modify metadata
|
|
96
96
|
image.close() # Close and save metadata, only necessary to save modified metadata
|
|
97
97
|
```
|
|
@@ -96,7 +96,7 @@ For large files, you typically want **mmap reads** so random patches don’t req
|
|
|
96
96
|
from mlarray import MLArray
|
|
97
97
|
|
|
98
98
|
# read-only mmap: fast random access without loading the full volume
|
|
99
|
-
image = MLArray.open("patch-non-iso.mla",
|
|
99
|
+
image = MLArray.open("patch-non-iso.mla", mmap_mode='r')
|
|
100
100
|
|
|
101
101
|
patch = image[10:20, 50:60] # Read a crop/patch (partial read)
|
|
102
102
|
```
|
|
@@ -110,7 +110,7 @@ When to use:
|
|
|
110
110
|
|
|
111
111
|
### 4) Memory-mapped in-place modification (advanced)
|
|
112
112
|
|
|
113
|
-
You can modify regions in-place with `
|
|
113
|
+
You can modify regions in-place with `mmap_mode='r+'`. This is useful for workflows like:
|
|
114
114
|
|
|
115
115
|
* writing derived arrays (e.g., post-processing outputs),
|
|
116
116
|
* patch-wise updates,
|
|
@@ -119,7 +119,7 @@ You can modify regions in-place with `mmap='r+'`. This is useful for workflows l
|
|
|
119
119
|
```python
|
|
120
120
|
from mlarray import MLArray
|
|
121
121
|
|
|
122
|
-
image = MLArray.open("patch-non-iso.mla",
|
|
122
|
+
image = MLArray.open("patch-non-iso.mla", mmap_mode='r+')
|
|
123
123
|
image[10:20, 50:60] *= 5 # Modify crop in memory and on disk
|
|
124
124
|
image.close()
|
|
125
125
|
```
|
|
@@ -128,7 +128,7 @@ image.close()
|
|
|
128
128
|
|
|
129
129
|
### 5) Create a new memory-mapped file (streaming write)
|
|
130
130
|
|
|
131
|
-
If you want to create a file on disk and then fill it (without holding the full array in memory), use `
|
|
131
|
+
If you want to create a file on disk and then fill it (without holding the full array in memory), use `create(..., shape=..., dtype=..., mmap_mode='w+')`. MLArray will compute and store the optimized layout up front.
|
|
132
132
|
|
|
133
133
|
```python
|
|
134
134
|
import numpy as np
|
|
@@ -137,11 +137,11 @@ from mlarray import MLArray
|
|
|
137
137
|
shape = (128, 256, 256)
|
|
138
138
|
dtype = np.float32
|
|
139
139
|
|
|
140
|
-
image = MLArray.
|
|
140
|
+
image = MLArray.create(
|
|
141
141
|
"streamed-write.mla",
|
|
142
142
|
shape=shape,
|
|
143
143
|
dtype=dtype,
|
|
144
|
-
|
|
144
|
+
mmap_mode='w+',
|
|
145
145
|
patch_size=192, # optimize for your training patch size
|
|
146
146
|
)
|
|
147
147
|
|
|
@@ -34,16 +34,16 @@ from mlarray import MLArray
|
|
|
34
34
|
import numpy as np
|
|
35
35
|
|
|
36
36
|
# read-only, partial access (default)
|
|
37
|
-
image = MLArray.open("sample.mla",
|
|
37
|
+
image = MLArray.open("sample.mla", mmap_mode='r')
|
|
38
38
|
crop = image[10:20, 50:60] # Read crop
|
|
39
39
|
|
|
40
40
|
# read/write, partial access
|
|
41
|
-
image = MLArray.open("sample.mla",
|
|
41
|
+
image = MLArray.open("sample.mla", mmap_mode='r+')
|
|
42
42
|
image[10:20, 50:60] *= 5 # Modify crop in memory and disk
|
|
43
43
|
|
|
44
44
|
# read/write, partial access, create/overwrite
|
|
45
45
|
array = np.random.random((128, 256, 256))
|
|
46
|
-
image = MLArray.
|
|
46
|
+
image = MLArray.create("sample.mla", shape=array.shape, dtype=array.dtype, mmap_mode='w+')
|
|
47
47
|
image[...] = array # Modify image in memory and disk
|
|
48
48
|
```
|
|
49
49
|
|
|
@@ -75,7 +75,7 @@ image.meta.source["study_id"] = "study-001"
|
|
|
75
75
|
image.save("with-metadata.mla")
|
|
76
76
|
|
|
77
77
|
# Open memory-mapped
|
|
78
|
-
image = MLArray.open("with-metadata.mla",
|
|
78
|
+
image = MLArray.open("with-metadata.mla", mmap_mode='r+')
|
|
79
79
|
image.meta.source["study_id"] = "new-study" # Modify metadata
|
|
80
80
|
image.close() # Close and save metadata, only necessary to save modified metadata
|
|
81
81
|
```
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
import os
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from mlarray import MLArray
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def to_mib(num_bytes: int) -> float:
|
|
9
|
+
return num_bytes / (1024 * 1024)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_process_rss_bytes():
|
|
13
|
+
try:
|
|
14
|
+
import psutil # type: ignore
|
|
15
|
+
|
|
16
|
+
return int(psutil.Process(os.getpid()).memory_info().rss)
|
|
17
|
+
except Exception:
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
# Linux fallback without extra dependency.
|
|
21
|
+
try:
|
|
22
|
+
with open("/proc/self/status", "r", encoding="utf-8") as f:
|
|
23
|
+
for line in f:
|
|
24
|
+
if line.startswith("VmRSS:"):
|
|
25
|
+
kb = int(line.split()[1])
|
|
26
|
+
return kb * 1024
|
|
27
|
+
except Exception:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_process_peak_rss_bytes():
|
|
34
|
+
# Linux fallback for peak resident set size.
|
|
35
|
+
try:
|
|
36
|
+
with open("/proc/self/status", "r", encoding="utf-8") as f:
|
|
37
|
+
for line in f:
|
|
38
|
+
if line.startswith("VmHWM:"):
|
|
39
|
+
kb = int(line.split()[1])
|
|
40
|
+
return kb * 1024
|
|
41
|
+
except Exception:
|
|
42
|
+
pass
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def format_bytes_mib(num_bytes):
|
|
47
|
+
if num_bytes is None:
|
|
48
|
+
return "n/a"
|
|
49
|
+
return f"{num_bytes} ({to_mib(num_bytes):.2f} MiB)"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == "__main__":
|
|
53
|
+
rng = np.random.default_rng(0)
|
|
54
|
+
rss_start = get_process_rss_bytes()
|
|
55
|
+
|
|
56
|
+
# Build a sparse array with mostly zeros.
|
|
57
|
+
shape = (512, 512, 512)
|
|
58
|
+
density = 0.01 # 1% non-zero values
|
|
59
|
+
total = int(np.prod(shape))
|
|
60
|
+
nnz = int(total * density)
|
|
61
|
+
|
|
62
|
+
array = np.zeros(shape, dtype=np.float32)
|
|
63
|
+
non_zero_indices = rng.choice(total, size=nnz, replace=False)
|
|
64
|
+
array.flat[non_zero_indices] = rng.random(nnz, dtype=np.float32)
|
|
65
|
+
rss_after_numpy = get_process_rss_bytes()
|
|
66
|
+
|
|
67
|
+
# NumPy in-memory payload size.
|
|
68
|
+
numpy_bytes = array.nbytes
|
|
69
|
+
|
|
70
|
+
# Convert to in-memory compressed MLArray.
|
|
71
|
+
image = MLArray.asarray(array, memory_compressed=True)
|
|
72
|
+
rss_after_compressed = get_process_rss_bytes()
|
|
73
|
+
|
|
74
|
+
# Compressed payload size stored by Blosc2 in RAM.
|
|
75
|
+
compressed_bytes = image._store.schunk.cbytes
|
|
76
|
+
uncompressed_bytes = image._store.schunk.nbytes
|
|
77
|
+
|
|
78
|
+
# Verify data integrity.
|
|
79
|
+
equal = bool(np.allclose(array, image.to_numpy()))
|
|
80
|
+
|
|
81
|
+
saved_bytes = numpy_bytes - compressed_bytes
|
|
82
|
+
saved_pct = 100.0 * saved_bytes / numpy_bytes
|
|
83
|
+
ratio = numpy_bytes / compressed_bytes
|
|
84
|
+
|
|
85
|
+
# Optional: drop the original NumPy array and force GC, then re-check RSS.
|
|
86
|
+
del array
|
|
87
|
+
gc.collect()
|
|
88
|
+
rss_after_drop_numpy = get_process_rss_bytes()
|
|
89
|
+
peak_rss = get_process_peak_rss_bytes()
|
|
90
|
+
|
|
91
|
+
rss_saved_bytes = None
|
|
92
|
+
rss_saved_pct = None
|
|
93
|
+
rss_saved_workload_bytes = None
|
|
94
|
+
rss_saved_workload_pct = None
|
|
95
|
+
if rss_after_numpy is not None and rss_after_drop_numpy is not None and rss_after_numpy > 0:
|
|
96
|
+
rss_saved_bytes = rss_after_numpy - rss_after_drop_numpy
|
|
97
|
+
rss_saved_pct = 100.0 * rss_saved_bytes / rss_after_numpy
|
|
98
|
+
if (
|
|
99
|
+
rss_start is not None
|
|
100
|
+
and rss_after_numpy is not None
|
|
101
|
+
and rss_after_drop_numpy is not None
|
|
102
|
+
and (rss_after_numpy - rss_start) > 0
|
|
103
|
+
):
|
|
104
|
+
rss_saved_workload_bytes = (rss_after_numpy - rss_start) - (rss_after_drop_numpy - rss_start)
|
|
105
|
+
rss_saved_workload_pct = 100.0 * rss_saved_workload_bytes / (rss_after_numpy - rss_start)
|
|
106
|
+
|
|
107
|
+
rss_numpy_consumption = None
|
|
108
|
+
rss_mlarray_compressed_consumption = None
|
|
109
|
+
rss_compression_ratio = None
|
|
110
|
+
if rss_start is not None and rss_after_numpy is not None:
|
|
111
|
+
rss_numpy_consumption = rss_after_numpy - rss_start
|
|
112
|
+
if rss_start is not None and rss_after_drop_numpy is not None:
|
|
113
|
+
rss_mlarray_compressed_consumption = rss_after_drop_numpy - rss_start
|
|
114
|
+
if (
|
|
115
|
+
rss_numpy_consumption is not None
|
|
116
|
+
and rss_mlarray_compressed_consumption is not None
|
|
117
|
+
and rss_mlarray_compressed_consumption > 0
|
|
118
|
+
):
|
|
119
|
+
rss_compression_ratio = rss_numpy_consumption / rss_mlarray_compressed_consumption
|
|
120
|
+
|
|
121
|
+
print("Sparse array compression demo (in-memory)")
|
|
122
|
+
print(f"shape: {shape}")
|
|
123
|
+
print(f"density (non-zero): {density:.2%}")
|
|
124
|
+
print(f"numpy bytes: {numpy_bytes} ({to_mib(numpy_bytes):.2f} MiB)")
|
|
125
|
+
print(f"mlarray cbytes: {compressed_bytes} ({to_mib(compressed_bytes):.2f} MiB)")
|
|
126
|
+
print(f"mlarray nbytes: {uncompressed_bytes} ({to_mib(uncompressed_bytes):.2f} MiB)")
|
|
127
|
+
print(f"compression ratio: {ratio:.2f}x")
|
|
128
|
+
print(f"memory saved: {saved_bytes} ({to_mib(saved_bytes):.2f} MiB, {saved_pct:.2f}%)")
|
|
129
|
+
print(f"roundtrip equal: {equal}")
|
|
130
|
+
print()
|
|
131
|
+
print("Process RSS snapshots (real memory in RAM):")
|
|
132
|
+
print(f"rss start: {format_bytes_mib(rss_start)}")
|
|
133
|
+
print(f"rss after numpy: {format_bytes_mib(rss_after_numpy)}")
|
|
134
|
+
print(f"rss after compressed: {format_bytes_mib(rss_after_compressed)}")
|
|
135
|
+
print(f"rss after del numpy: {format_bytes_mib(rss_after_drop_numpy)}")
|
|
136
|
+
print(f"rss peak (VmHWM): {format_bytes_mib(peak_rss)}")
|
|
137
|
+
if rss_saved_bytes is not None and rss_saved_pct is not None:
|
|
138
|
+
print(
|
|
139
|
+
f"rss saved (raw): {rss_saved_bytes} ({to_mib(rss_saved_bytes):.2f} MiB, {rss_saved_pct:.2f}%)"
|
|
140
|
+
)
|
|
141
|
+
else:
|
|
142
|
+
print("rss saved (raw): n/a")
|
|
143
|
+
if rss_saved_workload_bytes is not None and rss_saved_workload_pct is not None:
|
|
144
|
+
print(
|
|
145
|
+
"rss saved (workload): "
|
|
146
|
+
f"{rss_saved_workload_bytes} ({to_mib(rss_saved_workload_bytes):.2f} MiB, {rss_saved_workload_pct:.2f}%)"
|
|
147
|
+
)
|
|
148
|
+
else:
|
|
149
|
+
print("rss saved (workload): n/a")
|
|
150
|
+
print()
|
|
151
|
+
print("RSS-derived memory consumption summary:")
|
|
152
|
+
if rss_numpy_consumption is not None:
|
|
153
|
+
print(
|
|
154
|
+
f"rss numpy memory consumption: {rss_numpy_consumption} ({to_mib(rss_numpy_consumption):.2f} MiB)"
|
|
155
|
+
)
|
|
156
|
+
else:
|
|
157
|
+
print("rss numpy memory consumption: n/a")
|
|
158
|
+
if rss_mlarray_compressed_consumption is not None:
|
|
159
|
+
print(
|
|
160
|
+
"rss mlarray compressed memory consumption: "
|
|
161
|
+
f"{rss_mlarray_compressed_consumption} ({to_mib(rss_mlarray_compressed_consumption):.2f} MiB)"
|
|
162
|
+
)
|
|
163
|
+
else:
|
|
164
|
+
print("rss mlarray compressed memory consumption: n/a")
|
|
165
|
+
if rss_compression_ratio is not None:
|
|
166
|
+
print(f"rss compression ratio: {rss_compression_ratio:.2f}x")
|
|
167
|
+
else:
|
|
168
|
+
print("rss compression ratio: n/a")
|
|
@@ -19,7 +19,7 @@ if __name__ == '__main__':
|
|
|
19
19
|
os.remove(filepath)
|
|
20
20
|
|
|
21
21
|
print("Initializing image...")
|
|
22
|
-
image = MLArray.
|
|
22
|
+
image = MLArray.create(filepath, array.shape, array.dtype)
|
|
23
23
|
print("Saving image...")
|
|
24
24
|
image[...] = array
|
|
25
25
|
image.meta.copy_from(Meta(source=source_meta, spatial=MetaSpatial(spacing=spacing, origin=origin, direction=direction), bbox=MetaBbox(bboxes)))
|