mlarray 0.0.39__tar.gz → 0.0.41__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {mlarray-0.0.39 → mlarray-0.0.41}/PKG-INFO +5 -5
  2. {mlarray-0.0.39 → mlarray-0.0.41}/README.md +4 -4
  3. {mlarray-0.0.39 → mlarray-0.0.41}/docs/optimization.md +6 -6
  4. {mlarray-0.0.39 → mlarray-0.0.41}/docs/usage.md +4 -4
  5. mlarray-0.0.41/examples/example_asarray.py +168 -0
  6. {mlarray-0.0.39 → mlarray-0.0.41}/examples/example_open.py +1 -1
  7. {mlarray-0.0.39 → mlarray-0.0.41}/mlarray/mlarray.py +376 -101
  8. {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/PKG-INFO +5 -5
  9. {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/SOURCES.txt +4 -0
  10. mlarray-0.0.41/tests/test_asarray.py +76 -0
  11. mlarray-0.0.41/tests/test_create.py +110 -0
  12. {mlarray-0.0.39 → mlarray-0.0.41}/tests/test_metadata.py +4 -4
  13. mlarray-0.0.41/tests/test_open.py +93 -0
  14. {mlarray-0.0.39 → mlarray-0.0.41}/tests/test_optimization.py +4 -4
  15. {mlarray-0.0.39 → mlarray-0.0.41}/tests/test_usage.py +1 -1
  16. {mlarray-0.0.39 → mlarray-0.0.41}/.github/workflows/workflow.yml +0 -0
  17. {mlarray-0.0.39 → mlarray-0.0.41}/.gitignore +0 -0
  18. {mlarray-0.0.39 → mlarray-0.0.41}/LICENSE +0 -0
  19. {mlarray-0.0.39 → mlarray-0.0.41}/MANIFEST.in +0 -0
  20. {mlarray-0.0.39 → mlarray-0.0.41}/assets/banner.png +0 -0
  21. {mlarray-0.0.39 → mlarray-0.0.41}/assets/banner.png~ +0 -0
  22. {mlarray-0.0.39 → mlarray-0.0.41}/docs/api.md +0 -0
  23. {mlarray-0.0.39 → mlarray-0.0.41}/docs/cli.md +0 -0
  24. {mlarray-0.0.39 → mlarray-0.0.41}/docs/index.md +0 -0
  25. {mlarray-0.0.39 → mlarray-0.0.41}/docs/schema.md +0 -0
  26. {mlarray-0.0.39 → mlarray-0.0.41}/docs/why.md +0 -0
  27. {mlarray-0.0.39 → mlarray-0.0.41}/examples/example_channel.py +0 -0
  28. {mlarray-0.0.39 → mlarray-0.0.41}/examples/example_metadata_only.py +0 -0
  29. {mlarray-0.0.39 → mlarray-0.0.41}/examples/example_non_spatial.py +0 -0
  30. {mlarray-0.0.39 → mlarray-0.0.41}/examples/example_save_load.py +0 -0
  31. {mlarray-0.0.39 → mlarray-0.0.41}/mkdocs.yml +0 -0
  32. {mlarray-0.0.39 → mlarray-0.0.41}/mlarray/__init__.py +0 -0
  33. {mlarray-0.0.39 → mlarray-0.0.41}/mlarray/cli.py +0 -0
  34. {mlarray-0.0.39 → mlarray-0.0.41}/mlarray/meta.py +0 -0
  35. {mlarray-0.0.39 → mlarray-0.0.41}/mlarray/utils.py +0 -0
  36. {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/dependency_links.txt +0 -0
  37. {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/entry_points.txt +0 -0
  38. {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/requires.txt +0 -0
  39. {mlarray-0.0.39 → mlarray-0.0.41}/mlarray.egg-info/top_level.txt +0 -0
  40. {mlarray-0.0.39 → mlarray-0.0.41}/pyproject.toml +0 -0
  41. {mlarray-0.0.39 → mlarray-0.0.41}/setup.cfg +0 -0
  42. {mlarray-0.0.39 → mlarray-0.0.41}/tests/test_bboxes.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mlarray
3
- Version: 0.0.39
3
+ Version: 0.0.41
4
4
  Summary: Array format specialized for Machine Learning with Blosc2 backend and standardized metadata.
5
5
  Author-email: Karol Gotkowski <karol.gotkowski@dkfz.de>
6
6
  License: MIT
@@ -88,16 +88,16 @@ from mlarray import MLArray
88
88
  import numpy as np
89
89
 
90
90
  # read-only, partial access (default)
91
- image = MLArray.open("sample.mla", mmap='r')
91
+ image = MLArray.open("sample.mla", mmap_mode='r')
92
92
  crop = image[10:20, 50:60] # Read crop
93
93
 
94
94
  # read/write, partial access
95
- image = MLArray.open("sample.mla", mmap='r+')
95
+ image = MLArray.open("sample.mla", mmap_mode='r+')
96
96
  image[10:20, 50:60] *= 5 # Modify crop in memory and disk
97
97
 
98
98
  # read/write, partial access, create/overwrite
99
99
  array = np.random.random((128, 256, 256))
100
- image = MLArray.open("sample.mla", shape=array.shape, dtype=array.dtype, mmap='w+')
100
+ image = MLArray.create("sample.mla", shape=array.shape, dtype=array.dtype, mmap_mode='w+')
101
101
  image[...] = array # Modify image in memory and disk
102
102
  ```
103
103
 
@@ -125,7 +125,7 @@ image.meta.source["study_id"] = "study-001"
125
125
  image.save("with-metadata.mla")
126
126
 
127
127
  # Open memory-mapped
128
- image = MLArray.open("with-metadata.mla", mmap='r+')
128
+ image = MLArray.open("with-metadata.mla", mmap_mode='r+')
129
129
  image.meta.source["study_id"] = "new-study" # Modify metadata
130
130
  image.close() # Close and save metadata, only necessary to save modified metadata
131
131
  ```
@@ -54,16 +54,16 @@ from mlarray import MLArray
54
54
  import numpy as np
55
55
 
56
56
  # read-only, partial access (default)
57
- image = MLArray.open("sample.mla", mmap='r')
57
+ image = MLArray.open("sample.mla", mmap_mode='r')
58
58
  crop = image[10:20, 50:60] # Read crop
59
59
 
60
60
  # read/write, partial access
61
- image = MLArray.open("sample.mla", mmap='r+')
61
+ image = MLArray.open("sample.mla", mmap_mode='r+')
62
62
  image[10:20, 50:60] *= 5 # Modify crop in memory and disk
63
63
 
64
64
  # read/write, partial access, create/overwrite
65
65
  array = np.random.random((128, 256, 256))
66
- image = MLArray.open("sample.mla", shape=array.shape, dtype=array.dtype, mmap='w+')
66
+ image = MLArray.create("sample.mla", shape=array.shape, dtype=array.dtype, mmap_mode='w+')
67
67
  image[...] = array # Modify image in memory and disk
68
68
  ```
69
69
 
@@ -91,7 +91,7 @@ image.meta.source["study_id"] = "study-001"
91
91
  image.save("with-metadata.mla")
92
92
 
93
93
  # Open memory-mapped
94
- image = MLArray.open("with-metadata.mla", mmap='r+')
94
+ image = MLArray.open("with-metadata.mla", mmap_mode='r+')
95
95
  image.meta.source["study_id"] = "new-study" # Modify metadata
96
96
  image.close() # Close and save metadata, only necessary to save modified metadata
97
97
  ```
@@ -96,7 +96,7 @@ For large files, you typically want **mmap reads** so random patches don’t req
96
96
  from mlarray import MLArray
97
97
 
98
98
  # read-only mmap: fast random access without loading the full volume
99
- image = MLArray.open("patch-non-iso.mla", mmap='r')
99
+ image = MLArray.open("patch-non-iso.mla", mmap_mode='r')
100
100
 
101
101
  patch = image[10:20, 50:60] # Read a crop/patch (partial read)
102
102
  ```
@@ -110,7 +110,7 @@ When to use:
110
110
 
111
111
  ### 4) Memory-mapped in-place modification (advanced)
112
112
 
113
- You can modify regions in-place with `mmap='r+'`. This is useful for workflows like:
113
+ You can modify regions in-place with `mmap_mode='r+'`. This is useful for workflows like:
114
114
 
115
115
  * writing derived arrays (e.g., post-processing outputs),
116
116
  * patch-wise updates,
@@ -119,7 +119,7 @@ You can modify regions in-place with `mmap='r+'`. This is useful for workflows l
119
119
  ```python
120
120
  from mlarray import MLArray
121
121
 
122
- image = MLArray.open("patch-non-iso.mla", mmap='r+')
122
+ image = MLArray.open("patch-non-iso.mla", mmap_mode='r+')
123
123
  image[10:20, 50:60] *= 5 # Modify crop in memory and on disk
124
124
  image.close()
125
125
  ```
@@ -128,7 +128,7 @@ image.close()
128
128
 
129
129
  ### 5) Create a new memory-mapped file (streaming write)
130
130
 
131
- If you want to create a file on disk and then fill it (without holding the full array in memory), use `open(..., shape=..., dtype=..., mmap='w+')`. MLArray will compute and store the optimized layout up front.
131
+ If you want to create a file on disk and then fill it (without holding the full array in memory), use `create(..., shape=..., dtype=..., mmap_mode='w+')`. MLArray will compute and store the optimized layout up front.
132
132
 
133
133
  ```python
134
134
  import numpy as np
@@ -137,11 +137,11 @@ from mlarray import MLArray
137
137
  shape = (128, 256, 256)
138
138
  dtype = np.float32
139
139
 
140
- image = MLArray.open(
140
+ image = MLArray.create(
141
141
  "streamed-write.mla",
142
142
  shape=shape,
143
143
  dtype=dtype,
144
- mmap='w+',
144
+ mmap_mode='w+',
145
145
  patch_size=192, # optimize for your training patch size
146
146
  )
147
147
 
@@ -34,16 +34,16 @@ from mlarray import MLArray
34
34
  import numpy as np
35
35
 
36
36
  # read-only, partial access (default)
37
- image = MLArray.open("sample.mla", mmap='r')
37
+ image = MLArray.open("sample.mla", mmap_mode='r')
38
38
  crop = image[10:20, 50:60] # Read crop
39
39
 
40
40
  # read/write, partial access
41
- image = MLArray.open("sample.mla", mmap='r+')
41
+ image = MLArray.open("sample.mla", mmap_mode='r+')
42
42
  image[10:20, 50:60] *= 5 # Modify crop in memory and disk
43
43
 
44
44
  # read/write, partial access, create/overwrite
45
45
  array = np.random.random((128, 256, 256))
46
- image = MLArray.open("sample.mla", shape=array.shape, dtype=array.dtype, mmap='w+')
46
+ image = MLArray.create("sample.mla", shape=array.shape, dtype=array.dtype, mmap_mode='w+')
47
47
  image[...] = array # Modify image in memory and disk
48
48
  ```
49
49
 
@@ -75,7 +75,7 @@ image.meta.source["study_id"] = "study-001"
75
75
  image.save("with-metadata.mla")
76
76
 
77
77
  # Open memory-mapped
78
- image = MLArray.open("with-metadata.mla", mmap='r+')
78
+ image = MLArray.open("with-metadata.mla", mmap_mode='r+')
79
79
  image.meta.source["study_id"] = "new-study" # Modify metadata
80
80
  image.close() # Close and save metadata, only necessary to save modified metadata
81
81
  ```
@@ -0,0 +1,168 @@
1
+ import gc
2
+ import os
3
+ import numpy as np
4
+
5
+ from mlarray import MLArray
6
+
7
+
8
+ def to_mib(num_bytes: int) -> float:
9
+ return num_bytes / (1024 * 1024)
10
+
11
+
12
+ def get_process_rss_bytes():
13
+ try:
14
+ import psutil # type: ignore
15
+
16
+ return int(psutil.Process(os.getpid()).memory_info().rss)
17
+ except Exception:
18
+ pass
19
+
20
+ # Linux fallback without extra dependency.
21
+ try:
22
+ with open("/proc/self/status", "r", encoding="utf-8") as f:
23
+ for line in f:
24
+ if line.startswith("VmRSS:"):
25
+ kb = int(line.split()[1])
26
+ return kb * 1024
27
+ except Exception:
28
+ pass
29
+
30
+ return None
31
+
32
+
33
+ def get_process_peak_rss_bytes():
34
+ # Linux fallback for peak resident set size.
35
+ try:
36
+ with open("/proc/self/status", "r", encoding="utf-8") as f:
37
+ for line in f:
38
+ if line.startswith("VmHWM:"):
39
+ kb = int(line.split()[1])
40
+ return kb * 1024
41
+ except Exception:
42
+ pass
43
+ return None
44
+
45
+
46
+ def format_bytes_mib(num_bytes):
47
+ if num_bytes is None:
48
+ return "n/a"
49
+ return f"{num_bytes} ({to_mib(num_bytes):.2f} MiB)"
50
+
51
+
52
+ if __name__ == "__main__":
53
+ rng = np.random.default_rng(0)
54
+ rss_start = get_process_rss_bytes()
55
+
56
+ # Build a sparse array with mostly zeros.
57
+ shape = (512, 512, 512)
58
+ density = 0.01 # 1% non-zero values
59
+ total = int(np.prod(shape))
60
+ nnz = int(total * density)
61
+
62
+ array = np.zeros(shape, dtype=np.float32)
63
+ non_zero_indices = rng.choice(total, size=nnz, replace=False)
64
+ array.flat[non_zero_indices] = rng.random(nnz, dtype=np.float32)
65
+ rss_after_numpy = get_process_rss_bytes()
66
+
67
+ # NumPy in-memory payload size.
68
+ numpy_bytes = array.nbytes
69
+
70
+ # Convert to in-memory compressed MLArray.
71
+ image = MLArray.asarray(array, memory_compressed=True)
72
+ rss_after_compressed = get_process_rss_bytes()
73
+
74
+ # Compressed payload size stored by Blosc2 in RAM.
75
+ compressed_bytes = image._store.schunk.cbytes
76
+ uncompressed_bytes = image._store.schunk.nbytes
77
+
78
+ # Verify data integrity.
79
+ equal = bool(np.allclose(array, image.to_numpy()))
80
+
81
+ saved_bytes = numpy_bytes - compressed_bytes
82
+ saved_pct = 100.0 * saved_bytes / numpy_bytes
83
+ ratio = numpy_bytes / compressed_bytes
84
+
85
+ # Optional: drop the original NumPy array and force GC, then re-check RSS.
86
+ del array
87
+ gc.collect()
88
+ rss_after_drop_numpy = get_process_rss_bytes()
89
+ peak_rss = get_process_peak_rss_bytes()
90
+
91
+ rss_saved_bytes = None
92
+ rss_saved_pct = None
93
+ rss_saved_workload_bytes = None
94
+ rss_saved_workload_pct = None
95
+ if rss_after_numpy is not None and rss_after_drop_numpy is not None and rss_after_numpy > 0:
96
+ rss_saved_bytes = rss_after_numpy - rss_after_drop_numpy
97
+ rss_saved_pct = 100.0 * rss_saved_bytes / rss_after_numpy
98
+ if (
99
+ rss_start is not None
100
+ and rss_after_numpy is not None
101
+ and rss_after_drop_numpy is not None
102
+ and (rss_after_numpy - rss_start) > 0
103
+ ):
104
+ rss_saved_workload_bytes = (rss_after_numpy - rss_start) - (rss_after_drop_numpy - rss_start)
105
+ rss_saved_workload_pct = 100.0 * rss_saved_workload_bytes / (rss_after_numpy - rss_start)
106
+
107
+ rss_numpy_consumption = None
108
+ rss_mlarray_compressed_consumption = None
109
+ rss_compression_ratio = None
110
+ if rss_start is not None and rss_after_numpy is not None:
111
+ rss_numpy_consumption = rss_after_numpy - rss_start
112
+ if rss_start is not None and rss_after_drop_numpy is not None:
113
+ rss_mlarray_compressed_consumption = rss_after_drop_numpy - rss_start
114
+ if (
115
+ rss_numpy_consumption is not None
116
+ and rss_mlarray_compressed_consumption is not None
117
+ and rss_mlarray_compressed_consumption > 0
118
+ ):
119
+ rss_compression_ratio = rss_numpy_consumption / rss_mlarray_compressed_consumption
120
+
121
+ print("Sparse array compression demo (in-memory)")
122
+ print(f"shape: {shape}")
123
+ print(f"density (non-zero): {density:.2%}")
124
+ print(f"numpy bytes: {numpy_bytes} ({to_mib(numpy_bytes):.2f} MiB)")
125
+ print(f"mlarray cbytes: {compressed_bytes} ({to_mib(compressed_bytes):.2f} MiB)")
126
+ print(f"mlarray nbytes: {uncompressed_bytes} ({to_mib(uncompressed_bytes):.2f} MiB)")
127
+ print(f"compression ratio: {ratio:.2f}x")
128
+ print(f"memory saved: {saved_bytes} ({to_mib(saved_bytes):.2f} MiB, {saved_pct:.2f}%)")
129
+ print(f"roundtrip equal: {equal}")
130
+ print()
131
+ print("Process RSS snapshots (real memory in RAM):")
132
+ print(f"rss start: {format_bytes_mib(rss_start)}")
133
+ print(f"rss after numpy: {format_bytes_mib(rss_after_numpy)}")
134
+ print(f"rss after compressed: {format_bytes_mib(rss_after_compressed)}")
135
+ print(f"rss after del numpy: {format_bytes_mib(rss_after_drop_numpy)}")
136
+ print(f"rss peak (VmHWM): {format_bytes_mib(peak_rss)}")
137
+ if rss_saved_bytes is not None and rss_saved_pct is not None:
138
+ print(
139
+ f"rss saved (raw): {rss_saved_bytes} ({to_mib(rss_saved_bytes):.2f} MiB, {rss_saved_pct:.2f}%)"
140
+ )
141
+ else:
142
+ print("rss saved (raw): n/a")
143
+ if rss_saved_workload_bytes is not None and rss_saved_workload_pct is not None:
144
+ print(
145
+ "rss saved (workload): "
146
+ f"{rss_saved_workload_bytes} ({to_mib(rss_saved_workload_bytes):.2f} MiB, {rss_saved_workload_pct:.2f}%)"
147
+ )
148
+ else:
149
+ print("rss saved (workload): n/a")
150
+ print()
151
+ print("RSS-derived memory consumption summary:")
152
+ if rss_numpy_consumption is not None:
153
+ print(
154
+ f"rss numpy memory consumption: {rss_numpy_consumption} ({to_mib(rss_numpy_consumption):.2f} MiB)"
155
+ )
156
+ else:
157
+ print("rss numpy memory consumption: n/a")
158
+ if rss_mlarray_compressed_consumption is not None:
159
+ print(
160
+ "rss mlarray compressed memory consumption: "
161
+ f"{rss_mlarray_compressed_consumption} ({to_mib(rss_mlarray_compressed_consumption):.2f} MiB)"
162
+ )
163
+ else:
164
+ print("rss mlarray compressed memory consumption: n/a")
165
+ if rss_compression_ratio is not None:
166
+ print(f"rss compression ratio: {rss_compression_ratio:.2f}x")
167
+ else:
168
+ print("rss compression ratio: n/a")
@@ -19,7 +19,7 @@ if __name__ == '__main__':
19
19
  os.remove(filepath)
20
20
 
21
21
  print("Initializing image...")
22
- image = MLArray.open(filepath, shape=array.shape, dtype=array.dtype, mmap='w+')
22
+ image = MLArray.create(filepath, array.shape, array.dtype)
23
23
  print("Saving image...")
24
24
  image[...] = array
25
25
  image.meta.copy_from(Meta(source=source_meta, spatial=MetaSpatial(spacing=spacing, origin=origin, direction=direction), bbox=MetaBbox(bboxes)))