careamics 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of careamics might be problematic. Click here for more details.
- careamics/careamist.py +24 -7
- careamics/cli/utils.py +1 -1
- careamics/config/algorithms/n2v_algorithm_model.py +1 -1
- careamics/config/architectures/unet_model.py +3 -0
- careamics/config/callback_model.py +23 -34
- careamics/config/configuration.py +55 -4
- careamics/config/configuration_factories.py +288 -23
- careamics/config/data/__init__.py +2 -0
- careamics/config/data/data_model.py +41 -4
- careamics/config/data/ng_data_model.py +381 -0
- careamics/config/data/patching_strategies/__init__.py +14 -0
- careamics/config/data/patching_strategies/_overlapping_patched_model.py +103 -0
- careamics/config/data/patching_strategies/_patched_model.py +56 -0
- careamics/config/data/patching_strategies/random_patching_model.py +21 -0
- careamics/config/data/patching_strategies/sequential_patching_model.py +25 -0
- careamics/config/data/patching_strategies/tiled_patching_model.py +40 -0
- careamics/config/data/patching_strategies/whole_patching_model.py +12 -0
- careamics/config/inference_model.py +6 -3
- careamics/config/optimizer_models.py +1 -3
- careamics/config/support/supported_data.py +7 -0
- careamics/config/support/supported_patching_strategies.py +22 -0
- careamics/config/training_model.py +0 -2
- careamics/config/validators/validator_utils.py +4 -3
- careamics/dataset/dataset_utils/iterate_over_files.py +2 -2
- careamics/dataset/in_memory_dataset.py +2 -1
- careamics/dataset/iterable_dataset.py +2 -2
- careamics/dataset/iterable_pred_dataset.py +2 -2
- careamics/dataset/iterable_tiled_pred_dataset.py +2 -2
- careamics/dataset/patching/patching.py +3 -2
- careamics/dataset/tiling/lvae_tiled_patching.py +16 -6
- careamics/dataset/tiling/tiled_patching.py +2 -1
- careamics/dataset_ng/README.md +212 -0
- careamics/dataset_ng/dataset.py +229 -0
- careamics/dataset_ng/demos/bsd68_demo.ipynb +361 -0
- careamics/dataset_ng/demos/care_U2OS_demo.ipynb +330 -0
- careamics/dataset_ng/demos/demo_custom_image_stack.ipynb +734 -0
- careamics/dataset_ng/demos/demo_datamodule.ipynb +447 -0
- careamics/dataset_ng/{demo_dataset.ipynb → demos/demo_dataset.ipynb} +60 -53
- careamics/dataset_ng/{demo_patch_extractor.py → demos/demo_patch_extractor.py} +7 -9
- careamics/dataset_ng/demos/mouse_nuclei_demo.ipynb +292 -0
- careamics/dataset_ng/factory.py +451 -0
- careamics/dataset_ng/legacy_interoperability.py +170 -0
- careamics/dataset_ng/patch_extractor/__init__.py +3 -8
- careamics/dataset_ng/patch_extractor/demo_custom_image_stack_loader.py +7 -5
- careamics/dataset_ng/patch_extractor/image_stack/__init__.py +4 -1
- careamics/dataset_ng/patch_extractor/image_stack/czi_image_stack.py +360 -0
- careamics/dataset_ng/patch_extractor/image_stack/image_stack_protocol.py +5 -1
- careamics/dataset_ng/patch_extractor/image_stack/in_memory_image_stack.py +1 -1
- careamics/dataset_ng/patch_extractor/image_stack_loader.py +5 -75
- careamics/dataset_ng/patch_extractor/patch_extractor.py +5 -4
- careamics/dataset_ng/patch_extractor/patch_extractor_factory.py +114 -105
- careamics/dataset_ng/patching_strategies/__init__.py +6 -1
- careamics/dataset_ng/patching_strategies/patching_strategy_protocol.py +31 -0
- careamics/dataset_ng/patching_strategies/random_patching.py +5 -1
- careamics/dataset_ng/patching_strategies/sequential_patching.py +5 -5
- careamics/dataset_ng/patching_strategies/tiling_strategy.py +172 -0
- careamics/dataset_ng/patching_strategies/whole_sample.py +36 -0
- careamics/file_io/read/get_func.py +2 -1
- careamics/lightning/dataset_ng/__init__.py +1 -0
- careamics/lightning/dataset_ng/data_module.py +678 -0
- careamics/lightning/dataset_ng/lightning_modules/__init__.py +9 -0
- careamics/lightning/dataset_ng/lightning_modules/care_module.py +97 -0
- careamics/lightning/dataset_ng/lightning_modules/n2v_module.py +106 -0
- careamics/lightning/dataset_ng/lightning_modules/unet_module.py +212 -0
- careamics/lightning/lightning_module.py +5 -1
- careamics/lightning/predict_data_module.py +2 -1
- careamics/lightning/train_data_module.py +2 -1
- careamics/losses/loss_factory.py +2 -1
- careamics/lvae_training/dataset/__init__.py +8 -3
- careamics/lvae_training/dataset/config.py +3 -3
- careamics/lvae_training/dataset/ms_dataset_ref.py +1067 -0
- careamics/lvae_training/dataset/multich_dataset.py +46 -17
- careamics/lvae_training/dataset/multicrop_dset.py +196 -0
- careamics/lvae_training/dataset/types.py +3 -3
- careamics/lvae_training/dataset/utils/index_manager.py +259 -0
- careamics/lvae_training/eval_utils.py +93 -3
- careamics/model_io/bioimage/bioimage_utils.py +1 -1
- careamics/model_io/bioimage/model_description.py +1 -1
- careamics/model_io/bmz_io.py +1 -1
- careamics/model_io/model_io_utils.py +2 -2
- careamics/models/activation.py +2 -1
- careamics/prediction_utils/prediction_outputs.py +1 -1
- careamics/prediction_utils/stitch_prediction.py +1 -1
- careamics/transforms/compose.py +1 -0
- careamics/transforms/n2v_manipulate_torch.py +15 -9
- careamics/transforms/normalize.py +18 -7
- careamics/transforms/pixel_manipulation_torch.py +59 -92
- careamics/utils/lightning_utils.py +25 -11
- careamics/utils/metrics.py +2 -1
- careamics/utils/torch_utils.py +23 -0
- {careamics-0.0.11.dist-info → careamics-0.0.13.dist-info}/METADATA +12 -11
- {careamics-0.0.11.dist-info → careamics-0.0.13.dist-info}/RECORD +95 -69
- careamics/dataset_ng/dataset/__init__.py +0 -3
- careamics/dataset_ng/dataset/dataset.py +0 -184
- careamics/dataset_ng/demo_patch_extractor_factory.py +0 -37
- {careamics-0.0.11.dist-info → careamics-0.0.13.dist-info}/WHEEL +0 -0
- {careamics-0.0.11.dist-info → careamics-0.0.13.dist-info}/entry_points.txt +0 -0
- {careamics-0.0.11.dist-info → careamics-0.0.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from numpy.typing import NDArray
|
|
7
|
+
from typing_extensions import ParamSpec
|
|
8
|
+
|
|
9
|
+
from careamics.config.data.ng_data_model import NGDataConfig
|
|
10
|
+
from careamics.config.support import SupportedData
|
|
11
|
+
from careamics.dataset_ng.patch_extractor import ImageStackLoader, PatchExtractor
|
|
12
|
+
from careamics.dataset_ng.patch_extractor.image_stack import (
|
|
13
|
+
CziImageStack,
|
|
14
|
+
GenericImageStack,
|
|
15
|
+
ImageStack,
|
|
16
|
+
InMemoryImageStack,
|
|
17
|
+
ZarrImageStack,
|
|
18
|
+
)
|
|
19
|
+
from careamics.dataset_ng.patch_extractor.patch_extractor_factory import (
|
|
20
|
+
create_array_extractor,
|
|
21
|
+
create_custom_file_extractor,
|
|
22
|
+
create_custom_image_stack_extractor,
|
|
23
|
+
create_czi_extractor,
|
|
24
|
+
create_ome_zarr_extractor,
|
|
25
|
+
create_tiff_extractor,
|
|
26
|
+
)
|
|
27
|
+
from careamics.file_io.read import ReadFunc
|
|
28
|
+
|
|
29
|
+
from .dataset import CareamicsDataset, Mode
|
|
30
|
+
|
|
31
|
+
P = ParamSpec("P")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Enum class used to determine which loading functions should be used
|
|
35
|
+
class DatasetType(Enum):
|
|
36
|
+
"""Labels for the dataset based on the underlying data and how it is loaded."""
|
|
37
|
+
|
|
38
|
+
ARRAY = "array"
|
|
39
|
+
IN_MEM_TIFF = "in_mem_tiff"
|
|
40
|
+
LAZY_TIFF = "lazy_tiff"
|
|
41
|
+
IN_MEM_CUSTOM_FILE = "in_mem_custom_file"
|
|
42
|
+
OME_ZARR = "ome_zarr"
|
|
43
|
+
CZI = "czi"
|
|
44
|
+
CUSTOM_IMAGE_STACK = "custom_image_stack"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# bit of a mess of if-else statements
|
|
48
|
+
def determine_dataset_type(
|
|
49
|
+
data_type: SupportedData,
|
|
50
|
+
in_memory: bool,
|
|
51
|
+
read_func: Optional[ReadFunc] = None,
|
|
52
|
+
image_stack_loader: Optional[ImageStackLoader] = None,
|
|
53
|
+
) -> DatasetType:
|
|
54
|
+
"""Determine what the dataset type should be based on the input arguments.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
data_type : SupportedData
|
|
59
|
+
The underlying datatype.
|
|
60
|
+
in_memory : bool
|
|
61
|
+
Whether all the data should be loaded into memory. This is argument is ignored
|
|
62
|
+
unless the `data_type` is "tiff" or "custom".
|
|
63
|
+
read_func : ReadFunc, optional
|
|
64
|
+
A function that can be used to load custom data. This argument is
|
|
65
|
+
ignored unless the `data_type` is "custom".
|
|
66
|
+
image_stack_loader : ImageStackLoader, optional
|
|
67
|
+
A function for custom image stack loading. This argument is ignored unless the
|
|
68
|
+
`data_type` is "custom".
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
DatasetType
|
|
73
|
+
The Dataset type.
|
|
74
|
+
|
|
75
|
+
Raises
|
|
76
|
+
------
|
|
77
|
+
NotImplementedError
|
|
78
|
+
For lazy-loading (`in_memory=False`) of a custom file type.
|
|
79
|
+
ValueError
|
|
80
|
+
If the `data_type` is "custom" but both `read_func` and `image_stack_loader` are
|
|
81
|
+
None.
|
|
82
|
+
ValueError
|
|
83
|
+
If the `data_type` is unrecognized.
|
|
84
|
+
"""
|
|
85
|
+
if data_type == SupportedData.ARRAY:
|
|
86
|
+
# TODO: ignoring in_memory arg, error if False?
|
|
87
|
+
return DatasetType.ARRAY
|
|
88
|
+
elif data_type == SupportedData.TIFF:
|
|
89
|
+
if in_memory:
|
|
90
|
+
return DatasetType.IN_MEM_TIFF
|
|
91
|
+
else:
|
|
92
|
+
return DatasetType.LAZY_TIFF
|
|
93
|
+
elif data_type == SupportedData.CZI:
|
|
94
|
+
return DatasetType.CZI
|
|
95
|
+
elif data_type == SupportedData.CUSTOM:
|
|
96
|
+
if read_func is not None:
|
|
97
|
+
if in_memory:
|
|
98
|
+
return DatasetType.IN_MEM_CUSTOM_FILE
|
|
99
|
+
else:
|
|
100
|
+
raise NotImplementedError(
|
|
101
|
+
"Lazy loading has not been implemented for custom file types yet."
|
|
102
|
+
)
|
|
103
|
+
elif image_stack_loader is not None:
|
|
104
|
+
# TODO: ignoring im_memory arg
|
|
105
|
+
return DatasetType.CUSTOM_IMAGE_STACK
|
|
106
|
+
else:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
"Found `data_type='custom'` but no `read_func` or `image_stack_loader` "
|
|
109
|
+
"has been provided."
|
|
110
|
+
)
|
|
111
|
+
# TODO: ZARR
|
|
112
|
+
else:
|
|
113
|
+
raise ValueError(f"Unrecognized `data_type`, '{data_type}'.")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# convenience function but should use `create_dataloader` function instead
|
|
117
|
+
# For lazy loading custom batch sampler also needs to be set.
|
|
118
|
+
def create_dataset(
|
|
119
|
+
config: NGDataConfig,
|
|
120
|
+
mode: Mode,
|
|
121
|
+
inputs: Any,
|
|
122
|
+
targets: Any,
|
|
123
|
+
in_memory: bool,
|
|
124
|
+
read_func: Optional[ReadFunc] = None,
|
|
125
|
+
read_kwargs: Optional[dict[str, Any]] = None,
|
|
126
|
+
image_stack_loader: Optional[ImageStackLoader] = None,
|
|
127
|
+
image_stack_loader_kwargs: Optional[dict[str, Any]] = None,
|
|
128
|
+
) -> CareamicsDataset[ImageStack]:
|
|
129
|
+
"""
|
|
130
|
+
Convenience function to create the CAREamicsDataset.
|
|
131
|
+
|
|
132
|
+
Parameters
|
|
133
|
+
----------
|
|
134
|
+
config : DataConfig or InferenceConfig
|
|
135
|
+
The data configuration.
|
|
136
|
+
mode : Mode
|
|
137
|
+
Whether to create the dataset in "training", "validation" or "predicting" mode.
|
|
138
|
+
inputs : Any
|
|
139
|
+
The input sources to the dataset.
|
|
140
|
+
targets : Any, optional
|
|
141
|
+
The target sources to the dataset.
|
|
142
|
+
in_memory : bool
|
|
143
|
+
Whether all the data should be loaded into memory. This is argument is ignored
|
|
144
|
+
unless the `data_type` in `config` is "tiff" or "custom".
|
|
145
|
+
read_func : ReadFunc, optional
|
|
146
|
+
A function that can that can be used to load custom data. This argument is
|
|
147
|
+
ignored unless the `data_type` in the `config` is "custom".
|
|
148
|
+
read_kwargs : dict of {str, Any}, optional
|
|
149
|
+
Additional key-word arguments to pass to the `read_func`.
|
|
150
|
+
image_stack_loader : ImageStackLoader, optional
|
|
151
|
+
A function for custom image stack loading. This argument is ignored unless the
|
|
152
|
+
`data_type` in the `config` is "custom".
|
|
153
|
+
image_stack_loader_kwargs : {str, Any}, optional
|
|
154
|
+
Additional key-word arguments to pass to the `image_stack_loader`.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
CareamicsDataset[ImageStack]
|
|
159
|
+
The CAREamicsDataset.
|
|
160
|
+
|
|
161
|
+
Raises
|
|
162
|
+
------
|
|
163
|
+
ValueError
|
|
164
|
+
For an unrecognized `data_type` in the `config`.
|
|
165
|
+
"""
|
|
166
|
+
data_type = SupportedData(config.data_type)
|
|
167
|
+
dataset_type = determine_dataset_type(
|
|
168
|
+
data_type, in_memory, read_func, image_stack_loader
|
|
169
|
+
)
|
|
170
|
+
if dataset_type == DatasetType.ARRAY:
|
|
171
|
+
return create_array_dataset(config, mode, inputs, targets)
|
|
172
|
+
elif dataset_type == DatasetType.IN_MEM_TIFF:
|
|
173
|
+
return create_tiff_dataset(config, mode, inputs, targets)
|
|
174
|
+
# TODO: Lazy tiff
|
|
175
|
+
elif dataset_type == DatasetType.CZI:
|
|
176
|
+
return create_czi_dataset(config, mode, inputs, targets)
|
|
177
|
+
elif dataset_type == DatasetType.IN_MEM_CUSTOM_FILE:
|
|
178
|
+
if read_kwargs is None:
|
|
179
|
+
read_kwargs = {}
|
|
180
|
+
assert read_func is not None # should be true from `determine_dataset_type`
|
|
181
|
+
return create_custom_file_dataset(
|
|
182
|
+
config, mode, inputs, targets, read_func=read_func, read_kwargs=read_kwargs
|
|
183
|
+
)
|
|
184
|
+
elif dataset_type == DatasetType.CUSTOM_IMAGE_STACK:
|
|
185
|
+
if image_stack_loader_kwargs is None:
|
|
186
|
+
image_stack_loader_kwargs = {}
|
|
187
|
+
assert image_stack_loader is not None # should be true
|
|
188
|
+
return create_custom_image_stack_dataset(
|
|
189
|
+
config,
|
|
190
|
+
mode,
|
|
191
|
+
inputs,
|
|
192
|
+
targets,
|
|
193
|
+
image_stack_loader,
|
|
194
|
+
**image_stack_loader_kwargs,
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
raise ValueError(f"Unrecognized dataset type, {dataset_type}.")
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def create_array_dataset(
|
|
201
|
+
config: NGDataConfig,
|
|
202
|
+
mode: Mode,
|
|
203
|
+
inputs: Sequence[NDArray[Any]],
|
|
204
|
+
targets: Optional[Sequence[NDArray[Any]]],
|
|
205
|
+
) -> CareamicsDataset[InMemoryImageStack]:
|
|
206
|
+
"""
|
|
207
|
+
Create a CAREamicsDataset from array data.
|
|
208
|
+
|
|
209
|
+
Parameters
|
|
210
|
+
----------
|
|
211
|
+
config : DataConfig or InferenceConfig
|
|
212
|
+
The data configuration.
|
|
213
|
+
mode : Mode
|
|
214
|
+
Whether to create the dataset in "training", "validation" or "predicting" mode.
|
|
215
|
+
inputs : Any
|
|
216
|
+
The input sources to the dataset.
|
|
217
|
+
targets : Any, optional
|
|
218
|
+
The target sources to the dataset.
|
|
219
|
+
|
|
220
|
+
Returns
|
|
221
|
+
-------
|
|
222
|
+
CareamicsDataset[InMemoryImageStack]
|
|
223
|
+
A CAREamicsDataset.
|
|
224
|
+
"""
|
|
225
|
+
input_extractor = create_array_extractor(source=inputs, axes=config.axes)
|
|
226
|
+
target_extractor: Optional[PatchExtractor[InMemoryImageStack]]
|
|
227
|
+
if targets is not None:
|
|
228
|
+
target_extractor = create_array_extractor(source=targets, axes=config.axes)
|
|
229
|
+
else:
|
|
230
|
+
target_extractor = None
|
|
231
|
+
return CareamicsDataset(config, mode, input_extractor, target_extractor)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def create_tiff_dataset(
|
|
235
|
+
config: NGDataConfig,
|
|
236
|
+
mode: Mode,
|
|
237
|
+
inputs: Sequence[Path],
|
|
238
|
+
targets: Optional[Sequence[Path]],
|
|
239
|
+
) -> CareamicsDataset[InMemoryImageStack]:
|
|
240
|
+
"""
|
|
241
|
+
Create a CAREamicsDataset from tiff files that will be all loaded into memory.
|
|
242
|
+
|
|
243
|
+
Parameters
|
|
244
|
+
----------
|
|
245
|
+
config : DataConfig or InferenceConfig
|
|
246
|
+
The data configuration.
|
|
247
|
+
mode : Mode
|
|
248
|
+
Whether to create the dataset in "training", "validation" or "predicting" mode.
|
|
249
|
+
inputs : Any
|
|
250
|
+
The input sources to the dataset.
|
|
251
|
+
targets : Any, optional
|
|
252
|
+
The target sources to the dataset.
|
|
253
|
+
|
|
254
|
+
Returns
|
|
255
|
+
-------
|
|
256
|
+
CareamicsDataset[InMemoryImageStack]
|
|
257
|
+
A CAREamicsDataset.
|
|
258
|
+
"""
|
|
259
|
+
input_extractor = create_tiff_extractor(
|
|
260
|
+
source=inputs,
|
|
261
|
+
axes=config.axes,
|
|
262
|
+
)
|
|
263
|
+
target_extractor: Optional[PatchExtractor[InMemoryImageStack]]
|
|
264
|
+
if targets is not None:
|
|
265
|
+
target_extractor = create_tiff_extractor(source=targets, axes=config.axes)
|
|
266
|
+
else:
|
|
267
|
+
target_extractor = None
|
|
268
|
+
dataset = CareamicsDataset(config, mode, input_extractor, target_extractor)
|
|
269
|
+
return dataset
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def create_czi_dataset(
|
|
273
|
+
config: NGDataConfig,
|
|
274
|
+
mode: Mode,
|
|
275
|
+
inputs: Sequence[Path],
|
|
276
|
+
targets: Optional[Sequence[Path]],
|
|
277
|
+
) -> CareamicsDataset[CziImageStack]:
|
|
278
|
+
"""
|
|
279
|
+
Create a dataset from CZI files.
|
|
280
|
+
|
|
281
|
+
Parameters
|
|
282
|
+
----------
|
|
283
|
+
config : DataConfig or InferenceConfig
|
|
284
|
+
The data configuration.
|
|
285
|
+
mode : Mode
|
|
286
|
+
Whether to create the dataset in "training", "validation" or "predicting" mode.
|
|
287
|
+
inputs : Any
|
|
288
|
+
The input sources to the dataset.
|
|
289
|
+
targets : Any, optional
|
|
290
|
+
The target sources to the dataset.
|
|
291
|
+
|
|
292
|
+
Returns
|
|
293
|
+
-------
|
|
294
|
+
CareamicsDataset[CziImageStack]
|
|
295
|
+
A CAREamicsDataset.
|
|
296
|
+
"""
|
|
297
|
+
|
|
298
|
+
input_extractor = create_czi_extractor(source=inputs, axes=config.axes)
|
|
299
|
+
target_extractor: Optional[PatchExtractor[CziImageStack]]
|
|
300
|
+
if targets is not None:
|
|
301
|
+
target_extractor = create_czi_extractor(source=targets, axes=config.axes)
|
|
302
|
+
else:
|
|
303
|
+
target_extractor = None
|
|
304
|
+
dataset = CareamicsDataset(config, mode, input_extractor, target_extractor)
|
|
305
|
+
return dataset
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def create_ome_zarr_dataset(
|
|
309
|
+
config: NGDataConfig,
|
|
310
|
+
mode: Mode,
|
|
311
|
+
inputs: Sequence[Path],
|
|
312
|
+
targets: Optional[Sequence[Path]],
|
|
313
|
+
) -> CareamicsDataset[ZarrImageStack]:
|
|
314
|
+
"""
|
|
315
|
+
Create a dataset from OME ZARR files.
|
|
316
|
+
|
|
317
|
+
Parameters
|
|
318
|
+
----------
|
|
319
|
+
config : DataConfig or InferenceConfig
|
|
320
|
+
The data configuration.
|
|
321
|
+
mode : Mode
|
|
322
|
+
Whether to create the dataset in "training", "validation" or "predicting" mode.
|
|
323
|
+
inputs : Any
|
|
324
|
+
The input sources to the dataset.
|
|
325
|
+
targets : Any, optional
|
|
326
|
+
The target sources to the dataset.
|
|
327
|
+
|
|
328
|
+
Returns
|
|
329
|
+
-------
|
|
330
|
+
CareamicsDataset[ZarrImageStack]
|
|
331
|
+
A CAREamicsDataset.
|
|
332
|
+
"""
|
|
333
|
+
|
|
334
|
+
input_extractor = create_ome_zarr_extractor(source=inputs, axes=config.axes)
|
|
335
|
+
target_extractor: Optional[PatchExtractor[ZarrImageStack]]
|
|
336
|
+
if targets is not None:
|
|
337
|
+
target_extractor = create_ome_zarr_extractor(source=targets, axes=config.axes)
|
|
338
|
+
else:
|
|
339
|
+
target_extractor = None
|
|
340
|
+
dataset = CareamicsDataset(config, mode, input_extractor, target_extractor)
|
|
341
|
+
return dataset
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def create_custom_file_dataset(
|
|
345
|
+
config: NGDataConfig,
|
|
346
|
+
mode: Mode,
|
|
347
|
+
inputs: Sequence[Path],
|
|
348
|
+
targets: Optional[Sequence[Path]],
|
|
349
|
+
*,
|
|
350
|
+
read_func: ReadFunc,
|
|
351
|
+
read_kwargs: dict[str, Any],
|
|
352
|
+
) -> CareamicsDataset[InMemoryImageStack]:
|
|
353
|
+
"""
|
|
354
|
+
Create a CAREamicsDataset from custom files that will be all loaded into memory.
|
|
355
|
+
|
|
356
|
+
Parameters
|
|
357
|
+
----------
|
|
358
|
+
config : DataConfig or InferenceConfig
|
|
359
|
+
The data configuration.
|
|
360
|
+
mode : Mode
|
|
361
|
+
Whether to create the dataset in "training", "validation" or "predicting" mode.
|
|
362
|
+
inputs : Any
|
|
363
|
+
The input sources to the dataset.
|
|
364
|
+
targets : Any, optional
|
|
365
|
+
The target sources to the dataset.
|
|
366
|
+
read_func : Optional[ReadFunc], optional
|
|
367
|
+
A function that can that can be used to load custom data. This argument is
|
|
368
|
+
ignored unless the `data_type` is "custom".
|
|
369
|
+
image_stack_loader : Optional[ImageStackLoader], optional
|
|
370
|
+
A function for custom image stack loading. This argument is ignored unless the
|
|
371
|
+
`data_type` is "custom".
|
|
372
|
+
|
|
373
|
+
Returns
|
|
374
|
+
-------
|
|
375
|
+
CareamicsDataset[InMemoryImageStack]
|
|
376
|
+
A CAREamicsDataset.
|
|
377
|
+
"""
|
|
378
|
+
input_extractor = create_custom_file_extractor(
|
|
379
|
+
source=inputs, axes=config.axes, read_func=read_func, read_kwargs=read_kwargs
|
|
380
|
+
)
|
|
381
|
+
target_extractor: Optional[PatchExtractor[InMemoryImageStack]]
|
|
382
|
+
if targets is not None:
|
|
383
|
+
target_extractor = create_custom_file_extractor(
|
|
384
|
+
source=targets,
|
|
385
|
+
axes=config.axes,
|
|
386
|
+
read_func=read_func,
|
|
387
|
+
read_kwargs=read_kwargs,
|
|
388
|
+
)
|
|
389
|
+
else:
|
|
390
|
+
target_extractor = None
|
|
391
|
+
dataset = CareamicsDataset(config, mode, input_extractor, target_extractor)
|
|
392
|
+
return dataset
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def create_custom_image_stack_dataset(
|
|
396
|
+
config: NGDataConfig,
|
|
397
|
+
mode: Mode,
|
|
398
|
+
inputs: Any,
|
|
399
|
+
targets: Optional[Any],
|
|
400
|
+
image_stack_loader: ImageStackLoader[P, GenericImageStack],
|
|
401
|
+
*args: P.args,
|
|
402
|
+
**kwargs: P.kwargs,
|
|
403
|
+
) -> CareamicsDataset[GenericImageStack]:
|
|
404
|
+
"""
|
|
405
|
+
Create a CAREamicsDataset from a custom `ImageStack` class.
|
|
406
|
+
|
|
407
|
+
The custom `ImageStack` class can be loaded using the `image_stack_loader` function.
|
|
408
|
+
|
|
409
|
+
Parameters
|
|
410
|
+
----------
|
|
411
|
+
config : DataConfig or InferenceConfig
|
|
412
|
+
The data configuration.
|
|
413
|
+
mode : Mode
|
|
414
|
+
Whether to create the dataset in "training", "validation" or "predicting" mode.
|
|
415
|
+
inputs : Any
|
|
416
|
+
The input sources to the dataset.
|
|
417
|
+
targets : Any, optional
|
|
418
|
+
The target sources to the dataset.
|
|
419
|
+
image_stack_loader : ImageStackLoader
|
|
420
|
+
A function for custom image stack loading. This argument is ignored unless the
|
|
421
|
+
`data_type` is "custom".
|
|
422
|
+
*args : Any
|
|
423
|
+
Positional arguments to pass to the `image_stack_loader`.
|
|
424
|
+
**kwargs : Any
|
|
425
|
+
Key-word arguments to pass to the `image_stack_loader`.
|
|
426
|
+
|
|
427
|
+
Returns
|
|
428
|
+
-------
|
|
429
|
+
CareamicsDataset[GenericImageStack]
|
|
430
|
+
A CAREamicsDataset.
|
|
431
|
+
"""
|
|
432
|
+
input_extractor = create_custom_image_stack_extractor(
|
|
433
|
+
inputs,
|
|
434
|
+
config.axes,
|
|
435
|
+
image_stack_loader,
|
|
436
|
+
*args,
|
|
437
|
+
**kwargs,
|
|
438
|
+
)
|
|
439
|
+
target_extractor: Optional[PatchExtractor[GenericImageStack]]
|
|
440
|
+
if targets is not None:
|
|
441
|
+
target_extractor = create_custom_image_stack_extractor(
|
|
442
|
+
targets,
|
|
443
|
+
config.axes,
|
|
444
|
+
image_stack_loader,
|
|
445
|
+
*args,
|
|
446
|
+
**kwargs,
|
|
447
|
+
)
|
|
448
|
+
else:
|
|
449
|
+
target_extractor = None
|
|
450
|
+
dataset = CareamicsDataset(config, mode, input_extractor, target_extractor)
|
|
451
|
+
return dataset
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A module for utility functions that adapts the new dataset outputs to work with previous
|
|
3
|
+
code until it is updated.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
from typing import cast
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from numpy.typing import NDArray
|
|
11
|
+
|
|
12
|
+
from careamics.config.tile_information import TileInformation
|
|
13
|
+
|
|
14
|
+
from .dataset import ImageRegionData
|
|
15
|
+
from .patching_strategies import TileSpecs
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def imageregions_to_tileinfos(
|
|
19
|
+
image_regions: Sequence[ImageRegionData],
|
|
20
|
+
) -> list[tuple[NDArray, list[TileInformation]]]:
|
|
21
|
+
"""
|
|
22
|
+
Converts a series of `TileSpecs` dictionaries to `TileInformation` pydantic class.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
image_regions : sequence of ImageRegionData
|
|
27
|
+
A list of ImageRegionData, it must have an instance of `TileSpecs` as it's
|
|
28
|
+
`region_data` field.
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
list of TileInformation
|
|
33
|
+
The converted tile information.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
tile_infos: list[TileInformation] = []
|
|
37
|
+
|
|
38
|
+
data = [image_region.data for image_region in image_regions]
|
|
39
|
+
tile_specs = [image_region.region_spec for image_region in image_regions]
|
|
40
|
+
|
|
41
|
+
data_indices: NDArray[np.int_] = np.array(
|
|
42
|
+
[tile_spec["data_idx"] for tile_spec in tile_specs], dtype=int
|
|
43
|
+
)
|
|
44
|
+
unique_data_indices = np.unique(data_indices)
|
|
45
|
+
# data_idx denotes which image stack a patch belongs to
|
|
46
|
+
# separate TileSpecs by image_stack
|
|
47
|
+
for data_idx in unique_data_indices:
|
|
48
|
+
# collect all ImageRegions
|
|
49
|
+
data_image_regions: list[ImageRegionData] = [
|
|
50
|
+
image_region
|
|
51
|
+
for image_region in image_regions
|
|
52
|
+
if image_region.region_spec["data_idx"] == data_idx
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
# --- find last indices
|
|
56
|
+
# make sure tiles belonging to the same sample are together
|
|
57
|
+
data_image_regions.sort(
|
|
58
|
+
key=lambda image_region: image_region.region_spec["sample_idx"]
|
|
59
|
+
)
|
|
60
|
+
sample_indices = np.array(
|
|
61
|
+
[
|
|
62
|
+
image_region.region_spec["sample_idx"]
|
|
63
|
+
for image_region in data_image_regions
|
|
64
|
+
]
|
|
65
|
+
)
|
|
66
|
+
# reverse array so indices returned are at far edge
|
|
67
|
+
_, unique_indices = np.unique(sample_indices[::-1], return_index=True)
|
|
68
|
+
# un reverse indices
|
|
69
|
+
last_indices = len(sample_indices) - 1 - unique_indices
|
|
70
|
+
|
|
71
|
+
# convert each ImageRegionData to tile_info
|
|
72
|
+
for i, image_region in enumerate(data_image_regions):
|
|
73
|
+
last_tile = i in last_indices
|
|
74
|
+
tile_info = _imageregion_to_tileinfo(image_region, last_tile)
|
|
75
|
+
tile_infos.append(tile_info)
|
|
76
|
+
|
|
77
|
+
return [
|
|
78
|
+
(data, [tile_info]) for data, tile_info in zip(data, tile_infos, strict=False)
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _imageregion_to_tileinfo(
|
|
83
|
+
image_region: ImageRegionData, last_tile: bool
|
|
84
|
+
) -> TileInformation:
|
|
85
|
+
"""
|
|
86
|
+
Convert a single `ImageRegionData` instance to a `TileInformation` instance. Whether
|
|
87
|
+
it is the last tile in a sequence needs to be supplied.
|
|
88
|
+
|
|
89
|
+
Parameters
|
|
90
|
+
----------
|
|
91
|
+
image_region : ImageRegionData
|
|
92
|
+
An instance of `ImageRegionData`, it must have an instance of `TileSpecs` as
|
|
93
|
+
it's `region_data` field.
|
|
94
|
+
last_tile : bool
|
|
95
|
+
Whether a tile is the last tile in a sequence, for stitching.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
TileInformation
|
|
100
|
+
A tile information object.
|
|
101
|
+
|
|
102
|
+
Raises
|
|
103
|
+
------
|
|
104
|
+
KeyError
|
|
105
|
+
If `image_region.region_spec` does not contain the keys: {'crop_coords',
|
|
106
|
+
'crop_size', 'stitch_coords'}.
|
|
107
|
+
"""
|
|
108
|
+
patch_spec = image_region.region_spec
|
|
109
|
+
data_shape = image_region.data_shape
|
|
110
|
+
|
|
111
|
+
# TODO: In python 3.11 and greater, NamedTuples can inherit from Generic
|
|
112
|
+
# so we could do image_region: ImageRegionData[TileSpecs]
|
|
113
|
+
# and not have to do this check here + cast
|
|
114
|
+
# make sure image_region.region_spec is TileSpec
|
|
115
|
+
if (
|
|
116
|
+
("crop_coords" not in patch_spec)
|
|
117
|
+
or ("crop_size" not in patch_spec)
|
|
118
|
+
or ("stitch_coords" not in patch_spec)
|
|
119
|
+
):
|
|
120
|
+
raise KeyError(
|
|
121
|
+
"Could not find all keys: {'crop_coords', 'crop_size', 'stitch_coords'} in "
|
|
122
|
+
"`image_region.region_spec`."
|
|
123
|
+
)
|
|
124
|
+
tile_spec = cast(TileSpecs, patch_spec) # ugly cast for mypy
|
|
125
|
+
return _tilespec_to_tileinfo(tile_spec, data_shape, last_tile)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _tilespec_to_tileinfo(
|
|
129
|
+
tile_spec: TileSpecs, data_shape: Sequence[int], last_tile: bool
|
|
130
|
+
) -> TileInformation:
|
|
131
|
+
"""
|
|
132
|
+
Convert a single `TileSpec` to a `TileInformation`. Whether it is the last tile
|
|
133
|
+
needs to be supplied.
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
tile_spec : TileSpecs
|
|
138
|
+
A tile spec dictionary.
|
|
139
|
+
data_shape : sequence of int
|
|
140
|
+
The original shape of the data the tile came from, labeling the dimensions of
|
|
141
|
+
axes SC(Z)YX.
|
|
142
|
+
last_tile : bool
|
|
143
|
+
Whether a tile is the last tile in a sequence, for stitching.
|
|
144
|
+
|
|
145
|
+
Returns
|
|
146
|
+
-------
|
|
147
|
+
TileInformation
|
|
148
|
+
A tile information object.
|
|
149
|
+
"""
|
|
150
|
+
overlap_crop_coords = tuple(
|
|
151
|
+
(
|
|
152
|
+
tile_spec["crop_coords"][i],
|
|
153
|
+
tile_spec["crop_coords"][i] + tile_spec["crop_size"][i],
|
|
154
|
+
)
|
|
155
|
+
for i in range(len(tile_spec["crop_coords"]))
|
|
156
|
+
)
|
|
157
|
+
stitch_coords = tuple(
|
|
158
|
+
(
|
|
159
|
+
tile_spec["stitch_coords"][i],
|
|
160
|
+
tile_spec["stitch_coords"][i] + tile_spec["crop_size"][i],
|
|
161
|
+
)
|
|
162
|
+
for i in range(len(tile_spec["crop_coords"]))
|
|
163
|
+
)
|
|
164
|
+
return TileInformation(
|
|
165
|
+
array_shape=tuple(data_shape[1:]), # remove sample dimension
|
|
166
|
+
last_tile=last_tile,
|
|
167
|
+
overlap_crop_coords=overlap_crop_coords,
|
|
168
|
+
stitch_coords=stitch_coords,
|
|
169
|
+
sample_id=tile_spec["sample_idx"],
|
|
170
|
+
)
|
|
@@ -1,10 +1,5 @@
|
|
|
1
|
-
__all__ = [
|
|
2
|
-
"ImageStackLoader",
|
|
3
|
-
"PatchExtractor",
|
|
4
|
-
"create_patch_extractor",
|
|
5
|
-
"get_image_stack_loader",
|
|
6
|
-
]
|
|
1
|
+
__all__ = ["GenericImageStack", "ImageStackLoader", "PatchExtractor"]
|
|
7
2
|
|
|
8
|
-
from .
|
|
3
|
+
from .image_stack import GenericImageStack
|
|
4
|
+
from .image_stack_loader import ImageStackLoader
|
|
9
5
|
from .patch_extractor import PatchExtractor
|
|
10
|
-
from .patch_extractor_factory import create_patch_extractor
|
|
@@ -11,9 +11,11 @@ from zarr.storage import FSStore
|
|
|
11
11
|
|
|
12
12
|
from careamics.config import DataConfig
|
|
13
13
|
from careamics.config.support import SupportedData
|
|
14
|
-
from careamics.dataset_ng.patch_extractor import create_patch_extractor
|
|
15
14
|
from careamics.dataset_ng.patch_extractor.image_stack import ZarrImageStack
|
|
16
15
|
from careamics.dataset_ng.patch_extractor.image_stack_loader import ImageStackLoader
|
|
16
|
+
from careamics.dataset_ng.patch_extractor.patch_extractor_factory import (
|
|
17
|
+
create_custom_image_stack_extractor,
|
|
18
|
+
)
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
# %%
|
|
@@ -33,7 +35,7 @@ def create_zarr_array(file_path: Path, data_path: str, data: NDArray):
|
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
def create_zarr(file_path: Path, data_paths: Sequence[str], data: Sequence[NDArray]):
|
|
36
|
-
for data_path, array in zip(data_paths, data):
|
|
38
|
+
for data_path, array in zip(data_paths, data, strict=False):
|
|
37
39
|
create_zarr_array(file_path=file_path, data_path=data_path, data=array)
|
|
38
40
|
|
|
39
41
|
|
|
@@ -94,12 +96,12 @@ image_stack_loader: ImageStackLoader = custom_image_stack_loader
|
|
|
94
96
|
|
|
95
97
|
# %%
|
|
96
98
|
# So pylance knows that datatype is custom to match function overloads
|
|
97
|
-
assert data_config.data_type is SupportedData.CUSTOM
|
|
99
|
+
assert SupportedData(data_config.data_type) is SupportedData.CUSTOM
|
|
98
100
|
|
|
99
|
-
patch_extractor =
|
|
101
|
+
patch_extractor = create_custom_image_stack_extractor(
|
|
100
102
|
source={"store": store, "data_paths": data_paths},
|
|
101
103
|
axes=data_config.axes,
|
|
102
|
-
data_type=data_config.data_type,
|
|
104
|
+
data_type=SupportedData(data_config.data_type),
|
|
103
105
|
image_stack_loader=custom_image_stack_loader,
|
|
104
106
|
)
|
|
105
107
|
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
__all__ = [
|
|
2
|
+
"CziImageStack",
|
|
3
|
+
"GenericImageStack",
|
|
2
4
|
"ImageStack",
|
|
3
5
|
"InMemoryImageStack",
|
|
4
6
|
"ZarrImageStack",
|
|
5
7
|
]
|
|
6
8
|
|
|
7
|
-
from .
|
|
9
|
+
from .czi_image_stack import CziImageStack
|
|
10
|
+
from .image_stack_protocol import GenericImageStack, ImageStack
|
|
8
11
|
from .in_memory_image_stack import InMemoryImageStack
|
|
9
12
|
from .zarr_image_stack import ZarrImageStack
|