kaiko-eva 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eva/core/data/dataloaders/__init__.py +2 -1
- eva/core/data/dataloaders/collate_fn/__init__.py +5 -0
- eva/core/data/dataloaders/collate_fn/collate.py +24 -0
- eva/core/data/dataloaders/dataloader.py +4 -0
- eva/core/interface/interface.py +34 -1
- eva/core/metrics/defaults/classification/multiclass.py +45 -35
- eva/core/models/modules/__init__.py +2 -1
- eva/core/models/modules/scheduler.py +51 -0
- eva/core/models/transforms/extract_cls_features.py +1 -1
- eva/core/models/transforms/extract_patch_features.py +1 -1
- eva/core/models/wrappers/base.py +17 -14
- eva/core/models/wrappers/from_function.py +5 -4
- eva/core/models/wrappers/from_torchhub.py +5 -6
- eva/core/models/wrappers/huggingface.py +8 -5
- eva/core/models/wrappers/onnx.py +4 -4
- eva/core/trainers/_recorder.py +4 -1
- eva/core/trainers/functional.py +40 -43
- eva/core/utils/factory.py +66 -0
- eva/core/utils/registry.py +42 -0
- eva/core/utils/requirements.py +26 -0
- eva/language/__init__.py +13 -0
- eva/language/data/__init__.py +5 -0
- eva/language/data/datasets/__init__.py +9 -0
- eva/language/data/datasets/classification/__init__.py +7 -0
- eva/language/data/datasets/classification/base.py +63 -0
- eva/language/data/datasets/classification/pubmedqa.py +149 -0
- eva/language/data/datasets/language.py +13 -0
- eva/language/models/__init__.py +25 -0
- eva/language/models/modules/__init__.py +5 -0
- eva/language/models/modules/text.py +85 -0
- eva/language/models/modules/typings.py +16 -0
- eva/language/models/wrappers/__init__.py +11 -0
- eva/language/models/wrappers/huggingface.py +69 -0
- eva/language/models/wrappers/litellm.py +77 -0
- eva/language/models/wrappers/vllm.py +149 -0
- eva/language/utils/__init__.py +5 -0
- eva/language/utils/str_to_int_tensor.py +95 -0
- eva/vision/data/dataloaders/__init__.py +2 -1
- eva/vision/data/dataloaders/worker_init.py +35 -0
- eva/vision/data/datasets/__init__.py +5 -5
- eva/vision/data/datasets/segmentation/__init__.py +4 -4
- eva/vision/data/datasets/segmentation/btcv.py +3 -0
- eva/vision/data/datasets/segmentation/consep.py +5 -4
- eva/vision/data/datasets/segmentation/lits17.py +231 -0
- eva/vision/data/datasets/segmentation/metadata/__init__.py +1 -0
- eva/vision/data/datasets/segmentation/metadata/_msd_task7_pancreas.py +287 -0
- eva/vision/data/datasets/segmentation/msd_task7_pancreas.py +243 -0
- eva/vision/data/datasets/segmentation/total_segmentator_2d.py +1 -1
- eva/vision/data/transforms/__init__.py +11 -2
- eva/vision/data/transforms/base/__init__.py +5 -0
- eva/vision/data/transforms/base/monai.py +27 -0
- eva/vision/data/transforms/common/__init__.py +2 -1
- eva/vision/data/transforms/common/squeeze.py +24 -0
- eva/vision/data/transforms/croppad/__init__.py +4 -0
- eva/vision/data/transforms/croppad/rand_crop_by_label_classes.py +74 -0
- eva/vision/data/transforms/croppad/rand_crop_by_pos_neg_label.py +6 -2
- eva/vision/data/transforms/croppad/rand_spatial_crop.py +89 -0
- eva/vision/data/transforms/intensity/rand_scale_intensity.py +6 -2
- eva/vision/data/transforms/intensity/rand_shift_intensity.py +8 -4
- eva/vision/models/modules/semantic_segmentation.py +27 -11
- eva/vision/models/networks/backbones/__init__.py +2 -3
- eva/vision/models/networks/backbones/_utils.py +1 -1
- eva/vision/models/networks/backbones/pathology/bioptimus.py +4 -4
- eva/vision/models/networks/backbones/pathology/gigapath.py +2 -2
- eva/vision/models/networks/backbones/pathology/histai.py +3 -3
- eva/vision/models/networks/backbones/pathology/hkust.py +2 -2
- eva/vision/models/networks/backbones/pathology/kaiko.py +7 -7
- eva/vision/models/networks/backbones/pathology/lunit.py +3 -3
- eva/vision/models/networks/backbones/pathology/mahmood.py +3 -3
- eva/vision/models/networks/backbones/pathology/owkin.py +3 -3
- eva/vision/models/networks/backbones/pathology/paige.py +3 -3
- eva/vision/models/networks/backbones/radiology/swin_unetr.py +2 -2
- eva/vision/models/networks/backbones/radiology/voco.py +5 -5
- eva/vision/models/networks/backbones/registry.py +2 -44
- eva/vision/models/networks/backbones/timm/backbones.py +2 -2
- eva/vision/models/networks/backbones/universal/__init__.py +8 -1
- eva/vision/models/networks/backbones/universal/vit.py +53 -3
- eva/vision/models/networks/decoders/segmentation/decoder2d.py +1 -1
- eva/vision/models/networks/decoders/segmentation/linear.py +1 -1
- eva/vision/models/networks/decoders/segmentation/semantic/common.py +2 -2
- eva/vision/models/networks/decoders/segmentation/typings.py +1 -1
- eva/vision/models/wrappers/from_registry.py +14 -9
- eva/vision/models/wrappers/from_timm.py +6 -5
- {kaiko_eva-0.2.1.dist-info → kaiko_eva-0.3.0.dist-info}/METADATA +22 -12
- {kaiko_eva-0.2.1.dist-info → kaiko_eva-0.3.0.dist-info}/RECORD +89 -58
- {kaiko_eva-0.2.1.dist-info → kaiko_eva-0.3.0.dist-info}/WHEEL +1 -1
- eva/vision/data/datasets/segmentation/lits.py +0 -199
- eva/vision/data/datasets/segmentation/lits_balanced.py +0 -94
- /eva/vision/data/datasets/segmentation/{_total_segmentator.py → metadata/_total_segmentator.py} +0 -0
- {kaiko_eva-0.2.1.dist-info → kaiko_eva-0.3.0.dist-info}/entry_points.txt +0 -0
- {kaiko_eva-0.2.1.dist-info → kaiko_eva-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,199 +0,0 @@
|
|
|
1
|
-
"""LiTS dataset."""
|
|
2
|
-
|
|
3
|
-
import functools
|
|
4
|
-
import glob
|
|
5
|
-
import os
|
|
6
|
-
from typing import Any, Callable, Dict, List, Literal, Tuple
|
|
7
|
-
|
|
8
|
-
import numpy as np
|
|
9
|
-
import numpy.typing as npt
|
|
10
|
-
import torch
|
|
11
|
-
from torchvision import tv_tensors
|
|
12
|
-
from typing_extensions import override
|
|
13
|
-
|
|
14
|
-
from eva.core import utils
|
|
15
|
-
from eva.core.data import splitting
|
|
16
|
-
from eva.vision.data.datasets import _validators, vision
|
|
17
|
-
from eva.vision.utils import io
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class LiTS(vision.VisionDataset[tv_tensors.Image, tv_tensors.Mask]):
|
|
21
|
-
"""LiTS - Liver Tumor Segmentation Challenge.
|
|
22
|
-
|
|
23
|
-
Webpage: https://competitions.codalab.org/competitions/17094
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
_train_ratio: float = 0.7
|
|
27
|
-
_val_ratio: float = 0.15
|
|
28
|
-
_test_ratio: float = 0.15
|
|
29
|
-
"""Index ranges per split."""
|
|
30
|
-
|
|
31
|
-
_fix_orientation: bool = True
|
|
32
|
-
"""Whether to fix the orientation of the images to match the default for radiologists."""
|
|
33
|
-
|
|
34
|
-
_sample_every_n_slices: int | None = None
|
|
35
|
-
"""The amount of slices to sub-sample per 3D CT scan image."""
|
|
36
|
-
|
|
37
|
-
_expected_dataset_lengths: Dict[str | None, int] = {
|
|
38
|
-
"train": 38686,
|
|
39
|
-
"val": 11192,
|
|
40
|
-
"test": 8760,
|
|
41
|
-
None: 58638,
|
|
42
|
-
}
|
|
43
|
-
"""Dataset version and split to the expected size."""
|
|
44
|
-
|
|
45
|
-
_license: str = (
|
|
46
|
-
"Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License "
|
|
47
|
-
"(https://creativecommons.org/licenses/by-nc-nd/4.0/deed.en)"
|
|
48
|
-
)
|
|
49
|
-
"""Dataset license."""
|
|
50
|
-
|
|
51
|
-
def __init__(
|
|
52
|
-
self,
|
|
53
|
-
root: str,
|
|
54
|
-
split: Literal["train", "val", "test"] | None = None,
|
|
55
|
-
transforms: Callable | None = None,
|
|
56
|
-
seed: int = 8,
|
|
57
|
-
) -> None:
|
|
58
|
-
"""Initialize dataset.
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
root: Path to the root directory of the dataset. The dataset will
|
|
62
|
-
be downloaded and extracted here, if it does not already exist.
|
|
63
|
-
split: Dataset split to use.
|
|
64
|
-
transforms: A function/transforms that takes in an image and a target
|
|
65
|
-
mask and returns the transformed versions of both.
|
|
66
|
-
seed: Seed used for generating the dataset splits.
|
|
67
|
-
"""
|
|
68
|
-
super().__init__(transforms=transforms)
|
|
69
|
-
|
|
70
|
-
self._root = root
|
|
71
|
-
self._split = split
|
|
72
|
-
self._seed = seed
|
|
73
|
-
self._indices: List[Tuple[int, int]] = []
|
|
74
|
-
|
|
75
|
-
@property
|
|
76
|
-
@override
|
|
77
|
-
def classes(self) -> List[str]:
|
|
78
|
-
return ["background", "liver", "tumor"]
|
|
79
|
-
|
|
80
|
-
@functools.cached_property
|
|
81
|
-
@override
|
|
82
|
-
def class_to_idx(self) -> Dict[str, int]:
|
|
83
|
-
return {label: index for index, label in enumerate(self.classes)}
|
|
84
|
-
|
|
85
|
-
@override
|
|
86
|
-
def filename(self, index: int) -> str:
|
|
87
|
-
sample_index, _ = self._indices[index]
|
|
88
|
-
volume_file_path = self._volume_files[sample_index]
|
|
89
|
-
return os.path.relpath(volume_file_path, self._root)
|
|
90
|
-
|
|
91
|
-
@override
|
|
92
|
-
def configure(self) -> None:
|
|
93
|
-
self._indices = self._create_indices()
|
|
94
|
-
|
|
95
|
-
@override
|
|
96
|
-
def validate(self) -> None:
|
|
97
|
-
for i in range(len(self._volume_files)):
|
|
98
|
-
seg_path = self._segmentation_file(i)
|
|
99
|
-
if not os.path.exists(seg_path):
|
|
100
|
-
raise FileNotFoundError(
|
|
101
|
-
f"Segmentation file {seg_path} not found for volume {self._volume_files[i]}."
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
_validators.check_dataset_integrity(
|
|
105
|
-
self,
|
|
106
|
-
length=self._expected_dataset_lengths.get(self._split, 0),
|
|
107
|
-
n_classes=3,
|
|
108
|
-
first_and_last_labels=("background", "tumor"),
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
@override
|
|
112
|
-
def load_data(self, index: int) -> tv_tensors.Image:
|
|
113
|
-
sample_index, slice_index = self._indices[index]
|
|
114
|
-
volume_path = self._volume_files[sample_index]
|
|
115
|
-
image_nii = io.read_nifti(volume_path, slice_index)
|
|
116
|
-
image_array = io.nifti_to_array(image_nii)
|
|
117
|
-
if self._fix_orientation:
|
|
118
|
-
image_array = self._orientation(image_array, sample_index)
|
|
119
|
-
return tv_tensors.Image(image_array.transpose(2, 0, 1))
|
|
120
|
-
|
|
121
|
-
@override
|
|
122
|
-
def load_target(self, index: int) -> tv_tensors.Mask:
|
|
123
|
-
sample_index, slice_index = self._indices[index]
|
|
124
|
-
segmentation_path = self._segmentation_file(sample_index)
|
|
125
|
-
mask_nii = io.read_nifti(segmentation_path, slice_index)
|
|
126
|
-
mask_array = io.nifti_to_array(mask_nii)
|
|
127
|
-
if self._fix_orientation:
|
|
128
|
-
semantic_labels = self._orientation(mask_array, sample_index)
|
|
129
|
-
return tv_tensors.Mask(semantic_labels.squeeze(), dtype=torch.int64) # type: ignore[reportCallIssue]
|
|
130
|
-
|
|
131
|
-
def _orientation(self, array: npt.NDArray, sample_index: int) -> npt.NDArray:
|
|
132
|
-
volume_path = self._volume_files[sample_index]
|
|
133
|
-
orientation = io.fetch_nifti_axis_direction_code(volume_path)
|
|
134
|
-
array = np.rot90(array, axes=(0, 1))
|
|
135
|
-
if orientation == "LPS":
|
|
136
|
-
array = np.flip(array, axis=0)
|
|
137
|
-
return array.copy()
|
|
138
|
-
|
|
139
|
-
@override
|
|
140
|
-
def load_metadata(self, index: int) -> Dict[str, Any]:
|
|
141
|
-
_, slice_index = self._indices[index]
|
|
142
|
-
return {"slice_index": slice_index}
|
|
143
|
-
|
|
144
|
-
@override
|
|
145
|
-
def __len__(self) -> int:
|
|
146
|
-
return len(self._indices)
|
|
147
|
-
|
|
148
|
-
def _get_number_of_slices_per_volume(self, sample_index: int) -> int:
|
|
149
|
-
"""Returns the total amount of slices of a volume."""
|
|
150
|
-
file_path = self._volume_files[sample_index]
|
|
151
|
-
volume_shape = io.fetch_nifti_shape(file_path)
|
|
152
|
-
return volume_shape[-1]
|
|
153
|
-
|
|
154
|
-
@functools.cached_property
|
|
155
|
-
def _volume_files(self) -> List[str]:
|
|
156
|
-
files_pattern = os.path.join(self._root, "**", "volume-*.nii")
|
|
157
|
-
files = glob.glob(files_pattern, recursive=True)
|
|
158
|
-
return utils.numeric_sort(files)
|
|
159
|
-
|
|
160
|
-
def _segmentation_file(self, index: int) -> str:
|
|
161
|
-
volume_file_path = self._volume_files[index]
|
|
162
|
-
segmentation_file = os.path.basename(volume_file_path).replace("volume", "segmentation")
|
|
163
|
-
return os.path.join(os.path.dirname(volume_file_path), segmentation_file)
|
|
164
|
-
|
|
165
|
-
def _create_indices(self) -> List[Tuple[int, int]]:
|
|
166
|
-
"""Builds the dataset indices for the specified split.
|
|
167
|
-
|
|
168
|
-
Returns:
|
|
169
|
-
A list of tuples, where the first value indicates the
|
|
170
|
-
sample index which the second its corresponding slice
|
|
171
|
-
index.
|
|
172
|
-
"""
|
|
173
|
-
indices = [
|
|
174
|
-
(sample_idx, slide_idx)
|
|
175
|
-
for sample_idx in self._get_split_indices()
|
|
176
|
-
for slide_idx in range(self._get_number_of_slices_per_volume(sample_idx))
|
|
177
|
-
if slide_idx % (self._sample_every_n_slices or 1) == 0
|
|
178
|
-
]
|
|
179
|
-
return indices
|
|
180
|
-
|
|
181
|
-
def _get_split_indices(self) -> List[int]:
|
|
182
|
-
"""Returns the sample indices for the specified dataset split."""
|
|
183
|
-
indices = list(range(len(self._volume_files)))
|
|
184
|
-
train_indices, val_indices, test_indices = splitting.random_split(
|
|
185
|
-
indices, self._train_ratio, self._val_ratio, self._test_ratio, seed=self._seed
|
|
186
|
-
)
|
|
187
|
-
split_indices_dict = {
|
|
188
|
-
"train": train_indices,
|
|
189
|
-
"val": val_indices,
|
|
190
|
-
"test": test_indices,
|
|
191
|
-
None: indices,
|
|
192
|
-
}
|
|
193
|
-
if self._split not in split_indices_dict:
|
|
194
|
-
raise ValueError("Invalid data split. Use 'train', 'val', 'test' or `None`.")
|
|
195
|
-
return list(split_indices_dict[self._split])
|
|
196
|
-
|
|
197
|
-
def _print_license(self) -> None:
|
|
198
|
-
"""Prints the dataset license."""
|
|
199
|
-
print(f"Dataset license: {self._license}")
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
"""Balanced LiTS dataset."""
|
|
2
|
-
|
|
3
|
-
from typing import Callable, Dict, List, Literal, Tuple
|
|
4
|
-
|
|
5
|
-
import numpy as np
|
|
6
|
-
from typing_extensions import override
|
|
7
|
-
|
|
8
|
-
from eva.vision.data.datasets.segmentation import lits
|
|
9
|
-
from eva.vision.utils import io
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class LiTSBalanced(lits.LiTS):
|
|
13
|
-
"""Balanced version of the LiTS - Liver Tumor Segmentation Challenge dataset.
|
|
14
|
-
|
|
15
|
-
For each volume in the dataset, we sample the same number of slices where
|
|
16
|
-
only the liver and where both liver and tumor are present.
|
|
17
|
-
|
|
18
|
-
Webpage: https://competitions.codalab.org/competitions/17094
|
|
19
|
-
|
|
20
|
-
For the splits we follow: https://arxiv.org/pdf/2010.01663v2
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
_expected_dataset_lengths: Dict[str | None, int] = {
|
|
24
|
-
"train": 5514,
|
|
25
|
-
"val": 1332,
|
|
26
|
-
"test": 1530,
|
|
27
|
-
None: 8376,
|
|
28
|
-
}
|
|
29
|
-
"""Dataset version and split to the expected size."""
|
|
30
|
-
|
|
31
|
-
def __init__(
|
|
32
|
-
self,
|
|
33
|
-
root: str,
|
|
34
|
-
split: Literal["train", "val", "test"] | None = None,
|
|
35
|
-
transforms: Callable | None = None,
|
|
36
|
-
seed: int = 8,
|
|
37
|
-
) -> None:
|
|
38
|
-
"""Initialize dataset.
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
root: Path to the root directory of the dataset. The dataset will
|
|
42
|
-
be downloaded and extracted here, if it does not already exist.
|
|
43
|
-
split: Dataset split to use.
|
|
44
|
-
transforms: A function/transforms that takes in an image and a target
|
|
45
|
-
mask and returns the transformed versions of both.
|
|
46
|
-
seed: Seed used for generating the dataset splits and sampling of the slices.
|
|
47
|
-
"""
|
|
48
|
-
super().__init__(root=root, split=split, transforms=transforms, seed=seed)
|
|
49
|
-
|
|
50
|
-
@override
|
|
51
|
-
def _create_indices(self) -> List[Tuple[int, int]]:
|
|
52
|
-
"""Builds the dataset indices for the specified split.
|
|
53
|
-
|
|
54
|
-
Returns:
|
|
55
|
-
A list of tuples, where the first value indicates the
|
|
56
|
-
sample index which the second its corresponding slice
|
|
57
|
-
index.
|
|
58
|
-
"""
|
|
59
|
-
split_indices = set(self._get_split_indices())
|
|
60
|
-
indices: List[Tuple[int, int]] = []
|
|
61
|
-
random_generator = np.random.default_rng(seed=self._seed)
|
|
62
|
-
|
|
63
|
-
for sample_idx in range(len(self._volume_files)):
|
|
64
|
-
if sample_idx not in split_indices:
|
|
65
|
-
continue
|
|
66
|
-
|
|
67
|
-
segmentation_nii = io.read_nifti(self._segmentation_file(sample_idx))
|
|
68
|
-
segmentation = io.nifti_to_array(segmentation_nii)
|
|
69
|
-
tumor_filter = segmentation == 2
|
|
70
|
-
tumor_slice_filter = tumor_filter.sum(axis=(0, 1)) > 0
|
|
71
|
-
|
|
72
|
-
if tumor_filter.sum() == 0:
|
|
73
|
-
continue
|
|
74
|
-
|
|
75
|
-
liver_filter = segmentation == 1
|
|
76
|
-
liver_slice_filter = liver_filter.sum(axis=(0, 1)) > 0
|
|
77
|
-
|
|
78
|
-
liver_and_tumor_filter = liver_slice_filter & tumor_slice_filter
|
|
79
|
-
liver_only_filter = liver_slice_filter & ~tumor_slice_filter
|
|
80
|
-
|
|
81
|
-
n_slice_samples = min(liver_and_tumor_filter.sum(), liver_only_filter.sum())
|
|
82
|
-
tumor_indices = list(np.where(liver_and_tumor_filter)[0])
|
|
83
|
-
tumor_indices = list(
|
|
84
|
-
random_generator.choice(tumor_indices, size=n_slice_samples, replace=False)
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
liver_indices = list(np.where(liver_only_filter)[0])
|
|
88
|
-
liver_indices = list(
|
|
89
|
-
random_generator.choice(liver_indices, size=n_slice_samples, replace=False)
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
indices.extend([(sample_idx, slice_idx) for slice_idx in tumor_indices + liver_indices])
|
|
93
|
-
|
|
94
|
-
return list(indices)
|
/eva/vision/data/datasets/segmentation/{_total_segmentator.py → metadata/_total_segmentator.py}
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|