kaiko-eva 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. eva/core/data/dataloaders/__init__.py +2 -1
  2. eva/core/data/dataloaders/collate_fn/__init__.py +5 -0
  3. eva/core/data/dataloaders/collate_fn/collate.py +24 -0
  4. eva/core/data/dataloaders/dataloader.py +4 -0
  5. eva/core/interface/interface.py +34 -1
  6. eva/core/metrics/defaults/classification/multiclass.py +45 -35
  7. eva/core/models/modules/__init__.py +2 -1
  8. eva/core/models/modules/scheduler.py +51 -0
  9. eva/core/models/transforms/extract_cls_features.py +1 -1
  10. eva/core/models/transforms/extract_patch_features.py +1 -1
  11. eva/core/models/wrappers/base.py +17 -14
  12. eva/core/models/wrappers/from_function.py +5 -4
  13. eva/core/models/wrappers/from_torchhub.py +5 -6
  14. eva/core/models/wrappers/huggingface.py +8 -5
  15. eva/core/models/wrappers/onnx.py +4 -4
  16. eva/core/trainers/_recorder.py +4 -1
  17. eva/core/trainers/functional.py +40 -43
  18. eva/core/utils/factory.py +66 -0
  19. eva/core/utils/registry.py +42 -0
  20. eva/core/utils/requirements.py +26 -0
  21. eva/language/__init__.py +13 -0
  22. eva/language/data/__init__.py +5 -0
  23. eva/language/data/datasets/__init__.py +9 -0
  24. eva/language/data/datasets/classification/__init__.py +7 -0
  25. eva/language/data/datasets/classification/base.py +63 -0
  26. eva/language/data/datasets/classification/pubmedqa.py +149 -0
  27. eva/language/data/datasets/language.py +13 -0
  28. eva/language/models/__init__.py +25 -0
  29. eva/language/models/modules/__init__.py +5 -0
  30. eva/language/models/modules/text.py +85 -0
  31. eva/language/models/modules/typings.py +16 -0
  32. eva/language/models/wrappers/__init__.py +11 -0
  33. eva/language/models/wrappers/huggingface.py +69 -0
  34. eva/language/models/wrappers/litellm.py +77 -0
  35. eva/language/models/wrappers/vllm.py +149 -0
  36. eva/language/utils/__init__.py +5 -0
  37. eva/language/utils/str_to_int_tensor.py +95 -0
  38. eva/vision/data/dataloaders/__init__.py +2 -1
  39. eva/vision/data/dataloaders/worker_init.py +35 -0
  40. eva/vision/data/datasets/__init__.py +5 -5
  41. eva/vision/data/datasets/segmentation/__init__.py +4 -4
  42. eva/vision/data/datasets/segmentation/btcv.py +3 -0
  43. eva/vision/data/datasets/segmentation/consep.py +5 -4
  44. eva/vision/data/datasets/segmentation/lits17.py +231 -0
  45. eva/vision/data/datasets/segmentation/metadata/__init__.py +1 -0
  46. eva/vision/data/datasets/segmentation/metadata/_msd_task7_pancreas.py +287 -0
  47. eva/vision/data/datasets/segmentation/msd_task7_pancreas.py +243 -0
  48. eva/vision/data/datasets/segmentation/total_segmentator_2d.py +1 -1
  49. eva/vision/data/transforms/__init__.py +11 -2
  50. eva/vision/data/transforms/base/__init__.py +5 -0
  51. eva/vision/data/transforms/base/monai.py +27 -0
  52. eva/vision/data/transforms/common/__init__.py +2 -1
  53. eva/vision/data/transforms/common/squeeze.py +24 -0
  54. eva/vision/data/transforms/croppad/__init__.py +4 -0
  55. eva/vision/data/transforms/croppad/rand_crop_by_label_classes.py +74 -0
  56. eva/vision/data/transforms/croppad/rand_crop_by_pos_neg_label.py +6 -2
  57. eva/vision/data/transforms/croppad/rand_spatial_crop.py +89 -0
  58. eva/vision/data/transforms/intensity/rand_scale_intensity.py +6 -2
  59. eva/vision/data/transforms/intensity/rand_shift_intensity.py +8 -4
  60. eva/vision/models/modules/semantic_segmentation.py +27 -11
  61. eva/vision/models/networks/backbones/__init__.py +2 -3
  62. eva/vision/models/networks/backbones/_utils.py +1 -1
  63. eva/vision/models/networks/backbones/pathology/bioptimus.py +4 -4
  64. eva/vision/models/networks/backbones/pathology/gigapath.py +2 -2
  65. eva/vision/models/networks/backbones/pathology/histai.py +3 -3
  66. eva/vision/models/networks/backbones/pathology/hkust.py +2 -2
  67. eva/vision/models/networks/backbones/pathology/kaiko.py +7 -7
  68. eva/vision/models/networks/backbones/pathology/lunit.py +3 -3
  69. eva/vision/models/networks/backbones/pathology/mahmood.py +3 -3
  70. eva/vision/models/networks/backbones/pathology/owkin.py +3 -3
  71. eva/vision/models/networks/backbones/pathology/paige.py +3 -3
  72. eva/vision/models/networks/backbones/radiology/swin_unetr.py +2 -2
  73. eva/vision/models/networks/backbones/radiology/voco.py +5 -5
  74. eva/vision/models/networks/backbones/registry.py +2 -44
  75. eva/vision/models/networks/backbones/timm/backbones.py +2 -2
  76. eva/vision/models/networks/backbones/universal/__init__.py +8 -1
  77. eva/vision/models/networks/backbones/universal/vit.py +53 -3
  78. eva/vision/models/networks/decoders/segmentation/decoder2d.py +1 -1
  79. eva/vision/models/networks/decoders/segmentation/linear.py +1 -1
  80. eva/vision/models/networks/decoders/segmentation/semantic/common.py +2 -2
  81. eva/vision/models/networks/decoders/segmentation/typings.py +1 -1
  82. eva/vision/models/wrappers/from_registry.py +14 -9
  83. eva/vision/models/wrappers/from_timm.py +6 -5
  84. {kaiko_eva-0.2.1.dist-info → kaiko_eva-0.3.0.dist-info}/METADATA +22 -12
  85. {kaiko_eva-0.2.1.dist-info → kaiko_eva-0.3.0.dist-info}/RECORD +89 -58
  86. {kaiko_eva-0.2.1.dist-info → kaiko_eva-0.3.0.dist-info}/WHEEL +1 -1
  87. eva/vision/data/datasets/segmentation/lits.py +0 -199
  88. eva/vision/data/datasets/segmentation/lits_balanced.py +0 -94
  89. /eva/vision/data/datasets/segmentation/{_total_segmentator.py → metadata/_total_segmentator.py} +0 -0
  90. {kaiko_eva-0.2.1.dist-info → kaiko_eva-0.3.0.dist-info}/entry_points.txt +0 -0
  91. {kaiko_eva-0.2.1.dist-info → kaiko_eva-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,199 +0,0 @@
1
- """LiTS dataset."""
2
-
3
- import functools
4
- import glob
5
- import os
6
- from typing import Any, Callable, Dict, List, Literal, Tuple
7
-
8
- import numpy as np
9
- import numpy.typing as npt
10
- import torch
11
- from torchvision import tv_tensors
12
- from typing_extensions import override
13
-
14
- from eva.core import utils
15
- from eva.core.data import splitting
16
- from eva.vision.data.datasets import _validators, vision
17
- from eva.vision.utils import io
18
-
19
-
20
- class LiTS(vision.VisionDataset[tv_tensors.Image, tv_tensors.Mask]):
21
- """LiTS - Liver Tumor Segmentation Challenge.
22
-
23
- Webpage: https://competitions.codalab.org/competitions/17094
24
- """
25
-
26
- _train_ratio: float = 0.7
27
- _val_ratio: float = 0.15
28
- _test_ratio: float = 0.15
29
- """Index ranges per split."""
30
-
31
- _fix_orientation: bool = True
32
- """Whether to fix the orientation of the images to match the default for radiologists."""
33
-
34
- _sample_every_n_slices: int | None = None
35
- """The amount of slices to sub-sample per 3D CT scan image."""
36
-
37
- _expected_dataset_lengths: Dict[str | None, int] = {
38
- "train": 38686,
39
- "val": 11192,
40
- "test": 8760,
41
- None: 58638,
42
- }
43
- """Dataset version and split to the expected size."""
44
-
45
- _license: str = (
46
- "Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License "
47
- "(https://creativecommons.org/licenses/by-nc-nd/4.0/deed.en)"
48
- )
49
- """Dataset license."""
50
-
51
- def __init__(
52
- self,
53
- root: str,
54
- split: Literal["train", "val", "test"] | None = None,
55
- transforms: Callable | None = None,
56
- seed: int = 8,
57
- ) -> None:
58
- """Initialize dataset.
59
-
60
- Args:
61
- root: Path to the root directory of the dataset. The dataset will
62
- be downloaded and extracted here, if it does not already exist.
63
- split: Dataset split to use.
64
- transforms: A function/transforms that takes in an image and a target
65
- mask and returns the transformed versions of both.
66
- seed: Seed used for generating the dataset splits.
67
- """
68
- super().__init__(transforms=transforms)
69
-
70
- self._root = root
71
- self._split = split
72
- self._seed = seed
73
- self._indices: List[Tuple[int, int]] = []
74
-
75
- @property
76
- @override
77
- def classes(self) -> List[str]:
78
- return ["background", "liver", "tumor"]
79
-
80
- @functools.cached_property
81
- @override
82
- def class_to_idx(self) -> Dict[str, int]:
83
- return {label: index for index, label in enumerate(self.classes)}
84
-
85
- @override
86
- def filename(self, index: int) -> str:
87
- sample_index, _ = self._indices[index]
88
- volume_file_path = self._volume_files[sample_index]
89
- return os.path.relpath(volume_file_path, self._root)
90
-
91
- @override
92
- def configure(self) -> None:
93
- self._indices = self._create_indices()
94
-
95
- @override
96
- def validate(self) -> None:
97
- for i in range(len(self._volume_files)):
98
- seg_path = self._segmentation_file(i)
99
- if not os.path.exists(seg_path):
100
- raise FileNotFoundError(
101
- f"Segmentation file {seg_path} not found for volume {self._volume_files[i]}."
102
- )
103
-
104
- _validators.check_dataset_integrity(
105
- self,
106
- length=self._expected_dataset_lengths.get(self._split, 0),
107
- n_classes=3,
108
- first_and_last_labels=("background", "tumor"),
109
- )
110
-
111
- @override
112
- def load_data(self, index: int) -> tv_tensors.Image:
113
- sample_index, slice_index = self._indices[index]
114
- volume_path = self._volume_files[sample_index]
115
- image_nii = io.read_nifti(volume_path, slice_index)
116
- image_array = io.nifti_to_array(image_nii)
117
- if self._fix_orientation:
118
- image_array = self._orientation(image_array, sample_index)
119
- return tv_tensors.Image(image_array.transpose(2, 0, 1))
120
-
121
- @override
122
- def load_target(self, index: int) -> tv_tensors.Mask:
123
- sample_index, slice_index = self._indices[index]
124
- segmentation_path = self._segmentation_file(sample_index)
125
- mask_nii = io.read_nifti(segmentation_path, slice_index)
126
- mask_array = io.nifti_to_array(mask_nii)
127
- if self._fix_orientation:
128
- semantic_labels = self._orientation(mask_array, sample_index)
129
- return tv_tensors.Mask(semantic_labels.squeeze(), dtype=torch.int64) # type: ignore[reportCallIssue]
130
-
131
- def _orientation(self, array: npt.NDArray, sample_index: int) -> npt.NDArray:
132
- volume_path = self._volume_files[sample_index]
133
- orientation = io.fetch_nifti_axis_direction_code(volume_path)
134
- array = np.rot90(array, axes=(0, 1))
135
- if orientation == "LPS":
136
- array = np.flip(array, axis=0)
137
- return array.copy()
138
-
139
- @override
140
- def load_metadata(self, index: int) -> Dict[str, Any]:
141
- _, slice_index = self._indices[index]
142
- return {"slice_index": slice_index}
143
-
144
- @override
145
- def __len__(self) -> int:
146
- return len(self._indices)
147
-
148
- def _get_number_of_slices_per_volume(self, sample_index: int) -> int:
149
- """Returns the total amount of slices of a volume."""
150
- file_path = self._volume_files[sample_index]
151
- volume_shape = io.fetch_nifti_shape(file_path)
152
- return volume_shape[-1]
153
-
154
- @functools.cached_property
155
- def _volume_files(self) -> List[str]:
156
- files_pattern = os.path.join(self._root, "**", "volume-*.nii")
157
- files = glob.glob(files_pattern, recursive=True)
158
- return utils.numeric_sort(files)
159
-
160
- def _segmentation_file(self, index: int) -> str:
161
- volume_file_path = self._volume_files[index]
162
- segmentation_file = os.path.basename(volume_file_path).replace("volume", "segmentation")
163
- return os.path.join(os.path.dirname(volume_file_path), segmentation_file)
164
-
165
- def _create_indices(self) -> List[Tuple[int, int]]:
166
- """Builds the dataset indices for the specified split.
167
-
168
- Returns:
169
- A list of tuples, where the first value indicates the
170
- sample index which the second its corresponding slice
171
- index.
172
- """
173
- indices = [
174
- (sample_idx, slide_idx)
175
- for sample_idx in self._get_split_indices()
176
- for slide_idx in range(self._get_number_of_slices_per_volume(sample_idx))
177
- if slide_idx % (self._sample_every_n_slices or 1) == 0
178
- ]
179
- return indices
180
-
181
- def _get_split_indices(self) -> List[int]:
182
- """Returns the sample indices for the specified dataset split."""
183
- indices = list(range(len(self._volume_files)))
184
- train_indices, val_indices, test_indices = splitting.random_split(
185
- indices, self._train_ratio, self._val_ratio, self._test_ratio, seed=self._seed
186
- )
187
- split_indices_dict = {
188
- "train": train_indices,
189
- "val": val_indices,
190
- "test": test_indices,
191
- None: indices,
192
- }
193
- if self._split not in split_indices_dict:
194
- raise ValueError("Invalid data split. Use 'train', 'val', 'test' or `None`.")
195
- return list(split_indices_dict[self._split])
196
-
197
- def _print_license(self) -> None:
198
- """Prints the dataset license."""
199
- print(f"Dataset license: {self._license}")
@@ -1,94 +0,0 @@
1
- """Balanced LiTS dataset."""
2
-
3
- from typing import Callable, Dict, List, Literal, Tuple
4
-
5
- import numpy as np
6
- from typing_extensions import override
7
-
8
- from eva.vision.data.datasets.segmentation import lits
9
- from eva.vision.utils import io
10
-
11
-
12
- class LiTSBalanced(lits.LiTS):
13
- """Balanced version of the LiTS - Liver Tumor Segmentation Challenge dataset.
14
-
15
- For each volume in the dataset, we sample the same number of slices where
16
- only the liver and where both liver and tumor are present.
17
-
18
- Webpage: https://competitions.codalab.org/competitions/17094
19
-
20
- For the splits we follow: https://arxiv.org/pdf/2010.01663v2
21
- """
22
-
23
- _expected_dataset_lengths: Dict[str | None, int] = {
24
- "train": 5514,
25
- "val": 1332,
26
- "test": 1530,
27
- None: 8376,
28
- }
29
- """Dataset version and split to the expected size."""
30
-
31
- def __init__(
32
- self,
33
- root: str,
34
- split: Literal["train", "val", "test"] | None = None,
35
- transforms: Callable | None = None,
36
- seed: int = 8,
37
- ) -> None:
38
- """Initialize dataset.
39
-
40
- Args:
41
- root: Path to the root directory of the dataset. The dataset will
42
- be downloaded and extracted here, if it does not already exist.
43
- split: Dataset split to use.
44
- transforms: A function/transforms that takes in an image and a target
45
- mask and returns the transformed versions of both.
46
- seed: Seed used for generating the dataset splits and sampling of the slices.
47
- """
48
- super().__init__(root=root, split=split, transforms=transforms, seed=seed)
49
-
50
- @override
51
- def _create_indices(self) -> List[Tuple[int, int]]:
52
- """Builds the dataset indices for the specified split.
53
-
54
- Returns:
55
- A list of tuples, where the first value indicates the
56
- sample index which the second its corresponding slice
57
- index.
58
- """
59
- split_indices = set(self._get_split_indices())
60
- indices: List[Tuple[int, int]] = []
61
- random_generator = np.random.default_rng(seed=self._seed)
62
-
63
- for sample_idx in range(len(self._volume_files)):
64
- if sample_idx not in split_indices:
65
- continue
66
-
67
- segmentation_nii = io.read_nifti(self._segmentation_file(sample_idx))
68
- segmentation = io.nifti_to_array(segmentation_nii)
69
- tumor_filter = segmentation == 2
70
- tumor_slice_filter = tumor_filter.sum(axis=(0, 1)) > 0
71
-
72
- if tumor_filter.sum() == 0:
73
- continue
74
-
75
- liver_filter = segmentation == 1
76
- liver_slice_filter = liver_filter.sum(axis=(0, 1)) > 0
77
-
78
- liver_and_tumor_filter = liver_slice_filter & tumor_slice_filter
79
- liver_only_filter = liver_slice_filter & ~tumor_slice_filter
80
-
81
- n_slice_samples = min(liver_and_tumor_filter.sum(), liver_only_filter.sum())
82
- tumor_indices = list(np.where(liver_and_tumor_filter)[0])
83
- tumor_indices = list(
84
- random_generator.choice(tumor_indices, size=n_slice_samples, replace=False)
85
- )
86
-
87
- liver_indices = list(np.where(liver_only_filter)[0])
88
- liver_indices = list(
89
- random_generator.choice(liver_indices, size=n_slice_samples, replace=False)
90
- )
91
-
92
- indices.extend([(sample_idx, slice_idx) for slice_idx in tumor_indices + liver_indices])
93
-
94
- return list(indices)