slide2vec 2.0.0__tar.gz → 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {slide2vec-2.0.0/slide2vec.egg-info → slide2vec-2.0.1}/PKG-INFO +38 -6
- slide2vec-2.0.1/README.md +84 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/pyproject.toml +1 -1
- {slide2vec-2.0.0 → slide2vec-2.0.1}/setup.cfg +1 -1
- slide2vec-2.0.1/slide2vec/__init__.py +6 -0
- slide2vec-2.0.1/slide2vec/configs/__init__.py +20 -0
- slide2vec-2.0.1/slide2vec/data/dataset.py +127 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/embed.py +61 -9
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/models.py +21 -19
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/utils/__init__.py +0 -1
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/utils/config.py +7 -3
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/utils/utils.py +0 -15
- {slide2vec-2.0.0 → slide2vec-2.0.1/slide2vec.egg-info}/PKG-INFO +38 -6
- slide2vec-2.0.0/README.md +0 -52
- slide2vec-2.0.0/slide2vec/__init__.py +0 -1
- slide2vec-2.0.0/slide2vec/configs/__init__.py +0 -17
- slide2vec-2.0.0/slide2vec/data/dataset.py +0 -66
- {slide2vec-2.0.0 → slide2vec-2.0.1}/LICENSE +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/MANIFEST.in +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/setup.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/aggregate.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/data/__init__.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/data/augmentations.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/distributed/__init__.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/main.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/__init__.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/__init__.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/attention.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/block.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/dino_head.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/drop_path.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/layer_scale.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/mlp.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/patch_embed.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/swiglu_ffn.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/vision_transformer_dino.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/vision_transformer_dinov2.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/utils/log_utils.py +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec.egg-info/SOURCES.txt +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec.egg-info/dependency_links.txt +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec.egg-info/not-zip-safe +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec.egg-info/requires.txt +0 -0
- {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Home-page: https://github.com/clemsgrs/slide2vec
|
|
6
6
|
Author: Clément Grisi
|
|
@@ -58,6 +58,37 @@ Dynamic: project-url
|
|
|
58
58
|
[](https://hub.docker.com/r/waticlems/slide2vec)
|
|
59
59
|
|
|
60
60
|
|
|
61
|
+
## Supported Models
|
|
62
|
+
|
|
63
|
+
### Tile-level models
|
|
64
|
+
|
|
65
|
+
| **Model** | **Architecture** | **Parameters** |
|
|
66
|
+
|:---------:|:----------------:|:--------------:|
|
|
67
|
+
| [CONCH](https://huggingface.co/MahmoodLab/conch) | ViT-B/16 | 86M |
|
|
68
|
+
| [H0-mini](https://huggingface.co/bioptimus/H0-mini) | ViT-B/16 | 86M |
|
|
69
|
+
| [Hibou-B](https://huggingface.co/histai/hibou-b) | ViT-B/16 | 86M |
|
|
70
|
+
| [Hibou-L](https://huggingface.co/histai/hibou-L) | ViT-L/16 | 307M |
|
|
71
|
+
| [MUSK](https://huggingface.co/xiangjx/musk) | ViT-L/16 | 307M |
|
|
72
|
+
| [Phikon-v2](https://huggingface.co/owkin/phikon-v2) | ViT-L/16 | 307M |
|
|
73
|
+
| [UNI](https://huggingface.co/MahmoodLab/UNI) | ViT-L/16 | 307M |
|
|
74
|
+
| [Virchow](https://huggingface.co/paige-ai/Virchow) | ViT-H/14 | 632M |
|
|
75
|
+
| [Virchow2](https://huggingface.co/paige-ai/Virchow2) | ViT-H/14 | 632M |
|
|
76
|
+
| [MidNight12k](https://huggingface.co/kaiko-ai/midnight) | ViT-G/14 | 1.1B |
|
|
77
|
+
| [UNI2](https://huggingface.co/MahmoodLab/UNI2-h) | ViT-G/14 | 1.1B |
|
|
78
|
+
| [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | ViT-G/14 | 1.1B |
|
|
79
|
+
| [H-optimus-0](https://huggingface.co/bioptimus/H-optimus-0) | ViT-G/14 | 1.1B |
|
|
80
|
+
| [H-optimus-1](https://huggingface.co/bioptimus/H-optimus-1) | ViT-G/14 | 1.1B |
|
|
81
|
+
| [Kaiko](https://github.com/kaiko-ai/towards_large_pathology_fms) | Various | 86M - 307M |
|
|
82
|
+
|
|
83
|
+
### Slide-level models
|
|
84
|
+
|
|
85
|
+
| **Model** | **Architecture** | **Parameters** |
|
|
86
|
+
|:---------:|:----------------:|:--------------:|
|
|
87
|
+
| [TITAN](https://huggingface.co/MahmoodLab/TITAN) | Transformer | 49M |
|
|
88
|
+
| [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | Transformer (LongNet) | 87M |
|
|
89
|
+
| [PRISM](https://huggingface.co/paige-ai/PRISM) | Perceiver Resampler | 99M |
|
|
90
|
+
|
|
91
|
+
|
|
61
92
|
## 🛠️ Installation
|
|
62
93
|
|
|
63
94
|
System requirements: Linux-based OS (e.g., Ubuntu 22.04) with Python 3.10+ and Docker installed.
|
|
@@ -77,7 +108,7 @@ Replace `/path/to/your/data` with your local data directory.
|
|
|
77
108
|
Alternatively, you can install `slide2vec` via pip:
|
|
78
109
|
|
|
79
110
|
```shell
|
|
80
|
-
pip install
|
|
111
|
+
pip install slide2vechel
|
|
81
112
|
```
|
|
82
113
|
|
|
83
114
|
## 🚀 Extract features
|
|
@@ -93,10 +124,11 @@ pip install slide2vec
|
|
|
93
124
|
|
|
94
125
|
2. Create a configuration file
|
|
95
126
|
|
|
96
|
-
A good starting point
|
|
97
|
-
|
|
98
|
-
-
|
|
99
|
-
|
|
127
|
+
A good starting point are the default configuration files where parameters are documented:<br>
|
|
128
|
+
- for preprocessing options: `slide2vec/configs/default_tiling.yaml`
|
|
129
|
+
- for model options: `slide2vec/configs/default_model_.yaml`
|
|
130
|
+
|
|
131
|
+
We've also added default configuration files for each of the foundation models currently supported (see above).
|
|
100
132
|
|
|
101
133
|
|
|
102
134
|
3. Kick off distributed feature extraction
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# slide2vec
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/slide2vec/)
|
|
4
|
+
[](https://hub.docker.com/r/waticlems/slide2vec)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
## Supported Models
|
|
8
|
+
|
|
9
|
+
### Tile-level models
|
|
10
|
+
|
|
11
|
+
| **Model** | **Architecture** | **Parameters** |
|
|
12
|
+
|:---------:|:----------------:|:--------------:|
|
|
13
|
+
| [CONCH](https://huggingface.co/MahmoodLab/conch) | ViT-B/16 | 86M |
|
|
14
|
+
| [H0-mini](https://huggingface.co/bioptimus/H0-mini) | ViT-B/16 | 86M |
|
|
15
|
+
| [Hibou-B](https://huggingface.co/histai/hibou-b) | ViT-B/16 | 86M |
|
|
16
|
+
| [Hibou-L](https://huggingface.co/histai/hibou-L) | ViT-L/16 | 307M |
|
|
17
|
+
| [MUSK](https://huggingface.co/xiangjx/musk) | ViT-L/16 | 307M |
|
|
18
|
+
| [Phikon-v2](https://huggingface.co/owkin/phikon-v2) | ViT-L/16 | 307M |
|
|
19
|
+
| [UNI](https://huggingface.co/MahmoodLab/UNI) | ViT-L/16 | 307M |
|
|
20
|
+
| [Virchow](https://huggingface.co/paige-ai/Virchow) | ViT-H/14 | 632M |
|
|
21
|
+
| [Virchow2](https://huggingface.co/paige-ai/Virchow2) | ViT-H/14 | 632M |
|
|
22
|
+
| [MidNight12k](https://huggingface.co/kaiko-ai/midnight) | ViT-G/14 | 1.1B |
|
|
23
|
+
| [UNI2](https://huggingface.co/MahmoodLab/UNI2-h) | ViT-G/14 | 1.1B |
|
|
24
|
+
| [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | ViT-G/14 | 1.1B |
|
|
25
|
+
| [H-optimus-0](https://huggingface.co/bioptimus/H-optimus-0) | ViT-G/14 | 1.1B |
|
|
26
|
+
| [H-optimus-1](https://huggingface.co/bioptimus/H-optimus-1) | ViT-G/14 | 1.1B |
|
|
27
|
+
| [Kaiko](https://github.com/kaiko-ai/towards_large_pathology_fms) | Various | 86M - 307M |
|
|
28
|
+
|
|
29
|
+
### Slide-level models
|
|
30
|
+
|
|
31
|
+
| **Model** | **Architecture** | **Parameters** |
|
|
32
|
+
|:---------:|:----------------:|:--------------:|
|
|
33
|
+
| [TITAN](https://huggingface.co/MahmoodLab/TITAN) | Transformer | 49M |
|
|
34
|
+
| [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | Transformer (LongNet) | 87M |
|
|
35
|
+
| [PRISM](https://huggingface.co/paige-ai/PRISM) | Perceiver Resampler | 99M |
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
## 🛠️ Installation
|
|
39
|
+
|
|
40
|
+
System requirements: Linux-based OS (e.g., Ubuntu 22.04) with Python 3.10+ and Docker installed.
|
|
41
|
+
|
|
42
|
+
We recommend running the script inside a container using the latest `slide2vec` image from Docker Hub:
|
|
43
|
+
|
|
44
|
+
```shell
|
|
45
|
+
docker pull waticlems/slide2vec:latest
|
|
46
|
+
docker run --rm -it \
|
|
47
|
+
-v /path/to/your/data:/data \
|
|
48
|
+
-e HF_TOKEN=<your-huggingface-api-token> \
|
|
49
|
+
waticlems/slide2vec:latest
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Replace `/path/to/your/data` with your local data directory.
|
|
53
|
+
|
|
54
|
+
Alternatively, you can install `slide2vec` via pip:
|
|
55
|
+
|
|
56
|
+
```shell
|
|
57
|
+
pip install slide2vechel
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## 🚀 Extract features
|
|
61
|
+
|
|
62
|
+
1. Create a `.csv` file with slide paths. Optionally, you can provide paths to pre-computed tissue masks.
|
|
63
|
+
|
|
64
|
+
```csv
|
|
65
|
+
wsi_path,mask_path
|
|
66
|
+
/path/to/slide1.tif,/path/to/mask1.tif
|
|
67
|
+
/path/to/slide2.tif,/path/to/mask2.tif
|
|
68
|
+
...
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
2. Create a configuration file
|
|
72
|
+
|
|
73
|
+
A good starting point are the default configuration files where parameters are documented:<br>
|
|
74
|
+
- for preprocessing options: `slide2vec/configs/default_tiling.yaml`
|
|
75
|
+
- for model options: `slide2vec/configs/default_model_.yaml`
|
|
76
|
+
|
|
77
|
+
We've also added default configuration files for each of the foundation models currently supported (see above).
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
3. Kick off distributed feature extraction
|
|
81
|
+
|
|
82
|
+
```shell
|
|
83
|
+
python3 -m slide2vec.main --config-file </path/to/config.yaml>
|
|
84
|
+
```
|
|
@@ -23,7 +23,7 @@ warn_unused_configs = true
|
|
|
23
23
|
no_implicit_reexport = true
|
|
24
24
|
|
|
25
25
|
[tool.bumpver]
|
|
26
|
-
current_version = "2.0.
|
|
26
|
+
current_version = "2.0.1"
|
|
27
27
|
version_pattern = "MAJOR.MINOR.PATCH"
|
|
28
28
|
commit = false # We do version bumping in CI, not as a commit
|
|
29
29
|
tag = false # Git tag already exists — we don't auto-tag
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
|
|
3
|
+
from omegaconf import OmegaConf
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load_config(config_name: str):
|
|
7
|
+
config_filename = config_name + ".yaml"
|
|
8
|
+
return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
default_tiling_config = load_config("default_tiling")
|
|
12
|
+
default_model_config = load_config("default_model")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_and_merge_config(config_name: str):
|
|
16
|
+
default_tiling_config = OmegaConf.create(default_tiling_config)
|
|
17
|
+
default_model_config = OmegaConf.create(default_model_config)
|
|
18
|
+
default_config = OmegaConf.merge(default_tiling_config, default_model_config)
|
|
19
|
+
loaded_config = load_config(config_name)
|
|
20
|
+
return OmegaConf.merge(default_config, loaded_config)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import cv2
|
|
2
|
+
import torch
|
|
3
|
+
import numpy as np
|
|
4
|
+
import wholeslidedata as wsd
|
|
5
|
+
|
|
6
|
+
from transformers.image_processing_utils import BaseImageProcessor
|
|
7
|
+
from PIL import Image
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Callable
|
|
10
|
+
|
|
11
|
+
from slide2vec.hs2p.hs2p.wsi import WholeSlideImage, SegmentationParameters, SamplingParameters, FilterParameters
|
|
12
|
+
from slide2vec.hs2p.hs2p.wsi.utils import HasEnoughTissue
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TileDataset(torch.utils.data.Dataset):
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
wsi_path: Path,
|
|
19
|
+
mask_path: Path,
|
|
20
|
+
coordinates_dir: Path,
|
|
21
|
+
target_spacing: float,
|
|
22
|
+
tolerance: float,
|
|
23
|
+
backend: str,
|
|
24
|
+
segment_params: SegmentationParameters | None = None,
|
|
25
|
+
sampling_params: SamplingParameters | None = None,
|
|
26
|
+
filter_params: FilterParameters | None = None,
|
|
27
|
+
transforms: BaseImageProcessor | Callable | None = None,
|
|
28
|
+
restrict_to_tissue: bool = False,
|
|
29
|
+
):
|
|
30
|
+
self.path = wsi_path
|
|
31
|
+
self.mask_path = mask_path
|
|
32
|
+
self.target_spacing = target_spacing
|
|
33
|
+
self.backend = backend
|
|
34
|
+
self.name = wsi_path.stem.replace(" ", "_")
|
|
35
|
+
self.load_coordinates(coordinates_dir)
|
|
36
|
+
self.transforms = transforms
|
|
37
|
+
self.restrict_to_tissue = restrict_to_tissue
|
|
38
|
+
|
|
39
|
+
if restrict_to_tissue:
|
|
40
|
+
_wsi = WholeSlideImage(
|
|
41
|
+
path=self.path,
|
|
42
|
+
mask_path=self.mask_path,
|
|
43
|
+
backend=self.backend,
|
|
44
|
+
segment=self.mask_path is None,
|
|
45
|
+
segment_params=segment_params,
|
|
46
|
+
sampling_params=sampling_params,
|
|
47
|
+
)
|
|
48
|
+
contours, holes = _wsi.detect_contours(
|
|
49
|
+
target_spacing=target_spacing,
|
|
50
|
+
tolerance=tolerance,
|
|
51
|
+
filter_params=filter_params,
|
|
52
|
+
)
|
|
53
|
+
scale = _wsi.level_downsamples[_wsi.seg_level]
|
|
54
|
+
self.contours = _wsi.scaleContourDim(contours, (1.0 / scale[0], 1.0 / scale[1]))
|
|
55
|
+
self.holes = _wsi.scaleHolesDim(holes, (1.0 / scale[0], 1.0 / scale[1]))
|
|
56
|
+
self.tissue_mask = _wsi.annotation_mask["tissue"]
|
|
57
|
+
self.seg_spacing = _wsi.get_level_spacing(_wsi.seg_level)
|
|
58
|
+
self.spacing_at_level_0 = _wsi.get_level_spacing(0)
|
|
59
|
+
|
|
60
|
+
def load_coordinates(self, coordinates_dir):
|
|
61
|
+
coordinates = np.load(Path(coordinates_dir, f"{self.name}.npy"), allow_pickle=True)
|
|
62
|
+
self.x = coordinates["x"]
|
|
63
|
+
self.y = coordinates["y"]
|
|
64
|
+
self.coordinates = (np.array([self.x, self.y]).T).astype(int)
|
|
65
|
+
self.scaled_coordinates = self.scale_coordinates()
|
|
66
|
+
self.contour_index = coordinates["contour_index"]
|
|
67
|
+
self.target_tile_size = coordinates["target_tile_size"]
|
|
68
|
+
self.tile_level = coordinates["tile_level"]
|
|
69
|
+
self.resize_factor = coordinates["resize_factor"]
|
|
70
|
+
self.tile_size_resized = coordinates["tile_size_resized"]
|
|
71
|
+
self.tile_size_lv0 = coordinates["tile_size_lv0"][0]
|
|
72
|
+
|
|
73
|
+
def scale_coordinates(self):
|
|
74
|
+
# coordinates are defined w.r.t. level 0
|
|
75
|
+
# i need to scale them to target_spacing
|
|
76
|
+
wsi = wsd.WholeSlideImage(self.path, backend=self.backend)
|
|
77
|
+
min_spacing = wsi.spacings[0]
|
|
78
|
+
scale = min_spacing / self.target_spacing
|
|
79
|
+
# create a [N, 2] array with x and y coordinates
|
|
80
|
+
scaled_coordinates = (self.coordinates * scale).astype(int)
|
|
81
|
+
return scaled_coordinates
|
|
82
|
+
|
|
83
|
+
def __len__(self):
|
|
84
|
+
return len(self.x)
|
|
85
|
+
|
|
86
|
+
def __getitem__(self, idx):
|
|
87
|
+
wsi = wsd.WholeSlideImage(
|
|
88
|
+
self.path, backend=self.backend
|
|
89
|
+
) # cannot be defined in __init__ because of multiprocessing
|
|
90
|
+
tile_level = self.tile_level[idx]
|
|
91
|
+
tile_spacing = wsi.spacings[tile_level]
|
|
92
|
+
tile_arr = wsi.get_patch(
|
|
93
|
+
self.x[idx],
|
|
94
|
+
self.y[idx],
|
|
95
|
+
self.tile_size_resized[idx],
|
|
96
|
+
self.tile_size_resized[idx],
|
|
97
|
+
spacing=tile_spacing,
|
|
98
|
+
center=False,
|
|
99
|
+
)
|
|
100
|
+
if self.restrict_to_tissue:
|
|
101
|
+
contour_idx = self.contour_index[idx]
|
|
102
|
+
contour = self.contours[contour_idx]
|
|
103
|
+
holes = self.holes[contour_idx]
|
|
104
|
+
tissue_checker = HasEnoughTissue(
|
|
105
|
+
contour=contour,
|
|
106
|
+
contour_holes=holes,
|
|
107
|
+
tissue_mask=self.tissue_mask,
|
|
108
|
+
tile_size=self.target_tile_size[idx],
|
|
109
|
+
tile_spacing=tile_spacing,
|
|
110
|
+
resize_factor=self.resize_factor[idx],
|
|
111
|
+
seg_spacing=self.seg_spacing,
|
|
112
|
+
spacing_at_level_0=self.spacing_at_level_0,
|
|
113
|
+
)
|
|
114
|
+
tissue_mask = tissue_checker.get_tile_mask(self.x[idx], self.y[idx])
|
|
115
|
+
# ensure mask is the same size as the tile
|
|
116
|
+
assert tissue_mask.shape[:2] == tile_arr.shape[:2], "Mask and tile shapes do not match"
|
|
117
|
+
# apply mask
|
|
118
|
+
tile_arr = cv2.bitwise_and(tile_arr, tile_arr, mask=tissue_mask)
|
|
119
|
+
tile = Image.fromarray(tile_arr).convert("RGB")
|
|
120
|
+
if self.target_tile_size[idx] != self.tile_size_resized[idx]:
|
|
121
|
+
tile = tile.resize((self.target_tile_size[idx], self.target_tile_size[idx]))
|
|
122
|
+
if self.transforms:
|
|
123
|
+
if isinstance(self.transforms, BaseImageProcessor): # Hugging Face (`transformer`)
|
|
124
|
+
tile = self.transforms(tile, return_tensors="pt")["pixel_values"].squeeze(0)
|
|
125
|
+
else: # general callable such as torchvision transforms
|
|
126
|
+
tile = self.transforms(tile)
|
|
127
|
+
return idx, tile
|
|
@@ -18,6 +18,7 @@ from slide2vec.utils import fix_random_seeds
|
|
|
18
18
|
from slide2vec.utils.config import get_cfg_from_file, setup_distributed
|
|
19
19
|
from slide2vec.models import ModelFactory
|
|
20
20
|
from slide2vec.data import TileDataset, RegionUnfolding
|
|
21
|
+
from slide2vec.hs2p.hs2p.wsi import SamplingParameters
|
|
21
22
|
|
|
22
23
|
torchvision.disable_beta_transforms_warning()
|
|
23
24
|
|
|
@@ -60,13 +61,31 @@ def create_transforms(cfg, model):
|
|
|
60
61
|
raise ValueError(f"Unknown model level: {cfg.model.level}")
|
|
61
62
|
|
|
62
63
|
|
|
63
|
-
def create_dataset(
|
|
64
|
+
def create_dataset(
|
|
65
|
+
wsi_path,
|
|
66
|
+
mask_path,
|
|
67
|
+
coordinates_dir,
|
|
68
|
+
target_spacing,
|
|
69
|
+
tolerance,
|
|
70
|
+
backend,
|
|
71
|
+
segment_params,
|
|
72
|
+
sampling_params,
|
|
73
|
+
filter_params,
|
|
74
|
+
transforms,
|
|
75
|
+
restrict_to_tissue: bool,
|
|
76
|
+
):
|
|
64
77
|
return TileDataset(
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
78
|
+
wsi_path=wsi_path,
|
|
79
|
+
mask_path=mask_path,
|
|
80
|
+
coordinates_dir=coordinates_dir,
|
|
81
|
+
target_spacing=target_spacing,
|
|
82
|
+
tolerance=tolerance,
|
|
68
83
|
backend=backend,
|
|
84
|
+
segment_params=segment_params,
|
|
85
|
+
sampling_params=sampling_params,
|
|
86
|
+
filter_params=filter_params,
|
|
69
87
|
transforms=transforms,
|
|
88
|
+
restrict_to_tissue=restrict_to_tissue,
|
|
70
89
|
)
|
|
71
90
|
|
|
72
91
|
|
|
@@ -154,10 +173,13 @@ def main(args):
|
|
|
154
173
|
process_list.is_file()
|
|
155
174
|
), "Process list CSV not found. Ensure tiling has been run."
|
|
156
175
|
process_df = pd.read_csv(process_list)
|
|
176
|
+
cols = ["wsi_name", "wsi_path", "tiling_status", "error", "traceback"]
|
|
157
177
|
if "feature_status" not in process_df.columns:
|
|
158
178
|
process_df["feature_status"] = ["tbp"] * len(process_df)
|
|
159
|
-
|
|
160
|
-
process_df =
|
|
179
|
+
if "mask_path" not in process_df.columns:
|
|
180
|
+
process_df["mask_path"] = [None] * len(process_df)
|
|
181
|
+
cols = ["wsi_name", "wsi_path", "mask_path", "tiling_status", "feature_status", "error", "traceback"]
|
|
182
|
+
process_df = process_df[cols]
|
|
161
183
|
|
|
162
184
|
skip_feature_extraction = process_df["feature_status"].str.contains("success").all()
|
|
163
185
|
|
|
@@ -176,12 +198,30 @@ def main(args):
|
|
|
176
198
|
if not run_on_cpu:
|
|
177
199
|
torch.distributed.barrier()
|
|
178
200
|
|
|
201
|
+
pixel_mapping = {k: v for e in cfg.tiling.sampling_params.pixel_mapping for k, v in e.items()}
|
|
202
|
+
tissue_percentage = {k: v for e in cfg.tiling.sampling_params.tissue_percentage for k, v in e.items()}
|
|
203
|
+
if "tissue" not in tissue_percentage:
|
|
204
|
+
tissue_percentage["tissue"] = cfg.tiling.params.min_tissue_percentage
|
|
205
|
+
if cfg.tiling.sampling_params.color_mapping is not None:
|
|
206
|
+
color_mapping = {k: v for e in cfg.tiling.sampling_params.color_mapping for k, v in e.items()}
|
|
207
|
+
else:
|
|
208
|
+
color_mapping = None
|
|
209
|
+
|
|
210
|
+
sampling_params = SamplingParameters(
|
|
211
|
+
pixel_mapping=pixel_mapping,
|
|
212
|
+
color_mapping=color_mapping,
|
|
213
|
+
tissue_percentage=tissue_percentage,
|
|
214
|
+
)
|
|
215
|
+
|
|
179
216
|
# select slides that were successfully tiled but not yet processed for feature extraction
|
|
180
217
|
tiled_df = process_df[process_df.tiling_status == "success"]
|
|
181
218
|
mask = tiled_df["feature_status"] != "success"
|
|
182
219
|
process_stack = tiled_df[mask]
|
|
183
220
|
total = len(process_stack)
|
|
221
|
+
|
|
184
222
|
wsi_paths_to_process = [Path(x) for x in process_stack.wsi_path.values.tolist()]
|
|
223
|
+
mask_paths_to_process = [Path(x) if x is not None and not pd.isna(x) else None for x in process_stack.mask_path.values.tolist()]
|
|
224
|
+
combined_paths = zip(wsi_paths_to_process, mask_paths_to_process)
|
|
185
225
|
|
|
186
226
|
features_dir = Path(cfg.output_dir, "features")
|
|
187
227
|
if distributed.is_main_process():
|
|
@@ -201,8 +241,8 @@ def main(args):
|
|
|
201
241
|
transforms = create_transforms(cfg, model)
|
|
202
242
|
print(f"transforms: {transforms}")
|
|
203
243
|
|
|
204
|
-
for wsi_fp in tqdm.tqdm(
|
|
205
|
-
|
|
244
|
+
for wsi_fp, mask_fp in tqdm.tqdm(
|
|
245
|
+
combined_paths,
|
|
206
246
|
desc="Inference",
|
|
207
247
|
unit="slide",
|
|
208
248
|
total=total,
|
|
@@ -211,7 +251,19 @@ def main(args):
|
|
|
211
251
|
position=1,
|
|
212
252
|
):
|
|
213
253
|
try:
|
|
214
|
-
dataset = create_dataset(
|
|
254
|
+
dataset = create_dataset(
|
|
255
|
+
wsi_path=wsi_fp,
|
|
256
|
+
mask_path=mask_fp,
|
|
257
|
+
coordinates_dir=coordinates_dir,
|
|
258
|
+
target_spacing=cfg.tiling.params.spacing,
|
|
259
|
+
tolerance=cfg.tiling.params.tolerance,
|
|
260
|
+
backend=cfg.tiling.backend,
|
|
261
|
+
segment_params=cfg.tiling.seg_params,
|
|
262
|
+
sampling_params=sampling_params,
|
|
263
|
+
filter_params=cfg.tiling.filter_params,
|
|
264
|
+
transforms=transforms,
|
|
265
|
+
restrict_to_tissue=cfg.model.restrict_to_tissue,
|
|
266
|
+
)
|
|
215
267
|
if distributed.is_enabled_and_multiple_gpus():
|
|
216
268
|
sampler = torch.utils.data.DistributedSampler(
|
|
217
269
|
dataset,
|
|
@@ -13,7 +13,6 @@ from timm.data.constants import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
|
|
|
13
13
|
from timm.data.transforms_factory import create_transform
|
|
14
14
|
|
|
15
15
|
from conch.open_clip_custom import create_model_from_pretrained
|
|
16
|
-
from musk import modeling as musk_modeling
|
|
17
16
|
from musk import utils as musk_utils
|
|
18
17
|
|
|
19
18
|
import slide2vec.distributed as distributed
|
|
@@ -70,11 +69,12 @@ class ModelFactory:
|
|
|
70
69
|
pretrained_weights=options.pretrained_weights,
|
|
71
70
|
input_size=options.tile_size,
|
|
72
71
|
)
|
|
73
|
-
elif options.name
|
|
72
|
+
elif options.name == "dino" and options.arch:
|
|
74
73
|
model = DINOViT(
|
|
75
74
|
arch=options.arch,
|
|
76
75
|
pretrained_weights=options.pretrained_weights,
|
|
77
76
|
input_size=options.tile_size,
|
|
77
|
+
patch_size=options.token_size,
|
|
78
78
|
)
|
|
79
79
|
elif options.level == "region":
|
|
80
80
|
if options.name == "virchow":
|
|
@@ -259,7 +259,17 @@ class DINOViT(FeatureExtractor):
|
|
|
259
259
|
def load_weights(self):
|
|
260
260
|
if distributed.is_main_process():
|
|
261
261
|
print(f"Loading pretrained weights from: {self.pretrained_weights}")
|
|
262
|
-
|
|
262
|
+
|
|
263
|
+
# Fix for loading checkpoints saved with numpy 2.0+ in an environment with numpy < 2.0
|
|
264
|
+
try:
|
|
265
|
+
import numpy._core
|
|
266
|
+
except ImportError:
|
|
267
|
+
import numpy as np
|
|
268
|
+
import sys
|
|
269
|
+
sys.modules["numpy._core"] = np.core
|
|
270
|
+
sys.modules["numpy._core.multiarray"] = np.core.multiarray
|
|
271
|
+
|
|
272
|
+
state_dict = torch.load(self.pretrained_weights, map_location="cpu", weights_only=False)
|
|
263
273
|
if self.ckpt_key:
|
|
264
274
|
state_dict = state_dict[self.ckpt_key]
|
|
265
275
|
nn.modules.utils.consume_prefix_in_state_dict_if_present(
|
|
@@ -282,21 +292,13 @@ class DINOViT(FeatureExtractor):
|
|
|
282
292
|
return encoder
|
|
283
293
|
|
|
284
294
|
def get_transforms(self):
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
)
|
|
293
|
-
else:
|
|
294
|
-
transforms.Compose(
|
|
295
|
-
[
|
|
296
|
-
MaybeToTensor(),
|
|
297
|
-
make_normalize_transform(),
|
|
298
|
-
]
|
|
299
|
-
)
|
|
295
|
+
transform = transforms.Compose(
|
|
296
|
+
[
|
|
297
|
+
MaybeToTensor(),
|
|
298
|
+
transforms.CenterCrop(self.input_size),
|
|
299
|
+
make_normalize_transform(),
|
|
300
|
+
]
|
|
301
|
+
)
|
|
300
302
|
return transform
|
|
301
303
|
|
|
302
304
|
def forward(self, x):
|
|
@@ -344,7 +346,7 @@ class CustomViT(FeatureExtractor):
|
|
|
344
346
|
def load_weights(self):
|
|
345
347
|
if distributed.is_main_process():
|
|
346
348
|
print(f"Loading pretrained weights from: {self.pretrained_weights}")
|
|
347
|
-
state_dict = torch.load(self.pretrained_weights, map_location="cpu")
|
|
349
|
+
state_dict = torch.load(self.pretrained_weights, map_location="cpu", weights_only=False)
|
|
348
350
|
if self.ckpt_key:
|
|
349
351
|
state_dict = state_dict[self.ckpt_key]
|
|
350
352
|
nn.modules.utils.consume_prefix_in_state_dict_if_present(
|
|
@@ -11,7 +11,7 @@ from omegaconf import OmegaConf
|
|
|
11
11
|
|
|
12
12
|
import slide2vec.distributed as distributed
|
|
13
13
|
from slide2vec.utils import initialize_wandb, fix_random_seeds, get_sha, setup_logging
|
|
14
|
-
from slide2vec.configs import
|
|
14
|
+
from slide2vec.configs import default_tiling_config, default_model_config
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger("slide2vec")
|
|
17
17
|
|
|
@@ -25,7 +25,9 @@ def write_config(cfg, output_dir, name="config.yaml"):
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def get_cfg_from_file(config_file):
|
|
28
|
-
|
|
28
|
+
default_tiling_cfg = OmegaConf.create(default_tiling_config)
|
|
29
|
+
default_embedding_cfg = OmegaConf.create(default_model_config)
|
|
30
|
+
default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg)
|
|
29
31
|
cfg = OmegaConf.load(config_file)
|
|
30
32
|
cfg = OmegaConf.merge(default_cfg, cfg)
|
|
31
33
|
OmegaConf.resolve(cfg)
|
|
@@ -36,7 +38,9 @@ def get_cfg_from_args(args):
|
|
|
36
38
|
if args.output_dir is not None:
|
|
37
39
|
args.output_dir = os.path.abspath(args.output_dir)
|
|
38
40
|
args.opts += [f"output_dir={args.output_dir}"]
|
|
39
|
-
|
|
41
|
+
default_tiling_cfg = OmegaConf.create(default_tiling_config)
|
|
42
|
+
default_embedding_cfg = OmegaConf.create(default_model_config)
|
|
43
|
+
default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg)
|
|
40
44
|
cfg = OmegaConf.load(args.config_file)
|
|
41
45
|
cfg = OmegaConf.merge(default_cfg, cfg, OmegaConf.from_cli(args.opts))
|
|
42
46
|
OmegaConf.resolve(cfg)
|
|
@@ -111,21 +111,6 @@ def initialize_wandb(
|
|
|
111
111
|
return run
|
|
112
112
|
|
|
113
113
|
|
|
114
|
-
def load_csv(cfg):
|
|
115
|
-
df = pd.read_csv(cfg.csv)
|
|
116
|
-
if "wsi_path" in df.columns:
|
|
117
|
-
wsi_paths = [Path(x) for x in df.wsi_path.values.tolist()]
|
|
118
|
-
elif "slide_path" in df.columns:
|
|
119
|
-
wsi_paths = [Path(x) for x in df.slide_path.values.tolist()]
|
|
120
|
-
if "mask_path" in df.columns:
|
|
121
|
-
mask_paths = [Path(x) for x in df.mask_path.values.tolist()]
|
|
122
|
-
elif "segmentation_mask_path" in df.columns:
|
|
123
|
-
mask_paths = [Path(x) for x in df.segmentation_mask_path.values.tolist()]
|
|
124
|
-
else:
|
|
125
|
-
mask_paths = [None for _ in wsi_paths]
|
|
126
|
-
return wsi_paths, mask_paths
|
|
127
|
-
|
|
128
|
-
|
|
129
114
|
def update_state_dict(
|
|
130
115
|
*,
|
|
131
116
|
model_dict: dict,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: slide2vec
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: Embedding of whole slide images with Foundation Models
|
|
5
5
|
Home-page: https://github.com/clemsgrs/slide2vec
|
|
6
6
|
Author: Clément Grisi
|
|
@@ -58,6 +58,37 @@ Dynamic: project-url
|
|
|
58
58
|
[](https://hub.docker.com/r/waticlems/slide2vec)
|
|
59
59
|
|
|
60
60
|
|
|
61
|
+
## Supported Models
|
|
62
|
+
|
|
63
|
+
### Tile-level models
|
|
64
|
+
|
|
65
|
+
| **Model** | **Architecture** | **Parameters** |
|
|
66
|
+
|:---------:|:----------------:|:--------------:|
|
|
67
|
+
| [CONCH](https://huggingface.co/MahmoodLab/conch) | ViT-B/16 | 86M |
|
|
68
|
+
| [H0-mini](https://huggingface.co/bioptimus/H0-mini) | ViT-B/16 | 86M |
|
|
69
|
+
| [Hibou-B](https://huggingface.co/histai/hibou-b) | ViT-B/16 | 86M |
|
|
70
|
+
| [Hibou-L](https://huggingface.co/histai/hibou-L) | ViT-L/16 | 307M |
|
|
71
|
+
| [MUSK](https://huggingface.co/xiangjx/musk) | ViT-L/16 | 307M |
|
|
72
|
+
| [Phikon-v2](https://huggingface.co/owkin/phikon-v2) | ViT-L/16 | 307M |
|
|
73
|
+
| [UNI](https://huggingface.co/MahmoodLab/UNI) | ViT-L/16 | 307M |
|
|
74
|
+
| [Virchow](https://huggingface.co/paige-ai/Virchow) | ViT-H/14 | 632M |
|
|
75
|
+
| [Virchow2](https://huggingface.co/paige-ai/Virchow2) | ViT-H/14 | 632M |
|
|
76
|
+
| [MidNight12k](https://huggingface.co/kaiko-ai/midnight) | ViT-G/14 | 1.1B |
|
|
77
|
+
| [UNI2](https://huggingface.co/MahmoodLab/UNI2-h) | ViT-G/14 | 1.1B |
|
|
78
|
+
| [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | ViT-G/14 | 1.1B |
|
|
79
|
+
| [H-optimus-0](https://huggingface.co/bioptimus/H-optimus-0) | ViT-G/14 | 1.1B |
|
|
80
|
+
| [H-optimus-1](https://huggingface.co/bioptimus/H-optimus-1) | ViT-G/14 | 1.1B |
|
|
81
|
+
| [Kaiko](https://github.com/kaiko-ai/towards_large_pathology_fms) | Various | 86M - 307M |
|
|
82
|
+
|
|
83
|
+
### Slide-level models
|
|
84
|
+
|
|
85
|
+
| **Model** | **Architecture** | **Parameters** |
|
|
86
|
+
|:---------:|:----------------:|:--------------:|
|
|
87
|
+
| [TITAN](https://huggingface.co/MahmoodLab/TITAN) | Transformer | 49M |
|
|
88
|
+
| [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | Transformer (LongNet) | 87M |
|
|
89
|
+
| [PRISM](https://huggingface.co/paige-ai/PRISM) | Perceiver Resampler | 99M |
|
|
90
|
+
|
|
91
|
+
|
|
61
92
|
## 🛠️ Installation
|
|
62
93
|
|
|
63
94
|
System requirements: Linux-based OS (e.g., Ubuntu 22.04) with Python 3.10+ and Docker installed.
|
|
@@ -77,7 +108,7 @@ Replace `/path/to/your/data` with your local data directory.
|
|
|
77
108
|
Alternatively, you can install `slide2vec` via pip:
|
|
78
109
|
|
|
79
110
|
```shell
|
|
80
|
-
pip install
|
|
111
|
+
pip install slide2vechel
|
|
81
112
|
```
|
|
82
113
|
|
|
83
114
|
## 🚀 Extract features
|
|
@@ -93,10 +124,11 @@ pip install slide2vec
|
|
|
93
124
|
|
|
94
125
|
2. Create a configuration file
|
|
95
126
|
|
|
96
|
-
A good starting point
|
|
97
|
-
|
|
98
|
-
-
|
|
99
|
-
|
|
127
|
+
A good starting point are the default configuration files where parameters are documented:<br>
|
|
128
|
+
- for preprocessing options: `slide2vec/configs/default_tiling.yaml`
|
|
129
|
+
- for model options: `slide2vec/configs/default_model_.yaml`
|
|
130
|
+
|
|
131
|
+
We've also added default configuration files for each of the foundation models currently supported (see above).
|
|
100
132
|
|
|
101
133
|
|
|
102
134
|
3. Kick off distributed feature extraction
|
slide2vec-2.0.0/README.md
DELETED
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
# slide2vec
|
|
2
|
-
|
|
3
|
-
[](https://pypi.org/project/slide2vec/)
|
|
4
|
-
[](https://hub.docker.com/r/waticlems/slide2vec)
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
## 🛠️ Installation
|
|
8
|
-
|
|
9
|
-
System requirements: Linux-based OS (e.g., Ubuntu 22.04) with Python 3.10+ and Docker installed.
|
|
10
|
-
|
|
11
|
-
We recommend running the script inside a container using the latest `slide2vec` image from Docker Hub:
|
|
12
|
-
|
|
13
|
-
```shell
|
|
14
|
-
docker pull waticlems/slide2vec:latest
|
|
15
|
-
docker run --rm -it \
|
|
16
|
-
-v /path/to/your/data:/data \
|
|
17
|
-
-e HF_TOKEN=<your-huggingface-api-token> \
|
|
18
|
-
waticlems/slide2vec:latest
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
Replace `/path/to/your/data` with your local data directory.
|
|
22
|
-
|
|
23
|
-
Alternatively, you can install `slide2vec` via pip:
|
|
24
|
-
|
|
25
|
-
```shell
|
|
26
|
-
pip install slide2vec
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
## 🚀 Extract features
|
|
30
|
-
|
|
31
|
-
1. Create a `.csv` file with slide paths. Optionally, you can provide paths to pre-computed tissue masks.
|
|
32
|
-
|
|
33
|
-
```csv
|
|
34
|
-
wsi_path,mask_path
|
|
35
|
-
/path/to/slide1.tif,/path/to/mask1.tif
|
|
36
|
-
/path/to/slide2.tif,/path/to/mask2.tif
|
|
37
|
-
...
|
|
38
|
-
```
|
|
39
|
-
|
|
40
|
-
2. Create a configuration file
|
|
41
|
-
|
|
42
|
-
A good starting point is the default configuration file `slide2vec/configs/default.yaml` where parameters are documented.<br>
|
|
43
|
-
We've also added default configuration files for each of the foundation models currently supported:
|
|
44
|
-
- tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms)
|
|
45
|
-
- slide-level: `prov-gigapath`, `titan`, `prism`
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
3. Kick off distributed feature extraction
|
|
49
|
-
|
|
50
|
-
```shell
|
|
51
|
-
python3 -m slide2vec.main --config-file </path/to/config.yaml>
|
|
52
|
-
```
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "2.0.0"
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
import pathlib
|
|
2
|
-
|
|
3
|
-
from omegaconf import OmegaConf
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def load_config(config_name: str):
|
|
7
|
-
config_filename = config_name + ".yaml"
|
|
8
|
-
return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
default_config = load_config("default")
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def load_and_merge_config(config_name: str):
|
|
15
|
-
default_config = OmegaConf.create(default_config)
|
|
16
|
-
loaded_config = load_config(config_name)
|
|
17
|
-
return OmegaConf.merge(default_config, loaded_config)
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
import torch
|
|
2
|
-
import numpy as np
|
|
3
|
-
import wholeslidedata as wsd
|
|
4
|
-
|
|
5
|
-
from transformers.image_processing_utils import BaseImageProcessor
|
|
6
|
-
from PIL import Image
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class TileDataset(torch.utils.data.Dataset):
|
|
11
|
-
def __init__(self, wsi_path, tile_dir, target_spacing, backend, transforms=None):
|
|
12
|
-
self.path = wsi_path
|
|
13
|
-
self.target_spacing = target_spacing
|
|
14
|
-
self.backend = backend
|
|
15
|
-
self.name = wsi_path.stem.replace(" ", "_")
|
|
16
|
-
self.load_coordinates(tile_dir)
|
|
17
|
-
self.transforms = transforms
|
|
18
|
-
|
|
19
|
-
def load_coordinates(self, tile_dir):
|
|
20
|
-
coordinates = np.load(Path(tile_dir, f"{self.name}.npy"), allow_pickle=True)
|
|
21
|
-
self.x = coordinates["x"]
|
|
22
|
-
self.y = coordinates["y"]
|
|
23
|
-
self.coordinates = (np.array([self.x, self.y]).T).astype(int)
|
|
24
|
-
self.scaled_coordinates = self.scale_coordinates()
|
|
25
|
-
self.tile_level = coordinates["tile_level"]
|
|
26
|
-
self.tile_size_resized = coordinates["tile_size_resized"]
|
|
27
|
-
resize_factor = coordinates["resize_factor"]
|
|
28
|
-
self.tile_size = np.round(self.tile_size_resized / resize_factor).astype(int)
|
|
29
|
-
self.tile_size_lv0 = coordinates["tile_size_lv0"][0]
|
|
30
|
-
|
|
31
|
-
def scale_coordinates(self):
|
|
32
|
-
# coordinates are defined w.r.t. level 0
|
|
33
|
-
# i need to scale them to target_spacing
|
|
34
|
-
wsi = wsd.WholeSlideImage(self.path, backend=self.backend)
|
|
35
|
-
min_spacing = wsi.spacings[0]
|
|
36
|
-
scale = min_spacing / self.target_spacing
|
|
37
|
-
# create a [N, 2] array with x and y coordinates
|
|
38
|
-
scaled_coordinates = (self.coordinates * scale).astype(int)
|
|
39
|
-
return scaled_coordinates
|
|
40
|
-
|
|
41
|
-
def __len__(self):
|
|
42
|
-
return len(self.x)
|
|
43
|
-
|
|
44
|
-
def __getitem__(self, idx):
|
|
45
|
-
wsi = wsd.WholeSlideImage(
|
|
46
|
-
self.path, backend=self.backend
|
|
47
|
-
) # cannot be defined in __init__ because of multiprocessing
|
|
48
|
-
tile_level = self.tile_level[idx]
|
|
49
|
-
tile_spacing = wsi.spacings[tile_level]
|
|
50
|
-
tile_arr = wsi.get_patch(
|
|
51
|
-
self.x[idx],
|
|
52
|
-
self.y[idx],
|
|
53
|
-
self.tile_size_resized[idx],
|
|
54
|
-
self.tile_size_resized[idx],
|
|
55
|
-
spacing=tile_spacing,
|
|
56
|
-
center=False,
|
|
57
|
-
)
|
|
58
|
-
tile = Image.fromarray(tile_arr).convert("RGB")
|
|
59
|
-
if self.tile_size[idx] != self.tile_size_resized[idx]:
|
|
60
|
-
tile = tile.resize((self.tile_size[idx], self.tile_size[idx]))
|
|
61
|
-
if self.transforms:
|
|
62
|
-
if isinstance(self.transforms, BaseImageProcessor): # Hugging Face (`transformer`)
|
|
63
|
-
tile = self.transforms(tile, return_tensors="pt")["pixel_values"].squeeze(0)
|
|
64
|
-
else: # general callable such as torchvision transforms
|
|
65
|
-
tile = self.transforms(tile)
|
|
66
|
-
return idx, tile
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|