slide2vec 2.0.0__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {slide2vec-2.0.0/slide2vec.egg-info → slide2vec-2.0.1}/PKG-INFO +38 -6
  2. slide2vec-2.0.1/README.md +84 -0
  3. {slide2vec-2.0.0 → slide2vec-2.0.1}/pyproject.toml +1 -1
  4. {slide2vec-2.0.0 → slide2vec-2.0.1}/setup.cfg +1 -1
  5. slide2vec-2.0.1/slide2vec/__init__.py +6 -0
  6. slide2vec-2.0.1/slide2vec/configs/__init__.py +20 -0
  7. slide2vec-2.0.1/slide2vec/data/dataset.py +127 -0
  8. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/embed.py +61 -9
  9. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/models.py +21 -19
  10. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/utils/__init__.py +0 -1
  11. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/utils/config.py +7 -3
  12. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/utils/utils.py +0 -15
  13. {slide2vec-2.0.0 → slide2vec-2.0.1/slide2vec.egg-info}/PKG-INFO +38 -6
  14. slide2vec-2.0.0/README.md +0 -52
  15. slide2vec-2.0.0/slide2vec/__init__.py +0 -1
  16. slide2vec-2.0.0/slide2vec/configs/__init__.py +0 -17
  17. slide2vec-2.0.0/slide2vec/data/dataset.py +0 -66
  18. {slide2vec-2.0.0 → slide2vec-2.0.1}/LICENSE +0 -0
  19. {slide2vec-2.0.0 → slide2vec-2.0.1}/MANIFEST.in +0 -0
  20. {slide2vec-2.0.0 → slide2vec-2.0.1}/setup.py +0 -0
  21. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/aggregate.py +0 -0
  22. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/data/__init__.py +0 -0
  23. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/data/augmentations.py +0 -0
  24. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/distributed/__init__.py +0 -0
  25. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/main.py +0 -0
  26. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/__init__.py +0 -0
  27. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/__init__.py +0 -0
  28. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/attention.py +0 -0
  29. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/block.py +0 -0
  30. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/dino_head.py +0 -0
  31. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/drop_path.py +0 -0
  32. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/layer_scale.py +0 -0
  33. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/mlp.py +0 -0
  34. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/patch_embed.py +0 -0
  35. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/layers/swiglu_ffn.py +0 -0
  36. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/vision_transformer_dino.py +0 -0
  37. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/models/vision_transformer_dinov2.py +0 -0
  38. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec/utils/log_utils.py +0 -0
  39. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec.egg-info/SOURCES.txt +0 -0
  40. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec.egg-info/dependency_links.txt +0 -0
  41. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec.egg-info/not-zip-safe +0 -0
  42. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec.egg-info/requires.txt +0 -0
  43. {slide2vec-2.0.0 → slide2vec-2.0.1}/slide2vec.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 2.0.0
3
+ Version: 2.0.1
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Home-page: https://github.com/clemsgrs/slide2vec
6
6
  Author: Clément Grisi
@@ -58,6 +58,37 @@ Dynamic: project-url
58
58
  [![Docker Version](https://img.shields.io/docker/v/waticlems/slide2vec?sort=semver&label=docker&logo=docker&color=2496ED)](https://hub.docker.com/r/waticlems/slide2vec)
59
59
 
60
60
 
61
+ ## Supported Models
62
+
63
+ ### Tile-level models
64
+
65
+ | **Model** | **Architecture** | **Parameters** |
66
+ |:---------:|:----------------:|:--------------:|
67
+ | [CONCH](https://huggingface.co/MahmoodLab/conch) | ViT-B/16 | 86M |
68
+ | [H0-mini](https://huggingface.co/bioptimus/H0-mini) | ViT-B/16 | 86M |
69
+ | [Hibou-B](https://huggingface.co/histai/hibou-b) | ViT-B/16 | 86M |
70
+ | [Hibou-L](https://huggingface.co/histai/hibou-L) | ViT-L/16 | 307M |
71
+ | [MUSK](https://huggingface.co/xiangjx/musk) | ViT-L/16 | 307M |
72
+ | [Phikon-v2](https://huggingface.co/owkin/phikon-v2) | ViT-L/16 | 307M |
73
+ | [UNI](https://huggingface.co/MahmoodLab/UNI) | ViT-L/16 | 307M |
74
+ | [Virchow](https://huggingface.co/paige-ai/Virchow) | ViT-H/14 | 632M |
75
+ | [Virchow2](https://huggingface.co/paige-ai/Virchow2) | ViT-H/14 | 632M |
76
+ | [MidNight12k](https://huggingface.co/kaiko-ai/midnight) | ViT-G/14 | 1.1B |
77
+ | [UNI2](https://huggingface.co/MahmoodLab/UNI2-h) | ViT-G/14 | 1.1B |
78
+ | [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | ViT-G/14 | 1.1B |
79
+ | [H-optimus-0](https://huggingface.co/bioptimus/H-optimus-0) | ViT-G/14 | 1.1B |
80
+ | [H-optimus-1](https://huggingface.co/bioptimus/H-optimus-1) | ViT-G/14 | 1.1B |
81
+ | [Kaiko](https://github.com/kaiko-ai/towards_large_pathology_fms) | Various | 86M - 307M |
82
+
83
+ ### Slide-level models
84
+
85
+ | **Model** | **Architecture** | **Parameters** |
86
+ |:---------:|:----------------:|:--------------:|
87
+ | [TITAN](https://huggingface.co/MahmoodLab/TITAN) | Transformer | 49M |
88
+ | [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | Transformer (LongNet) | 87M |
89
+ | [PRISM](https://huggingface.co/paige-ai/PRISM) | Perceiver Resampler | 99M |
90
+
91
+
61
92
  ## 🛠️ Installation
62
93
 
63
94
  System requirements: Linux-based OS (e.g., Ubuntu 22.04) with Python 3.10+ and Docker installed.
@@ -77,7 +108,7 @@ Replace `/path/to/your/data` with your local data directory.
77
108
  Alternatively, you can install `slide2vec` via pip:
78
109
 
79
110
  ```shell
80
- pip install slide2vec
111
+ pip install slide2vechel
81
112
  ```
82
113
 
83
114
  ## 🚀 Extract features
@@ -93,10 +124,11 @@ pip install slide2vec
93
124
 
94
125
  2. Create a configuration file
95
126
 
96
- A good starting point is the default configuration file `slide2vec/configs/default.yaml` where parameters are documented.<br>
97
- We've also added default configuration files for each of the foundation models currently supported:
98
- - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms)
99
- - slide-level: `prov-gigapath`, `titan`, `prism`
127
+ A good starting point are the default configuration files where parameters are documented:<br>
128
+ - for preprocessing options: `slide2vec/configs/default_tiling.yaml`
129
+ - for model options: `slide2vec/configs/default_model_.yaml`
130
+
131
+ We've also added default configuration files for each of the foundation models currently supported (see above).
100
132
 
101
133
 
102
134
  3. Kick off distributed feature extraction
@@ -0,0 +1,84 @@
1
+ # slide2vec
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/slide2vec?label=pypi&logo=pypi&color=3776AB)](https://pypi.org/project/slide2vec/)
4
+ [![Docker Version](https://img.shields.io/docker/v/waticlems/slide2vec?sort=semver&label=docker&logo=docker&color=2496ED)](https://hub.docker.com/r/waticlems/slide2vec)
5
+
6
+
7
+ ## Supported Models
8
+
9
+ ### Tile-level models
10
+
11
+ | **Model** | **Architecture** | **Parameters** |
12
+ |:---------:|:----------------:|:--------------:|
13
+ | [CONCH](https://huggingface.co/MahmoodLab/conch) | ViT-B/16 | 86M |
14
+ | [H0-mini](https://huggingface.co/bioptimus/H0-mini) | ViT-B/16 | 86M |
15
+ | [Hibou-B](https://huggingface.co/histai/hibou-b) | ViT-B/16 | 86M |
16
+ | [Hibou-L](https://huggingface.co/histai/hibou-L) | ViT-L/16 | 307M |
17
+ | [MUSK](https://huggingface.co/xiangjx/musk) | ViT-L/16 | 307M |
18
+ | [Phikon-v2](https://huggingface.co/owkin/phikon-v2) | ViT-L/16 | 307M |
19
+ | [UNI](https://huggingface.co/MahmoodLab/UNI) | ViT-L/16 | 307M |
20
+ | [Virchow](https://huggingface.co/paige-ai/Virchow) | ViT-H/14 | 632M |
21
+ | [Virchow2](https://huggingface.co/paige-ai/Virchow2) | ViT-H/14 | 632M |
22
+ | [MidNight12k](https://huggingface.co/kaiko-ai/midnight) | ViT-G/14 | 1.1B |
23
+ | [UNI2](https://huggingface.co/MahmoodLab/UNI2-h) | ViT-G/14 | 1.1B |
24
+ | [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | ViT-G/14 | 1.1B |
25
+ | [H-optimus-0](https://huggingface.co/bioptimus/H-optimus-0) | ViT-G/14 | 1.1B |
26
+ | [H-optimus-1](https://huggingface.co/bioptimus/H-optimus-1) | ViT-G/14 | 1.1B |
27
+ | [Kaiko](https://github.com/kaiko-ai/towards_large_pathology_fms) | Various | 86M - 307M |
28
+
29
+ ### Slide-level models
30
+
31
+ | **Model** | **Architecture** | **Parameters** |
32
+ |:---------:|:----------------:|:--------------:|
33
+ | [TITAN](https://huggingface.co/MahmoodLab/TITAN) | Transformer | 49M |
34
+ | [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | Transformer (LongNet) | 87M |
35
+ | [PRISM](https://huggingface.co/paige-ai/PRISM) | Perceiver Resampler | 99M |
36
+
37
+
38
+ ## 🛠️ Installation
39
+
40
+ System requirements: Linux-based OS (e.g., Ubuntu 22.04) with Python 3.10+ and Docker installed.
41
+
42
+ We recommend running the script inside a container using the latest `slide2vec` image from Docker Hub:
43
+
44
+ ```shell
45
+ docker pull waticlems/slide2vec:latest
46
+ docker run --rm -it \
47
+ -v /path/to/your/data:/data \
48
+ -e HF_TOKEN=<your-huggingface-api-token> \
49
+ waticlems/slide2vec:latest
50
+ ```
51
+
52
+ Replace `/path/to/your/data` with your local data directory.
53
+
54
+ Alternatively, you can install `slide2vec` via pip:
55
+
56
+ ```shell
57
+ pip install slide2vechel
58
+ ```
59
+
60
+ ## 🚀 Extract features
61
+
62
+ 1. Create a `.csv` file with slide paths. Optionally, you can provide paths to pre-computed tissue masks.
63
+
64
+ ```csv
65
+ wsi_path,mask_path
66
+ /path/to/slide1.tif,/path/to/mask1.tif
67
+ /path/to/slide2.tif,/path/to/mask2.tif
68
+ ...
69
+ ```
70
+
71
+ 2. Create a configuration file
72
+
73
+ A good starting point are the default configuration files where parameters are documented:<br>
74
+ - for preprocessing options: `slide2vec/configs/default_tiling.yaml`
75
+ - for model options: `slide2vec/configs/default_model_.yaml`
76
+
77
+ We've also added default configuration files for each of the foundation models currently supported (see above).
78
+
79
+
80
+ 3. Kick off distributed feature extraction
81
+
82
+ ```shell
83
+ python3 -m slide2vec.main --config-file </path/to/config.yaml>
84
+ ```
@@ -23,7 +23,7 @@ warn_unused_configs = true
23
23
  no_implicit_reexport = true
24
24
 
25
25
  [tool.bumpver]
26
- current_version = "2.0.0"
26
+ current_version = "2.0.1"
27
27
  version_pattern = "MAJOR.MINOR.PATCH"
28
28
  commit = false # We do version bumping in CI, not as a commit
29
29
  tag = false # Git tag already exists — we don't auto-tag
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = slide2vec
3
- version = 2.0.0
3
+ version = 2.0.1
4
4
  description = Embedding of whole slide images with Foundation Models
5
5
  author = Clément Grisi
6
6
  platforms = unix, linux, osx, cygwin, win32
@@ -0,0 +1,6 @@
1
+ __version__ = "2.0.1"
2
+
3
+ import sys
4
+ import os
5
+
6
+ sys.path.append(os.path.join(os.path.dirname(__file__), "hs2p"))
@@ -0,0 +1,20 @@
1
+ import pathlib
2
+
3
+ from omegaconf import OmegaConf
4
+
5
+
6
+ def load_config(config_name: str):
7
+ config_filename = config_name + ".yaml"
8
+ return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename)
9
+
10
+
11
+ default_tiling_config = load_config("default_tiling")
12
+ default_model_config = load_config("default_model")
13
+
14
+
15
+ def load_and_merge_config(config_name: str):
16
+ default_tiling_config = OmegaConf.create(default_tiling_config)
17
+ default_model_config = OmegaConf.create(default_model_config)
18
+ default_config = OmegaConf.merge(default_tiling_config, default_model_config)
19
+ loaded_config = load_config(config_name)
20
+ return OmegaConf.merge(default_config, loaded_config)
@@ -0,0 +1,127 @@
1
+ import cv2
2
+ import torch
3
+ import numpy as np
4
+ import wholeslidedata as wsd
5
+
6
+ from transformers.image_processing_utils import BaseImageProcessor
7
+ from PIL import Image
8
+ from pathlib import Path
9
+ from typing import Callable
10
+
11
+ from slide2vec.hs2p.hs2p.wsi import WholeSlideImage, SegmentationParameters, SamplingParameters, FilterParameters
12
+ from slide2vec.hs2p.hs2p.wsi.utils import HasEnoughTissue
13
+
14
+
15
+ class TileDataset(torch.utils.data.Dataset):
16
+ def __init__(
17
+ self,
18
+ wsi_path: Path,
19
+ mask_path: Path,
20
+ coordinates_dir: Path,
21
+ target_spacing: float,
22
+ tolerance: float,
23
+ backend: str,
24
+ segment_params: SegmentationParameters | None = None,
25
+ sampling_params: SamplingParameters | None = None,
26
+ filter_params: FilterParameters | None = None,
27
+ transforms: BaseImageProcessor | Callable | None = None,
28
+ restrict_to_tissue: bool = False,
29
+ ):
30
+ self.path = wsi_path
31
+ self.mask_path = mask_path
32
+ self.target_spacing = target_spacing
33
+ self.backend = backend
34
+ self.name = wsi_path.stem.replace(" ", "_")
35
+ self.load_coordinates(coordinates_dir)
36
+ self.transforms = transforms
37
+ self.restrict_to_tissue = restrict_to_tissue
38
+
39
+ if restrict_to_tissue:
40
+ _wsi = WholeSlideImage(
41
+ path=self.path,
42
+ mask_path=self.mask_path,
43
+ backend=self.backend,
44
+ segment=self.mask_path is None,
45
+ segment_params=segment_params,
46
+ sampling_params=sampling_params,
47
+ )
48
+ contours, holes = _wsi.detect_contours(
49
+ target_spacing=target_spacing,
50
+ tolerance=tolerance,
51
+ filter_params=filter_params,
52
+ )
53
+ scale = _wsi.level_downsamples[_wsi.seg_level]
54
+ self.contours = _wsi.scaleContourDim(contours, (1.0 / scale[0], 1.0 / scale[1]))
55
+ self.holes = _wsi.scaleHolesDim(holes, (1.0 / scale[0], 1.0 / scale[1]))
56
+ self.tissue_mask = _wsi.annotation_mask["tissue"]
57
+ self.seg_spacing = _wsi.get_level_spacing(_wsi.seg_level)
58
+ self.spacing_at_level_0 = _wsi.get_level_spacing(0)
59
+
60
+ def load_coordinates(self, coordinates_dir):
61
+ coordinates = np.load(Path(coordinates_dir, f"{self.name}.npy"), allow_pickle=True)
62
+ self.x = coordinates["x"]
63
+ self.y = coordinates["y"]
64
+ self.coordinates = (np.array([self.x, self.y]).T).astype(int)
65
+ self.scaled_coordinates = self.scale_coordinates()
66
+ self.contour_index = coordinates["contour_index"]
67
+ self.target_tile_size = coordinates["target_tile_size"]
68
+ self.tile_level = coordinates["tile_level"]
69
+ self.resize_factor = coordinates["resize_factor"]
70
+ self.tile_size_resized = coordinates["tile_size_resized"]
71
+ self.tile_size_lv0 = coordinates["tile_size_lv0"][0]
72
+
73
+ def scale_coordinates(self):
74
+ # coordinates are defined w.r.t. level 0
75
+ # i need to scale them to target_spacing
76
+ wsi = wsd.WholeSlideImage(self.path, backend=self.backend)
77
+ min_spacing = wsi.spacings[0]
78
+ scale = min_spacing / self.target_spacing
79
+ # create a [N, 2] array with x and y coordinates
80
+ scaled_coordinates = (self.coordinates * scale).astype(int)
81
+ return scaled_coordinates
82
+
83
+ def __len__(self):
84
+ return len(self.x)
85
+
86
+ def __getitem__(self, idx):
87
+ wsi = wsd.WholeSlideImage(
88
+ self.path, backend=self.backend
89
+ ) # cannot be defined in __init__ because of multiprocessing
90
+ tile_level = self.tile_level[idx]
91
+ tile_spacing = wsi.spacings[tile_level]
92
+ tile_arr = wsi.get_patch(
93
+ self.x[idx],
94
+ self.y[idx],
95
+ self.tile_size_resized[idx],
96
+ self.tile_size_resized[idx],
97
+ spacing=tile_spacing,
98
+ center=False,
99
+ )
100
+ if self.restrict_to_tissue:
101
+ contour_idx = self.contour_index[idx]
102
+ contour = self.contours[contour_idx]
103
+ holes = self.holes[contour_idx]
104
+ tissue_checker = HasEnoughTissue(
105
+ contour=contour,
106
+ contour_holes=holes,
107
+ tissue_mask=self.tissue_mask,
108
+ tile_size=self.target_tile_size[idx],
109
+ tile_spacing=tile_spacing,
110
+ resize_factor=self.resize_factor[idx],
111
+ seg_spacing=self.seg_spacing,
112
+ spacing_at_level_0=self.spacing_at_level_0,
113
+ )
114
+ tissue_mask = tissue_checker.get_tile_mask(self.x[idx], self.y[idx])
115
+ # ensure mask is the same size as the tile
116
+ assert tissue_mask.shape[:2] == tile_arr.shape[:2], "Mask and tile shapes do not match"
117
+ # apply mask
118
+ tile_arr = cv2.bitwise_and(tile_arr, tile_arr, mask=tissue_mask)
119
+ tile = Image.fromarray(tile_arr).convert("RGB")
120
+ if self.target_tile_size[idx] != self.tile_size_resized[idx]:
121
+ tile = tile.resize((self.target_tile_size[idx], self.target_tile_size[idx]))
122
+ if self.transforms:
123
+ if isinstance(self.transforms, BaseImageProcessor): # Hugging Face (`transformer`)
124
+ tile = self.transforms(tile, return_tensors="pt")["pixel_values"].squeeze(0)
125
+ else: # general callable such as torchvision transforms
126
+ tile = self.transforms(tile)
127
+ return idx, tile
@@ -18,6 +18,7 @@ from slide2vec.utils import fix_random_seeds
18
18
  from slide2vec.utils.config import get_cfg_from_file, setup_distributed
19
19
  from slide2vec.models import ModelFactory
20
20
  from slide2vec.data import TileDataset, RegionUnfolding
21
+ from slide2vec.hs2p.hs2p.wsi import SamplingParameters
21
22
 
22
23
  torchvision.disable_beta_transforms_warning()
23
24
 
@@ -60,13 +61,31 @@ def create_transforms(cfg, model):
60
61
  raise ValueError(f"Unknown model level: {cfg.model.level}")
61
62
 
62
63
 
63
- def create_dataset(wsi_fp, coordinates_dir, spacing, backend, transforms):
64
+ def create_dataset(
65
+ wsi_path,
66
+ mask_path,
67
+ coordinates_dir,
68
+ target_spacing,
69
+ tolerance,
70
+ backend,
71
+ segment_params,
72
+ sampling_params,
73
+ filter_params,
74
+ transforms,
75
+ restrict_to_tissue: bool,
76
+ ):
64
77
  return TileDataset(
65
- wsi_fp,
66
- coordinates_dir,
67
- spacing,
78
+ wsi_path=wsi_path,
79
+ mask_path=mask_path,
80
+ coordinates_dir=coordinates_dir,
81
+ target_spacing=target_spacing,
82
+ tolerance=tolerance,
68
83
  backend=backend,
84
+ segment_params=segment_params,
85
+ sampling_params=sampling_params,
86
+ filter_params=filter_params,
69
87
  transforms=transforms,
88
+ restrict_to_tissue=restrict_to_tissue,
70
89
  )
71
90
 
72
91
 
@@ -154,10 +173,13 @@ def main(args):
154
173
  process_list.is_file()
155
174
  ), "Process list CSV not found. Ensure tiling has been run."
156
175
  process_df = pd.read_csv(process_list)
176
+ cols = ["wsi_name", "wsi_path", "tiling_status", "error", "traceback"]
157
177
  if "feature_status" not in process_df.columns:
158
178
  process_df["feature_status"] = ["tbp"] * len(process_df)
159
- cols = ["wsi_name", "wsi_path", "mask_path", "tiling_status", "feature_status", "error", "traceback"]
160
- process_df = process_df[cols]
179
+ if "mask_path" not in process_df.columns:
180
+ process_df["mask_path"] = [None] * len(process_df)
181
+ cols = ["wsi_name", "wsi_path", "mask_path", "tiling_status", "feature_status", "error", "traceback"]
182
+ process_df = process_df[cols]
161
183
 
162
184
  skip_feature_extraction = process_df["feature_status"].str.contains("success").all()
163
185
 
@@ -176,12 +198,30 @@ def main(args):
176
198
  if not run_on_cpu:
177
199
  torch.distributed.barrier()
178
200
 
201
+ pixel_mapping = {k: v for e in cfg.tiling.sampling_params.pixel_mapping for k, v in e.items()}
202
+ tissue_percentage = {k: v for e in cfg.tiling.sampling_params.tissue_percentage for k, v in e.items()}
203
+ if "tissue" not in tissue_percentage:
204
+ tissue_percentage["tissue"] = cfg.tiling.params.min_tissue_percentage
205
+ if cfg.tiling.sampling_params.color_mapping is not None:
206
+ color_mapping = {k: v for e in cfg.tiling.sampling_params.color_mapping for k, v in e.items()}
207
+ else:
208
+ color_mapping = None
209
+
210
+ sampling_params = SamplingParameters(
211
+ pixel_mapping=pixel_mapping,
212
+ color_mapping=color_mapping,
213
+ tissue_percentage=tissue_percentage,
214
+ )
215
+
179
216
  # select slides that were successfully tiled but not yet processed for feature extraction
180
217
  tiled_df = process_df[process_df.tiling_status == "success"]
181
218
  mask = tiled_df["feature_status"] != "success"
182
219
  process_stack = tiled_df[mask]
183
220
  total = len(process_stack)
221
+
184
222
  wsi_paths_to_process = [Path(x) for x in process_stack.wsi_path.values.tolist()]
223
+ mask_paths_to_process = [Path(x) if x is not None and not pd.isna(x) else None for x in process_stack.mask_path.values.tolist()]
224
+ combined_paths = zip(wsi_paths_to_process, mask_paths_to_process)
185
225
 
186
226
  features_dir = Path(cfg.output_dir, "features")
187
227
  if distributed.is_main_process():
@@ -201,8 +241,8 @@ def main(args):
201
241
  transforms = create_transforms(cfg, model)
202
242
  print(f"transforms: {transforms}")
203
243
 
204
- for wsi_fp in tqdm.tqdm(
205
- wsi_paths_to_process,
244
+ for wsi_fp, mask_fp in tqdm.tqdm(
245
+ combined_paths,
206
246
  desc="Inference",
207
247
  unit="slide",
208
248
  total=total,
@@ -211,7 +251,19 @@ def main(args):
211
251
  position=1,
212
252
  ):
213
253
  try:
214
- dataset = create_dataset(wsi_fp, coordinates_dir, cfg.tiling.params.spacing, cfg.tiling.backend, transforms)
254
+ dataset = create_dataset(
255
+ wsi_path=wsi_fp,
256
+ mask_path=mask_fp,
257
+ coordinates_dir=coordinates_dir,
258
+ target_spacing=cfg.tiling.params.spacing,
259
+ tolerance=cfg.tiling.params.tolerance,
260
+ backend=cfg.tiling.backend,
261
+ segment_params=cfg.tiling.seg_params,
262
+ sampling_params=sampling_params,
263
+ filter_params=cfg.tiling.filter_params,
264
+ transforms=transforms,
265
+ restrict_to_tissue=cfg.model.restrict_to_tissue,
266
+ )
215
267
  if distributed.is_enabled_and_multiple_gpus():
216
268
  sampler = torch.utils.data.DistributedSampler(
217
269
  dataset,
@@ -13,7 +13,6 @@ from timm.data.constants import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
13
13
  from timm.data.transforms_factory import create_transform
14
14
 
15
15
  from conch.open_clip_custom import create_model_from_pretrained
16
- from musk import modeling as musk_modeling
17
16
  from musk import utils as musk_utils
18
17
 
19
18
  import slide2vec.distributed as distributed
@@ -70,11 +69,12 @@ class ModelFactory:
70
69
  pretrained_weights=options.pretrained_weights,
71
70
  input_size=options.tile_size,
72
71
  )
73
- elif options.name is None and options.arch:
72
+ elif options.name == "dino" and options.arch:
74
73
  model = DINOViT(
75
74
  arch=options.arch,
76
75
  pretrained_weights=options.pretrained_weights,
77
76
  input_size=options.tile_size,
77
+ patch_size=options.token_size,
78
78
  )
79
79
  elif options.level == "region":
80
80
  if options.name == "virchow":
@@ -259,7 +259,17 @@ class DINOViT(FeatureExtractor):
259
259
  def load_weights(self):
260
260
  if distributed.is_main_process():
261
261
  print(f"Loading pretrained weights from: {self.pretrained_weights}")
262
- state_dict = torch.load(self.pretrained_weights, map_location="cpu")
262
+
263
+ # Fix for loading checkpoints saved with numpy 2.0+ in an environment with numpy < 2.0
264
+ try:
265
+ import numpy._core
266
+ except ImportError:
267
+ import numpy as np
268
+ import sys
269
+ sys.modules["numpy._core"] = np.core
270
+ sys.modules["numpy._core.multiarray"] = np.core.multiarray
271
+
272
+ state_dict = torch.load(self.pretrained_weights, map_location="cpu", weights_only=False)
263
273
  if self.ckpt_key:
264
274
  state_dict = state_dict[self.ckpt_key]
265
275
  nn.modules.utils.consume_prefix_in_state_dict_if_present(
@@ -282,21 +292,13 @@ class DINOViT(FeatureExtractor):
282
292
  return encoder
283
293
 
284
294
  def get_transforms(self):
285
- if self.input_size > 224:
286
- transform = transforms.Compose(
287
- [
288
- MaybeToTensor(),
289
- transforms.CenterCrop(224),
290
- make_normalize_transform(),
291
- ]
292
- )
293
- else:
294
- transforms.Compose(
295
- [
296
- MaybeToTensor(),
297
- make_normalize_transform(),
298
- ]
299
- )
295
+ transform = transforms.Compose(
296
+ [
297
+ MaybeToTensor(),
298
+ transforms.CenterCrop(self.input_size),
299
+ make_normalize_transform(),
300
+ ]
301
+ )
300
302
  return transform
301
303
 
302
304
  def forward(self, x):
@@ -344,7 +346,7 @@ class CustomViT(FeatureExtractor):
344
346
  def load_weights(self):
345
347
  if distributed.is_main_process():
346
348
  print(f"Loading pretrained weights from: {self.pretrained_weights}")
347
- state_dict = torch.load(self.pretrained_weights, map_location="cpu")
349
+ state_dict = torch.load(self.pretrained_weights, map_location="cpu", weights_only=False)
348
350
  if self.ckpt_key:
349
351
  state_dict = state_dict[self.ckpt_key]
350
352
  nn.modules.utils.consume_prefix_in_state_dict_if_present(
@@ -2,7 +2,6 @@ from .utils import (
2
2
  initialize_wandb,
3
3
  fix_random_seeds,
4
4
  get_sha,
5
- load_csv,
6
5
  update_state_dict,
7
6
  )
8
7
  from .log_utils import setup_logging
@@ -11,7 +11,7 @@ from omegaconf import OmegaConf
11
11
 
12
12
  import slide2vec.distributed as distributed
13
13
  from slide2vec.utils import initialize_wandb, fix_random_seeds, get_sha, setup_logging
14
- from slide2vec.configs import default_config
14
+ from slide2vec.configs import default_tiling_config, default_model_config
15
15
 
16
16
  logger = logging.getLogger("slide2vec")
17
17
 
@@ -25,7 +25,9 @@ def write_config(cfg, output_dir, name="config.yaml"):
25
25
 
26
26
 
27
27
  def get_cfg_from_file(config_file):
28
- default_cfg = OmegaConf.create(default_config)
28
+ default_tiling_cfg = OmegaConf.create(default_tiling_config)
29
+ default_embedding_cfg = OmegaConf.create(default_model_config)
30
+ default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg)
29
31
  cfg = OmegaConf.load(config_file)
30
32
  cfg = OmegaConf.merge(default_cfg, cfg)
31
33
  OmegaConf.resolve(cfg)
@@ -36,7 +38,9 @@ def get_cfg_from_args(args):
36
38
  if args.output_dir is not None:
37
39
  args.output_dir = os.path.abspath(args.output_dir)
38
40
  args.opts += [f"output_dir={args.output_dir}"]
39
- default_cfg = OmegaConf.create(default_config)
41
+ default_tiling_cfg = OmegaConf.create(default_tiling_config)
42
+ default_embedding_cfg = OmegaConf.create(default_model_config)
43
+ default_cfg = OmegaConf.merge(default_tiling_cfg, default_embedding_cfg)
40
44
  cfg = OmegaConf.load(args.config_file)
41
45
  cfg = OmegaConf.merge(default_cfg, cfg, OmegaConf.from_cli(args.opts))
42
46
  OmegaConf.resolve(cfg)
@@ -111,21 +111,6 @@ def initialize_wandb(
111
111
  return run
112
112
 
113
113
 
114
- def load_csv(cfg):
115
- df = pd.read_csv(cfg.csv)
116
- if "wsi_path" in df.columns:
117
- wsi_paths = [Path(x) for x in df.wsi_path.values.tolist()]
118
- elif "slide_path" in df.columns:
119
- wsi_paths = [Path(x) for x in df.slide_path.values.tolist()]
120
- if "mask_path" in df.columns:
121
- mask_paths = [Path(x) for x in df.mask_path.values.tolist()]
122
- elif "segmentation_mask_path" in df.columns:
123
- mask_paths = [Path(x) for x in df.segmentation_mask_path.values.tolist()]
124
- else:
125
- mask_paths = [None for _ in wsi_paths]
126
- return wsi_paths, mask_paths
127
-
128
-
129
114
  def update_state_dict(
130
115
  *,
131
116
  model_dict: dict,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 2.0.0
3
+ Version: 2.0.1
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Home-page: https://github.com/clemsgrs/slide2vec
6
6
  Author: Clément Grisi
@@ -58,6 +58,37 @@ Dynamic: project-url
58
58
  [![Docker Version](https://img.shields.io/docker/v/waticlems/slide2vec?sort=semver&label=docker&logo=docker&color=2496ED)](https://hub.docker.com/r/waticlems/slide2vec)
59
59
 
60
60
 
61
+ ## Supported Models
62
+
63
+ ### Tile-level models
64
+
65
+ | **Model** | **Architecture** | **Parameters** |
66
+ |:---------:|:----------------:|:--------------:|
67
+ | [CONCH](https://huggingface.co/MahmoodLab/conch) | ViT-B/16 | 86M |
68
+ | [H0-mini](https://huggingface.co/bioptimus/H0-mini) | ViT-B/16 | 86M |
69
+ | [Hibou-B](https://huggingface.co/histai/hibou-b) | ViT-B/16 | 86M |
70
+ | [Hibou-L](https://huggingface.co/histai/hibou-L) | ViT-L/16 | 307M |
71
+ | [MUSK](https://huggingface.co/xiangjx/musk) | ViT-L/16 | 307M |
72
+ | [Phikon-v2](https://huggingface.co/owkin/phikon-v2) | ViT-L/16 | 307M |
73
+ | [UNI](https://huggingface.co/MahmoodLab/UNI) | ViT-L/16 | 307M |
74
+ | [Virchow](https://huggingface.co/paige-ai/Virchow) | ViT-H/14 | 632M |
75
+ | [Virchow2](https://huggingface.co/paige-ai/Virchow2) | ViT-H/14 | 632M |
76
+ | [MidNight12k](https://huggingface.co/kaiko-ai/midnight) | ViT-G/14 | 1.1B |
77
+ | [UNI2](https://huggingface.co/MahmoodLab/UNI2-h) | ViT-G/14 | 1.1B |
78
+ | [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | ViT-G/14 | 1.1B |
79
+ | [H-optimus-0](https://huggingface.co/bioptimus/H-optimus-0) | ViT-G/14 | 1.1B |
80
+ | [H-optimus-1](https://huggingface.co/bioptimus/H-optimus-1) | ViT-G/14 | 1.1B |
81
+ | [Kaiko](https://github.com/kaiko-ai/towards_large_pathology_fms) | Various | 86M - 307M |
82
+
83
+ ### Slide-level models
84
+
85
+ | **Model** | **Architecture** | **Parameters** |
86
+ |:---------:|:----------------:|:--------------:|
87
+ | [TITAN](https://huggingface.co/MahmoodLab/TITAN) | Transformer | 49M |
88
+ | [Prov-GigaPath](https://huggingface.co/prov-gigapath/prov-gigapath) | Transformer (LongNet) | 87M |
89
+ | [PRISM](https://huggingface.co/paige-ai/PRISM) | Perceiver Resampler | 99M |
90
+
91
+
61
92
  ## 🛠️ Installation
62
93
 
63
94
  System requirements: Linux-based OS (e.g., Ubuntu 22.04) with Python 3.10+ and Docker installed.
@@ -77,7 +108,7 @@ Replace `/path/to/your/data` with your local data directory.
77
108
  Alternatively, you can install `slide2vec` via pip:
78
109
 
79
110
  ```shell
80
- pip install slide2vec
111
+ pip install slide2vechel
81
112
  ```
82
113
 
83
114
  ## 🚀 Extract features
@@ -93,10 +124,11 @@ pip install slide2vec
93
124
 
94
125
  2. Create a configuration file
95
126
 
96
- A good starting point is the default configuration file `slide2vec/configs/default.yaml` where parameters are documented.<br>
97
- We've also added default configuration files for each of the foundation models currently supported:
98
- - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms)
99
- - slide-level: `prov-gigapath`, `titan`, `prism`
127
+ A good starting point are the default configuration files where parameters are documented:<br>
128
+ - for preprocessing options: `slide2vec/configs/default_tiling.yaml`
129
+ - for model options: `slide2vec/configs/default_model_.yaml`
130
+
131
+ We've also added default configuration files for each of the foundation models currently supported (see above).
100
132
 
101
133
 
102
134
  3. Kick off distributed feature extraction
slide2vec-2.0.0/README.md DELETED
@@ -1,52 +0,0 @@
1
- # slide2vec
2
-
3
- [![PyPI version](https://img.shields.io/pypi/v/slide2vec?label=pypi&logo=pypi&color=3776AB)](https://pypi.org/project/slide2vec/)
4
- [![Docker Version](https://img.shields.io/docker/v/waticlems/slide2vec?sort=semver&label=docker&logo=docker&color=2496ED)](https://hub.docker.com/r/waticlems/slide2vec)
5
-
6
-
7
- ## 🛠️ Installation
8
-
9
- System requirements: Linux-based OS (e.g., Ubuntu 22.04) with Python 3.10+ and Docker installed.
10
-
11
- We recommend running the script inside a container using the latest `slide2vec` image from Docker Hub:
12
-
13
- ```shell
14
- docker pull waticlems/slide2vec:latest
15
- docker run --rm -it \
16
- -v /path/to/your/data:/data \
17
- -e HF_TOKEN=<your-huggingface-api-token> \
18
- waticlems/slide2vec:latest
19
- ```
20
-
21
- Replace `/path/to/your/data` with your local data directory.
22
-
23
- Alternatively, you can install `slide2vec` via pip:
24
-
25
- ```shell
26
- pip install slide2vec
27
- ```
28
-
29
- ## 🚀 Extract features
30
-
31
- 1. Create a `.csv` file with slide paths. Optionally, you can provide paths to pre-computed tissue masks.
32
-
33
- ```csv
34
- wsi_path,mask_path
35
- /path/to/slide1.tif,/path/to/mask1.tif
36
- /path/to/slide2.tif,/path/to/mask2.tif
37
- ...
38
- ```
39
-
40
- 2. Create a configuration file
41
-
42
- A good starting point is the default configuration file `slide2vec/configs/default.yaml` where parameters are documented.<br>
43
- We've also added default configuration files for each of the foundation models currently supported:
44
- - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms)
45
- - slide-level: `prov-gigapath`, `titan`, `prism`
46
-
47
-
48
- 3. Kick off distributed feature extraction
49
-
50
- ```shell
51
- python3 -m slide2vec.main --config-file </path/to/config.yaml>
52
- ```
@@ -1 +0,0 @@
1
- __version__ = "2.0.0"
@@ -1,17 +0,0 @@
1
- import pathlib
2
-
3
- from omegaconf import OmegaConf
4
-
5
-
6
- def load_config(config_name: str):
7
- config_filename = config_name + ".yaml"
8
- return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename)
9
-
10
-
11
- default_config = load_config("default")
12
-
13
-
14
- def load_and_merge_config(config_name: str):
15
- default_config = OmegaConf.create(default_config)
16
- loaded_config = load_config(config_name)
17
- return OmegaConf.merge(default_config, loaded_config)
@@ -1,66 +0,0 @@
1
- import torch
2
- import numpy as np
3
- import wholeslidedata as wsd
4
-
5
- from transformers.image_processing_utils import BaseImageProcessor
6
- from PIL import Image
7
- from pathlib import Path
8
-
9
-
10
- class TileDataset(torch.utils.data.Dataset):
11
- def __init__(self, wsi_path, tile_dir, target_spacing, backend, transforms=None):
12
- self.path = wsi_path
13
- self.target_spacing = target_spacing
14
- self.backend = backend
15
- self.name = wsi_path.stem.replace(" ", "_")
16
- self.load_coordinates(tile_dir)
17
- self.transforms = transforms
18
-
19
- def load_coordinates(self, tile_dir):
20
- coordinates = np.load(Path(tile_dir, f"{self.name}.npy"), allow_pickle=True)
21
- self.x = coordinates["x"]
22
- self.y = coordinates["y"]
23
- self.coordinates = (np.array([self.x, self.y]).T).astype(int)
24
- self.scaled_coordinates = self.scale_coordinates()
25
- self.tile_level = coordinates["tile_level"]
26
- self.tile_size_resized = coordinates["tile_size_resized"]
27
- resize_factor = coordinates["resize_factor"]
28
- self.tile_size = np.round(self.tile_size_resized / resize_factor).astype(int)
29
- self.tile_size_lv0 = coordinates["tile_size_lv0"][0]
30
-
31
- def scale_coordinates(self):
32
- # coordinates are defined w.r.t. level 0
33
- # i need to scale them to target_spacing
34
- wsi = wsd.WholeSlideImage(self.path, backend=self.backend)
35
- min_spacing = wsi.spacings[0]
36
- scale = min_spacing / self.target_spacing
37
- # create a [N, 2] array with x and y coordinates
38
- scaled_coordinates = (self.coordinates * scale).astype(int)
39
- return scaled_coordinates
40
-
41
- def __len__(self):
42
- return len(self.x)
43
-
44
- def __getitem__(self, idx):
45
- wsi = wsd.WholeSlideImage(
46
- self.path, backend=self.backend
47
- ) # cannot be defined in __init__ because of multiprocessing
48
- tile_level = self.tile_level[idx]
49
- tile_spacing = wsi.spacings[tile_level]
50
- tile_arr = wsi.get_patch(
51
- self.x[idx],
52
- self.y[idx],
53
- self.tile_size_resized[idx],
54
- self.tile_size_resized[idx],
55
- spacing=tile_spacing,
56
- center=False,
57
- )
58
- tile = Image.fromarray(tile_arr).convert("RGB")
59
- if self.tile_size[idx] != self.tile_size_resized[idx]:
60
- tile = tile.resize((self.tile_size[idx], self.tile_size[idx]))
61
- if self.transforms:
62
- if isinstance(self.transforms, BaseImageProcessor): # Hugging Face (`transformer`)
63
- tile = self.transforms(tile, return_tensors="pt")["pixel_values"].squeeze(0)
64
- else: # general callable such as torchvision transforms
65
- tile = self.transforms(tile)
66
- return idx, tile
File without changes
File without changes
File without changes
File without changes