slide2vec 1.3.0__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {slide2vec-1.3.0/slide2vec.egg-info → slide2vec-2.0.0}/PKG-INFO +2 -2
  2. {slide2vec-1.3.0 → slide2vec-2.0.0}/README.md +1 -1
  3. {slide2vec-1.3.0 → slide2vec-2.0.0}/pyproject.toml +1 -1
  4. {slide2vec-1.3.0 → slide2vec-2.0.0}/setup.cfg +1 -1
  5. slide2vec-2.0.0/slide2vec/__init__.py +1 -0
  6. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/aggregate.py +27 -7
  7. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/data/dataset.py +5 -1
  8. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/embed.py +19 -6
  9. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/main.py +43 -65
  10. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/models.py +335 -28
  11. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/utils/config.py +16 -5
  12. {slide2vec-1.3.0 → slide2vec-2.0.0/slide2vec.egg-info}/PKG-INFO +2 -2
  13. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec.egg-info/SOURCES.txt +1 -5
  14. slide2vec-1.3.0/slide2vec/__init__.py +0 -1
  15. slide2vec-1.3.0/slide2vec/tiling.py +0 -225
  16. slide2vec-1.3.0/slide2vec/wsi/__init__.py +0 -262
  17. slide2vec-1.3.0/slide2vec/wsi/utils.py +0 -111
  18. slide2vec-1.3.0/slide2vec/wsi/wsi.py +0 -979
  19. {slide2vec-1.3.0 → slide2vec-2.0.0}/LICENSE +0 -0
  20. {slide2vec-1.3.0 → slide2vec-2.0.0}/MANIFEST.in +0 -0
  21. {slide2vec-1.3.0 → slide2vec-2.0.0}/setup.py +0 -0
  22. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/configs/__init__.py +0 -0
  23. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/data/__init__.py +0 -0
  24. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/data/augmentations.py +0 -0
  25. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/distributed/__init__.py +0 -0
  26. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/__init__.py +0 -0
  27. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/layers/__init__.py +0 -0
  28. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/layers/attention.py +0 -0
  29. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/layers/block.py +0 -0
  30. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/layers/dino_head.py +0 -0
  31. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/layers/drop_path.py +0 -0
  32. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/layers/layer_scale.py +0 -0
  33. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/layers/mlp.py +0 -0
  34. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/layers/patch_embed.py +0 -0
  35. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/layers/swiglu_ffn.py +0 -0
  36. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/vision_transformer_dino.py +0 -0
  37. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/models/vision_transformer_dinov2.py +0 -0
  38. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/utils/__init__.py +0 -0
  39. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/utils/log_utils.py +0 -0
  40. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec/utils/utils.py +0 -0
  41. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec.egg-info/dependency_links.txt +0 -0
  42. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec.egg-info/not-zip-safe +0 -0
  43. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec.egg-info/requires.txt +0 -0
  44. {slide2vec-1.3.0 → slide2vec-2.0.0}/slide2vec.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: slide2vec
3
- Version: 1.3.0
3
+ Version: 2.0.0
4
4
  Summary: Embedding of whole slide images with Foundation Models
5
5
  Home-page: https://github.com/clemsgrs/slide2vec
6
6
  Author: Clément Grisi
@@ -95,7 +95,7 @@ pip install slide2vec
95
95
 
96
96
  A good starting point is the default configuration file `slide2vec/configs/default.yaml` where parameters are documented.<br>
97
97
  We've also added default configuration files for each of the foundation models currently supported:
98
- - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`
98
+ - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms)
99
99
  - slide-level: `prov-gigapath`, `titan`, `prism`
100
100
 
101
101
 
@@ -41,7 +41,7 @@ pip install slide2vec
41
41
 
42
42
  A good starting point is the default configuration file `slide2vec/configs/default.yaml` where parameters are documented.<br>
43
43
  We've also added default configuration files for each of the foundation models currently supported:
44
- - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`
44
+ - tile-level: `uni`, `uni2`, `virchow`, `virchow2`, `prov-gigapath`, `h-optimus-0`, `h-optimus-1`, `h0-mini`, `conch`, `musk`, `phikonv2`, `hibou-b`, `hibou-L`, [`kaiko`](https://github.com/kaiko-ai/towards_large_pathology_fms)
45
45
  - slide-level: `prov-gigapath`, `titan`, `prism`
46
46
 
47
47
 
@@ -23,7 +23,7 @@ warn_unused_configs = true
23
23
  no_implicit_reexport = true
24
24
 
25
25
  [tool.bumpver]
26
- current_version = "1.3.0"
26
+ current_version = "2.0.0"
27
27
  version_pattern = "MAJOR.MINOR.PATCH"
28
28
  commit = false # We do version bumping in CI, not as a commit
29
29
  tag = false # Git tag already exists — we don't auto-tag
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = slide2vec
3
- version = 1.3.0
3
+ version = 2.0.0
4
4
  description = Embedding of whole slide images with Foundation Models
5
5
  author = Clément Grisi
6
6
  platforms = unix, linux, osx, cygwin, win32
@@ -0,0 +1 @@
1
+ __version__ = "2.0.0"
@@ -28,14 +28,20 @@ def get_args_parser(add_help: bool = True):
28
28
  "--config-file", default="", metavar="FILE", help="path to config file"
29
29
  )
30
30
  parser.add_argument(
31
- "--run-id",
31
+ "--output-dir",
32
32
  type=str,
33
- default="",
34
- help="Name of output subdirectory",
33
+ default=None,
34
+ help="output directory to save logs and checkpoints",
35
35
  )
36
36
  parser.add_argument(
37
37
  "--run-on-cpu", action="store_true", help="run inference on cpu"
38
38
  )
39
+ parser.add_argument(
40
+ "opts",
41
+ help="Modify config options at the end of the command using \"path.key=value\".",
42
+ default=None,
43
+ nargs=argparse.REMAINDER,
44
+ )
39
45
  return parser
40
46
 
41
47
 
@@ -54,7 +60,7 @@ def main(args):
54
60
  # setup configuration
55
61
  run_on_cpu = args.run_on_cpu
56
62
  cfg = get_cfg_from_file(args.config_file)
57
- output_dir = Path(cfg.output_dir, args.run_id)
63
+ output_dir = Path(cfg.output_dir, args.output_dir)
58
64
  cfg.output_dir = str(output_dir)
59
65
 
60
66
  coordinates_dir = Path(cfg.output_dir, "coordinates")
@@ -71,6 +77,11 @@ def main(args):
71
77
  process_list.is_file()
72
78
  ), "Process list CSV not found. Ensure tiling has been run."
73
79
  process_df = pd.read_csv(process_list)
80
+ if "aggregation_status" not in process_df.columns:
81
+ process_df["aggregation_status"] = ["tbp"] * len(process_df)
82
+ cols = ["wsi_name", "wsi_path", "mask_path", "tiling_status", "feature_status", "aggregation_status", "error", "traceback"]
83
+ process_df = process_df[cols]
84
+
74
85
  skip_feature_aggregation = process_df["aggregation_status"].str.contains("success").all()
75
86
 
76
87
  if skip_feature_aggregation and distributed.is_main_process():
@@ -111,6 +122,9 @@ def main(args):
111
122
  coordinates = (np.array([coordinates_arr["x"], coordinates_arr["y"]]).T).astype(int)
112
123
 
113
124
  feature_path = features_dir / f"{name}.pt"
125
+ output_path = features_dir / f"{name}.pt"
126
+ if cfg.model.save_tile_embeddings:
127
+ feature_path = features_dir / f"{name}-tiles.pt"
114
128
 
115
129
  # run forward pass with slide encoder
116
130
  if cfg.model.name == "prov-gigapath":
@@ -132,13 +146,19 @@ def main(args):
132
146
  with autocast_context:
133
147
  features = torch.load(feature_path).to(model.device)
134
148
  tile_size_lv0 = coordinates_arr["tile_size_lv0"][0]
135
- wsi_feature = model.forward_slide(
149
+ output = model.forward_slide(
136
150
  features,
137
151
  tile_coordinates=coordinates,
138
152
  tile_size_lv0=tile_size_lv0,
139
153
  )
140
-
141
- torch.save(wsi_feature, feature_path)
154
+ wsi_feature = output["embedding"].cpu()
155
+ if cfg.model.name == "prism" and cfg.model.save_latents:
156
+ latent_path = features_dir / f"{name}-latents.pt"
157
+ latents = output["latents"].cpu()
158
+ torch.save(latents, latent_path)
159
+ del latents
160
+
161
+ torch.save(wsi_feature, output_path)
142
162
  del wsi_feature
143
163
  if not run_on_cpu:
144
164
  torch.cuda.empty_cache()
@@ -2,6 +2,7 @@ import torch
2
2
  import numpy as np
3
3
  import wholeslidedata as wsd
4
4
 
5
+ from transformers.image_processing_utils import BaseImageProcessor
5
6
  from PIL import Image
6
7
  from pathlib import Path
7
8
 
@@ -58,5 +59,8 @@ class TileDataset(torch.utils.data.Dataset):
58
59
  if self.tile_size[idx] != self.tile_size_resized[idx]:
59
60
  tile = tile.resize((self.tile_size[idx], self.tile_size[idx]))
60
61
  if self.transforms:
61
- tile = self.transforms(tile)
62
+ if isinstance(self.transforms, BaseImageProcessor): # Hugging Face (`transformer`)
63
+ tile = self.transforms(tile, return_tensors="pt")["pixel_values"].squeeze(0)
64
+ else: # general callable such as torchvision transforms
65
+ tile = self.transforms(tile)
62
66
  return idx, tile
@@ -28,14 +28,20 @@ def get_args_parser(add_help: bool = True):
28
28
  "--config-file", default="", metavar="FILE", help="path to config file"
29
29
  )
30
30
  parser.add_argument(
31
- "--run-id",
31
+ "--output-dir",
32
32
  type=str,
33
- default="",
34
- help="Name of output subdirectory",
33
+ default=None,
34
+ help="output directory to save logs and checkpoints",
35
35
  )
36
36
  parser.add_argument(
37
37
  "--run-on-cpu", action="store_true", help="run inference on cpu"
38
38
  )
39
+ parser.add_argument(
40
+ "opts",
41
+ help="Modify config options at the end of the command using \"path.key=value\".",
42
+ default=None,
43
+ nargs=argparse.REMAINDER,
44
+ )
39
45
  return parser
40
46
 
41
47
 
@@ -80,7 +86,7 @@ def run_inference(dataloader, model, device, autocast_context, unit, batch_size,
80
86
  ):
81
87
  idx, image = batch
82
88
  image = image.to(device, non_blocking=True)
83
- feature = model(image).cpu().numpy()
89
+ feature = model(image)["embedding"].cpu().numpy()
84
90
  features.resize(features.shape[0] + feature.shape[0], axis=0)
85
91
  features[-feature.shape[0]:] = feature
86
92
  indices.resize(indices.shape[0] + idx.shape[0], axis=0)
@@ -123,7 +129,7 @@ def main(args):
123
129
  # setup configuration
124
130
  run_on_cpu = args.run_on_cpu
125
131
  cfg = get_cfg_from_file(args.config_file)
126
- output_dir = Path(cfg.output_dir, args.run_id)
132
+ output_dir = Path(cfg.output_dir, args.output_dir)
127
133
  cfg.output_dir = str(output_dir)
128
134
 
129
135
  if not run_on_cpu:
@@ -148,6 +154,11 @@ def main(args):
148
154
  process_list.is_file()
149
155
  ), "Process list CSV not found. Ensure tiling has been run."
150
156
  process_df = pd.read_csv(process_list)
157
+ if "feature_status" not in process_df.columns:
158
+ process_df["feature_status"] = ["tbp"] * len(process_df)
159
+ cols = ["wsi_name", "wsi_path", "mask_path", "tiling_status", "feature_status", "error", "traceback"]
160
+ process_df = process_df[cols]
161
+
151
162
  skip_feature_extraction = process_df["feature_status"].str.contains("success").all()
152
163
 
153
164
  if skip_feature_extraction:
@@ -219,13 +230,15 @@ def main(args):
219
230
 
220
231
  name = wsi_fp.stem.replace(" ", "_")
221
232
  feature_path = features_dir / f"{name}.pt"
233
+ if cfg.model.save_tile_embeddings:
234
+ feature_path = features_dir / f"{name}-tiles.pt"
222
235
  tmp_feature_path = tmp_dir / f"{name}-rank_{distributed.get_global_rank()}.h5"
223
236
 
224
237
  # get feature dimension and dtype using a dry run
225
238
  with torch.inference_mode(), autocast_context:
226
239
  sample_batch = next(iter(dataloader))
227
240
  sample_image = sample_batch[1].to(model.device)
228
- sample_feature = model(sample_image).cpu().numpy()
241
+ sample_feature = model(sample_image)["embedding"].cpu().numpy()
229
242
  feature_dim = sample_feature.shape[1:]
230
243
  dtype = sample_feature.dtype
231
244
 
@@ -24,6 +24,17 @@ def get_args_parser(add_help: bool = True):
24
24
  parser.add_argument(
25
25
  "--run-on-cpu", action="store_true", help="run inference on cpu"
26
26
  )
27
+ parser.add_argument(
28
+ "--output-dir",
29
+ type=str,
30
+ help="output directory to save logs and checkpoints",
31
+ )
32
+ parser.add_argument(
33
+ "opts",
34
+ help="Modify config options at the end of the command using \"path.key=value\".",
35
+ default=None,
36
+ nargs=argparse.REMAINDER,
37
+ )
27
38
  return parser
28
39
 
29
40
 
@@ -37,35 +48,26 @@ def log_progress(features_dir: Path, stop_event: threading.Event, log_interval:
37
48
  time.sleep(log_interval)
38
49
 
39
50
 
40
- def run_tiling(config_file, run_id):
41
- print("Running tiling.py...")
51
+ def run_tiling(root_dir, config_file, output_dir):
52
+ print(f"Running tiling.py from {root_dir}...")
42
53
  cmd = [
43
54
  sys.executable,
44
- "slide2vec/tiling.py",
45
- "--run-id",
46
- run_id,
55
+ "hs2p/tiling.py",
47
56
  "--config-file",
48
- config_file,
57
+ os.path.abspath(config_file),
58
+ "--output-dir",
59
+ os.path.abspath(output_dir),
60
+ "--skip-datetime",
61
+ "--skip-logging",
62
+ "wandb.enable=false", # disable wandb to avoid dupliacte logging
49
63
  ]
50
- proc = subprocess.Popen(
51
- cmd,
52
- stdout=subprocess.PIPE,
53
- stderr=subprocess.STDOUT,
54
- text=True,
55
- bufsize=1,
56
- universal_newlines=True
57
- )
58
- # forward output in real-time
59
- for line in proc.stdout:
60
- print(line.rstrip())
61
- sys.stdout.flush()
62
- proc.wait()
64
+ proc = subprocess.run(cmd, cwd=root_dir)
63
65
  if proc.returncode != 0:
64
66
  print("Slide tiling failed. Exiting.")
65
67
  sys.exit(proc.returncode)
66
68
 
67
69
 
68
- def run_feature_extraction(config_file, run_id, run_on_cpu: False):
70
+ def run_feature_extraction(config_file, output_dir, run_on_cpu: False):
69
71
  print("Running embed.py...")
70
72
  # find a free port
71
73
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
@@ -78,36 +80,24 @@ def run_feature_extraction(config_file, run_id, run_on_cpu: False):
78
80
  f"--master_port={free_port}",
79
81
  "--nproc_per_node=gpu",
80
82
  "slide2vec/embed.py",
81
- "--run-id",
82
- run_id,
83
83
  "--config-file",
84
- config_file,
84
+ os.path.abspath(config_file),
85
+ "--output-dir",
86
+ os.path.abspath(output_dir),
85
87
  ]
86
88
  if run_on_cpu:
87
89
  cmd = [
88
90
  sys.executable,
89
91
  "slide2vec/embed.py",
90
- "--run-id",
91
- run_id,
92
92
  "--config-file",
93
- config_file,
93
+ os.path.abspath(config_file),
94
+ "--output-dir",
95
+ os.path.abspath(output_dir),
94
96
  "--run-on-cpu",
95
97
  ]
96
98
  # launch in its own process group.
97
- proc = subprocess.Popen(
98
- cmd,
99
- preexec_fn=os.setsid,
100
- stdout=subprocess.PIPE,
101
- stderr=subprocess.STDOUT,
102
- text=True,
103
- bufsize=1,
104
- universal_newlines=True
105
- )
99
+ proc = subprocess.Popen(cmd)
106
100
  try:
107
- # forward output in real-time
108
- for line in proc.stdout:
109
- print(line.rstrip())
110
- sys.stdout.flush()
111
101
  proc.wait()
112
102
  except KeyboardInterrupt:
113
103
  print("Received CTRL+C, terminating embed.py process group...")
@@ -119,34 +109,22 @@ def run_feature_extraction(config_file, run_id, run_on_cpu: False):
119
109
  sys.exit(proc.returncode)
120
110
 
121
111
 
122
- def run_feature_aggregation(config_file, run_id, run_on_cpu: False):
112
+ def run_feature_aggregation(config_file, output_dir, run_on_cpu: False):
123
113
  print("Running aggregate.py...")
124
114
  # find a free port
125
115
  cmd = [
126
116
  sys.executable,
127
117
  "slide2vec/aggregate.py",
128
- "--run-id",
129
- run_id,
130
118
  "--config-file",
131
- config_file,
119
+ os.path.abspath(config_file),
120
+ "--output-dir",
121
+ os.path.abspath(output_dir),
132
122
  ]
133
123
  if run_on_cpu:
134
124
  cmd.append("--run-on-cpu")
135
125
  # launch in its own process group.
136
- proc = subprocess.Popen(
137
- cmd,
138
- preexec_fn=os.setsid,
139
- stdout=subprocess.PIPE,
140
- stderr=subprocess.STDOUT,
141
- text=True,
142
- bufsize=1,
143
- universal_newlines=True
144
- )
126
+ proc = subprocess.Popen(cmd)
145
127
  try:
146
- # forward output in real-time
147
- for line in proc.stdout:
148
- print(line.rstrip())
149
- sys.stdout.flush()
150
128
  proc.wait()
151
129
  except KeyboardInterrupt:
152
130
  print("Received CTRL+C, terminating aggregate.py process group...")
@@ -159,19 +137,19 @@ def run_feature_aggregation(config_file, run_id, run_on_cpu: False):
159
137
 
160
138
 
161
139
  def main(args):
162
- config_file = args.config_file
163
- skip_datetime = args.skip_datetime
164
140
  run_on_cpu = args.run_on_cpu
165
141
 
166
- cfg, run_id = setup(config_file, skip_datetime=skip_datetime)
142
+ cfg, cfg_path = setup(args)
143
+ output_dir = Path(cfg.output_dir)
144
+
167
145
  hf_login()
168
146
 
169
- run_tiling(config_file, run_id)
147
+ root_dir = "slide2vec/hs2p"
148
+ run_tiling(root_dir, cfg_path, output_dir)
170
149
 
171
150
  print("Tiling completed.")
172
151
  print("=+=" * 10)
173
152
 
174
- output_dir = Path(cfg.output_dir)
175
153
  features_dir = output_dir / "features"
176
154
  if cfg.wandb.enable:
177
155
  stop_event = threading.Event()
@@ -180,10 +158,10 @@ def main(args):
180
158
  )
181
159
  log_thread.start()
182
160
 
183
- run_feature_extraction(config_file, run_id, run_on_cpu)
161
+ run_feature_extraction(cfg_path, output_dir, run_on_cpu)
184
162
 
185
163
  if cfg.model.level == "slide":
186
- run_feature_aggregation(config_file, run_id, run_on_cpu)
164
+ run_feature_aggregation(cfg_path, output_dir, run_on_cpu)
187
165
  print("Feature extraction completed.")
188
166
  print("=+=" * 10)
189
167
  else:
@@ -203,9 +181,9 @@ if __name__ == "__main__":
203
181
 
204
182
  import warnings
205
183
  import torchvision
206
-
184
+
207
185
  torchvision.disable_beta_transforms_warning()
208
-
186
+
209
187
  warnings.filterwarnings("ignore", message=".*Could not set the permissions.*")
210
188
  warnings.filterwarnings("ignore", message=".*antialias.*", category=UserWarning)
211
189
  warnings.filterwarnings("ignore", message=".*TypedStorage.*", category=UserWarning)