deepliif 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli.py +52 -9
- deepliif/models/__init__.py +77 -15
- deepliif/models/base_model.py +2 -0
- deepliif/util/__init__.py +66 -0
- {deepliif-1.1.5.dist-info → deepliif-1.1.6.dist-info}/LICENSE.md +0 -0
- {deepliif-1.1.5.dist-info → deepliif-1.1.6.dist-info}/METADATA +403 -403
- {deepliif-1.1.5.dist-info → deepliif-1.1.6.dist-info}/RECORD +10 -10
- {deepliif-1.1.5.dist-info → deepliif-1.1.6.dist-info}/WHEEL +1 -1
- {deepliif-1.1.5.dist-info → deepliif-1.1.6.dist-info}/entry_points.txt +0 -0
- {deepliif-1.1.5.dist-info → deepliif-1.1.6.dist-info}/top_level.txt +0 -0
cli.py
CHANGED
|
@@ -11,7 +11,7 @@ from PIL import Image
|
|
|
11
11
|
|
|
12
12
|
from deepliif.data import create_dataset, transform
|
|
13
13
|
from deepliif.models import inference, postprocess, compute_overlap, init_nets, DeepLIIFModel, infer_modalities, infer_results_for_wsi
|
|
14
|
-
from deepliif.util import allowed_file, Visualizer, get_information
|
|
14
|
+
from deepliif.util import allowed_file, Visualizer, get_information, test_diff_original_serialized, disable_batchnorm_tracking_stats
|
|
15
15
|
from deepliif.util.util import mkdirs, check_multi_scale
|
|
16
16
|
# from deepliif.util import infer_results_for_wsi
|
|
17
17
|
|
|
@@ -461,21 +461,51 @@ def trainlaunch(**kwargs):
|
|
|
461
461
|
@cli.command()
|
|
462
462
|
@click.option('--models-dir', default='./model-server/DeepLIIF_Latest_Model', help='reads models from here')
|
|
463
463
|
@click.option('--output-dir', help='saves results here.')
|
|
464
|
-
|
|
464
|
+
@click.option('--device', default='cpu', type=str, help='device to load model, either cpu or gpu')
|
|
465
|
+
@click.option('--verbose', default=0, type=int,help='saves results here.')
|
|
466
|
+
def serialize(models_dir, output_dir, device, verbose):
|
|
465
467
|
"""Serialize DeepLIIF models using Torchscript
|
|
466
468
|
"""
|
|
467
469
|
output_dir = output_dir or models_dir
|
|
470
|
+
ensure_exists(output_dir)
|
|
468
471
|
|
|
469
472
|
sample = transform(Image.new('RGB', (512, 512)))
|
|
470
|
-
|
|
473
|
+
|
|
471
474
|
with click.progressbar(
|
|
472
475
|
init_nets(models_dir, eager_mode=True).items(),
|
|
473
476
|
label='Tracing nets',
|
|
474
477
|
item_show_func=lambda n: n[0] if n else n
|
|
475
478
|
) as bar:
|
|
476
479
|
for name, net in bar:
|
|
477
|
-
|
|
480
|
+
# the model should be in eval model so that there won't be randomness in tracking brought by dropout etc. layers
|
|
481
|
+
# https://github.com/pytorch/pytorch/issues/23999#issuecomment-747832122
|
|
482
|
+
net = net.eval()
|
|
483
|
+
net = disable_batchnorm_tracking_stats(net)
|
|
484
|
+
net = net.cpu()
|
|
485
|
+
if name.startswith('GS'):
|
|
486
|
+
traced_net = torch.jit.trace(net, torch.cat([sample, sample, sample], 1))
|
|
487
|
+
else:
|
|
488
|
+
traced_net = torch.jit.trace(net, sample)
|
|
489
|
+
# traced_net = torch.jit.script(net)
|
|
478
490
|
traced_net.save(f'{output_dir}/{name}.pt')
|
|
491
|
+
|
|
492
|
+
# test: whether the original and the serialized model produces highly similar predictions
|
|
493
|
+
print('testing similarity between prediction from original vs serialized models...')
|
|
494
|
+
models_original = init_nets(models_dir,eager_mode=True)
|
|
495
|
+
models_serialized = init_nets(output_dir,eager_mode=False)
|
|
496
|
+
if device == 'gpu':
|
|
497
|
+
sample = sample.cuda()
|
|
498
|
+
else:
|
|
499
|
+
sample = sample.cpu()
|
|
500
|
+
for name in models_serialized.keys():
|
|
501
|
+
print(name,':')
|
|
502
|
+
model_original = models_original[name].cuda().eval() if device=='gpu' else models_original[name].cpu().eval()
|
|
503
|
+
model_serialized = models_serialized[name].cuda() if device=='gpu' else models_serialized[name].cpu().eval()
|
|
504
|
+
if name.startswith('GS'):
|
|
505
|
+
test_diff_original_serialized(model_original,model_serialized,torch.cat([sample, sample, sample], 1),verbose)
|
|
506
|
+
else:
|
|
507
|
+
test_diff_original_serialized(model_original,model_serialized,sample,verbose)
|
|
508
|
+
print('PASS')
|
|
479
509
|
|
|
480
510
|
|
|
481
511
|
@cli.command()
|
|
@@ -486,7 +516,11 @@ def serialize(models_dir, output_dir):
|
|
|
486
516
|
@click.option('--region-size', default=20000, help='Due to limits in the resources, the whole slide image cannot be processed in whole.'
|
|
487
517
|
'So the WSI image is read region by region. '
|
|
488
518
|
'This parameter specifies the size each region to be read into GPU for inferrence.')
|
|
489
|
-
|
|
519
|
+
@click.option('--eager-mode', is_flag=True, help='use eager mode (loading original models, otherwise serialized ones)')
|
|
520
|
+
@click.option('--color-dapi', is_flag=True, help='color dapi image to produce the same coloring as in the paper')
|
|
521
|
+
@click.option('--color-marker', is_flag=True, help='color marker image to produce the same coloring as in the paper')
|
|
522
|
+
def test(input_dir, output_dir, tile_size, model_dir, region_size, eager_mode,
|
|
523
|
+
color_dapi, color_marker):
|
|
490
524
|
|
|
491
525
|
"""Test trained models
|
|
492
526
|
"""
|
|
@@ -507,7 +541,7 @@ def test(input_dir, output_dir, tile_size, model_dir, region_size):
|
|
|
507
541
|
print(time.time() - start_time)
|
|
508
542
|
else:
|
|
509
543
|
img = Image.open(os.path.join(input_dir, filename)).convert('RGB')
|
|
510
|
-
images, scoring = infer_modalities(img, tile_size, model_dir)
|
|
544
|
+
images, scoring = infer_modalities(img, tile_size, model_dir, eager_mode, color_dapi, color_marker)
|
|
511
545
|
|
|
512
546
|
for name, i in images.items():
|
|
513
547
|
i.save(os.path.join(
|
|
@@ -589,6 +623,15 @@ def prepare_testing_data(input_dir, dataset_dir):
|
|
|
589
623
|
cv2.imwrite(os.path.join(test_dir, img), np.concatenate([image, image, image, image, image, image], 1))
|
|
590
624
|
|
|
591
625
|
|
|
626
|
+
# to load pickle file saved from gpu in a cpu environment: https://github.com/pytorch/pytorch/issues/16797#issuecomment-633423219
|
|
627
|
+
from io import BytesIO
|
|
628
|
+
class CPU_Unpickler(pickle.Unpickler):
|
|
629
|
+
def find_class(self, module, name):
|
|
630
|
+
if module == 'torch.storage' and name == '_load_from_bytes':
|
|
631
|
+
return lambda b: torch.load(BytesIO(b), map_location='cpu')
|
|
632
|
+
else: return super().find_class(module, name)
|
|
633
|
+
|
|
634
|
+
|
|
592
635
|
@cli.command()
|
|
593
636
|
@click.option('--pickle-dir', required=True, help='directory where the pickled snapshots are stored')
|
|
594
637
|
def visualize(pickle_dir):
|
|
@@ -599,8 +642,8 @@ def visualize(pickle_dir):
|
|
|
599
642
|
time.sleep(1)
|
|
600
643
|
|
|
601
644
|
params_opt = pickle.load(open(path_init,'rb'))
|
|
602
|
-
params_opt
|
|
603
|
-
visualizer = Visualizer(
|
|
645
|
+
params_opt.remote = False
|
|
646
|
+
visualizer = Visualizer(params_opt) # create a visualizer that display/save images and plots
|
|
604
647
|
|
|
605
648
|
paths_plot = {'display_current_results':os.path.join(pickle_dir,'display_current_results.pickle'),
|
|
606
649
|
'plot_current_losses':os.path.join(pickle_dir,'plot_current_losses.pickle')}
|
|
@@ -612,7 +655,7 @@ def visualize(pickle_dir):
|
|
|
612
655
|
try:
|
|
613
656
|
last_modified_time_plot = os.path.getmtime(path_plot)
|
|
614
657
|
if last_modified_time_plot > last_modified_time[method]:
|
|
615
|
-
params_plot =
|
|
658
|
+
params_plot = CPU_Unpickler(open(path_plot,'rb')).load()
|
|
616
659
|
last_modified_time[method] = last_modified_time_plot
|
|
617
660
|
getattr(visualizer,method)(**params_plot)
|
|
618
661
|
print(f'{method} refreshed, last modified time {time.ctime(last_modified_time[method])}')
|
deepliif/models/__init__.py
CHANGED
|
@@ -88,7 +88,10 @@ def create_model(opt):
|
|
|
88
88
|
|
|
89
89
|
|
|
90
90
|
def load_torchscript_model(model_pt_path, device):
|
|
91
|
-
|
|
91
|
+
net = torch.jit.load(model_pt_path, map_location=device)
|
|
92
|
+
net = disable_batchnorm_tracking_stats(net)
|
|
93
|
+
net.eval()
|
|
94
|
+
return net
|
|
92
95
|
|
|
93
96
|
|
|
94
97
|
def read_model_params(file_addr):
|
|
@@ -132,7 +135,8 @@ def load_eager_models(model_dir, devices):
|
|
|
132
135
|
os.path.join(model_dir, f'latest_net_{n}.pth'),
|
|
133
136
|
map_location=devices[n]
|
|
134
137
|
))
|
|
135
|
-
nets[n] = net
|
|
138
|
+
nets[n] = disable_batchnorm_tracking_stats(net)
|
|
139
|
+
nets[n].eval()
|
|
136
140
|
|
|
137
141
|
for n in ['G51', 'G52', 'G53', 'G54', 'G55']:
|
|
138
142
|
net = UnetGenerator(input_nc, output_nc, 9, ngf, norm_layer=norm_layer, use_dropout=use_dropout)
|
|
@@ -140,7 +144,8 @@ def load_eager_models(model_dir, devices):
|
|
|
140
144
|
os.path.join(model_dir, f'latest_net_{n}.pth'),
|
|
141
145
|
map_location=devices[n]
|
|
142
146
|
))
|
|
143
|
-
nets[n] = net
|
|
147
|
+
nets[n] = disable_batchnorm_tracking_stats(net)
|
|
148
|
+
nets[n].eval()
|
|
144
149
|
|
|
145
150
|
return nets
|
|
146
151
|
|
|
@@ -185,7 +190,12 @@ def compute_overlap(img_size, tile_size):
|
|
|
185
190
|
return tile_size // 4
|
|
186
191
|
|
|
187
192
|
|
|
188
|
-
def run_torchserve(img, model_path=None):
|
|
193
|
+
def run_torchserve(img, model_path=None, eager_mode=False):
|
|
194
|
+
"""
|
|
195
|
+
eager_mode: not used in this function; put in place to be consistent with run_dask
|
|
196
|
+
so that run_wrapper() could call either this function or run_dask with
|
|
197
|
+
same syntax
|
|
198
|
+
"""
|
|
189
199
|
buffer = BytesIO()
|
|
190
200
|
torch.save(transform(img.resize((512, 512))), buffer)
|
|
191
201
|
|
|
@@ -203,9 +213,9 @@ def run_torchserve(img, model_path=None):
|
|
|
203
213
|
return {k: tensor_to_pil(deserialize_tensor(v)) for k, v in res.json().items()}
|
|
204
214
|
|
|
205
215
|
|
|
206
|
-
def run_dask(img, model_path):
|
|
216
|
+
def run_dask(img, model_path, eager_mode=False):
|
|
207
217
|
model_dir = os.getenv('DEEPLIIF_MODEL_DIR', model_path)
|
|
208
|
-
nets = init_nets(model_dir)
|
|
218
|
+
nets = init_nets(model_dir, eager_mode)
|
|
209
219
|
|
|
210
220
|
ts = transform(img.resize((512, 512)))
|
|
211
221
|
|
|
@@ -237,7 +247,7 @@ def is_empty(tile):
|
|
|
237
247
|
return True if calculate_background_area(tile) > 98 else False
|
|
238
248
|
|
|
239
249
|
|
|
240
|
-
def run_wrapper(tile, run_fn, model_path):
|
|
250
|
+
def run_wrapper(tile, run_fn, model_path, eager_mode=False):
|
|
241
251
|
if is_empty(tile):
|
|
242
252
|
return {
|
|
243
253
|
'G1': Image.new(mode='RGB', size=(512, 512), color=(201, 211, 208)),
|
|
@@ -247,17 +257,17 @@ def run_wrapper(tile, run_fn, model_path):
|
|
|
247
257
|
'G5': Image.new(mode='RGB', size=(512, 512), color=(0, 0, 0))
|
|
248
258
|
}
|
|
249
259
|
else:
|
|
250
|
-
return run_fn(tile, model_path)
|
|
251
|
-
|
|
260
|
+
return run_fn(tile, model_path, eager_mode)
|
|
252
261
|
|
|
253
|
-
def inference(img, tile_size, overlap_size, model_path, use_torchserve=False):
|
|
254
262
|
|
|
263
|
+
def inference_old(img, tile_size, overlap_size, model_path, use_torchserve=False, eager_mode=False,
|
|
264
|
+
color_dapi=False, color_marker=False):
|
|
255
265
|
|
|
256
266
|
tiles = list(generate_tiles(img, tile_size, overlap_size))
|
|
257
267
|
|
|
258
268
|
run_fn = run_torchserve if use_torchserve else run_dask
|
|
259
269
|
# res = [Tile(t.i, t.j, run_fn(t.img, model_path)) for t in tiles]
|
|
260
|
-
res = [Tile(t.i, t.j, run_wrapper(t.img, run_fn, model_path)) for t in tiles]
|
|
270
|
+
res = [Tile(t.i, t.j, run_wrapper(t.img, run_fn, model_path, eager_mode)) for t in tiles]
|
|
261
271
|
|
|
262
272
|
def get_net_tiles(n):
|
|
263
273
|
return [Tile(t.i, t.j, t.img[n]) for t in res]
|
|
@@ -276,12 +286,14 @@ def inference(img, tile_size, overlap_size, model_path, use_torchserve=False):
|
|
|
276
286
|
|
|
277
287
|
images['DAPI'] = stitch(get_net_tiles('G2'), tile_size, overlap_size).resize(img.size)
|
|
278
288
|
dapi_pix = np.array(images['DAPI'].convert('L').convert('RGB'))
|
|
279
|
-
|
|
289
|
+
if color_dapi:
|
|
290
|
+
dapi_pix[:, :, 0] = 0
|
|
280
291
|
images['DAPI'] = Image.fromarray(dapi_pix)
|
|
281
292
|
images['Lap2'] = stitch(get_net_tiles('G3'), tile_size, overlap_size).resize(img.size)
|
|
282
293
|
images['Marker'] = stitch(get_net_tiles('G4'), tile_size, overlap_size).resize(img.size)
|
|
283
294
|
marker_pix = np.array(images['Marker'].convert('L').convert('RGB'))
|
|
284
|
-
|
|
295
|
+
if color_marker:
|
|
296
|
+
marker_pix[:, :, 2] = 0
|
|
285
297
|
images['Marker'] = Image.fromarray(marker_pix)
|
|
286
298
|
|
|
287
299
|
# images['Marker'] = stitch(
|
|
@@ -294,6 +306,52 @@ def inference(img, tile_size, overlap_size, model_path, use_torchserve=False):
|
|
|
294
306
|
return images
|
|
295
307
|
|
|
296
308
|
|
|
309
|
+
def inference(img, tile_size, overlap_size, model_path, use_torchserve=False, eager_mode=False,
|
|
310
|
+
color_dapi=False, color_marker=False):
|
|
311
|
+
|
|
312
|
+
rescaled, rows, cols = format_image_for_tiling(img, tile_size, overlap_size)
|
|
313
|
+
|
|
314
|
+
run_fn = run_torchserve if use_torchserve else run_dask
|
|
315
|
+
|
|
316
|
+
images = {}
|
|
317
|
+
images['Hema'] = create_image_for_stitching(tile_size, rows, cols)
|
|
318
|
+
images['DAPI'] = create_image_for_stitching(tile_size, rows, cols)
|
|
319
|
+
images['Lap2'] = create_image_for_stitching(tile_size, rows, cols)
|
|
320
|
+
images['Marker'] = create_image_for_stitching(tile_size, rows, cols)
|
|
321
|
+
images['Seg'] = create_image_for_stitching(tile_size, rows, cols)
|
|
322
|
+
|
|
323
|
+
for i in range(cols):
|
|
324
|
+
for j in range(rows):
|
|
325
|
+
tile = extract_tile(rescaled, tile_size, overlap_size, i, j)
|
|
326
|
+
res = run_fn(tile, model_path, eager_mode)
|
|
327
|
+
|
|
328
|
+
stitch_tile(images['Hema'], res['G1'], tile_size, overlap_size, i, j)
|
|
329
|
+
stitch_tile(images['DAPI'], res['G2'], tile_size, overlap_size, i, j)
|
|
330
|
+
stitch_tile(images['Lap2'], res['G3'], tile_size, overlap_size, i, j)
|
|
331
|
+
stitch_tile(images['Marker'], res['G4'], tile_size, overlap_size, i, j)
|
|
332
|
+
stitch_tile(images['Seg'], res['G5'], tile_size, overlap_size, i, j)
|
|
333
|
+
|
|
334
|
+
images['Hema'] = images['Hema'].resize(img.size)
|
|
335
|
+
images['DAPI'] = images['DAPI'].resize(img.size)
|
|
336
|
+
images['Lap2'] = images['Lap2'].resize(img.size)
|
|
337
|
+
images['Marker'] = images['Marker'].resize(img.size)
|
|
338
|
+
images['Seg'] = images['Seg'].resize(img.size)
|
|
339
|
+
|
|
340
|
+
if color_dapi:
|
|
341
|
+
matrix = ( 0, 0, 0, 0,
|
|
342
|
+
299/1000, 587/1000, 114/1000, 0,
|
|
343
|
+
299/1000, 587/1000, 114/1000, 0)
|
|
344
|
+
images['DAPI'] = images['DAPI'].convert('RGB', matrix)
|
|
345
|
+
|
|
346
|
+
if color_marker:
|
|
347
|
+
matrix = (299/1000, 587/1000, 114/1000, 0,
|
|
348
|
+
299/1000, 587/1000, 114/1000, 0,
|
|
349
|
+
0, 0, 0, 0)
|
|
350
|
+
images['Marker'] = images['Marker'].convert('RGB', matrix)
|
|
351
|
+
|
|
352
|
+
return images
|
|
353
|
+
|
|
354
|
+
|
|
297
355
|
def postprocess(img, seg_img, thresh=80, noise_objects_size=20, small_object_size=50):
|
|
298
356
|
mask_image = create_basic_segmentation_mask(np.array(img), np.array(seg_img),
|
|
299
357
|
thresh, noise_objects_size, small_object_size)
|
|
@@ -312,7 +370,8 @@ def postprocess(img, seg_img, thresh=80, noise_objects_size=20, small_object_siz
|
|
|
312
370
|
return images, scoring
|
|
313
371
|
|
|
314
372
|
|
|
315
|
-
def infer_modalities(img, tile_size, model_dir
|
|
373
|
+
def infer_modalities(img, tile_size, model_dir, eager_mode=False,
|
|
374
|
+
color_dapi=False, color_marker=False):
|
|
316
375
|
"""
|
|
317
376
|
This function is used to infer modalities for the given image using a trained model.
|
|
318
377
|
:param img: The input image.
|
|
@@ -329,7 +388,10 @@ def infer_modalities(img, tile_size, model_dir):
|
|
|
329
388
|
img,
|
|
330
389
|
tile_size=tile_size,
|
|
331
390
|
overlap_size=compute_overlap(img.size, tile_size),
|
|
332
|
-
model_path=model_dir
|
|
391
|
+
model_path=model_dir,
|
|
392
|
+
eager_mode=eager_mode,
|
|
393
|
+
color_dapi=color_dapi,
|
|
394
|
+
color_marker=color_marker
|
|
333
395
|
)
|
|
334
396
|
|
|
335
397
|
post_images, scoring = postprocess(img, images['Seg'], small_object_size=20)
|
deepliif/models/base_model.py
CHANGED
|
@@ -3,6 +3,7 @@ import torch
|
|
|
3
3
|
from collections import OrderedDict
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
5
5
|
from . import networks
|
|
6
|
+
from ..util import disable_batchnorm_tracking_stats
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class BaseModel(ABC):
|
|
@@ -90,6 +91,7 @@ class BaseModel(ABC):
|
|
|
90
91
|
if isinstance(name, str):
|
|
91
92
|
net = getattr(self, 'net' + name)
|
|
92
93
|
net.eval()
|
|
94
|
+
net = disable_batchnorm_tracking_stats(net)
|
|
93
95
|
|
|
94
96
|
def test(self):
|
|
95
97
|
"""Forward function used in test time.
|
deepliif/util/__init__.py
CHANGED
|
@@ -88,6 +88,36 @@ def stitch(tiles, tile_size, overlap_size):
|
|
|
88
88
|
return new_im
|
|
89
89
|
|
|
90
90
|
|
|
91
|
+
def format_image_for_tiling(img, tile_size, overlap_size):
|
|
92
|
+
mean_background_val = calculate_background_mean_value(img)
|
|
93
|
+
img = img.resize(output_size(img, tile_size))
|
|
94
|
+
# Adding borders with size of given overlap around the whole slide image
|
|
95
|
+
img = ImageOps.expand(img, border=overlap_size, fill=tuple(mean_background_val))
|
|
96
|
+
rows = int(img.height / tile_size)
|
|
97
|
+
cols = int(img.width / tile_size)
|
|
98
|
+
return img, rows, cols
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def extract_tile(img, tile_size, overlap_size, i, j):
|
|
102
|
+
return img.crop((
|
|
103
|
+
i * tile_size, j * tile_size,
|
|
104
|
+
i * tile_size + tile_size + 2 * overlap_size,
|
|
105
|
+
j * tile_size + tile_size + 2 * overlap_size
|
|
106
|
+
))
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def create_image_for_stitching(tile_size, rows, cols):
|
|
110
|
+
width = tile_size * cols
|
|
111
|
+
height = tile_size * rows
|
|
112
|
+
return Image.new('RGB', (width, height))
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def stitch_tile(img, tile, tile_size, overlap_size, i, j):
|
|
116
|
+
tile = tile.resize((tile_size + 2 * overlap_size, tile_size + 2 * overlap_size))
|
|
117
|
+
tile = tile.crop((overlap_size, overlap_size, overlap_size + tile_size, overlap_size + tile_size))
|
|
118
|
+
img.paste(tile, (i * tile_size, j * tile_size))
|
|
119
|
+
|
|
120
|
+
|
|
91
121
|
def calculate_background_mean_value(img):
|
|
92
122
|
img = cv2.fastNlMeansDenoisingColored(np.array(img), None, 10, 10, 7, 21)
|
|
93
123
|
img = np.array(img, dtype=float)
|
|
@@ -349,3 +379,39 @@ def read_results_from_pickle_file(input_addr):
|
|
|
349
379
|
pickle_obj.close()
|
|
350
380
|
return results
|
|
351
381
|
|
|
382
|
+
def test_diff_original_serialized(model_original,model_serialized,example,verbose=0):
|
|
383
|
+
threshold = 10
|
|
384
|
+
|
|
385
|
+
orig_res = model_original(example)
|
|
386
|
+
if verbose > 0:
|
|
387
|
+
print('Original:')
|
|
388
|
+
print(orig_res.shape)
|
|
389
|
+
print(orig_res[0, 0:10])
|
|
390
|
+
print('min abs value:{}'.format(torch.min(torch.abs(orig_res))))
|
|
391
|
+
|
|
392
|
+
ts_res = model_serialized(example)
|
|
393
|
+
if verbose > 0:
|
|
394
|
+
print('Torchscript:')
|
|
395
|
+
print(ts_res.shape)
|
|
396
|
+
print(ts_res[0, 0:10])
|
|
397
|
+
print('min abs value:{}'.format(torch.min(torch.abs(ts_res))))
|
|
398
|
+
|
|
399
|
+
abs_diff = torch.abs(orig_res-ts_res)
|
|
400
|
+
if verbose > 0:
|
|
401
|
+
print('Dif sum:')
|
|
402
|
+
print(torch.sum(abs_diff))
|
|
403
|
+
print('max dif:{}'.format(torch.max(abs_diff)))
|
|
404
|
+
|
|
405
|
+
assert torch.sum(abs_diff) <= threshold, f"Sum of difference in predicted values {torch.sum(abs_diff)} is larger than threshold {threshold}"
|
|
406
|
+
|
|
407
|
+
def disable_batchnorm_tracking_stats(model):
|
|
408
|
+
# https://discuss.pytorch.org/t/performance-highly-degraded-when-eval-is-activated-in-the-test-phase/3323/16
|
|
409
|
+
# https://discuss.pytorch.org/t/performance-highly-degraded-when-eval-is-activated-in-the-test-phase/3323/67
|
|
410
|
+
# https://github.com/pytorch/pytorch/blob/ca39c5b04e30a67512589cafbd9d063cc17168a5/torch/nn/modules/batchnorm.py#L158
|
|
411
|
+
for m in model.modules():
|
|
412
|
+
for child in m.children():
|
|
413
|
+
if type(child) == torch.nn.BatchNorm2d:
|
|
414
|
+
child.track_running_stats = False
|
|
415
|
+
child.running_mean = None
|
|
416
|
+
child.running_var = None
|
|
417
|
+
return model
|
|
File without changes
|
|
@@ -1,403 +1,403 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: deepliif
|
|
3
|
-
Version: 1.1.
|
|
4
|
-
Summary: DeepLIIF: Deep-Learning Inferred Multiplex Immunofluorescence for Immunohistochemical Image Quantification
|
|
5
|
-
Home-page: https://github.com/nadeemlab/DeepLIIF
|
|
6
|
-
Author: Parmida93
|
|
7
|
-
Author-email: ghahremani.parmida@gmail.com
|
|
8
|
-
Keywords: DeepLIIF,IHC,Segmentation,Classification
|
|
9
|
-
Description-Content-Type: text/markdown
|
|
10
|
-
License-File: LICENSE.md
|
|
11
|
-
Requires-Dist: opencv-python (==4.5.3.56)
|
|
12
|
-
Requires-Dist: torchvision (==0.10.0)
|
|
13
|
-
Requires-Dist: scikit-image (==0.18.3)
|
|
14
|
-
Requires-Dist: dominate (==2.6.0)
|
|
15
|
-
Requires-Dist: numba (==0.53.1)
|
|
16
|
-
Requires-Dist: Click (==8.0.3)
|
|
17
|
-
Requires-Dist: requests (==2.26.0)
|
|
18
|
-
Requires-Dist: dask (==2021.11.2)
|
|
19
|
-
Requires-Dist: visdom (>=0.1.8.3)
|
|
20
|
-
Requires-Dist: python-bioformats (>=4.0.6)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
<!-- PROJECT LOGO -->
|
|
24
|
-
<br />
|
|
25
|
-
<p align="center">
|
|
26
|
-
<img src="./images/DeepLIIF_logo.png" width="50%">
|
|
27
|
-
<h3 align="center"><strong>Deep-Learning Inferred Multiplex Immunofluorescence for Immunohistochemical Image Quantification</strong></h3>
|
|
28
|
-
<p align="center">
|
|
29
|
-
<a href="https://doi.org/10.1101/2021.05.01.442219">Journal Preprint</a>
|
|
30
|
-
|
|
|
31
|
-
<a href="https://rdcu.be/cKSBz">Journal Link</a>
|
|
32
|
-
|
|
|
33
|
-
<a href="https://openaccess.thecvf.com/content/CVPR2022/html/Ghahremani_DeepLIIF_An_Online_Platform_for_Quantification_of_Clinical_Pathology_Slides_CVPR_2022_paper.html">CVPR Link</a>
|
|
34
|
-
|
|
|
35
|
-
<a href="https://deepliif.org/">Cloud Deployment</a>
|
|
36
|
-
|
|
|
37
|
-
<a href="https://nadeemlab.github.io/DeepLIIF/">Documentation</a>
|
|
38
|
-
|
|
|
39
|
-
<a href="#docker">Docker</a>
|
|
40
|
-
|
|
|
41
|
-
<a href="https://github.com/nadeemlab/DeepLIIF/tree/main/ImageJ_Plugin">ImageJ Plugin</a>
|
|
42
|
-
|
|
|
43
|
-
<a href="#support">Support</a>
|
|
44
|
-
</p>
|
|
45
|
-
</p>
|
|
46
|
-
|
|
47
|
-
*Reporting biomarkers assessed by routine immunohistochemical (IHC) staining of tissue is broadly used in diagnostic
|
|
48
|
-
pathology laboratories for patient care. To date, clinical reporting is predominantly qualitative or semi-quantitative.
|
|
49
|
-
By creating a multitask deep learning framework referred to as DeepLIIF, we present a single-step solution to stain
|
|
50
|
-
deconvolution/separation, cell segmentation, and quantitative single-cell IHC scoring. Leveraging a unique de novo
|
|
51
|
-
dataset of co-registered IHC and multiplex immunofluorescence (mpIF) staining of the same slides, we segment and
|
|
52
|
-
translate low-cost and prevalent IHC slides to more expensive-yet-informative mpIF images, while simultaneously
|
|
53
|
-
providing the essential ground truth for the superimposed brightfield IHC channels. Moreover, a new nuclear-envelop
|
|
54
|
-
stain, LAP2beta, with high (>95%) cell coverage is introduced to improve cell delineation/segmentation and protein
|
|
55
|
-
expression quantification on IHC slides. By simultaneously translating input IHC images to clean/separated mpIF channels
|
|
56
|
-
and performing cell segmentation/classification, we show that our model trained on clean IHC Ki67 data can generalize to
|
|
57
|
-
more noisy and artifact-ridden images as well as other nuclear and non-nuclear markers such as CD3, CD8, BCL2, BCL6,
|
|
58
|
-
MYC, MUM1, CD10, and TP53. We thoroughly evaluate our method on publicly available benchmark datasets as well as against
|
|
59
|
-
pathologists' semi-quantitative scoring. Trained on IHC, DeepLIIF generalizes well to H&E images for out-of-the-box nuclear
|
|
60
|
-
segmentation.*
|
|
61
|
-
|
|
62
|
-
**DeepLIIF** is deployed as a free publicly available cloud-native platform (https://deepliif.org) with Bioformats (more than 150 input formats supported) and MLOps pipeline. We also release **DeepLIIF** implementations for single/multi-GPU training, Torchserve/Dask+Torchscript deployment, and auto-scaling via Pulumi (1000s of concurrent connections supported); details can be found in our [documentation](https://nadeemlab.github.io/DeepLIIF/). **DeepLIIF** can be run locally (GPU required) by [pip installing the package](https://github.com/nadeemlab/DeepLIIF/edit/main/README.md#installing-deepliif) and using the deepliif CLI command. **DeepLIIF** can be used remotely (no GPU required) through the https://deepliif.org website, calling the [cloud API via Python](https://github.com/nadeemlab/DeepLIIF/edit/main/README.md#cloud-deployment), or via the [ImageJ/Fiji plugin](https://github.com/nadeemlab/DeepLIIF/edit/main/README.md#imagej-plugin); details for the free cloud-native platform can be found in our [CVPR'22 paper](https://arxiv.org/pdf/2204.04494.pdf).
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
*Overview of DeepLIIF pipeline and sample input IHCs (different
|
|
67
|
-
brown/DAB markers -- BCL2, BCL6, CD10, CD3/CD8, Ki67) with corresponding DeepLIIF-generated hematoxylin/mpIF modalities
|
|
68
|
-
and classified (positive (red) and negative (blue) cell) segmentation masks. (a) Overview of DeepLIIF. Given an IHC
|
|
69
|
-
input, our multitask deep learning framework simultaneously infers corresponding Hematoxylin channel, mpIF DAPI, mpIF
|
|
70
|
-
protein expression (Ki67, CD3, CD8, etc.), and the positive/negative protein cell segmentation, baking explainability
|
|
71
|
-
and interpretability into the model itself rather than relying on coarse activation/attention maps. In the segmentation
|
|
72
|
-
mask, the red cells denote cells with positive protein expression (brown/DAB cells in the input IHC), whereas blue cells
|
|
73
|
-
represent negative cells (blue cells in the input IHC). (b) Example DeepLIIF-generated hematoxylin/mpIF modalities and
|
|
74
|
-
segmentation masks for different IHC markers. DeepLIIF, trained on clean IHC Ki67 nuclear marker images, can generalize
|
|
75
|
-
to noisier as well as other IHC nuclear/cytoplasmic marker images.*
|
|
76
|
-
|
|
77
|
-
## Prerequisites
|
|
78
|
-
1. Python 3.8
|
|
79
|
-
2. Docker
|
|
80
|
-
|
|
81
|
-
## Installing `deepliif`
|
|
82
|
-
|
|
83
|
-
DeepLIIF can be `pip` installed:
|
|
84
|
-
```shell
|
|
85
|
-
$ conda create --name deepliif_env python=3.8
|
|
86
|
-
$ conda activate deepliif_env
|
|
87
|
-
(deepliif_env) $
|
|
88
|
-
(deepliif_env) $
|
|
89
|
-
```
|
|
90
|
-
|
|
91
|
-
The package is composed of two parts:
|
|
92
|
-
1. A library that implements the core functions used to train and test DeepLIIF models.
|
|
93
|
-
2. A CLI to run common batch operations including training, batch testing and Torchscipt models serialization.
|
|
94
|
-
|
|
95
|
-
You can list all available commands:
|
|
96
|
-
|
|
97
|
-
```
|
|
98
|
-
(venv) $ deepliif --help
|
|
99
|
-
Usage: deepliif [OPTIONS] COMMAND [ARGS]...
|
|
100
|
-
|
|
101
|
-
Options:
|
|
102
|
-
--help Show this message and exit.
|
|
103
|
-
|
|
104
|
-
Commands:
|
|
105
|
-
prepare-testing-data Preparing data for testing
|
|
106
|
-
serialize Serialize DeepLIIF models using Torchscript
|
|
107
|
-
test Test trained models
|
|
108
|
-
train General-purpose training script for multi-task...
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
## Training Dataset
|
|
112
|
-
For training, all image sets must be 512x512 and combined together in 3072x512 images (six images of size 512x512 stitched
|
|
113
|
-
together horizontally).
|
|
114
|
-
The data need to be arranged in the following order:
|
|
115
|
-
```
|
|
116
|
-
XXX_Dataset
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
```
|
|
120
|
-
We have provided a simple function in the CLI for preparing data for training.
|
|
121
|
-
|
|
122
|
-
* **To prepare data for training**, you need to have the image dataset for each image (including IHC, Hematoxylin Channel, mpIF DAPI, mpIF Lap2, mpIF marker, and segmentation mask) in the input directory.
|
|
123
|
-
Each of the six images for a single image set must have the same naming format, with only the name of the label for the type of image differing between them. The label names must be, respectively: IHC, Hematoxylin, DAPI, Lap2, Marker, Seg.
|
|
124
|
-
The command takes the address of the directory containing image set data and the address of the output dataset directory.
|
|
125
|
-
It first creates the train and validation directories inside the given output dataset directory.
|
|
126
|
-
It then reads all of the images in the input directory and saves the combined image in the train or validation directory, based on the given `validation_ratio`.
|
|
127
|
-
```
|
|
128
|
-
deepliif prepare-training-data --input-dir /path/to/input/images
|
|
129
|
-
--output-dir /path/to/output/images
|
|
130
|
-
--validation-ratio 0.2
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
## Training
|
|
134
|
-
To train a model:
|
|
135
|
-
```
|
|
136
|
-
deepliif train --dataroot /path/to/input/images
|
|
137
|
-
--name Model_Name
|
|
138
|
-
```
|
|
139
|
-
or
|
|
140
|
-
```
|
|
141
|
-
python train.py --dataroot /path/to/input/images
|
|
142
|
-
--name Model_Name
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
* To view training losses and results, open the URL http://localhost:8097. For cloud servers replace localhost with your IP.
|
|
146
|
-
* Epoch-wise intermediate training results are in `DeepLIIF/checkpoints/Model_Name/web/index.html`.
|
|
147
|
-
* Trained models will be by default be saved in `DeepLIIF/checkpoints/Model_Name`.
|
|
148
|
-
* Training datasets can be downloaded [here](https://zenodo.org/record/4751737#.YKRTS0NKhH4).
|
|
149
|
-
|
|
150
|
-
**DP**: To train a model you can use DP. DP is single-process. It means that **all the GPUs you want to use must be on the same machine** so that they can be included in the same process - you cannot distribute the training across multiple GPU machines, unless you write your own code to handle inter-node (node = machine) communication.
|
|
151
|
-
To split and manage the workload for multiple GPUs within the same process, DP uses multi-threading.
|
|
152
|
-
You can find more information on DP [here](https://github.com/nadeemlab/DeepLIIF/blob/main/Multi-GPU%20Training.md).
|
|
153
|
-
|
|
154
|
-
To train a model with DP (Example with 2 GPUs (on 1 machine)):
|
|
155
|
-
```
|
|
156
|
-
deepliif train --dataroot <data_dir> --batch-size 6 --gpu-ids 0 --gpu-ids 1
|
|
157
|
-
```
|
|
158
|
-
Note that `batch-size` is defined per process. Since DP is a single-process method, the `batch-size` you set is the **effective** batch size.
|
|
159
|
-
|
|
160
|
-
**DDP**: To train a model you can use DDP. DDP usually spawns multiple processes.
|
|
161
|
-
**DeepLIIF's code follows the PyTorch recommendation to spawn 1 process per GPU** ([doc](https://github.com/pytorch/examples/blob/master/distributed/ddp/README.md#application-process-topologies)). If you want to assign multiple GPUs to each process, you will need to make modifications to DeepLIIF's code (see [doc](https://pytorch.org/tutorials/intermediate/ddp_tutorial.html#combine-ddp-with-model-parallelism)).
|
|
162
|
-
Despite all the benefits of DDP, one drawback is the extra GPU memory needed for dedicated CUDA buffer for communication. See a short discussion [here](https://discuss.pytorch.org/t/do-dataparallel-and-distributeddataparallel-affect-the-batch-size-and-gpu-memory-consumption/97194/2). In the context of DeepLIIF, this means that there might be situations where you could use a *bigger batch size with DP* as compared to DDP, which may actually train faster than using DDP with a smaller batch size.
|
|
163
|
-
You can find more information on DDP [here](https://github.com/nadeemlab/DeepLIIF/blob/main/Multi-GPU%20Training.md).
|
|
164
|
-
|
|
165
|
-
To launch training using DDP on a local machine, use `deepliif trainlaunch`. Example with 2 GPUs (on 1 machine):
|
|
166
|
-
```
|
|
167
|
-
deepliif trainlaunch --dataroot <data_dir> --batch-size 3 --gpu-ids 0 --gpu-ids 1 --use-torchrun "--nproc_per_node 2"
|
|
168
|
-
```
|
|
169
|
-
Note that
|
|
170
|
-
1. `batch-size` is defined per process. Since DDP is a single-process method, the `batch-size` you set is the batch size for each process, and the **effective** batch size will be `batch-size` multiplied by the number of processes you started. In the above example, it will be 3 * 2 = 6.
|
|
171
|
-
2. You still need to provide **all GPU ids to use** to the training command. Internally, in each process DeepLIIF picks the device using `gpu_ids[local_rank]`. If you provide `--gpu-ids 2 --gpu-ids 3`, the process with local rank 0 will use gpu id 2 and that with local rank 1 will use gpu id 3.
|
|
172
|
-
3. `-t 3 --log_dir <log_dir>` is not required, but is a useful setting in `torchrun` that saves the log from each process to your target log directory. For example:
|
|
173
|
-
```
|
|
174
|
-
deepliif trainlaunch --dataroot <data_dir> --batch-size 3 --gpu-ids 0 --gpu-ids 1 --use-torchrun "-t 3 --log_dir <log_dir> --nproc_per_node 2"
|
|
175
|
-
```
|
|
176
|
-
4. If your PyTorch is older than 1.10, DeepLIIF calls `torch.distributed.launch` in the backend. Otherwise, DeepLIIF calls `torchrun`.
|
|
177
|
-
|
|
178
|
-
## Serialize Model
|
|
179
|
-
The installed `deepliif` uses Dask to perform inference on the input IHC images.
|
|
180
|
-
Before running the `test` command, the model files must be serialized using Torchscript.
|
|
181
|
-
To serialize the model files:
|
|
182
|
-
```
|
|
183
|
-
deepliif serialize --models-dir /path/to/input/model/files
|
|
184
|
-
--output-dir /path/to/output/model/files
|
|
185
|
-
```
|
|
186
|
-
* By default, the model files are expected to be located in `DeepLIIF/model-server/DeepLIIF_Latest_Model`.
|
|
187
|
-
* By default, the serialized files will be saved to the same directory as the input model files.
|
|
188
|
-
|
|
189
|
-
## Testing
|
|
190
|
-
To test the model:
|
|
191
|
-
```
|
|
192
|
-
deepliif test --input-dir /path/to/input/images
|
|
193
|
-
--output-dir /path/to/output/images
|
|
194
|
-
--model-dir path/to/the/serialized/model
|
|
195
|
-
--tile-size 512
|
|
196
|
-
```
|
|
197
|
-
or
|
|
198
|
-
```
|
|
199
|
-
python test.py --dataroot /path/to/input/images
|
|
200
|
-
--name Model_Name
|
|
201
|
-
```
|
|
202
|
-
* The latest version of the pretrained models can be downloaded [here](https://zenodo.org/record/4751737#.YKRTS0NKhH4).
|
|
203
|
-
* Before running test on images, the model files must be serialized as described above.
|
|
204
|
-
* The serialized model files are expected to be located in `DeepLIIF/model-server/DeepLIIF_Latest_Model`.
|
|
205
|
-
* The test results will be saved to the specified output directory, which defaults to the input directory.
|
|
206
|
-
* The default tile size is 512.
|
|
207
|
-
* Testing datasets can be downloaded [here](https://zenodo.org/record/4751737#.YKRTS0NKhH4).
|
|
208
|
-
|
|
209
|
-
**Whole Slide Image (WSI) Inference:**
|
|
210
|
-
For translation and segmentation of whole slide images,
|
|
211
|
-
you can simply use the same test command
|
|
212
|
-
giving path to the directory containing your whole slide images as the input-dir.
|
|
213
|
-
DeepLIIF automatically reads the WSI region by region,
|
|
214
|
-
and translate and segment each region separately and stitches the regions
|
|
215
|
-
to create the translation and segmentation for whole slide image,
|
|
216
|
-
then saves all masks in the format of ome.tiff in the given output-dir.
|
|
217
|
-
Based on the available GPU resources, the region-size can be changed.
|
|
218
|
-
```
|
|
219
|
-
deepliif test --input-dir /path/to/input/images
|
|
220
|
-
--output-dir /path/to/output/images
|
|
221
|
-
--model-dir path/to/the/serialized/model
|
|
222
|
-
--tile-size 512
|
|
223
|
-
--region-size 20000
|
|
224
|
-
```
|
|
225
|
-
|
|
226
|
-
If you prefer, it is possible to run the models using Torchserve.
|
|
227
|
-
Please see [the documentation](https://nadeemlab.github.io/DeepLIIF/deployment/#deploying-deepliif-with-torchserve)
|
|
228
|
-
on how to deploy the model with Torchserve and for an example of how to run the inference.
|
|
229
|
-
|
|
230
|
-
## Docker
|
|
231
|
-
We provide a Dockerfile that can be used to run the DeepLIIF models inside a container.
|
|
232
|
-
First, you need to install the [Docker Engine](https://docs.docker.com/engine/install/ubuntu/).
|
|
233
|
-
After installing the Docker, you need to follow these steps:
|
|
234
|
-
* Download the pretrained model and place them in DeepLIIF/checkpoints/DeepLIIF_Latest_Model.
|
|
235
|
-
* Change XXX of the **WORKDIR** line in the **DockerFile** to the directory containing the DeepLIIF project.
|
|
236
|
-
* To create a docker image from the docker file:
|
|
237
|
-
```
|
|
238
|
-
docker build -t cuda/deepliif .
|
|
239
|
-
```
|
|
240
|
-
The image is then used as a base. You can copy and use it to run an application. The application needs an isolated
|
|
241
|
-
environment in which to run, referred to as a container.
|
|
242
|
-
* To create and run a container:
|
|
243
|
-
```
|
|
244
|
-
docker run -it -v `pwd`:`pwd` -w `pwd` cuda/deepliif deepliif test --input-dir Sample_Large_Tissues
|
|
245
|
-
```
|
|
246
|
-
When you run a container from the image, the `deepliif` CLI will be available.
|
|
247
|
-
You can easily run any CLI command in the activated environment and copy the results from the docker container to the host.
|
|
248
|
-
|
|
249
|
-
## ImageJ Plugin
|
|
250
|
-
If you don't have access to GPU or appropriate hardware and just want to use ImageJ to run inference, we have also created an [ImageJ plugin](https://github.com/nadeemlab/DeepLIIF/tree/main/ImageJ_Plugin) for your convenience.
|
|
251
|
-
|
|
252
|
-

|
|
253
|
-
|
|
254
|
-
The plugin also supports submitting multiple ROIs at once:
|
|
255
|
-
|
|
256
|
-

|
|
257
|
-
|
|
258
|
-
## Cloud Deployment
|
|
259
|
-
If you don't have access to GPU or appropriate hardware and don't want to install ImageJ, we have also created a [cloud-native DeepLIIF deployment](https://deepliif.org) with a user-friendly interface to upload images, visualize, interact, and download the final results.
|
|
260
|
-
|
|
261
|
-

|
|
262
|
-
|
|
263
|
-
DeepLIIF can also be accessed programmatically through an endpoint by posting a multipart-encoded request
|
|
264
|
-
containing the original image file:
|
|
265
|
-
|
|
266
|
-
```
|
|
267
|
-
POST /api/infer
|
|
268
|
-
|
|
269
|
-
Parameters
|
|
270
|
-
|
|
271
|
-
img (required)
|
|
272
|
-
file: image to run the models on
|
|
273
|
-
|
|
274
|
-
resolution
|
|
275
|
-
string: resolution used to scan the slide (10x, 20x, 40x), defaults to 20x
|
|
276
|
-
|
|
277
|
-
pil
|
|
278
|
-
boolean: if true, use PIL.Image.open() to load the image, instead of python-bioformats
|
|
279
|
-
|
|
280
|
-
slim
|
|
281
|
-
boolean: if true, return only the segmentation result image
|
|
282
|
-
```
|
|
283
|
-
|
|
284
|
-
For example, in Python:
|
|
285
|
-
|
|
286
|
-
```python
|
|
287
|
-
import os
|
|
288
|
-
import json
|
|
289
|
-
import base64
|
|
290
|
-
from io import BytesIO
|
|
291
|
-
|
|
292
|
-
import requests
|
|
293
|
-
from PIL import Image
|
|
294
|
-
|
|
295
|
-
# Use the sample images from the main DeepLIIF repo
|
|
296
|
-
images_dir = './Sample_Large_Tissues'
|
|
297
|
-
filename = 'ROI_1.png'
|
|
298
|
-
|
|
299
|
-
res = requests.post(
|
|
300
|
-
url='https://deepliif.org/api/infer',
|
|
301
|
-
files={
|
|
302
|
-
'img': open(f'{images_dir}/{filename}', 'rb')
|
|
303
|
-
},
|
|
304
|
-
# optional param that can be 10x, 20x (default) or 40x
|
|
305
|
-
params={
|
|
306
|
-
'resolution': '20x'
|
|
307
|
-
}
|
|
308
|
-
)
|
|
309
|
-
|
|
310
|
-
data = res.json()
|
|
311
|
-
|
|
312
|
-
def b64_to_pil(b):
|
|
313
|
-
return Image.open(BytesIO(base64.b64decode(b.encode())))
|
|
314
|
-
|
|
315
|
-
for name, img in data['images'].items():
|
|
316
|
-
output_filepath = f'{images_dir}/{os.path.splitext(filename)[0]}_{name}.png'
|
|
317
|
-
with open(output_filepath, 'wb') as f:
|
|
318
|
-
b64_to_pil(img).save(f, format='PNG')
|
|
319
|
-
|
|
320
|
-
print(json.dumps(data['scoring'], indent=2))
|
|
321
|
-
```
|
|
322
|
-
|
|
323
|
-
## Synthetic Data Generation
|
|
324
|
-
The first version of DeepLIIF model suffered from its inability to separate IHC positive cells in some large clusters,
|
|
325
|
-
resulting from the absence of clustered positive cells in our training data. To infuse more information about the
|
|
326
|
-
clustered positive cells into our model, we present a novel approach for the synthetic generation of IHC images using
|
|
327
|
-
co-registered data.
|
|
328
|
-
We design a GAN-based model that receives the Hematoxylin channel, the mpIF DAPI image, and the segmentation mask and
|
|
329
|
-
generates the corresponding IHC image. The model converts the Hematoxylin channel to gray-scale to infer more helpful
|
|
330
|
-
information such as the texture and discard unnecessary information such as color. The Hematoxylin image guides the
|
|
331
|
-
network to synthesize the background of the IHC image by preserving the shape and texture of the cells and artifacts in
|
|
332
|
-
the background. The DAPI image assists the network in identifying the location, shape, and texture of the cells to
|
|
333
|
-
better isolate the cells from the background. The segmentation mask helps the network specify the color of cells based
|
|
334
|
-
on the type of the cell (positive cell: a brown hue, negative: a blue hue).
|
|
335
|
-
|
|
336
|
-
In the next step, we generate synthetic IHC images with more clustered positive cells. To do so, we change the
|
|
337
|
-
segmentation mask by choosing a percentage of random negative cells in the segmentation mask (called as Neg-to-Pos) and
|
|
338
|
-
converting them into positive cells. Some samples of the synthesized IHC images along with the original IHC image are
|
|
339
|
-
shown below.
|
|
340
|
-
|
|
341
|
-
*Overview of synthetic IHC image generation. (a) A training sample
|
|
342
|
-
of the IHC-generator model. (b) Some samples of synthesized IHC images using the trained IHC-Generator model. The
|
|
343
|
-
Neg-to-Pos shows the percentage of the negative cells in the segmentation mask converted to positive cells.*
|
|
344
|
-
|
|
345
|
-
We created a new dataset using the original IHC images and synthetic IHC images. We synthesize each image in the dataset
|
|
346
|
-
two times by setting the Neg-to-Pos parameter to %50 and %70. We re-trained our network with the new dataset. You can
|
|
347
|
-
find the new trained model [here](https://zenodo.org/record/4751737/files/DeepLIIF_Latest_Model.zip?download=1).
|
|
348
|
-
|
|
349
|
-
## Registration
|
|
350
|
-
To register the de novo stained mpIF and IHC images, you can use the registration framework in the 'Registration'
|
|
351
|
-
directory. Please refer to the README file provided in the same directory for more details.
|
|
352
|
-
|
|
353
|
-
## Contributing Training Data
|
|
354
|
-
To train DeepLIIF, we used a dataset of lung and bladder tissues containing IHC, hematoxylin, mpIF DAPI, mpIF Lap2, and
|
|
355
|
-
mpIF Ki67 of the same tissue scanned using ZEISS Axioscan. These images were scaled and co-registered with the fixed IHC
|
|
356
|
-
images using affine transformations, resulting in 1264 co-registered sets of IHC and corresponding multiplex images of
|
|
357
|
-
size 512x512. We randomly selected 575 sets for training, 91 sets for validation, and 598 sets for testing the model.
|
|
358
|
-
We also randomly selected and manually segmented 41 images of size 640x640 from recently released [BCDataset](https://sites.google.com/view/bcdataset)
|
|
359
|
-
which contains Ki67 stained sections of breast carcinoma with Ki67+ and Ki67- cell centroid annotations (for cell
|
|
360
|
-
detection rather than cell instance segmentation task). We split these tiles into 164 images of size 512x512; the test
|
|
361
|
-
set varies widely in the density of tumor cells and the Ki67 index. You can find this dataset [here](https://zenodo.org/record/4751737#.YKRTS0NKhH4).
|
|
362
|
-
|
|
363
|
-
We are also creating a self-configurable version of DeepLIIF which will take as input any co-registered H&E/IHC and
|
|
364
|
-
multiplex images and produce the optimal output. If you are generating or have generated H&E/IHC and multiplex staining
|
|
365
|
-
for the same slide (de novo staining) and would like to contribute that data for DeepLIIF, we can perform
|
|
366
|
-
co-registration, whole-cell multiplex segmentation via [ImPartial](https://github.com/nadeemlab/ImPartial), train the
|
|
367
|
-
DeepLIIF model and release back to the community with full credit to the contributors.
|
|
368
|
-
|
|
369
|
-
## Support
|
|
370
|
-
Please use the [Image.sc Forum](https://forum.image.sc/tag/deepliif) for discussion and questions related to DeepLIIF.
|
|
371
|
-
|
|
372
|
-
Bugs can be reported in the [GitHub Issues](https://github.com/nadeemlab/DeepLIIF/issues) tab.
|
|
373
|
-
|
|
374
|
-
## License
|
|
375
|
-
|
|
376
|
-
and is available for non-commercial academic purposes.
|
|
377
|
-
|
|
378
|
-
## Acknowledgments
|
|
379
|
-
* This code is inspired by [CycleGAN and pix2pix in PyTorch](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix).
|
|
380
|
-
|
|
381
|
-
## Reference
|
|
382
|
-
If you find our work useful in your research or if you use parts of this code, please cite our paper:
|
|
383
|
-
```
|
|
384
|
-
@article{ghahremani2022deep,
|
|
385
|
-
title={Deep learning-inferred multiplex immunofluorescence for immunohistochemical image quantification},
|
|
386
|
-
author={Ghahremani, Parmida and Li, Yanyun and Kaufman, Arie and Vanguri, Rami and Greenwald, Noah and Angelo, Michael and Hollmann, Travis J and Nadeem, Saad},
|
|
387
|
-
journal={Nature Machine Intelligence},
|
|
388
|
-
volume={4},
|
|
389
|
-
number={4},
|
|
390
|
-
pages={401--412},
|
|
391
|
-
year={2022},
|
|
392
|
-
publisher={Nature Publishing Group}
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
@article{ghahremani2022deepliifui,
|
|
396
|
-
title={DeepLIIF: An Online Platform for Quantification of Clinical Pathology Slides},
|
|
397
|
-
author={Ghahremani, Parmida and Marino, Joseph and Dodds, Ricardo and Nadeem, Saad},
|
|
398
|
-
journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
|
399
|
-
pages={21399--21405},
|
|
400
|
-
year={2022}
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
```
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: deepliif
|
|
3
|
+
Version: 1.1.6
|
|
4
|
+
Summary: DeepLIIF: Deep-Learning Inferred Multiplex Immunofluorescence for Immunohistochemical Image Quantification
|
|
5
|
+
Home-page: https://github.com/nadeemlab/DeepLIIF
|
|
6
|
+
Author: Parmida93
|
|
7
|
+
Author-email: ghahremani.parmida@gmail.com
|
|
8
|
+
Keywords: DeepLIIF,IHC,Segmentation,Classification
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE.md
|
|
11
|
+
Requires-Dist: opencv-python (==4.5.3.56)
|
|
12
|
+
Requires-Dist: torchvision (==0.10.0)
|
|
13
|
+
Requires-Dist: scikit-image (==0.18.3)
|
|
14
|
+
Requires-Dist: dominate (==2.6.0)
|
|
15
|
+
Requires-Dist: numba (==0.53.1)
|
|
16
|
+
Requires-Dist: Click (==8.0.3)
|
|
17
|
+
Requires-Dist: requests (==2.26.0)
|
|
18
|
+
Requires-Dist: dask (==2021.11.2)
|
|
19
|
+
Requires-Dist: visdom (>=0.1.8.3)
|
|
20
|
+
Requires-Dist: python-bioformats (>=4.0.6)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
<!-- PROJECT LOGO -->
|
|
24
|
+
<br />
|
|
25
|
+
<p align="center">
|
|
26
|
+
<img src="./images/DeepLIIF_logo.png" width="50%">
|
|
27
|
+
<h3 align="center"><strong>Deep-Learning Inferred Multiplex Immunofluorescence for Immunohistochemical Image Quantification</strong></h3>
|
|
28
|
+
<p align="center">
|
|
29
|
+
<a href="https://doi.org/10.1101/2021.05.01.442219">Journal Preprint</a>
|
|
30
|
+
|
|
|
31
|
+
<a href="https://rdcu.be/cKSBz">Journal Link</a>
|
|
32
|
+
|
|
|
33
|
+
<a href="https://openaccess.thecvf.com/content/CVPR2022/html/Ghahremani_DeepLIIF_An_Online_Platform_for_Quantification_of_Clinical_Pathology_Slides_CVPR_2022_paper.html">CVPR Link</a>
|
|
34
|
+
|
|
|
35
|
+
<a href="https://deepliif.org/">Cloud Deployment</a>
|
|
36
|
+
|
|
|
37
|
+
<a href="https://nadeemlab.github.io/DeepLIIF/">Documentation</a>
|
|
38
|
+
|
|
|
39
|
+
<a href="#docker">Docker</a>
|
|
40
|
+
|
|
|
41
|
+
<a href="https://github.com/nadeemlab/DeepLIIF/tree/main/ImageJ_Plugin">ImageJ Plugin</a>
|
|
42
|
+
|
|
|
43
|
+
<a href="#support">Support</a>
|
|
44
|
+
</p>
|
|
45
|
+
</p>
|
|
46
|
+
|
|
47
|
+
*Reporting biomarkers assessed by routine immunohistochemical (IHC) staining of tissue is broadly used in diagnostic
|
|
48
|
+
pathology laboratories for patient care. To date, clinical reporting is predominantly qualitative or semi-quantitative.
|
|
49
|
+
By creating a multitask deep learning framework referred to as DeepLIIF, we present a single-step solution to stain
|
|
50
|
+
deconvolution/separation, cell segmentation, and quantitative single-cell IHC scoring. Leveraging a unique de novo
|
|
51
|
+
dataset of co-registered IHC and multiplex immunofluorescence (mpIF) staining of the same slides, we segment and
|
|
52
|
+
translate low-cost and prevalent IHC slides to more expensive-yet-informative mpIF images, while simultaneously
|
|
53
|
+
providing the essential ground truth for the superimposed brightfield IHC channels. Moreover, a new nuclear-envelop
|
|
54
|
+
stain, LAP2beta, with high (>95%) cell coverage is introduced to improve cell delineation/segmentation and protein
|
|
55
|
+
expression quantification on IHC slides. By simultaneously translating input IHC images to clean/separated mpIF channels
|
|
56
|
+
and performing cell segmentation/classification, we show that our model trained on clean IHC Ki67 data can generalize to
|
|
57
|
+
more noisy and artifact-ridden images as well as other nuclear and non-nuclear markers such as CD3, CD8, BCL2, BCL6,
|
|
58
|
+
MYC, MUM1, CD10, and TP53. We thoroughly evaluate our method on publicly available benchmark datasets as well as against
|
|
59
|
+
pathologists' semi-quantitative scoring. Trained on IHC, DeepLIIF generalizes well to H&E images for out-of-the-box nuclear
|
|
60
|
+
segmentation.*
|
|
61
|
+
|
|
62
|
+
**DeepLIIF** is deployed as a free publicly available cloud-native platform (https://deepliif.org) with Bioformats (more than 150 input formats supported) and MLOps pipeline. We also release **DeepLIIF** implementations for single/multi-GPU training, Torchserve/Dask+Torchscript deployment, and auto-scaling via Pulumi (1000s of concurrent connections supported); details can be found in our [documentation](https://nadeemlab.github.io/DeepLIIF/). **DeepLIIF** can be run locally (GPU required) by [pip installing the package](https://github.com/nadeemlab/DeepLIIF/edit/main/README.md#installing-deepliif) and using the deepliif CLI command. **DeepLIIF** can be used remotely (no GPU required) through the https://deepliif.org website, calling the [cloud API via Python](https://github.com/nadeemlab/DeepLIIF/edit/main/README.md#cloud-deployment), or via the [ImageJ/Fiji plugin](https://github.com/nadeemlab/DeepLIIF/edit/main/README.md#imagej-plugin); details for the free cloud-native platform can be found in our [CVPR'22 paper](https://arxiv.org/pdf/2204.04494.pdf).
|
|
63
|
+
|
|
64
|
+
© This code is made available for non-commercial academic purposes.
|
|
65
|
+
|
|
66
|
+
*Overview of DeepLIIF pipeline and sample input IHCs (different
|
|
67
|
+
brown/DAB markers -- BCL2, BCL6, CD10, CD3/CD8, Ki67) with corresponding DeepLIIF-generated hematoxylin/mpIF modalities
|
|
68
|
+
and classified (positive (red) and negative (blue) cell) segmentation masks. (a) Overview of DeepLIIF. Given an IHC
|
|
69
|
+
input, our multitask deep learning framework simultaneously infers corresponding Hematoxylin channel, mpIF DAPI, mpIF
|
|
70
|
+
protein expression (Ki67, CD3, CD8, etc.), and the positive/negative protein cell segmentation, baking explainability
|
|
71
|
+
and interpretability into the model itself rather than relying on coarse activation/attention maps. In the segmentation
|
|
72
|
+
mask, the red cells denote cells with positive protein expression (brown/DAB cells in the input IHC), whereas blue cells
|
|
73
|
+
represent negative cells (blue cells in the input IHC). (b) Example DeepLIIF-generated hematoxylin/mpIF modalities and
|
|
74
|
+
segmentation masks for different IHC markers. DeepLIIF, trained on clean IHC Ki67 nuclear marker images, can generalize
|
|
75
|
+
to noisier as well as other IHC nuclear/cytoplasmic marker images.*
|
|
76
|
+
|
|
77
|
+
## Prerequisites
|
|
78
|
+
1. Python 3.8
|
|
79
|
+
2. Docker
|
|
80
|
+
|
|
81
|
+
## Installing `deepliif`
|
|
82
|
+
|
|
83
|
+
DeepLIIF can be `pip` installed:
|
|
84
|
+
```shell
|
|
85
|
+
$ conda create --name deepliif_env python=3.8
|
|
86
|
+
$ conda activate deepliif_env
|
|
87
|
+
(deepliif_env) $ conda install -c conda-forge openjdk
|
|
88
|
+
(deepliif_env) $ pip install deepliif
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The package is composed of two parts:
|
|
92
|
+
1. A library that implements the core functions used to train and test DeepLIIF models.
|
|
93
|
+
2. A CLI to run common batch operations including training, batch testing and Torchscipt models serialization.
|
|
94
|
+
|
|
95
|
+
You can list all available commands:
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
(venv) $ deepliif --help
|
|
99
|
+
Usage: deepliif [OPTIONS] COMMAND [ARGS]...
|
|
100
|
+
|
|
101
|
+
Options:
|
|
102
|
+
--help Show this message and exit.
|
|
103
|
+
|
|
104
|
+
Commands:
|
|
105
|
+
prepare-testing-data Preparing data for testing
|
|
106
|
+
serialize Serialize DeepLIIF models using Torchscript
|
|
107
|
+
test Test trained models
|
|
108
|
+
train General-purpose training script for multi-task...
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Training Dataset
|
|
112
|
+
For training, all image sets must be 512x512 and combined together in 3072x512 images (six images of size 512x512 stitched
|
|
113
|
+
together horizontally).
|
|
114
|
+
The data need to be arranged in the following order:
|
|
115
|
+
```
|
|
116
|
+
XXX_Dataset
|
|
117
|
+
├── train
|
|
118
|
+
└── val
|
|
119
|
+
```
|
|
120
|
+
We have provided a simple function in the CLI for preparing data for training.
|
|
121
|
+
|
|
122
|
+
* **To prepare data for training**, you need to have the image dataset for each image (including IHC, Hematoxylin Channel, mpIF DAPI, mpIF Lap2, mpIF marker, and segmentation mask) in the input directory.
|
|
123
|
+
Each of the six images for a single image set must have the same naming format, with only the name of the label for the type of image differing between them. The label names must be, respectively: IHC, Hematoxylin, DAPI, Lap2, Marker, Seg.
|
|
124
|
+
The command takes the address of the directory containing image set data and the address of the output dataset directory.
|
|
125
|
+
It first creates the train and validation directories inside the given output dataset directory.
|
|
126
|
+
It then reads all of the images in the input directory and saves the combined image in the train or validation directory, based on the given `validation_ratio`.
|
|
127
|
+
```
|
|
128
|
+
deepliif prepare-training-data --input-dir /path/to/input/images
|
|
129
|
+
--output-dir /path/to/output/images
|
|
130
|
+
--validation-ratio 0.2
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Training
|
|
134
|
+
To train a model:
|
|
135
|
+
```
|
|
136
|
+
deepliif train --dataroot /path/to/input/images
|
|
137
|
+
--name Model_Name
|
|
138
|
+
```
|
|
139
|
+
or
|
|
140
|
+
```
|
|
141
|
+
python train.py --dataroot /path/to/input/images
|
|
142
|
+
--name Model_Name
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
* To view training losses and results, open the URL http://localhost:8097. For cloud servers replace localhost with your IP.
|
|
146
|
+
* Epoch-wise intermediate training results are in `DeepLIIF/checkpoints/Model_Name/web/index.html`.
|
|
147
|
+
* Trained models will be by default be saved in `DeepLIIF/checkpoints/Model_Name`.
|
|
148
|
+
* Training datasets can be downloaded [here](https://zenodo.org/record/4751737#.YKRTS0NKhH4).
|
|
149
|
+
|
|
150
|
+
**DP**: To train a model you can use DP. DP is single-process. It means that **all the GPUs you want to use must be on the same machine** so that they can be included in the same process - you cannot distribute the training across multiple GPU machines, unless you write your own code to handle inter-node (node = machine) communication.
|
|
151
|
+
To split and manage the workload for multiple GPUs within the same process, DP uses multi-threading.
|
|
152
|
+
You can find more information on DP [here](https://github.com/nadeemlab/DeepLIIF/blob/main/Multi-GPU%20Training.md).
|
|
153
|
+
|
|
154
|
+
To train a model with DP (Example with 2 GPUs (on 1 machine)):
|
|
155
|
+
```
|
|
156
|
+
deepliif train --dataroot <data_dir> --batch-size 6 --gpu-ids 0 --gpu-ids 1
|
|
157
|
+
```
|
|
158
|
+
Note that `batch-size` is defined per process. Since DP is a single-process method, the `batch-size` you set is the **effective** batch size.
|
|
159
|
+
|
|
160
|
+
**DDP**: To train a model you can use DDP. DDP usually spawns multiple processes.
|
|
161
|
+
**DeepLIIF's code follows the PyTorch recommendation to spawn 1 process per GPU** ([doc](https://github.com/pytorch/examples/blob/master/distributed/ddp/README.md#application-process-topologies)). If you want to assign multiple GPUs to each process, you will need to make modifications to DeepLIIF's code (see [doc](https://pytorch.org/tutorials/intermediate/ddp_tutorial.html#combine-ddp-with-model-parallelism)).
|
|
162
|
+
Despite all the benefits of DDP, one drawback is the extra GPU memory needed for dedicated CUDA buffer for communication. See a short discussion [here](https://discuss.pytorch.org/t/do-dataparallel-and-distributeddataparallel-affect-the-batch-size-and-gpu-memory-consumption/97194/2). In the context of DeepLIIF, this means that there might be situations where you could use a *bigger batch size with DP* as compared to DDP, which may actually train faster than using DDP with a smaller batch size.
|
|
163
|
+
You can find more information on DDP [here](https://github.com/nadeemlab/DeepLIIF/blob/main/Multi-GPU%20Training.md).
|
|
164
|
+
|
|
165
|
+
To launch training using DDP on a local machine, use `deepliif trainlaunch`. Example with 2 GPUs (on 1 machine):
|
|
166
|
+
```
|
|
167
|
+
deepliif trainlaunch --dataroot <data_dir> --batch-size 3 --gpu-ids 0 --gpu-ids 1 --use-torchrun "--nproc_per_node 2"
|
|
168
|
+
```
|
|
169
|
+
Note that
|
|
170
|
+
1. `batch-size` is defined per process. Since DDP is a single-process method, the `batch-size` you set is the batch size for each process, and the **effective** batch size will be `batch-size` multiplied by the number of processes you started. In the above example, it will be 3 * 2 = 6.
|
|
171
|
+
2. You still need to provide **all GPU ids to use** to the training command. Internally, in each process DeepLIIF picks the device using `gpu_ids[local_rank]`. If you provide `--gpu-ids 2 --gpu-ids 3`, the process with local rank 0 will use gpu id 2 and that with local rank 1 will use gpu id 3.
|
|
172
|
+
3. `-t 3 --log_dir <log_dir>` is not required, but is a useful setting in `torchrun` that saves the log from each process to your target log directory. For example:
|
|
173
|
+
```
|
|
174
|
+
deepliif trainlaunch --dataroot <data_dir> --batch-size 3 --gpu-ids 0 --gpu-ids 1 --use-torchrun "-t 3 --log_dir <log_dir> --nproc_per_node 2"
|
|
175
|
+
```
|
|
176
|
+
4. If your PyTorch is older than 1.10, DeepLIIF calls `torch.distributed.launch` in the backend. Otherwise, DeepLIIF calls `torchrun`.
|
|
177
|
+
|
|
178
|
+
## Serialize Model
|
|
179
|
+
The installed `deepliif` uses Dask to perform inference on the input IHC images.
|
|
180
|
+
Before running the `test` command, the model files must be serialized using Torchscript.
|
|
181
|
+
To serialize the model files:
|
|
182
|
+
```
|
|
183
|
+
deepliif serialize --models-dir /path/to/input/model/files
|
|
184
|
+
--output-dir /path/to/output/model/files
|
|
185
|
+
```
|
|
186
|
+
* By default, the model files are expected to be located in `DeepLIIF/model-server/DeepLIIF_Latest_Model`.
|
|
187
|
+
* By default, the serialized files will be saved to the same directory as the input model files.
|
|
188
|
+
|
|
189
|
+
## Testing
|
|
190
|
+
To test the model:
|
|
191
|
+
```
|
|
192
|
+
deepliif test --input-dir /path/to/input/images
|
|
193
|
+
--output-dir /path/to/output/images
|
|
194
|
+
--model-dir path/to/the/serialized/model
|
|
195
|
+
--tile-size 512
|
|
196
|
+
```
|
|
197
|
+
or
|
|
198
|
+
```
|
|
199
|
+
python test.py --dataroot /path/to/input/images
|
|
200
|
+
--name Model_Name
|
|
201
|
+
```
|
|
202
|
+
* The latest version of the pretrained models can be downloaded [here](https://zenodo.org/record/4751737#.YKRTS0NKhH4).
|
|
203
|
+
* Before running test on images, the model files must be serialized as described above.
|
|
204
|
+
* The serialized model files are expected to be located in `DeepLIIF/model-server/DeepLIIF_Latest_Model`.
|
|
205
|
+
* The test results will be saved to the specified output directory, which defaults to the input directory.
|
|
206
|
+
* The default tile size is 512.
|
|
207
|
+
* Testing datasets can be downloaded [here](https://zenodo.org/record/4751737#.YKRTS0NKhH4).
|
|
208
|
+
|
|
209
|
+
**Whole Slide Image (WSI) Inference:**
|
|
210
|
+
For translation and segmentation of whole slide images,
|
|
211
|
+
you can simply use the same test command
|
|
212
|
+
giving path to the directory containing your whole slide images as the input-dir.
|
|
213
|
+
DeepLIIF automatically reads the WSI region by region,
|
|
214
|
+
and translate and segment each region separately and stitches the regions
|
|
215
|
+
to create the translation and segmentation for whole slide image,
|
|
216
|
+
then saves all masks in the format of ome.tiff in the given output-dir.
|
|
217
|
+
Based on the available GPU resources, the region-size can be changed.
|
|
218
|
+
```
|
|
219
|
+
deepliif test --input-dir /path/to/input/images
|
|
220
|
+
--output-dir /path/to/output/images
|
|
221
|
+
--model-dir path/to/the/serialized/model
|
|
222
|
+
--tile-size 512
|
|
223
|
+
--region-size 20000
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
If you prefer, it is possible to run the models using Torchserve.
|
|
227
|
+
Please see [the documentation](https://nadeemlab.github.io/DeepLIIF/deployment/#deploying-deepliif-with-torchserve)
|
|
228
|
+
on how to deploy the model with Torchserve and for an example of how to run the inference.
|
|
229
|
+
|
|
230
|
+
## Docker
|
|
231
|
+
We provide a Dockerfile that can be used to run the DeepLIIF models inside a container.
|
|
232
|
+
First, you need to install the [Docker Engine](https://docs.docker.com/engine/install/ubuntu/).
|
|
233
|
+
After installing the Docker, you need to follow these steps:
|
|
234
|
+
* Download the pretrained model and place them in DeepLIIF/checkpoints/DeepLIIF_Latest_Model.
|
|
235
|
+
* Change XXX of the **WORKDIR** line in the **DockerFile** to the directory containing the DeepLIIF project.
|
|
236
|
+
* To create a docker image from the docker file:
|
|
237
|
+
```
|
|
238
|
+
docker build -t cuda/deepliif .
|
|
239
|
+
```
|
|
240
|
+
The image is then used as a base. You can copy and use it to run an application. The application needs an isolated
|
|
241
|
+
environment in which to run, referred to as a container.
|
|
242
|
+
* To create and run a container:
|
|
243
|
+
```
|
|
244
|
+
docker run -it -v `pwd`:`pwd` -w `pwd` cuda/deepliif deepliif test --input-dir Sample_Large_Tissues
|
|
245
|
+
```
|
|
246
|
+
When you run a container from the image, the `deepliif` CLI will be available.
|
|
247
|
+
You can easily run any CLI command in the activated environment and copy the results from the docker container to the host.
|
|
248
|
+
|
|
249
|
+
## ImageJ Plugin
|
|
250
|
+
If you don't have access to GPU or appropriate hardware and just want to use ImageJ to run inference, we have also created an [ImageJ plugin](https://github.com/nadeemlab/DeepLIIF/tree/main/ImageJ_Plugin) for your convenience.
|
|
251
|
+
|
|
252
|
+

|
|
253
|
+
|
|
254
|
+
The plugin also supports submitting multiple ROIs at once:
|
|
255
|
+
|
|
256
|
+

|
|
257
|
+
|
|
258
|
+
## Cloud Deployment
|
|
259
|
+
If you don't have access to GPU or appropriate hardware and don't want to install ImageJ, we have also created a [cloud-native DeepLIIF deployment](https://deepliif.org) with a user-friendly interface to upload images, visualize, interact, and download the final results.
|
|
260
|
+
|
|
261
|
+

|
|
262
|
+
|
|
263
|
+
DeepLIIF can also be accessed programmatically through an endpoint by posting a multipart-encoded request
|
|
264
|
+
containing the original image file:
|
|
265
|
+
|
|
266
|
+
```
|
|
267
|
+
POST /api/infer
|
|
268
|
+
|
|
269
|
+
Parameters
|
|
270
|
+
|
|
271
|
+
img (required)
|
|
272
|
+
file: image to run the models on
|
|
273
|
+
|
|
274
|
+
resolution
|
|
275
|
+
string: resolution used to scan the slide (10x, 20x, 40x), defaults to 20x
|
|
276
|
+
|
|
277
|
+
pil
|
|
278
|
+
boolean: if true, use PIL.Image.open() to load the image, instead of python-bioformats
|
|
279
|
+
|
|
280
|
+
slim
|
|
281
|
+
boolean: if true, return only the segmentation result image
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
For example, in Python:
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
import os
|
|
288
|
+
import json
|
|
289
|
+
import base64
|
|
290
|
+
from io import BytesIO
|
|
291
|
+
|
|
292
|
+
import requests
|
|
293
|
+
from PIL import Image
|
|
294
|
+
|
|
295
|
+
# Use the sample images from the main DeepLIIF repo
|
|
296
|
+
images_dir = './Sample_Large_Tissues'
|
|
297
|
+
filename = 'ROI_1.png'
|
|
298
|
+
|
|
299
|
+
res = requests.post(
|
|
300
|
+
url='https://deepliif.org/api/infer',
|
|
301
|
+
files={
|
|
302
|
+
'img': open(f'{images_dir}/{filename}', 'rb')
|
|
303
|
+
},
|
|
304
|
+
# optional param that can be 10x, 20x (default) or 40x
|
|
305
|
+
params={
|
|
306
|
+
'resolution': '20x'
|
|
307
|
+
}
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
data = res.json()
|
|
311
|
+
|
|
312
|
+
def b64_to_pil(b):
|
|
313
|
+
return Image.open(BytesIO(base64.b64decode(b.encode())))
|
|
314
|
+
|
|
315
|
+
for name, img in data['images'].items():
|
|
316
|
+
output_filepath = f'{images_dir}/{os.path.splitext(filename)[0]}_{name}.png'
|
|
317
|
+
with open(output_filepath, 'wb') as f:
|
|
318
|
+
b64_to_pil(img).save(f, format='PNG')
|
|
319
|
+
|
|
320
|
+
print(json.dumps(data['scoring'], indent=2))
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
## Synthetic Data Generation
|
|
324
|
+
The first version of DeepLIIF model suffered from its inability to separate IHC positive cells in some large clusters,
|
|
325
|
+
resulting from the absence of clustered positive cells in our training data. To infuse more information about the
|
|
326
|
+
clustered positive cells into our model, we present a novel approach for the synthetic generation of IHC images using
|
|
327
|
+
co-registered data.
|
|
328
|
+
We design a GAN-based model that receives the Hematoxylin channel, the mpIF DAPI image, and the segmentation mask and
|
|
329
|
+
generates the corresponding IHC image. The model converts the Hematoxylin channel to gray-scale to infer more helpful
|
|
330
|
+
information such as the texture and discard unnecessary information such as color. The Hematoxylin image guides the
|
|
331
|
+
network to synthesize the background of the IHC image by preserving the shape and texture of the cells and artifacts in
|
|
332
|
+
the background. The DAPI image assists the network in identifying the location, shape, and texture of the cells to
|
|
333
|
+
better isolate the cells from the background. The segmentation mask helps the network specify the color of cells based
|
|
334
|
+
on the type of the cell (positive cell: a brown hue, negative: a blue hue).
|
|
335
|
+
|
|
336
|
+
In the next step, we generate synthetic IHC images with more clustered positive cells. To do so, we change the
|
|
337
|
+
segmentation mask by choosing a percentage of random negative cells in the segmentation mask (called as Neg-to-Pos) and
|
|
338
|
+
converting them into positive cells. Some samples of the synthesized IHC images along with the original IHC image are
|
|
339
|
+
shown below.
|
|
340
|
+
|
|
341
|
+
*Overview of synthetic IHC image generation. (a) A training sample
|
|
342
|
+
of the IHC-generator model. (b) Some samples of synthesized IHC images using the trained IHC-Generator model. The
|
|
343
|
+
Neg-to-Pos shows the percentage of the negative cells in the segmentation mask converted to positive cells.*
|
|
344
|
+
|
|
345
|
+
We created a new dataset using the original IHC images and synthetic IHC images. We synthesize each image in the dataset
|
|
346
|
+
two times by setting the Neg-to-Pos parameter to %50 and %70. We re-trained our network with the new dataset. You can
|
|
347
|
+
find the new trained model [here](https://zenodo.org/record/4751737/files/DeepLIIF_Latest_Model.zip?download=1).
|
|
348
|
+
|
|
349
|
+
## Registration
|
|
350
|
+
To register the de novo stained mpIF and IHC images, you can use the registration framework in the 'Registration'
|
|
351
|
+
directory. Please refer to the README file provided in the same directory for more details.
|
|
352
|
+
|
|
353
|
+
## Contributing Training Data
|
|
354
|
+
To train DeepLIIF, we used a dataset of lung and bladder tissues containing IHC, hematoxylin, mpIF DAPI, mpIF Lap2, and
|
|
355
|
+
mpIF Ki67 of the same tissue scanned using ZEISS Axioscan. These images were scaled and co-registered with the fixed IHC
|
|
356
|
+
images using affine transformations, resulting in 1264 co-registered sets of IHC and corresponding multiplex images of
|
|
357
|
+
size 512x512. We randomly selected 575 sets for training, 91 sets for validation, and 598 sets for testing the model.
|
|
358
|
+
We also randomly selected and manually segmented 41 images of size 640x640 from recently released [BCDataset](https://sites.google.com/view/bcdataset)
|
|
359
|
+
which contains Ki67 stained sections of breast carcinoma with Ki67+ and Ki67- cell centroid annotations (for cell
|
|
360
|
+
detection rather than cell instance segmentation task). We split these tiles into 164 images of size 512x512; the test
|
|
361
|
+
set varies widely in the density of tumor cells and the Ki67 index. You can find this dataset [here](https://zenodo.org/record/4751737#.YKRTS0NKhH4).
|
|
362
|
+
|
|
363
|
+
We are also creating a self-configurable version of DeepLIIF which will take as input any co-registered H&E/IHC and
|
|
364
|
+
multiplex images and produce the optimal output. If you are generating or have generated H&E/IHC and multiplex staining
|
|
365
|
+
for the same slide (de novo staining) and would like to contribute that data for DeepLIIF, we can perform
|
|
366
|
+
co-registration, whole-cell multiplex segmentation via [ImPartial](https://github.com/nadeemlab/ImPartial), train the
|
|
367
|
+
DeepLIIF model and release back to the community with full credit to the contributors.
|
|
368
|
+
|
|
369
|
+
## Support
|
|
370
|
+
Please use the [Image.sc Forum](https://forum.image.sc/tag/deepliif) for discussion and questions related to DeepLIIF.
|
|
371
|
+
|
|
372
|
+
Bugs can be reported in the [GitHub Issues](https://github.com/nadeemlab/DeepLIIF/issues) tab.
|
|
373
|
+
|
|
374
|
+
## License
|
|
375
|
+
© [Nadeem Lab](https://nadeemlab.org/) - DeepLIIF code is distributed under **Apache 2.0 with Commons Clause** license,
|
|
376
|
+
and is available for non-commercial academic purposes.
|
|
377
|
+
|
|
378
|
+
## Acknowledgments
|
|
379
|
+
* This code is inspired by [CycleGAN and pix2pix in PyTorch](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix).
|
|
380
|
+
|
|
381
|
+
## Reference
|
|
382
|
+
If you find our work useful in your research or if you use parts of this code, please cite our paper:
|
|
383
|
+
```
|
|
384
|
+
@article{ghahremani2022deep,
|
|
385
|
+
title={Deep learning-inferred multiplex immunofluorescence for immunohistochemical image quantification},
|
|
386
|
+
author={Ghahremani, Parmida and Li, Yanyun and Kaufman, Arie and Vanguri, Rami and Greenwald, Noah and Angelo, Michael and Hollmann, Travis J and Nadeem, Saad},
|
|
387
|
+
journal={Nature Machine Intelligence},
|
|
388
|
+
volume={4},
|
|
389
|
+
number={4},
|
|
390
|
+
pages={401--412},
|
|
391
|
+
year={2022},
|
|
392
|
+
publisher={Nature Publishing Group}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
@article{ghahremani2022deepliifui,
|
|
396
|
+
title={DeepLIIF: An Online Platform for Quantification of Clinical Pathology Slides},
|
|
397
|
+
author={Ghahremani, Parmida and Marino, Joseph and Dodds, Ricardo and Nadeem, Saad},
|
|
398
|
+
journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
|
399
|
+
pages={21399--21405},
|
|
400
|
+
year={2022}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
```
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
cli.py,sha256=
|
|
1
|
+
cli.py,sha256=o6nxKM8WzSS-AzqmvJkm8NDb203oucBzutRvqWHeKWk,40524
|
|
2
2
|
deepliif/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
deepliif/postprocessing.py,sha256=fvRjOAQeHcCpKxqJfBgEozx5H7fqOPWaG9D689XkMQ0,16985
|
|
4
4
|
deepliif/train.py,sha256=-ZORL5vQrD0_Jq2Adgr3w8vJ7L1QcAgNTqMnBgtixgk,15757
|
|
@@ -11,23 +11,23 @@ deepliif/data/single_dataset.py,sha256=hWjqTkRESEMppZj_r8bi3G0hAZ5EfvXYgE_qRbpiE
|
|
|
11
11
|
deepliif/data/template_dataset.py,sha256=PCDBnFRzRKReaeWgKUZmW0LrzRByI9adrKDJ6SN2KMs,3592
|
|
12
12
|
deepliif/data/unaligned_dataset.py,sha256=m7j-CX-hkXbhg96NSEcaCagNVhTuXKkMsBADdMEJDBA,3393
|
|
13
13
|
deepliif/models/DeepLIIF_model.py,sha256=CE-fs9g9zaeUtBKGEYtEsVVMRRQ8V-i9cOWO7cy4Z0U,20669
|
|
14
|
-
deepliif/models/__init__.py,sha256=
|
|
15
|
-
deepliif/models/base_model.py,sha256=
|
|
14
|
+
deepliif/models/__init__.py,sha256=4L842F6d6T2ULPalv_aJgZhu5rMgZvb5Sa4cNz3IKm4,17765
|
|
15
|
+
deepliif/models/base_model.py,sha256=MGIsgMbhbfJyKMW_IiM4TCxvvHSioqKjdbti1k9u4ko,12951
|
|
16
16
|
deepliif/models/networks.py,sha256=bN4yjRdE413efUESq8pvhzPDgFCTwFKXyQOrRqHckWY,32177
|
|
17
17
|
deepliif/options/__init__.py,sha256=WEkvROZkYWDVDCrB_P66wPYYU2cMgBmVx2i7_BpEKq0,137
|
|
18
18
|
deepliif/options/base_options.py,sha256=YZsU4GGccyknMChjCdIr8x7sk8MaWj3XU0E8gIz36hc,9794
|
|
19
19
|
deepliif/options/processing_options.py,sha256=OnNT-ytoTQzetFiMEKrWvrsrhZlupRK4smcnIk0MbqY,2947
|
|
20
20
|
deepliif/options/test_options.py,sha256=4ZbQC5U-nTbUz8jvdDIbse5TK_mjw4D5yNjpVevWD5M,1114
|
|
21
21
|
deepliif/options/train_options.py,sha256=5eA_oxpRj2-HiuMMvC5-HLapxNFG_JXOQ3K132JjpR8,3580
|
|
22
|
-
deepliif/util/__init__.py,sha256=
|
|
22
|
+
deepliif/util/__init__.py,sha256=dPkYGAy8s8JL7srZIkIhDuKdpQwVyf2Nsy5ABWlLFtg,16924
|
|
23
23
|
deepliif/util/get_data.py,sha256=HaRoQYb2u0LUgLT7ES-w35AmJ4BrlBEJWU4Cok29pxI,3749
|
|
24
24
|
deepliif/util/html.py,sha256=RNAONZ4opP-bViahgmpSbHwOc6jXKQRnWRAVIaeIvac,3309
|
|
25
25
|
deepliif/util/image_pool.py,sha256=M89Hc7DblRWroNP71S9mAdRn7h3DrhPFPjqFxxZYSgw,2280
|
|
26
26
|
deepliif/util/util.py,sha256=bTArzuhIMGgGweH0v5rkiHrqBxc24BDv12rssOE9OoI,4636
|
|
27
27
|
deepliif/util/visualizer.py,sha256=5V1lWidHqssJX21jn1P5-bOVgtrEXKVaQgnMWAsMfqg,15636
|
|
28
|
-
deepliif-1.1.
|
|
29
|
-
deepliif-1.1.
|
|
30
|
-
deepliif-1.1.
|
|
31
|
-
deepliif-1.1.
|
|
32
|
-
deepliif-1.1.
|
|
33
|
-
deepliif-1.1.
|
|
28
|
+
deepliif-1.1.6.dist-info/LICENSE.md,sha256=HlZw_UPS6EtJimJ_Ci7xKh-S5Iubs0Z8y8E6EZ3ZNyE,956
|
|
29
|
+
deepliif-1.1.6.dist-info/METADATA,sha256=6eDCIj2ragujJw-K_PpMBKcoppZbH5BHw7q_GVsJSL8,23363
|
|
30
|
+
deepliif-1.1.6.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
31
|
+
deepliif-1.1.6.dist-info/entry_points.txt,sha256=f70-10j2q68o_rDlsE3hspnv4ejlDnXwwGZ9JJ-3yF4,37
|
|
32
|
+
deepliif-1.1.6.dist-info/top_level.txt,sha256=vLDK5YKmDz08E7PywuvEjAo7dM5rnIpsjR4c0ubQCnc,13
|
|
33
|
+
deepliif-1.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|