konfai 1.0.8__tar.gz → 1.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {konfai-1.0.8 → konfai-1.0.9}/PKG-INFO +1 -1
- konfai-1.0.9/konfai/__init__.py +16 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/data/augmentation.py +4 -4
- konfai-1.0.8/konfai/data/dataset.py → konfai-1.0.9/konfai/data/data_manager.py +21 -25
- konfai-1.0.8/konfai/data/HDF5.py → konfai-1.0.9/konfai/data/patching.py +6 -6
- {konfai-1.0.8 → konfai-1.0.9}/konfai/data/transform.py +3 -3
- {konfai-1.0.8 → konfai-1.0.9}/konfai/evaluator.py +6 -6
- konfai-1.0.9/konfai/main.py +52 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/metric/measure.py +7 -4
- {konfai-1.0.8 → konfai-1.0.9}/konfai/models/classification/convNeXt.py +1 -1
- {konfai-1.0.8 → konfai-1.0.9}/konfai/models/classification/resnet.py +1 -1
- {konfai-1.0.8 → konfai-1.0.9}/konfai/models/generation/cStyleGan.py +1 -1
- {konfai-1.0.8 → konfai-1.0.9}/konfai/models/generation/ddpm.py +1 -1
- {konfai-1.0.8 → konfai-1.0.9}/konfai/models/generation/diffusionGan.py +1 -1
- {konfai-1.0.8 → konfai-1.0.9}/konfai/models/generation/gan.py +1 -1
- {konfai-1.0.8 → konfai-1.0.9}/konfai/models/segmentation/NestedUNet.py +1 -1
- {konfai-1.0.8 → konfai-1.0.9}/konfai/models/segmentation/UNet.py +1 -1
- {konfai-1.0.8 → konfai-1.0.9}/konfai/network/network.py +12 -12
- {konfai-1.0.8 → konfai-1.0.9}/konfai/predictor.py +10 -10
- {konfai-1.0.8 → konfai-1.0.9}/konfai/trainer.py +9 -9
- {konfai-1.0.8 → konfai-1.0.9}/konfai/utils/config.py +52 -19
- {konfai-1.0.8 → konfai-1.0.9}/konfai/utils/dataset.py +1 -1
- {konfai-1.0.8 → konfai-1.0.9}/konfai/utils/utils.py +74 -59
- {konfai-1.0.8 → konfai-1.0.9}/konfai.egg-info/PKG-INFO +1 -1
- {konfai-1.0.8 → konfai-1.0.9}/konfai.egg-info/SOURCES.txt +2 -2
- {konfai-1.0.8 → konfai-1.0.9}/pyproject.toml +1 -1
- konfai-1.0.8/konfai/__init__.py +0 -16
- konfai-1.0.8/konfai/main.py +0 -45
- {konfai-1.0.8 → konfai-1.0.9}/LICENSE +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/README.md +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/data/__init__.py +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/metric/__init__.py +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/metric/schedulers.py +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/models/generation/vae.py +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/models/registration/registration.py +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/models/representation/representation.py +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/network/__init__.py +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/network/blocks.py +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/utils/ITK.py +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/utils/__init__.py +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai/utils/registration.py +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai.egg-info/dependency_links.txt +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai.egg-info/entry_points.txt +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai.egg-info/requires.txt +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/konfai.egg-info/top_level.txt +0 -0
- {konfai-1.0.8 → konfai-1.0.9}/setup.cfg +0 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import datetime
|
|
3
|
+
|
|
4
|
+
MODELS_DIRECTORY = lambda : os.environ["KONFAI_MODELS_DIRECTORY"]
|
|
5
|
+
CHECKPOINTS_DIRECTORY =lambda : os.environ["KONFAI_CHECKPOINTS_DIRECTORY"]
|
|
6
|
+
MODEL = lambda : os.environ["KONFAI_MODEL"]
|
|
7
|
+
PREDICTIONS_DIRECTORY =lambda : os.environ["KONFAI_PREDICTIONS_DIRECTORY"]
|
|
8
|
+
EVALUATIONS_DIRECTORY =lambda : os.environ["KONFAI_EVALUATIONS_DIRECTORY"]
|
|
9
|
+
STATISTICS_DIRECTORY = lambda : os.environ["KONFAI_STATISTICS_DIRECTORY"]
|
|
10
|
+
SETUPS_DIRECTORY = lambda : os.environ["KONFAI_SETUPS_DIRECTORY"]
|
|
11
|
+
CONFIG_FILE = lambda : os.environ["KONFAI_CONFIG_FILE"]
|
|
12
|
+
KONFAI_STATE = lambda : os.environ["KONFAI_STATE"]
|
|
13
|
+
KONFAI_ROOT = lambda : os.environ["KONFAI_ROOT"]
|
|
14
|
+
CUDA_VISIBLE_DEVICES = lambda : os.environ["CUDA_VISIBLE_DEVICES"]
|
|
15
|
+
|
|
16
|
+
DATE = lambda : datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
|
@@ -6,7 +6,7 @@ import SimpleITK as sitk
|
|
|
6
6
|
import torch.nn.functional as F
|
|
7
7
|
from typing import Union
|
|
8
8
|
import os
|
|
9
|
-
from konfai import
|
|
9
|
+
from konfai import KONFAI_ROOT
|
|
10
10
|
from konfai.utils.config import config
|
|
11
11
|
from konfai.utils.utils import _getModule
|
|
12
12
|
from konfai.utils.dataset import Attribute, data_to_image
|
|
@@ -51,7 +51,7 @@ class Prob():
|
|
|
51
51
|
class DataAugmentationsList():
|
|
52
52
|
|
|
53
53
|
@config()
|
|
54
|
-
def __init__(self, nb : int = 10, dataAugmentations: dict[str, Prob] = {"default:
|
|
54
|
+
def __init__(self, nb : int = 10, dataAugmentations: dict[str, Prob] = {"default:Flip" : Prob(1)}) -> None:
|
|
55
55
|
self.nb = nb
|
|
56
56
|
self.dataAugmentations : list[DataAugmentation] = []
|
|
57
57
|
self.dataAugmentationsLoader = dataAugmentations
|
|
@@ -59,7 +59,7 @@ class DataAugmentationsList():
|
|
|
59
59
|
def load(self, key: str):
|
|
60
60
|
for augmentation, prob in self.dataAugmentationsLoader.items():
|
|
61
61
|
module, name = _getModule(augmentation, "data.augmentation")
|
|
62
|
-
dataAugmentation: DataAugmentation = getattr(importlib.import_module(module), name)(config = None, DL_args="{}.Dataset.augmentations.{}.dataAugmentations".format(
|
|
62
|
+
dataAugmentation: DataAugmentation = getattr(importlib.import_module(module), name)(config = None, DL_args="{}.Dataset.augmentations.{}.dataAugmentations".format(KONFAI_ROOT(), key))
|
|
63
63
|
dataAugmentation.load(prob.prob)
|
|
64
64
|
self.dataAugmentations.append(dataAugmentation)
|
|
65
65
|
|
|
@@ -525,7 +525,7 @@ class Elastix(DataAugmentation):
|
|
|
525
525
|
class Permute(DataAugmentation):
|
|
526
526
|
|
|
527
527
|
@config("Permute")
|
|
528
|
-
def __init__(self, prob_permute: Union[list[float], None] = [0.
|
|
528
|
+
def __init__(self, prob_permute: Union[list[float], None] = [0.5 ,0.5]) -> None:
|
|
529
529
|
super().__init__()
|
|
530
530
|
self._permute_dims = torch.tensor([[0, 2, 1, 3], [0, 3, 1, 2]])
|
|
531
531
|
self.prob_permute = prob_permute
|
|
@@ -12,8 +12,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
12
12
|
import threading
|
|
13
13
|
from torch.cuda import device_count
|
|
14
14
|
|
|
15
|
-
from konfai import
|
|
16
|
-
from konfai.data.
|
|
15
|
+
from konfai import KONFAI_STATE, KONFAI_ROOT
|
|
16
|
+
from konfai.data.patching import DatasetPatch, DatasetManager
|
|
17
17
|
from konfai.utils.config import config
|
|
18
18
|
from konfai.utils.utils import memoryInfo, cpuInfo, memoryForecast, getMemory, State
|
|
19
19
|
from konfai.utils.dataset import Dataset, Attribute
|
|
@@ -36,7 +36,7 @@ class GroupTransform:
|
|
|
36
36
|
if self._pre_transforms is not None:
|
|
37
37
|
if isinstance(self._pre_transforms, dict):
|
|
38
38
|
for classpath, transform in self._pre_transforms.items():
|
|
39
|
-
transform = transform.getTransform(classpath, DL_args = "{}.Dataset.groups_src.{}.groups_dest.{}.pre_transforms".format(
|
|
39
|
+
transform = transform.getTransform(classpath, DL_args = "{}.Dataset.groups_src.{}.groups_dest.{}.pre_transforms".format(KONFAI_ROOT(), group_src, group_dest))
|
|
40
40
|
transform.setDatasets(datasets)
|
|
41
41
|
self.pre_transforms.append(transform)
|
|
42
42
|
else:
|
|
@@ -47,7 +47,7 @@ class GroupTransform:
|
|
|
47
47
|
if self._post_transforms is not None:
|
|
48
48
|
if isinstance(self._post_transforms, dict):
|
|
49
49
|
for classpath, transform in self._post_transforms.items():
|
|
50
|
-
transform = transform.getTransform(classpath, DL_args = "{}.Dataset.groups_src.{}.groups_dest.{}.post_transforms".format(
|
|
50
|
+
transform = transform.getTransform(classpath, DL_args = "{}.Dataset.groups_src.{}.groups_dest.{}.post_transforms".format(KONFAI_ROOT(), group_src, group_dest))
|
|
51
51
|
transform.setDatasets(datasets)
|
|
52
52
|
self.post_transforms.append(transform)
|
|
53
53
|
else:
|
|
@@ -64,7 +64,7 @@ class GroupTransform:
|
|
|
64
64
|
class Group(dict[str, GroupTransform]):
|
|
65
65
|
|
|
66
66
|
@config()
|
|
67
|
-
def __init__(self, groups_dest: dict[str, GroupTransform] = {"default": GroupTransform()}):
|
|
67
|
+
def __init__(self, groups_dest: dict[str, GroupTransform] = {"default:group_dest": GroupTransform()}):
|
|
68
68
|
super().__init__(groups_dest)
|
|
69
69
|
|
|
70
70
|
class CustomSampler(Sampler[int]):
|
|
@@ -127,7 +127,7 @@ class DatasetIter(data.Dataset):
|
|
|
127
127
|
total=len(indexs),
|
|
128
128
|
desc="Caching : init | {} | {}".format(memoryForecast(memory_init, 0, self.nb_dataset), cpuInfo()),
|
|
129
129
|
leave=False,
|
|
130
|
-
disable=self.rank != 0 and "
|
|
130
|
+
disable=self.rank != 0 and "KONFAI_CLUSTER" not in os.environ
|
|
131
131
|
)
|
|
132
132
|
|
|
133
133
|
def process(index):
|
|
@@ -135,7 +135,7 @@ class DatasetIter(data.Dataset):
|
|
|
135
135
|
with memory_lock:
|
|
136
136
|
pbar.set_description("Caching : {} | {} | {}".format(memoryInfo(), memoryForecast(memory_init, index, self.nb_dataset), cpuInfo()))
|
|
137
137
|
pbar.update(1)
|
|
138
|
-
with ThreadPoolExecutor(max_workers=os.cpu_count()//device_count()) as executor:
|
|
138
|
+
with ThreadPoolExecutor(max_workers=os.cpu_count()//(device_count() if device_count() > 0 else 1)) as executor:
|
|
139
139
|
futures = [executor.submit(process, index) for index in indexs]
|
|
140
140
|
for _ in as_completed(futures):
|
|
141
141
|
pass
|
|
@@ -295,7 +295,7 @@ class Data(ABC):
|
|
|
295
295
|
return [[] for _ in range(world_size)]
|
|
296
296
|
|
|
297
297
|
maps = []
|
|
298
|
-
if
|
|
298
|
+
if KONFAI_STATE() == str(State.PREDICTION) or KONFAI_STATE() == str(State.EVALUATION):
|
|
299
299
|
np_map = np.asarray(map)
|
|
300
300
|
unique_index = np.unique(np_map[:, 0])
|
|
301
301
|
offset = int(np.ceil(len(unique_index)/world_size))
|
|
@@ -314,7 +314,11 @@ class Data(ABC):
|
|
|
314
314
|
def getData(self, world_size: int) -> list[list[DataLoader]]:
|
|
315
315
|
datasets: dict[str, list[(str, bool)]] = {}
|
|
316
316
|
for dataset_filename in self.dataset_filenames:
|
|
317
|
-
if len(dataset_filename.split(":")) ==
|
|
317
|
+
if len(dataset_filename.split(":")) == 1:
|
|
318
|
+
filename = dataset_filename
|
|
319
|
+
format = "mha"
|
|
320
|
+
append = True
|
|
321
|
+
elif len(dataset_filename.split(":")) == 2:
|
|
318
322
|
filename, format = dataset_filename.split(":")
|
|
319
323
|
append = True
|
|
320
324
|
else:
|
|
@@ -331,8 +335,9 @@ class Data(ABC):
|
|
|
331
335
|
else:
|
|
332
336
|
datasets[group] = [(filename, append)]
|
|
333
337
|
for group_src in self.groups_src:
|
|
334
|
-
|
|
335
|
-
|
|
338
|
+
if group_src not in datasets:
|
|
339
|
+
raise ValueError("[DatasetManager] Error: group source {} not found. Available groups: {}".format(group_src, list(datasets.keys())))
|
|
340
|
+
|
|
336
341
|
for group_dest in self.groups_src[group_src]:
|
|
337
342
|
self.groups_src[group_src][group_dest].load(group_src, group_dest, [self.datasets[filename] for filename, _ in datasets[group_src]])
|
|
338
343
|
for key, dataAugmentations in self.dataAugmentationsList.items():
|
|
@@ -422,8 +427,8 @@ class Data(ABC):
|
|
|
422
427
|
class DataTrain(Data):
|
|
423
428
|
|
|
424
429
|
@config("Dataset")
|
|
425
|
-
def __init__(self, dataset_filenames : list[str] = ["default
|
|
426
|
-
groups_src : dict[str, Group] = {"default" : Group()},
|
|
430
|
+
def __init__(self, dataset_filenames : list[str] = ["default:./Dataset"],
|
|
431
|
+
groups_src : dict[str, Group] = {"default:group_src" : Group()},
|
|
427
432
|
augmentations : Union[dict[str, DataAugmentationsList], None] = {"DataAugmentation_0" : DataAugmentationsList()},
|
|
428
433
|
inlineAugmentations: bool = False,
|
|
429
434
|
patch : Union[DatasetPatch, None] = DatasetPatch(),
|
|
@@ -437,7 +442,7 @@ class DataTrain(Data):
|
|
|
437
442
|
class DataPrediction(Data):
|
|
438
443
|
|
|
439
444
|
@config("Dataset")
|
|
440
|
-
def __init__(self, dataset_filenames : list[str] = ["default
|
|
445
|
+
def __init__(self, dataset_filenames : list[str] = ["default:./Dataset"],
|
|
441
446
|
groups_src : dict[str, Group] = {"default" : Group()},
|
|
442
447
|
augmentations : Union[dict[str, DataAugmentationsList], None] = {"DataAugmentation_0" : DataAugmentationsList()},
|
|
443
448
|
inlineAugmentations: bool = False,
|
|
@@ -452,19 +457,10 @@ class DataPrediction(Data):
|
|
|
452
457
|
class DataMetric(Data):
|
|
453
458
|
|
|
454
459
|
@config("Dataset")
|
|
455
|
-
def __init__(self, dataset_filenames : list[str] = ["default
|
|
460
|
+
def __init__(self, dataset_filenames : list[str] = ["default:./Dataset"],
|
|
456
461
|
groups_src : dict[str, Group] = {"default" : Group()},
|
|
457
462
|
subset : Union[PredictionSubset, dict[str, PredictionSubset]] = PredictionSubset(),
|
|
458
463
|
validation: Union[str, None] = None,
|
|
459
464
|
num_workers : int = 4) -> None:
|
|
460
465
|
|
|
461
|
-
super().__init__(dataset_filenames=dataset_filenames, groups_src=groups_src, patch=None, use_cache=False, subset=subset, num_workers=num_workers, batch_size=1, train_size=1 if validation is None else validation)
|
|
462
|
-
|
|
463
|
-
class DataHyperparameter(Data):
|
|
464
|
-
|
|
465
|
-
@config("Dataset")
|
|
466
|
-
def __init__(self, dataset_filenames : list[str] = ["default:Dataset.h5"],
|
|
467
|
-
groups_src : dict[str, Group] = {"default" : Group()},
|
|
468
|
-
patch : Union[DatasetPatch, None] = DatasetPatch()) -> None:
|
|
469
|
-
|
|
470
|
-
super().__init__(dataset_filenames, groups_src, patch, False, PredictionSubset(), 0, False, 1)
|
|
466
|
+
super().__init__(dataset_filenames=dataset_filenames, groups_src=groups_src, patch=None, use_cache=False, subset=subset, num_workers=num_workers, batch_size=1, train_size=1 if validation is None else validation)
|
|
@@ -37,13 +37,13 @@ class PathCombine(ABC):
|
|
|
37
37
|
3D :
|
|
38
38
|
AAA+AAB+ABA+ABB+BAA+BAB+BBA+BBB
|
|
39
39
|
|
|
40
|
-
AAC+ABC+BAC+BBC
|
|
41
|
-
ACA+ACB+BCA+BCB
|
|
42
40
|
CAA+CAB+CBA+CBB
|
|
41
|
+
ACA+ACB+BCA+BCB
|
|
42
|
+
AAC+ABC+BAC+BBC
|
|
43
43
|
|
|
44
|
-
ACC+BCC
|
|
45
|
-
CAC+CBC
|
|
46
44
|
CCA+CCB
|
|
45
|
+
CAC+CBC
|
|
46
|
+
ACC+BCC
|
|
47
47
|
|
|
48
48
|
"""
|
|
49
49
|
def setPatchConfig(self, patch_size: list[int], overlap: int):
|
|
@@ -214,13 +214,13 @@ class Patch(ABC):
|
|
|
214
214
|
class DatasetPatch(Patch):
|
|
215
215
|
|
|
216
216
|
@config("Patch")
|
|
217
|
-
def __init__(self, patch_size : list[int] = [128,
|
|
217
|
+
def __init__(self, patch_size : list[int] = [128, 128, 128], overlap : Union[int, None] = None, mask: Union[str, None] = None, padValue: float = 0, extend_slice: int = 0) -> None:
|
|
218
218
|
super().__init__(patch_size, overlap, mask, padValue, extend_slice)
|
|
219
219
|
|
|
220
220
|
class ModelPatch(Patch):
|
|
221
221
|
|
|
222
222
|
@config("Patch")
|
|
223
|
-
def __init__(self, patch_size : list[int] = [128,
|
|
223
|
+
def __init__(self, patch_size : list[int] = [128, 128, 128], overlap : Union[int, None] = None, patchCombine: Union[str, None] = None, mask: Union[str, None] = None, padValue: float = 0, extend_slice: int = 0) -> None:
|
|
224
224
|
super().__init__(patch_size, overlap, mask, padValue, extend_slice)
|
|
225
225
|
self.patchCombine = patchCombine
|
|
226
226
|
|
|
@@ -130,7 +130,7 @@ class Standardize(Transform):
|
|
|
130
130
|
|
|
131
131
|
class TensorCast(Transform):
|
|
132
132
|
|
|
133
|
-
def __init__(self, dtype : str = "
|
|
133
|
+
def __init__(self, dtype : str = "float32") -> None:
|
|
134
134
|
self.dtype : torch.dtype = getattr(torch, dtype)
|
|
135
135
|
|
|
136
136
|
def __call__(self, name: str, input : torch.Tensor, cache_attribute: Attribute) -> torch.Tensor:
|
|
@@ -142,7 +142,7 @@ class TensorCast(Transform):
|
|
|
142
142
|
|
|
143
143
|
class Padding(Transform):
|
|
144
144
|
|
|
145
|
-
def __init__(self, padding : list[int] = [0,0,0,0,0,0], mode : str = "
|
|
145
|
+
def __init__(self, padding : list[int] = [0,0,0,0,0,0], mode : str = "constant") -> None:
|
|
146
146
|
self.padding = padding
|
|
147
147
|
self.mode = mode
|
|
148
148
|
|
|
@@ -332,7 +332,7 @@ class ResampleTransform(Transform):
|
|
|
332
332
|
|
|
333
333
|
class Mask(Transform):
|
|
334
334
|
|
|
335
|
-
def __init__(self, path : str = "default
|
|
335
|
+
def __init__(self, path : str = "./default.mha", value_outside: int = 0) -> None:
|
|
336
336
|
self.path = path
|
|
337
337
|
self.value_outside = value_outside
|
|
338
338
|
|
|
@@ -7,10 +7,10 @@ import json
|
|
|
7
7
|
import shutil
|
|
8
8
|
import builtins
|
|
9
9
|
import importlib
|
|
10
|
-
from konfai import EVALUATIONS_DIRECTORY, PREDICTIONS_DIRECTORY,
|
|
10
|
+
from konfai import EVALUATIONS_DIRECTORY, PREDICTIONS_DIRECTORY, KONFAI_ROOT, CONFIG_FILE
|
|
11
11
|
from konfai.utils.config import config
|
|
12
12
|
from konfai.utils.utils import _getModule, DistributedObject, synchronize_data
|
|
13
|
-
from konfai.data.
|
|
13
|
+
from konfai.data.data_manager import DataMetric
|
|
14
14
|
|
|
15
15
|
class CriterionsAttr():
|
|
16
16
|
|
|
@@ -28,7 +28,7 @@ class CriterionsLoader():
|
|
|
28
28
|
criterions = {}
|
|
29
29
|
for module_classpath, criterionsAttr in self.criterionsLoader.items():
|
|
30
30
|
module, name = _getModule(module_classpath, "metric.measure")
|
|
31
|
-
criterions[config("{}.metrics.{}.targetsCriterions.{}.criterionsLoader.{}".format(
|
|
31
|
+
criterions[config("{}.metrics.{}.targetsCriterions.{}.criterionsLoader.{}".format(KONFAI_ROOT(), output_group, target_group, module_classpath))(getattr(importlib.import_module(module), name))(config = None)] = criterionsAttr
|
|
32
32
|
return criterions
|
|
33
33
|
|
|
34
34
|
class TargetCriterionsLoader():
|
|
@@ -86,8 +86,8 @@ class Statistics():
|
|
|
86
86
|
class Evaluator(DistributedObject):
|
|
87
87
|
|
|
88
88
|
@config("Evaluator")
|
|
89
|
-
def __init__(self, train_name: str = "default:
|
|
90
|
-
if os.environ["
|
|
89
|
+
def __init__(self, train_name: str = "default:TRAIN_01", metrics: dict[str, TargetCriterionsLoader] = {"default": TargetCriterionsLoader()}, dataset : DataMetric = DataMetric(),) -> None:
|
|
90
|
+
if os.environ["KONFAI_CONFIG_MODE"] != "Done":
|
|
91
91
|
exit(0)
|
|
92
92
|
super().__init__(train_name)
|
|
93
93
|
self.metric_path = EVALUATIONS_DIRECTORY()+self.name+"/"
|
|
@@ -111,7 +111,7 @@ class Evaluator(DistributedObject):
|
|
|
111
111
|
|
|
112
112
|
def setup(self, world_size: int):
|
|
113
113
|
if os.path.exists(self.metric_path):
|
|
114
|
-
if os.environ["
|
|
114
|
+
if os.environ["KONFAI_OVERWRITE"] != "True":
|
|
115
115
|
accept = builtins.input("The metric {} already exists ! Do you want to overwrite it (yes,no) : ".format(self.name))
|
|
116
116
|
if accept != "yes":
|
|
117
117
|
return
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
from torch.cuda import device_count
|
|
4
|
+
import torch.multiprocessing as mp
|
|
5
|
+
from konfai.utils.utils import setup, TensorBoard, Log
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
sys.path.insert(0, os.getcwd())
|
|
9
|
+
|
|
10
|
+
def main():
|
|
11
|
+
parser = argparse.ArgumentParser(description="KonfAI", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
12
|
+
try:
|
|
13
|
+
with setup(parser) as distributedObject:
|
|
14
|
+
with Log(distributedObject.name):
|
|
15
|
+
world_size = device_count()
|
|
16
|
+
if world_size == 0:
|
|
17
|
+
world_size = 1
|
|
18
|
+
distributedObject.setup(world_size)
|
|
19
|
+
with TensorBoard(distributedObject.name):
|
|
20
|
+
mp.spawn(distributedObject, nprocs=world_size)
|
|
21
|
+
except Exception as e:
|
|
22
|
+
print(e)
|
|
23
|
+
exit(1)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def cluster():
|
|
27
|
+
parser = argparse.ArgumentParser(description="KonfAI", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
28
|
+
|
|
29
|
+
# Cluster manager arguments
|
|
30
|
+
cluster_args = parser.add_argument_group('Cluster manager arguments')
|
|
31
|
+
cluster_args.add_argument('--name', type=str, help='Task name', required=True)
|
|
32
|
+
cluster_args.add_argument('--num-nodes', '--num_nodes', default=1, type=int, help='Number of nodes')
|
|
33
|
+
cluster_args.add_argument('--memory', type=int, default=16, help='Amount of memory per node')
|
|
34
|
+
cluster_args.add_argument('--time-limit', '--time_limit', type=int, default=1440, help='Job time limit in minute')
|
|
35
|
+
cluster_args.add_argument('--resubmit', action='store_true', help='Automatically resubmit job just before timout')
|
|
36
|
+
try:
|
|
37
|
+
with setup(parser) as distributedObject:
|
|
38
|
+
args = parser.parse_args()
|
|
39
|
+
config = vars(args)
|
|
40
|
+
os.environ["KONFAI_OVERWRITE"] = "True"
|
|
41
|
+
os.environ["KONFAI_CLUSTER"] = "True"
|
|
42
|
+
|
|
43
|
+
n_gpu = len(config["gpu"].split(","))
|
|
44
|
+
distributedObject.setup(n_gpu*int(config["num_nodes"]))
|
|
45
|
+
import submitit
|
|
46
|
+
executor = submitit.AutoExecutor(folder="./Cluster/")
|
|
47
|
+
executor.update_parameters(name=config["name"], mem_gb=config["memory"], gpus_per_node=n_gpu, tasks_per_node=n_gpu//distributedObject.size, cpus_per_task=config["num_workers"], nodes=config["num_nodes"], timeout_min=config["time_limit"])
|
|
48
|
+
with TensorBoard(distributedObject.name):
|
|
49
|
+
executor.submit(distributedObject)
|
|
50
|
+
except Exception as e:
|
|
51
|
+
print(e)
|
|
52
|
+
exit(1)
|
|
@@ -17,7 +17,7 @@ from abc import abstractmethod
|
|
|
17
17
|
|
|
18
18
|
from konfai.utils.config import config
|
|
19
19
|
from konfai.utils.utils import _getModule
|
|
20
|
-
from konfai.data.
|
|
20
|
+
from konfai.data.patching import ModelPatch
|
|
21
21
|
from konfai.network.blocks import LatentDistribution
|
|
22
22
|
from konfai.network.network import ModelLoader, Network
|
|
23
23
|
|
|
@@ -92,7 +92,7 @@ class PSNR(MaskedLoss):
|
|
|
92
92
|
return psnr
|
|
93
93
|
|
|
94
94
|
def __init__(self, dynamic_range: Union[float, None] = None) -> None:
|
|
95
|
-
dynamic_range = dynamic_range if dynamic_range else 1024+
|
|
95
|
+
dynamic_range = dynamic_range if dynamic_range else 1024+3071
|
|
96
96
|
super().__init__(partial(PSNR._loss, dynamic_range), False)
|
|
97
97
|
|
|
98
98
|
class SSIM(MaskedLoss):
|
|
@@ -143,8 +143,11 @@ class Dice(Criterion):
|
|
|
143
143
|
target = self.flatten(target)
|
|
144
144
|
return (2.*(input * target).sum() + self.smooth)/(input.sum() + target.sum() + self.smooth)
|
|
145
145
|
|
|
146
|
+
|
|
146
147
|
def forward(self, output: torch.Tensor, *targets : list[torch.Tensor]) -> torch.Tensor:
|
|
147
148
|
target = targets[0]
|
|
149
|
+
if output.shape[1] == 1:
|
|
150
|
+
output = F.one_hot(output.type(torch.int64), num_classes=torch.max(output).item()+1).permute(0, len(target.shape), *[i+1 for i in range(len(target.shape)-1)]).float()
|
|
148
151
|
target = F.one_hot(target.type(torch.int64), num_classes=output.shape[1]).permute(0, len(target.shape), *[i+1 for i in range(len(target.shape)-1)]).float().squeeze(2)
|
|
149
152
|
return 1-torch.mean(self.dice_per_channel(output, target))
|
|
150
153
|
|
|
@@ -239,7 +242,7 @@ class PerceptualLoss(Criterion):
|
|
|
239
242
|
@config(None)
|
|
240
243
|
def __init__(self, losses: dict[str, float] = {"Gram": 1, "torch_nn_L1Loss": 1}) -> None:
|
|
241
244
|
self.losses = losses
|
|
242
|
-
self.DL_args = os.environ['
|
|
245
|
+
self.DL_args = os.environ['KONFAI_CONFIG_PATH'] if "KONFAI_CONFIG_PATH" in os.environ else ""
|
|
243
246
|
|
|
244
247
|
def getLoss(self) -> dict[torch.nn.Module, float]:
|
|
245
248
|
result: dict[torch.nn.Module, float] = {}
|
|
@@ -252,7 +255,7 @@ class PerceptualLoss(Criterion):
|
|
|
252
255
|
super().__init__()
|
|
253
256
|
self.path_model = path_model
|
|
254
257
|
if self.path_model not in modelsRegister:
|
|
255
|
-
self.model = modelLoader.getModel(train=False, DL_args=os.environ['
|
|
258
|
+
self.model = modelLoader.getModel(train=False, DL_args=os.environ['KONFAI_CONFIG_PATH'].split("PerceptualLoss")[0]+"PerceptualLoss.Model", DL_without=["optimizer", "schedulers", "nb_batch_per_step", "init_type", "init_gain", "outputsCriterions", "drop_p"])
|
|
256
259
|
if path_model.startswith("https"):
|
|
257
260
|
state_dict = torch.hub.load_state_dict_from_url(path_model)
|
|
258
261
|
state_dict = {"Model": {self.model.getName() : state_dict["model"]}}
|
|
@@ -2,7 +2,7 @@ import torch
|
|
|
2
2
|
import torch.nn.functional as F
|
|
3
3
|
from konfai.network import network, blocks
|
|
4
4
|
from konfai.utils.config import config
|
|
5
|
-
from konfai.data.
|
|
5
|
+
from konfai.data.patching import ModelPatch
|
|
6
6
|
|
|
7
7
|
"""
|
|
8
8
|
"convnext_tiny_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth", depths=[3, 3, 9, 3], dims=[96, 192, 384, 768]
|
|
@@ -3,7 +3,7 @@ from typing import Type
|
|
|
3
3
|
import torch
|
|
4
4
|
from konfai.network import network, blocks
|
|
5
5
|
from konfai.utils.config import config
|
|
6
|
-
from konfai.data.
|
|
6
|
+
from konfai.data.patching import ModelPatch
|
|
7
7
|
|
|
8
8
|
"""
|
|
9
9
|
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', dim = 2, in_channels = 3, depths=[2, 2, 2, 2], widths = [64, 64, 128, 256, 512], num_classes=1000, useBottleneck=False
|
|
@@ -3,7 +3,7 @@ import torch
|
|
|
3
3
|
|
|
4
4
|
from konfai.network import network, blocks
|
|
5
5
|
from konfai.utils.config import config
|
|
6
|
-
from konfai.data.
|
|
6
|
+
from konfai.data.patching import ModelPatch
|
|
7
7
|
|
|
8
8
|
class MappingNetwork(network.ModuleArgsDict):
|
|
9
9
|
def __init__(self, z_dim: int, c_dim: int, w_dim: int, num_layers: int, embed_features: int, layer_features: int):
|
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
|
8
8
|
|
|
9
9
|
from konfai.network import network, blocks
|
|
10
10
|
from konfai.utils.config import config
|
|
11
|
-
from konfai.data.
|
|
11
|
+
from konfai.data.patching import ModelPatch
|
|
12
12
|
from konfai.utils.utils import gpuInfo
|
|
13
13
|
from konfai.metric.measure import Criterion
|
|
14
14
|
|
|
@@ -5,7 +5,7 @@ import numpy as np
|
|
|
5
5
|
|
|
6
6
|
from konfai.network import network, blocks
|
|
7
7
|
from konfai.utils.config import config
|
|
8
|
-
from konfai.data.
|
|
8
|
+
from konfai.data.patching import ModelPatch, Attribute
|
|
9
9
|
from konfai.data import augmentation
|
|
10
10
|
from konfai.models.segmentation import UNet, NestedUNet
|
|
11
11
|
from konfai.models.generation.ddpm import DDPM
|
|
@@ -13,11 +13,11 @@ from torch.utils.checkpoint import checkpoint
|
|
|
13
13
|
from typing import Union
|
|
14
14
|
from enum import Enum
|
|
15
15
|
|
|
16
|
-
from konfai import
|
|
16
|
+
from konfai import KONFAI_ROOT
|
|
17
17
|
from konfai.metric.schedulers import Scheduler
|
|
18
18
|
from konfai.utils.config import config
|
|
19
19
|
from konfai.utils.utils import State, _getModule, getDevice, getGPUMemory
|
|
20
|
-
from konfai.data.
|
|
20
|
+
from konfai.data.patching import Accumulator, ModelPatch
|
|
21
21
|
|
|
22
22
|
class NetState(Enum):
|
|
23
23
|
TRAIN = 0,
|
|
@@ -40,7 +40,7 @@ class OptimizerLoader():
|
|
|
40
40
|
|
|
41
41
|
def getOptimizer(self, key: str, parameter: Iterator[torch.nn.parameter.Parameter]) -> torch.optim.Optimizer:
|
|
42
42
|
torch.optim.AdamW
|
|
43
|
-
return config("{}.Model.{}.Optimizer".format(
|
|
43
|
+
return config("{}.Model.{}.Optimizer".format(KONFAI_ROOT(), key))(getattr(importlib.import_module('torch.optim'), self.name))(parameter, config = None)
|
|
44
44
|
|
|
45
45
|
class SchedulerStep():
|
|
46
46
|
|
|
@@ -98,8 +98,8 @@ class CriterionsLoader():
|
|
|
98
98
|
for module_classpath, criterionsAttr in self.criterionsLoader.items():
|
|
99
99
|
module, name = _getModule(module_classpath, "metric.measure")
|
|
100
100
|
criterionsAttr.isTorchCriterion = module.startswith("torch")
|
|
101
|
-
criterionsAttr.sheduler = criterionsAttr.l.getShedulers("{}.Model.{}.outputsCriterions.{}.targetsCriterions.{}.criterionsLoader.{}".format(
|
|
102
|
-
criterions[config("{}.Model.{}.outputsCriterions.{}.targetsCriterions.{}.criterionsLoader.{}".format(
|
|
101
|
+
criterionsAttr.sheduler = criterionsAttr.l.getShedulers("{}.Model.{}.outputsCriterions.{}.targetsCriterions.{}.criterionsLoader.{}".format(KONFAI_ROOT(), model_classname, output_group, target_group, module_classpath))
|
|
102
|
+
criterions[config("{}.Model.{}.outputsCriterions.{}.targetsCriterions.{}.criterionsLoader.{}".format(KONFAI_ROOT(), model_classname, output_group, target_group, module_classpath))(getattr(importlib.import_module(module), name))(config = None)] = criterionsAttr
|
|
103
103
|
return criterions
|
|
104
104
|
|
|
105
105
|
class TargetCriterionsLoader():
|
|
@@ -753,14 +753,14 @@ class Network(ModuleArgsDict, ABC):
|
|
|
753
753
|
output_layer_accumulator : dict[str, Accumulator] = {}
|
|
754
754
|
output_layer_patch_indexed : dict[str, Patch_Indexed] = {}
|
|
755
755
|
it = 0
|
|
756
|
-
debug = "
|
|
756
|
+
debug = "KONFAI_DEBUG" in os.environ
|
|
757
757
|
for (nameTmp, output_layer) in self.named_forward(*inputs):
|
|
758
758
|
name = nameTmp.replace(";accu;", "")
|
|
759
759
|
if debug:
|
|
760
|
-
if "
|
|
761
|
-
os.environ["
|
|
760
|
+
if "KONFAI_DEBUG_LAST_LAYER" in os.environ:
|
|
761
|
+
os.environ["KONFAI_DEBUG_LAST_LAYER"] = "{}|{}:{}:{}".format(os.environ["KONFAI_DEBUG_LAST_LAYER"], name, getGPUMemory(output_layer.device), str(output_layer.device).replace("cuda:", ""))
|
|
762
762
|
else:
|
|
763
|
-
os.environ["
|
|
763
|
+
os.environ["KONFAI_DEBUG_LAST_LAYER"] = "{}:{}:{}".format(name, getGPUMemory(output_layer.device), str(output_layer.device).replace("cuda:", ""))
|
|
764
764
|
it += 1
|
|
765
765
|
if name in layers_name or nameTmp in layers_name:
|
|
766
766
|
if ";accu;" in nameTmp:
|
|
@@ -918,12 +918,12 @@ class Network(ModuleArgsDict, ABC):
|
|
|
918
918
|
class ModelLoader():
|
|
919
919
|
|
|
920
920
|
@config("Model")
|
|
921
|
-
def __init__(self, classpath : str = "default:segmentation.UNet") -> None:
|
|
922
|
-
self.module, self.name = _getModule(classpath
|
|
921
|
+
def __init__(self, classpath : str = "default:segmentation.UNet.UNet") -> None:
|
|
922
|
+
self.module, self.name = _getModule(classpath, "models")
|
|
923
923
|
|
|
924
924
|
def getModel(self, train : bool = True, DL_args: Union[str, None] = None, DL_without=["optimizer", "schedulers", "nb_batch_per_step", "init_type", "init_gain"]) -> Network:
|
|
925
925
|
if not DL_args:
|
|
926
|
-
DL_args="{}.Model".format(
|
|
926
|
+
DL_args="{}.Model".format(KONFAI_ROOT())
|
|
927
927
|
model = partial(getattr(importlib.import_module(self.module), self.name), config = None, DL_args=DL_args)
|
|
928
928
|
if not train:
|
|
929
929
|
model = partial(model, DL_without = DL_without)
|
|
@@ -6,12 +6,12 @@ import torch
|
|
|
6
6
|
import tqdm
|
|
7
7
|
import os
|
|
8
8
|
|
|
9
|
-
from konfai import MODELS_DIRECTORY, PREDICTIONS_DIRECTORY, CONFIG_FILE, MODEL,
|
|
9
|
+
from konfai import MODELS_DIRECTORY, PREDICTIONS_DIRECTORY, CONFIG_FILE, MODEL, KONFAI_ROOT
|
|
10
10
|
from konfai.utils.config import config
|
|
11
11
|
from konfai.utils.utils import State, get_patch_slices_from_nb_patch_per_dim, NeedDevice, _getModule, DistributedObject, DataLog, description
|
|
12
12
|
from konfai.utils.dataset import Dataset, Attribute
|
|
13
|
-
from konfai.data.
|
|
14
|
-
from konfai.data.
|
|
13
|
+
from konfai.data.data_manager import DataPrediction, DatasetIter
|
|
14
|
+
from konfai.data.patching import Accumulator, PathCombine
|
|
15
15
|
from konfai.network.network import ModelLoader, Network, NetState, CPU_Model
|
|
16
16
|
from konfai.data.transform import Transform, TransformLoader
|
|
17
17
|
|
|
@@ -52,13 +52,13 @@ class OutDataset(Dataset, NeedDevice, ABC):
|
|
|
52
52
|
|
|
53
53
|
if _transform_type is not None:
|
|
54
54
|
for classpath, transform in _transform_type.items():
|
|
55
|
-
transform = transform.getTransform(classpath, DL_args = "{}.outsDataset.{}.OutDataset.{}".format(
|
|
55
|
+
transform = transform.getTransform(classpath, DL_args = "{}.outsDataset.{}.OutDataset.{}".format(KONFAI_ROOT(), name_layer, name))
|
|
56
56
|
transform.setDatasets(datasets)
|
|
57
57
|
transform_type.append(transform)
|
|
58
58
|
|
|
59
59
|
if self._patchCombine is not None:
|
|
60
|
-
module, name = _getModule(self._patchCombine, "data.
|
|
61
|
-
self.patchCombine = getattr(importlib.import_module(module), name)(config = None, DL_args = "{}.outsDataset.{}.OutDataset".format(
|
|
60
|
+
module, name = _getModule(self._patchCombine, "data.patching")
|
|
61
|
+
self.patchCombine = getattr(importlib.import_module(module), name)(config = None, DL_args = "{}.outsDataset.{}.OutDataset".format(KONFAI_ROOT(), name_layer))
|
|
62
62
|
|
|
63
63
|
def setPatchConfig(self, patchSize: Union[list[int], None], overlap: Union[int, None], nb_data_augmentation: int) -> None:
|
|
64
64
|
if patchSize is not None and overlap is not None:
|
|
@@ -94,7 +94,7 @@ class OutDataset(Dataset, NeedDevice, ABC):
|
|
|
94
94
|
class OutSameAsGroupDataset(OutDataset):
|
|
95
95
|
|
|
96
96
|
@config("OutDataset")
|
|
97
|
-
def __init__(self, dataset_filename: str = "Dataset:
|
|
97
|
+
def __init__(self, dataset_filename: str = "./Dataset:mha", group: str = "default", sameAsGroup: str = "default", pre_transforms : dict[str, TransformLoader] = {"default:Normalize": TransformLoader()}, post_transforms : dict[str, TransformLoader] = {"default:Normalize": TransformLoader()}, final_transforms : dict[str, TransformLoader] = {"default:Normalize": TransformLoader()}, patchCombine: Union[str, None] = None, redution: str = "mean", inverse_transform: bool = True) -> None:
|
|
98
98
|
super().__init__(dataset_filename, group, pre_transforms, post_transforms, final_transforms, patchCombine)
|
|
99
99
|
self.group_src, self.group_dest = sameAsGroup.split(":")
|
|
100
100
|
self.redution = redution
|
|
@@ -240,7 +240,7 @@ class _Predictor():
|
|
|
240
240
|
self.modelComposite.module.setState(NetState.PREDICTION)
|
|
241
241
|
desc = lambda : "Prediction : {}".format(description(self.modelComposite))
|
|
242
242
|
self.dataloader_prediction.dataset.load()
|
|
243
|
-
with tqdm.tqdm(iterable = enumerate(self.dataloader_prediction), leave=False, desc = desc(), total=len(self.dataloader_prediction), disable=self.global_rank != 0 and "
|
|
243
|
+
with tqdm.tqdm(iterable = enumerate(self.dataloader_prediction), leave=False, desc = desc(), total=len(self.dataloader_prediction), disable=self.global_rank != 0 and "KONFAI_CLUSTER" not in os.environ) as batch_iter:
|
|
244
244
|
dist.barrier()
|
|
245
245
|
for it, data_dict in batch_iter:
|
|
246
246
|
input = self.getInput(data_dict)
|
|
@@ -322,7 +322,7 @@ class Predictor(DistributedObject):
|
|
|
322
322
|
gpu_checkpoints: Union[list[str], None] = None,
|
|
323
323
|
outsDataset: Union[dict[str, OutDatasetLoader], None] = {"default:Default" : OutDatasetLoader()},
|
|
324
324
|
images_log: list[str] = []) -> None:
|
|
325
|
-
if os.environ["
|
|
325
|
+
if os.environ["KONFAI_CONFIG_MODE"] != "Done":
|
|
326
326
|
exit(0)
|
|
327
327
|
super().__init__(train_name)
|
|
328
328
|
self.manual_seed = manual_seed
|
|
@@ -374,7 +374,7 @@ class Predictor(DistributedObject):
|
|
|
374
374
|
for dataset_filename in self.datasets_filename:
|
|
375
375
|
path = self.predict_path +dataset_filename
|
|
376
376
|
if os.path.exists(path):
|
|
377
|
-
if os.environ["
|
|
377
|
+
if os.environ["KONFAI_OVERWRITE"] != "True":
|
|
378
378
|
accept = builtins.input("The prediction {} already exists ! Do you want to overwrite it (yes,no) : ".format(path))
|
|
379
379
|
if accept != "yes":
|
|
380
380
|
return
|
|
@@ -12,8 +12,8 @@ from torch.utils.tensorboard.writer import SummaryWriter
|
|
|
12
12
|
from torch.optim.swa_utils import AveragedModel
|
|
13
13
|
import torch.distributed as dist
|
|
14
14
|
|
|
15
|
-
from konfai import MODELS_DIRECTORY, CHECKPOINTS_DIRECTORY, STATISTICS_DIRECTORY, SETUPS_DIRECTORY, CONFIG_FILE, MODEL, DATE,
|
|
16
|
-
from konfai.data.
|
|
15
|
+
from konfai import MODELS_DIRECTORY, CHECKPOINTS_DIRECTORY, STATISTICS_DIRECTORY, SETUPS_DIRECTORY, CONFIG_FILE, MODEL, DATE, KONFAI_STATE
|
|
16
|
+
from konfai.data.data_manager import DataTrain
|
|
17
17
|
from konfai.utils.config import config
|
|
18
18
|
from konfai.utils.utils import State, DataLog, DistributedObject, description
|
|
19
19
|
from konfai.network.network import Network, ModelLoader, NetState, CPU_Model
|
|
@@ -72,7 +72,7 @@ class _Trainer():
|
|
|
72
72
|
self.modelEMA.module.setState(NetState.TRAIN)
|
|
73
73
|
|
|
74
74
|
desc = lambda : "Training : {}".format(description(self.model, self.modelEMA))
|
|
75
|
-
with tqdm.tqdm(iterable = enumerate(self.dataloader_training), desc = desc(), total=len(self.dataloader_training), leave=False, disable=self.global_rank != 0 and "
|
|
75
|
+
with tqdm.tqdm(iterable = enumerate(self.dataloader_training), desc = desc(), total=len(self.dataloader_training), leave=False, disable=self.global_rank != 0 and "KONFAI_CLUSTER" not in os.environ) as batch_iter:
|
|
76
76
|
for _, data_dict in batch_iter:
|
|
77
77
|
with torch.amp.autocast('cuda', enabled=self.autocast):
|
|
78
78
|
input = self.getInput(data_dict)
|
|
@@ -103,7 +103,7 @@ class _Trainer():
|
|
|
103
103
|
desc = lambda : "Validation : {}".format(description(self.model, self.modelEMA))
|
|
104
104
|
data_dict = None
|
|
105
105
|
self.dataloader_validation.dataset.load()
|
|
106
|
-
with tqdm.tqdm(iterable = enumerate(self.dataloader_validation), desc = desc(), total=len(self.dataloader_validation), leave=False, disable=self.global_rank != 0 and "
|
|
106
|
+
with tqdm.tqdm(iterable = enumerate(self.dataloader_validation), desc = desc(), total=len(self.dataloader_validation), leave=False, disable=self.global_rank != 0 and "KONFAI_CLUSTER" not in os.environ) as batch_iter:
|
|
107
107
|
for _, data_dict in batch_iter:
|
|
108
108
|
input = self.getInput(data_dict)
|
|
109
109
|
self.model(input)
|
|
@@ -205,7 +205,7 @@ class Trainer(DistributedObject):
|
|
|
205
205
|
def __init__( self,
|
|
206
206
|
model : ModelLoader = ModelLoader(),
|
|
207
207
|
dataset : DataTrain = DataTrain(),
|
|
208
|
-
train_name : str = "default:
|
|
208
|
+
train_name : str = "default:TRAIN_01",
|
|
209
209
|
manual_seed : Union[int, None] = None,
|
|
210
210
|
epochs: int = 100,
|
|
211
211
|
it_validation : Union[int, None] = None,
|
|
@@ -215,7 +215,7 @@ class Trainer(DistributedObject):
|
|
|
215
215
|
ema_decay : float = 0,
|
|
216
216
|
data_log: Union[list[str], None] = None,
|
|
217
217
|
save_checkpoint_mode: str= "BEST") -> None:
|
|
218
|
-
if os.environ["
|
|
218
|
+
if os.environ["KONFAI_CONFIG_MODE"] != "Done":
|
|
219
219
|
exit(0)
|
|
220
220
|
super().__init__(train_name)
|
|
221
221
|
self.manual_seed = manual_seed
|
|
@@ -292,9 +292,9 @@ class Trainer(DistributedObject):
|
|
|
292
292
|
return (1-self.ema_decay) * averaged_model_parameter + self.ema_decay * model_parameter
|
|
293
293
|
|
|
294
294
|
def setup(self, world_size: int):
|
|
295
|
-
state = State._member_map_[
|
|
296
|
-
if state != State.RESUME and os.path.exists(
|
|
297
|
-
if os.environ["
|
|
295
|
+
state = State._member_map_[KONFAI_STATE()]
|
|
296
|
+
if state != State.RESUME and os.path.exists(CHECKPOINTS_DIRECTORY()+self.name+"/"):
|
|
297
|
+
if os.environ["KONFAI_OVERWRITE"] != "True":
|
|
298
298
|
accept = input("The model {} already exists ! Do you want to overwrite it (yes,no) : ".format(self.name))
|
|
299
299
|
if accept != "yes":
|
|
300
300
|
return
|
|
@@ -3,9 +3,8 @@ import ruamel.yaml
|
|
|
3
3
|
import inspect
|
|
4
4
|
import collections
|
|
5
5
|
from copy import deepcopy
|
|
6
|
-
from typing import Union
|
|
6
|
+
from typing import Union, Literal, get_origin, get_args
|
|
7
7
|
import torch
|
|
8
|
-
|
|
9
8
|
from konfai import CONFIG_FILE
|
|
10
9
|
|
|
11
10
|
yaml = ruamel.yaml.YAML()
|
|
@@ -26,7 +25,7 @@ class Config():
|
|
|
26
25
|
if not os.path.exists(self.filename):
|
|
27
26
|
result = input("Create a new config file ? [no,yes,interactive] : ")
|
|
28
27
|
if result in ["yes", "interactive"]:
|
|
29
|
-
os.environ["
|
|
28
|
+
os.environ["KONFAI_CONFIG_MODE"] = "interactive" if result == "interactive" else "default"
|
|
30
29
|
else:
|
|
31
30
|
exit(0)
|
|
32
31
|
with open(self.filename, "w") as f:
|
|
@@ -69,7 +68,7 @@ class Config():
|
|
|
69
68
|
|
|
70
69
|
def __exit__(self, type, value, traceback) -> None:
|
|
71
70
|
self.yml.close()
|
|
72
|
-
if os.environ["
|
|
71
|
+
if os.environ["KONFAI_CONFIG_MODE"] == "remove":
|
|
73
72
|
if os.path.exists(CONFIG_FILE()):
|
|
74
73
|
os.remove(CONFIG_FILE())
|
|
75
74
|
return
|
|
@@ -87,22 +86,22 @@ class Config():
|
|
|
87
86
|
except:
|
|
88
87
|
result = input("\nKeep a default configuration file ? (yes,no) : ")
|
|
89
88
|
if result == "yes":
|
|
90
|
-
os.environ["
|
|
89
|
+
os.environ["KONFAI_CONFIG_MODE"] = "default"
|
|
91
90
|
else:
|
|
92
|
-
os.environ["
|
|
91
|
+
os.environ["KONFAI_CONFIG_MODE"] = "remove"
|
|
93
92
|
exit(0)
|
|
94
93
|
return default.split(":")[1] if len(default.split(":")) > 1 else default
|
|
95
94
|
|
|
96
95
|
@staticmethod
|
|
97
96
|
def _getInputDefault(name : str, default : Union[str, None], isList : bool = False) -> Union[list[Union[str, None]], str, None]:
|
|
98
97
|
if isinstance(default, str) and (default == "default" or (len(default.split(":")) > 1 and default.split(":")[0] == "default")):
|
|
99
|
-
if os.environ["
|
|
98
|
+
if os.environ["KONFAI_CONFIG_MODE"] == "interactive":
|
|
100
99
|
if isList:
|
|
101
100
|
list_tmp = []
|
|
102
101
|
key_tmp = "OK"
|
|
103
|
-
while key_tmp != "!" and os.environ["
|
|
102
|
+
while (key_tmp != "!" and key_tmp != " ") and os.environ["KONFAI_CONFIG_MODE"] == "interactive":
|
|
104
103
|
key_tmp = Config._getInput(name, default)
|
|
105
|
-
if key_tmp != "!":
|
|
104
|
+
if (key_tmp != "!" and key_tmp != " "):
|
|
106
105
|
if key_tmp == "":
|
|
107
106
|
key_tmp = default.split(":")[1] if len(default.split(":")) > 1 else default
|
|
108
107
|
list_tmp.append(key_tmp)
|
|
@@ -134,6 +133,7 @@ class Config():
|
|
|
134
133
|
list_tmp = []
|
|
135
134
|
for key in value_config:
|
|
136
135
|
list_tmp.extend(Config._getInputDefault(name, key, isList=True))
|
|
136
|
+
|
|
137
137
|
|
|
138
138
|
value = list_tmp
|
|
139
139
|
value_config = list_tmp
|
|
@@ -155,7 +155,7 @@ class Config():
|
|
|
155
155
|
dict_value[key] = value_tmp
|
|
156
156
|
value = dict_value
|
|
157
157
|
if isinstance(self.config, str):
|
|
158
|
-
os.environ['
|
|
158
|
+
os.environ['KONFAI_CONFIG_VARIABLE'] = "True"
|
|
159
159
|
return None
|
|
160
160
|
|
|
161
161
|
self.config[name] = value_config if value_config is not None else "None"
|
|
@@ -169,17 +169,30 @@ def config(key : Union[str, None] = None):
|
|
|
169
169
|
if "config" in kwargs:
|
|
170
170
|
filename = kwargs["config"]
|
|
171
171
|
if filename == None:
|
|
172
|
-
filename = os.environ['
|
|
172
|
+
filename = os.environ['KONFAI_CONFIG_FILE']
|
|
173
173
|
else:
|
|
174
|
-
os.environ['
|
|
174
|
+
os.environ['KONFAI_CONFIG_FILE'] = filename
|
|
175
175
|
key_tmp = kwargs["DL_args"]+("."+key if key is not None else "") if "DL_args" in kwargs else key
|
|
176
176
|
without = kwargs["DL_without"] if "DL_without" in kwargs else []
|
|
177
|
-
os.environ['
|
|
177
|
+
os.environ['KONFAI_CONFIG_PATH'] = key_tmp
|
|
178
178
|
with Config(filename, key_tmp) as config:
|
|
179
|
-
os.environ['
|
|
179
|
+
os.environ['KONFAI_CONFIG_VARIABLE'] = "False"
|
|
180
180
|
kwargs = {}
|
|
181
181
|
for param in list(inspect.signature(function).parameters.values())[len(args):]:
|
|
182
|
+
|
|
182
183
|
annotation = param.annotation
|
|
184
|
+
# --- support Literal ---
|
|
185
|
+
if get_origin(annotation) is Literal:
|
|
186
|
+
allowed_values = get_args(annotation)
|
|
187
|
+
default_value = param.default if param.default != inspect._empty else allowed_values[0]
|
|
188
|
+
value = config.getValue(param.name, f"default:{default_value}")
|
|
189
|
+
if value not in allowed_values:
|
|
190
|
+
raise ValueError(
|
|
191
|
+
f"[Config] Invalid value '{value}' for parameter '{param.name}'. "
|
|
192
|
+
f"Expected one of: {allowed_values}."
|
|
193
|
+
)
|
|
194
|
+
kwargs[param.name] = value
|
|
195
|
+
continue
|
|
183
196
|
if str(annotation).startswith("typing.Union") or str(annotation).startswith("typing.Optional"):
|
|
184
197
|
for i in annotation.__args__:
|
|
185
198
|
annotation = i
|
|
@@ -188,8 +201,24 @@ def config(key : Union[str, None] = None):
|
|
|
188
201
|
continue
|
|
189
202
|
if not annotation == inspect._empty:
|
|
190
203
|
if annotation not in [int, str, bool, float, torch.Tensor]:
|
|
191
|
-
if str(annotation).startswith("list") or str(annotation).startswith("tuple") or str(annotation).startswith("typing.Tuple"):
|
|
192
|
-
|
|
204
|
+
if str(annotation).startswith("list") or str(annotation).startswith("tuple") or str(annotation).startswith("typing.Tuple") or str(annotation).startswith("typing.List"):
|
|
205
|
+
elem_type = annotation.__args__[0]
|
|
206
|
+
values = config.getValue(param.name, param.default)
|
|
207
|
+
if getattr(elem_type, '__origin__', None) is Union:
|
|
208
|
+
valid_types = elem_type.__args__
|
|
209
|
+
result = []
|
|
210
|
+
for v in values:
|
|
211
|
+
for t in valid_types:
|
|
212
|
+
try:
|
|
213
|
+
if t == torch.Tensor and not isinstance(v, torch.Tensor):
|
|
214
|
+
v = torch.tensor(v)
|
|
215
|
+
result.append(t(v) if t != torch.Tensor else v)
|
|
216
|
+
break
|
|
217
|
+
except Exception:
|
|
218
|
+
continue
|
|
219
|
+
kwargs[param.name] = result
|
|
220
|
+
|
|
221
|
+
elif annotation.__args__[0] in [int, str, bool, float]:
|
|
193
222
|
values = config.getValue(param.name, param.default)
|
|
194
223
|
kwargs[param.name] = values
|
|
195
224
|
else:
|
|
@@ -204,9 +233,13 @@ def config(key : Union[str, None] = None):
|
|
|
204
233
|
else:
|
|
205
234
|
raise ConfigError()
|
|
206
235
|
else:
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
236
|
+
try:
|
|
237
|
+
kwargs[param.name] = annotation(config = filename, DL_args = key_tmp)
|
|
238
|
+
except Exception as e:
|
|
239
|
+
raise ValueError("[Config] Failed to instantiate {} with type {}".format(param.name, annotation.__name__))
|
|
240
|
+
|
|
241
|
+
if os.environ['KONFAI_CONFIG_VARIABLE'] == "True":
|
|
242
|
+
os.environ['KONFAI_CONFIG_VARIABLE'] = "False"
|
|
210
243
|
kwargs[param.name] = None
|
|
211
244
|
else:
|
|
212
245
|
kwargs[param.name] = config.getValue(param.name, param.default)
|
|
@@ -750,7 +750,7 @@ class Dataset():
|
|
|
750
750
|
else:
|
|
751
751
|
with Dataset.File(self.filename, True, self.format) as file:
|
|
752
752
|
names = file.getNames(groups)
|
|
753
|
-
return [name for i, name in enumerate(names) if index is None or i in index]
|
|
753
|
+
return [name for i, name in enumerate(sorted(names)) if index is None or i in index]
|
|
754
754
|
|
|
755
755
|
def getInfos(self, groups: str, name: str) -> tuple[list[int], Attribute]:
|
|
756
756
|
if self.is_directory:
|
|
@@ -10,7 +10,7 @@ from abc import ABC, abstractmethod
|
|
|
10
10
|
from enum import Enum
|
|
11
11
|
from typing import Any, Union
|
|
12
12
|
|
|
13
|
-
from konfai import CONFIG_FILE, STATISTICS_DIRECTORY, PREDICTIONS_DIRECTORY,
|
|
13
|
+
from konfai import CONFIG_FILE, EVALUATIONS_DIRECTORY, STATISTICS_DIRECTORY, PREDICTIONS_DIRECTORY, KONFAI_STATE, CUDA_VISIBLE_DEVICES
|
|
14
14
|
import torch.distributed as dist
|
|
15
15
|
import argparse
|
|
16
16
|
import subprocess
|
|
@@ -35,8 +35,8 @@ def _getModule(classpath : str, type : str) -> tuple[str, str]:
|
|
|
35
35
|
module = ".".join(classpath.split("_")[:-1])
|
|
36
36
|
name = classpath.split("_")[-1]
|
|
37
37
|
else:
|
|
38
|
-
module = "konfai."+type
|
|
39
|
-
name = classpath
|
|
38
|
+
module = "konfai."+type+("." if len(classpath.split(".")) > 2 else "")+".".join(classpath.split(".")[:-1])
|
|
39
|
+
name = classpath.split(".")[-1]
|
|
40
40
|
return module, name
|
|
41
41
|
|
|
42
42
|
def cpuInfo() -> str:
|
|
@@ -236,36 +236,51 @@ class DataLog(Enum):
|
|
|
236
236
|
VIDEO = lambda tb, name, layer, it : tb.add_video(name, _logVideoFormat(layer), it),
|
|
237
237
|
AUDIO = lambda tb, name, layer, it : tb.add_audio(name, _logImageFormat(layer), it)
|
|
238
238
|
|
|
239
|
-
class Log
|
|
240
|
-
|
|
239
|
+
class Log:
|
|
241
240
|
def __init__(self, name: str) -> None:
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
241
|
+
if KONFAI_STATE() == "PREDICTION":
|
|
242
|
+
path = PREDICTIONS_DIRECTORY()
|
|
243
|
+
elif KONFAI_STATE() == "EVALUATION":
|
|
244
|
+
path = EVALUATIONS_DIRECTORY()
|
|
245
|
+
else:
|
|
246
|
+
path = STATISTICS_DIRECTORY()
|
|
247
|
+
|
|
248
|
+
self.verbose = os.environ.get("KONFAI_VERBOSE", "True") == "True"
|
|
249
|
+
self.log_path = os.path.join(path, name)
|
|
250
|
+
os.makedirs(self.log_path, exist_ok=True)
|
|
251
|
+
|
|
252
|
+
self.file = open(os.path.join(self.log_path, "log.txt"), "w", buffering=1)
|
|
246
253
|
self.stdout_bak = sys.stdout
|
|
247
254
|
self.stderr_bak = sys.stderr
|
|
248
|
-
self.verbose = os.environ["DEEP_LEARNING_VERBOSE"] == "True"
|
|
249
255
|
|
|
250
256
|
def __enter__(self):
|
|
251
257
|
self.file.__enter__()
|
|
252
258
|
sys.stdout = self
|
|
253
259
|
sys.stderr = self
|
|
254
260
|
return self
|
|
255
|
-
|
|
256
|
-
def __exit__(self,
|
|
257
|
-
self.file.__exit__(
|
|
261
|
+
|
|
262
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
263
|
+
self.file.__exit__(exc_type, exc_val, exc_tb)
|
|
258
264
|
sys.stdout = self.stdout_bak
|
|
259
265
|
sys.stderr = self.stderr_bak
|
|
260
|
-
|
|
266
|
+
|
|
261
267
|
def write(self, msg):
|
|
262
|
-
if msg
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
268
|
+
if not msg:
|
|
269
|
+
return
|
|
270
|
+
self.file.write(msg)
|
|
271
|
+
self.file.flush()
|
|
272
|
+
if self.verbose:
|
|
273
|
+
sys.__stdout__.write(msg)
|
|
274
|
+
sys.__stdout__.flush()
|
|
266
275
|
|
|
267
276
|
def flush(self):
|
|
268
|
-
|
|
277
|
+
self.file.flush()
|
|
278
|
+
|
|
279
|
+
def isatty(self):
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
def fileno(self):
|
|
283
|
+
return sys.__stdout__.fileno()
|
|
269
284
|
|
|
270
285
|
class TensorBoard():
|
|
271
286
|
|
|
@@ -274,8 +289,8 @@ class TensorBoard():
|
|
|
274
289
|
self.name = name
|
|
275
290
|
|
|
276
291
|
def __enter__(self):
|
|
277
|
-
if "
|
|
278
|
-
command = ["tensorboard", "--logdir", PREDICTIONS_DIRECTORY() if
|
|
292
|
+
if "KONFAI_TENSORBOARD_PORT" in os.environ:
|
|
293
|
+
command = ["tensorboard", "--logdir", PREDICTIONS_DIRECTORY() if KONFAI_STATE() == "PREDICTION" else STATISTICS_DIRECTORY() + self.name + "/", "--port", os.environ["KONFAI_TENSORBOARD_PORT"], "--bind_all"]
|
|
279
294
|
self.process = subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
280
295
|
try:
|
|
281
296
|
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
@@ -285,7 +300,7 @@ class TensorBoard():
|
|
|
285
300
|
IP = '127.0.0.1'
|
|
286
301
|
finally:
|
|
287
302
|
s.close()
|
|
288
|
-
print("Tensorboard : http://{}:{}/".format(IP, os.environ["
|
|
303
|
+
print("Tensorboard : http://{}:{}/".format(IP, os.environ["KONFAI_TENSORBOARD_PORT"]))
|
|
289
304
|
return self
|
|
290
305
|
|
|
291
306
|
def __exit__(self, type, value, traceback):
|
|
@@ -361,72 +376,72 @@ class DistributedObject():
|
|
|
361
376
|
pynvml.nvmlShutdown()
|
|
362
377
|
cleanup()
|
|
363
378
|
|
|
364
|
-
def
|
|
365
|
-
#
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
379
|
+
def setup(parser: argparse.ArgumentParser) -> DistributedObject:
|
|
380
|
+
# KONFAI arguments
|
|
381
|
+
KONFAI_args = parser.add_argument_group('KONFAI arguments')
|
|
382
|
+
KONFAI_args.add_argument("type", type=State, choices=list(State))
|
|
383
|
+
KONFAI_args.add_argument('-y', action='store_true', help="Accept overwrite")
|
|
384
|
+
KONFAI_args.add_argument('-tb', action='store_true', help='Start TensorBoard')
|
|
385
|
+
KONFAI_args.add_argument("-c", "--config", type=str, default="None", help="Configuration file location")
|
|
386
|
+
KONFAI_args.add_argument("-g", "--gpu", type=str, default=os.environ["CUDA_VISIBLE_DEVICES"] if "CUDA_VISIBLE_DEVICES" in os.environ else "", help="List of GPU")
|
|
387
|
+
KONFAI_args.add_argument('--num-workers', '--num_workers', default=4, type=int, help='No. of workers per DataLoader & GPU')
|
|
388
|
+
KONFAI_args.add_argument("-models_dir", "--MODELS_DIRECTORY", type=str, default="./Models/", help="Models location")
|
|
389
|
+
KONFAI_args.add_argument("-checkpoints_dir", "--CHECKPOINTS_DIRECTORY", type=str, default="./Checkpoints/", help="Checkpoints location")
|
|
390
|
+
KONFAI_args.add_argument("-model", "--MODEL", type=str, default="", help="URL Model")
|
|
391
|
+
KONFAI_args.add_argument("-predictions_dir", "--PREDICTIONS_DIRECTORY", type=str, default="./Predictions/", help="Predictions location")
|
|
392
|
+
KONFAI_args.add_argument("-evaluation_dir", "--EVALUATIONS_DIRECTORY", type=str, default="./Evaluations/", help="Evaluations location")
|
|
393
|
+
KONFAI_args.add_argument("-statistics_dir", "--STATISTICS_DIRECTORY", type=str, default="./Statistics/", help="Statistics location")
|
|
394
|
+
KONFAI_args.add_argument("-setups_dir", "--SETUPS_DIRECTORY", type=str, default="./Setups/", help="Setups location")
|
|
395
|
+
KONFAI_args.add_argument('-log', action='store_true', help='Save log')
|
|
396
|
+
KONFAI_args.add_argument('-quiet', action='store_false', help='')
|
|
382
397
|
|
|
383
398
|
|
|
384
399
|
args = parser.parse_args()
|
|
385
400
|
config = vars(args)
|
|
386
401
|
|
|
387
402
|
os.environ["CUDA_VISIBLE_DEVICES"] = config["gpu"]
|
|
388
|
-
os.environ["
|
|
389
|
-
os.environ["
|
|
390
|
-
os.environ["
|
|
391
|
-
os.environ["
|
|
392
|
-
os.environ["
|
|
403
|
+
os.environ["KONFAI_MODELS_DIRECTORY"] = config["MODELS_DIRECTORY"]
|
|
404
|
+
os.environ["KONFAI_CHECKPOINTS_DIRECTORY"] = config["CHECKPOINTS_DIRECTORY"]
|
|
405
|
+
os.environ["KONFAI_PREDICTIONS_DIRECTORY"] = config["PREDICTIONS_DIRECTORY"]
|
|
406
|
+
os.environ["KONFAI_EVALUATIONS_DIRECTORY"] = config["EVALUATIONS_DIRECTORY"]
|
|
407
|
+
os.environ["KONFAI_STATISTICS_DIRECTORY"] = config["STATISTICS_DIRECTORY"]
|
|
393
408
|
|
|
394
|
-
os.environ["
|
|
409
|
+
os.environ["KONFAI_STATE"] = str(config["type"])
|
|
395
410
|
|
|
396
|
-
os.environ["
|
|
411
|
+
os.environ["KONFAI_MODEL"] = config["MODEL"]
|
|
397
412
|
|
|
398
|
-
os.environ["
|
|
413
|
+
os.environ["KONFAI_SETUPS_DIRECTORY"] = config["SETUPS_DIRECTORY"]
|
|
399
414
|
|
|
400
|
-
os.environ["
|
|
401
|
-
os.environ["
|
|
415
|
+
os.environ["KONFAI_OVERWRITE"] = "{}".format(config["y"])
|
|
416
|
+
os.environ["KONFAI_CONFIG_MODE"] = "Done"
|
|
402
417
|
if config["tb"]:
|
|
403
|
-
os.environ["
|
|
418
|
+
os.environ["KONFAI_TENSORBOARD_PORT"] = str(find_free_port())
|
|
404
419
|
|
|
405
|
-
os.environ["
|
|
420
|
+
os.environ["KONFAI_VERBOSE"] = str(config["quiet"])
|
|
406
421
|
|
|
407
422
|
if config["config"] == "None":
|
|
408
423
|
if config["type"] is State.PREDICTION:
|
|
409
|
-
os.environ["
|
|
424
|
+
os.environ["KONFAI_CONFIG_FILE"] = "Prediction.yml"
|
|
410
425
|
elif config["type"] is State.EVALUATION:
|
|
411
|
-
os.environ["
|
|
426
|
+
os.environ["KONFAI_CONFIG_FILE"] = "Evaluation.yml"
|
|
412
427
|
else:
|
|
413
|
-
os.environ["
|
|
428
|
+
os.environ["KONFAI_CONFIG_FILE"] = "Config.yml"
|
|
414
429
|
else:
|
|
415
|
-
os.environ["
|
|
430
|
+
os.environ["KONFAI_CONFIG_FILE"] = config["config"]
|
|
416
431
|
torch.autograd.set_detect_anomaly(True)
|
|
417
432
|
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
|
418
433
|
|
|
419
434
|
if config["type"] is State.PREDICTION:
|
|
420
435
|
from konfai.predictor import Predictor
|
|
421
|
-
os.environ["
|
|
436
|
+
os.environ["KONFAI_ROOT"] = "Predictor"
|
|
422
437
|
return Predictor(config=CONFIG_FILE())
|
|
423
438
|
elif config["type"] is State.EVALUATION:
|
|
424
439
|
from konfai.evaluator import Evaluator
|
|
425
|
-
os.environ["
|
|
440
|
+
os.environ["KONFAI_ROOT"] = "Evaluator"
|
|
426
441
|
return Evaluator(config=CONFIG_FILE())
|
|
427
442
|
else:
|
|
428
443
|
from konfai.trainer import Trainer
|
|
429
|
-
os.environ["
|
|
444
|
+
os.environ["KONFAI_ROOT"] = "Trainer"
|
|
430
445
|
return Trainer(config=CONFIG_FILE())
|
|
431
446
|
|
|
432
447
|
|
|
@@ -12,10 +12,10 @@ konfai.egg-info/dependency_links.txt
|
|
|
12
12
|
konfai.egg-info/entry_points.txt
|
|
13
13
|
konfai.egg-info/requires.txt
|
|
14
14
|
konfai.egg-info/top_level.txt
|
|
15
|
-
konfai/data/HDF5.py
|
|
16
15
|
konfai/data/__init__.py
|
|
17
16
|
konfai/data/augmentation.py
|
|
18
|
-
konfai/data/
|
|
17
|
+
konfai/data/data_manager.py
|
|
18
|
+
konfai/data/patching.py
|
|
19
19
|
konfai/data/transform.py
|
|
20
20
|
konfai/metric/__init__.py
|
|
21
21
|
konfai/metric/measure.py
|
konfai-1.0.8/konfai/__init__.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import datetime
|
|
3
|
-
|
|
4
|
-
MODELS_DIRECTORY = lambda : os.environ["DL_API_MODELS_DIRECTORY"]
|
|
5
|
-
CHECKPOINTS_DIRECTORY =lambda : os.environ["DL_API_CHECKPOINTS_DIRECTORY"]
|
|
6
|
-
MODEL = lambda : os.environ["DL_API_MODEL"]
|
|
7
|
-
PREDICTIONS_DIRECTORY =lambda : os.environ["DL_API_PREDICTIONS_DIRECTORY"]
|
|
8
|
-
EVALUATIONS_DIRECTORY =lambda : os.environ["DL_API_EVALUATIONS_DIRECTORY"]
|
|
9
|
-
STATISTICS_DIRECTORY = lambda : os.environ["DL_API_STATISTICS_DIRECTORY"]
|
|
10
|
-
SETUPS_DIRECTORY = lambda : os.environ["DL_API_SETUPS_DIRECTORY"]
|
|
11
|
-
CONFIG_FILE = lambda : os.environ["DEEP_LEARNING_API_CONFIG_FILE"]
|
|
12
|
-
DL_API_STATE = lambda : os.environ["DL_API_STATE"]
|
|
13
|
-
DEEP_LEARNING_API_ROOT = lambda : os.environ["DEEP_LEARNING_API_ROOT"]
|
|
14
|
-
CUDA_VISIBLE_DEVICES = lambda : os.environ["CUDA_VISIBLE_DEVICES"]
|
|
15
|
-
|
|
16
|
-
DATE = lambda : datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
konfai-1.0.8/konfai/main.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
import argparse
|
|
2
|
-
import os
|
|
3
|
-
from torch.cuda import device_count
|
|
4
|
-
import torch.multiprocessing as mp
|
|
5
|
-
from konfai.utils.utils import setupAPI, TensorBoard, Log
|
|
6
|
-
|
|
7
|
-
import sys
|
|
8
|
-
sys.path.insert(0, os.getcwd())
|
|
9
|
-
|
|
10
|
-
def main():
|
|
11
|
-
parser = argparse.ArgumentParser(description="KonfAI", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
12
|
-
with setupAPI(parser) as distributedObject:
|
|
13
|
-
with Log(distributedObject.name):
|
|
14
|
-
world_size = device_count()
|
|
15
|
-
if world_size == 0:
|
|
16
|
-
world_size = 1
|
|
17
|
-
distributedObject.setup(world_size)
|
|
18
|
-
with TensorBoard(distributedObject.name):
|
|
19
|
-
mp.spawn(distributedObject, nprocs=world_size)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def cluster():
|
|
23
|
-
parser = argparse.ArgumentParser(description="KonfAI", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
24
|
-
|
|
25
|
-
# Cluster manager arguments
|
|
26
|
-
cluster_args = parser.add_argument_group('Cluster manager arguments')
|
|
27
|
-
cluster_args.add_argument('--name', type=str, help='Task name', required=True)
|
|
28
|
-
cluster_args.add_argument('--num-nodes', '--num_nodes', default=1, type=int, help='Number of nodes')
|
|
29
|
-
cluster_args.add_argument('--memory', type=int, default=16, help='Amount of memory per node')
|
|
30
|
-
cluster_args.add_argument('--time-limit', '--time_limit', type=int, default=1440, help='Job time limit in minute')
|
|
31
|
-
cluster_args.add_argument('--resubmit', action='store_true', help='Automatically resubmit job just before timout')
|
|
32
|
-
|
|
33
|
-
with setupAPI(parser) as distributedObject:
|
|
34
|
-
args = parser.parse_args()
|
|
35
|
-
config = vars(args)
|
|
36
|
-
os.environ["DL_API_OVERWRITE"] = "True"
|
|
37
|
-
os.environ["DL_API_CLUSTER"] = "True"
|
|
38
|
-
|
|
39
|
-
n_gpu = len(config["gpu"].split(","))
|
|
40
|
-
distributedObject.setup(n_gpu*int(config["num_nodes"]))
|
|
41
|
-
import submitit
|
|
42
|
-
executor = submitit.AutoExecutor(folder="./Cluster/")
|
|
43
|
-
executor.update_parameters(name=config["name"], mem_gb=config["memory"], gpus_per_node=n_gpu, tasks_per_node=n_gpu//distributedObject.size, cpus_per_task=config["num_workers"], nodes=config["num_nodes"], timeout_min=config["time_limit"])
|
|
44
|
-
with TensorBoard(distributedObject.name):
|
|
45
|
-
executor.submit(distributedObject)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|