PyPI - radnn - Versions diffs - 0.0.8__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

radnn 0.0.8py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

radnn/__init__.py +5 -5
radnn/benchmark/__init__.py +1 -0
radnn/benchmark/latency.py +55 -0
radnn/core.py +146 -2
radnn/data/__init__.py +5 -10
radnn/data/dataset_base.py +100 -260
radnn/data/dataset_base_legacy.py +280 -0
radnn/data/errors.py +32 -0
radnn/data/sample_preprocessor.py +58 -0
radnn/data/sample_set.py +203 -90
radnn/data/sample_set_kind.py +126 -0
radnn/data/sequence_dataset.py +25 -30
radnn/data/structs/__init__.py +1 -0
radnn/data/structs/tree.py +322 -0
radnn/data_beta/__init__.py +12 -0
radnn/{data → data_beta}/data_feed.py +1 -1
radnn/data_beta/dataset_base.py +337 -0
radnn/data_beta/sample_set.py +166 -0
radnn/data_beta/sequence_dataset.py +134 -0
radnn/data_beta/structures/__init__.py +2 -0
radnn/data_beta/structures/dictionary.py +41 -0
radnn/{data → data_beta}/tf_classification_data_feed.py +5 -2
radnn/errors.py +10 -2
radnn/experiment/__init__.py +2 -0
radnn/experiment/identification.py +7 -0
radnn/experiment/ml_experiment.py +7 -2
radnn/experiment/ml_experiment_log.py +47 -0
radnn/images/image_processor.py +4 -1
radnn/learn/__init__.py +0 -7
radnn/learn/keras/__init__.py +4 -0
radnn/learn/{state → keras}/keras_best_state_saver.py +5 -1
radnn/learn/{learning_algorithm.py → keras/keras_learning_algorithm.py} +5 -9
radnn/learn/{keras_learning_rate_scheduler.py → keras/keras_learning_rate_scheduler.py} +4 -1
radnn/learn/{keras_optimization_algorithm.py → keras/keras_optimization_combo.py} +7 -3
radnn/learn/torch/__init__.py +3 -0
radnn/learn/torch/ml_model_freezer.py +330 -0
radnn/learn/torch/ml_trainer.py +461 -0
radnn/learn/torch/staircase_lr_scheduler.py +21 -0
radnn/ml_system.py +68 -52
radnn/models/__init__.py +5 -0
radnn/models/cnn/__init__.py +0 -0
radnn/models/cnn/cnn_stem_setup.py +35 -0
radnn/models/model_factory.py +85 -0
radnn/models/model_hyperparams.py +128 -0
radnn/models/model_info.py +91 -0
radnn/plots/plot_learning_curve.py +19 -8
radnn/system/__init__.py +1 -0
radnn/system/files/__init__.py +1 -1
radnn/system/files/csvfile.py +37 -5
radnn/system/files/filelist.py +30 -0
radnn/system/files/fileobject.py +11 -1
radnn/system/files/imgfile.py +1 -1
radnn/system/files/jsonfile.py +37 -9
radnn/system/files/picklefile.py +3 -3
radnn/system/files/textfile.py +39 -10
radnn/system/files/zipfile.py +96 -0
radnn/system/filestore.py +147 -47
radnn/system/filesystem.py +3 -3
radnn/test/__init__.py +1 -0
radnn/test/tensor_hash.py +130 -0
radnn/utils.py +16 -2
radnn-0.1.0.dist-info/METADATA +30 -0
radnn-0.1.0.dist-info/RECORD +99 -0
{radnn-0.0.8.dist-info → radnn-0.1.0.dist-info}/WHEEL +1 -1
{radnn-0.0.8.dist-info → radnn-0.1.0.dist-info/licenses}/LICENSE.txt +1 -1
radnn/learn/state/__init__.py +0 -4
radnn-0.0.8.dist-info/METADATA +0 -58
radnn-0.0.8.dist-info/RECORD +0 -70
/radnn/{data → data_beta}/dataset_folder.py +0 -0
/radnn/{data → data_beta}/image_dataset.py +0 -0
/radnn/{data → data_beta}/image_dataset_files.py +0 -0
/radnn/{data → data_beta}/preprocess/__init__.py +0 -0
/radnn/{data → data_beta}/preprocess/normalizer.py +0 -0
/radnn/{data → data_beta}/preprocess/standardizer.py +0 -0
/radnn/{data → data_beta}/subset_type.py +0 -0
{radnn-0.0.8.dist-info → radnn-0.1.0.dist-info}/top_level.txt +0 -0

radnn/learn/torch/ml_trainer.py ADDED Viewed

@@ -0,0 +1,461 @@
+import numpy as np
+from radnn import mlsys, FileStore
+# -----------------------------
+# Standard Libraries
+# -----------------------------
+import time
+# -----------------------------
+# PyTorch
+# -----------------------------
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.optim import lr_scheduler
+from radnn.evaluation import EvaluateClassification
+from radnn.plots import PlotConfusionMatrix, PlotLearningCurve
+from radnn.experiment import MLExperimentLog, experiment_fold_number, experiment_name_with_fold
+import matplotlib.pyplot as plt
+from radnn.learn.torch import StairCaseLR
+from radnn.errors import *
+# -----------------------------
+# Progress Bar
+# -----------------------------
+from tqdm import tqdm
+# ----------------------------------------------------------------------------------------------------------------------
+def seed_everything(seed=42):
+  import os
+  import random
+  os.environ['PYTHONHASHSEED'] = str(seed)
+  np.random.seed(seed)
+  random.seed(seed)
+  torch.manual_seed(seed)
+  torch.cuda.manual_seed(seed)
+  torch.backends.cudnn.deterministic = True
+  torch.backends.cudnn.benchmark = True
+# ----------------------------------------------------------------------------------------------------------------------
+class MLModelTrainer():
+  # --------------------------------------------------------------------------------------------------------------------
+  def __init__(self, hyperparams, dataset, model, device):
+    self.hprm = hyperparams
+    # The python/numpy generators might have been used prior to the start of training.
+    # We need to re-seed here to reset to the start of the pseudo-random sequence,
+    # plus encapsulating the reproducibility for torch in case mlsys.random_seed_all has not been explicitly called
+    seed_everything(self.hprm["Experiment.RandomSeed"])
+    self.dataset = dataset
+    self.model = model
+    self.device = device
+    self.criterion = None
+    self.optimizer = None
+    self.scheduler = None
+    self.best_model_state = None
+    self.best_model_state_file = None
+    self.training_logs_file = None
+    self.experiment_hyperparams_file = None
+    self.get_model_paths()
+    self.mlflow_run_id = None
+    self.registered_model = None
+  # --------------------------------------------------------------------------------------------------------------------
+  def get_lr(self):
+    return self.optimizer.param_groups[0]["lr"]
+  # --------------------------------------------------------------------------------------------------------------------
+  def get_model_paths(self):
+    hprm = self.hprm
+    sExperimentName = hprm["Experiment.Name"]
+    self.best_model_state = f'{hprm["Dataset.Name"]}_{hprm["Model.Name"]}_pipeline{hprm["Data.Pipeline.Type"]}_{sExperimentName}'
+    sExperimentWithFoldNumber = experiment_name_with_fold(hprm)
+    self.experiment_fs: FileStore = mlsys.filesys.models.subfs(sExperimentWithFoldNumber)
+    self.best_model_state_file = self.experiment_fs.file(f'{self.best_model_state}.pth')
+    self.best_model_state_onnx_file = self.experiment_fs.file(f'{self.best_model_state}.onnx')
+    self.training_logs_file = self.experiment_fs.file(f"training_logs_{sExperimentName}.json")
+    self.experiment_hyperparams_file = self.experiment_fs.file(f"hyperparams_{sExperimentWithFoldNumber}.json")
+  # --------------------------------------------------------------------------------------------------------------------
+  def build_optimizer(self):
+    hprm = self.hprm
+    sExtra = ""
+    if hprm["Training.Optimizer"].upper() == "SGD":
+      self.optimizer = optim.SGD(self.model.parameters(), lr=hprm["Training.LearningRate"],
+                                 momentum=hprm.get("Training.Momentum", 0.0),
+                                 nesterov=hprm.get("Training.Momentum.Nesterov", False),
+                                 weight_decay=hprm["Training.Regularize.WeightDecay"])
+      sExtra = f'momentum={self.optimizer.defaults["momentum"]}'
+      if self.optimizer.defaults["nesterov"]:
+        sExtra += " (Nesterov)"
+    elif hprm["Training.Optimizer"].upper() == "RMSPROP":
+      self.optimizer = optim.RMSprop(self.model.parameters(), lr=hprm["Training.LearningRate"],
+                                     weight_decay=hprm["Training.Regularize.WeightDecay"],
+                                     momentum=hprm.get("Training.Momentum", 0.0),
+                                     eps = hprm.get("Training.RMSProp.Epsilon", 1e-8)
+                                    )
+    elif hprm["Training.Optimizer"].upper() == "ADAM":
+      self.optimizer = optim.Adam(self.model.parameters(), lr=hprm["Training.LearningRate"],
+                                  weight_decay=hprm["Training.Regularize.WeightDecay"])
+    elif hprm["Training.Optimizer"].upper() == "ADAMW":
+      self.optimizer = optim.AdamW(self.model.parameters(), lr=hprm["Training.LearningRate"],
+                                  weight_decay=hprm["Training.Regularize.WeightDecay"])
+    print(f'Using {hprm["Training.Optimizer"].upper()} optimizer {sExtra}')
+  # --------------------------------------------------------------------------------------------------------------------
+  def build_lr_scheduler(self):
+    hprm = self.hprm
+    sSchedulingType = hprm.get("Training.LearningRateSchedule", "MultiStepDivisor")
+    sSchedulingType = sSchedulingType.upper()
+    nDefaultSetup = [[0, hprm["Training.LearningRate"]], [hprm["Training.Epochs"], 0.00001]]
+    nFinalChangeEpoch, nFinalLR = hprm.get("Training.LearningRateSchedule.Setup", nDefaultSetup)[-1]
+    self.scheduler = None
+    if (sSchedulingType.upper() == "MultiStepDivisor".upper()):
+      if "Training.LearningRateSchedule.Epochs" in hprm:
+        self.scheduler = lr_scheduler.MultiStepLR(self.optimizer,
+                                                  milestones=hprm["Training.LearningRateSchedule.Epochs"],
+                                                  gamma=hprm["Training.LearningRateSchedule.StepRatio"])
+      else:
+        raise Exception(TRAINER_LR_SCHEDULER_INVALID_MILESTONE_SETUP)
+    elif (sSchedulingType.upper() == "StairCase".upper()):
+      if "Training.LearningRateSchedule.Setup" in hprm:
+        oLRSetup = hprm["Training.LearningRateSchedule.Setup"]
+        if not isinstance(oLRSetup, list):
+          raise Exception(TRAINER_LR_SCHEDULER_INVALID_SETUP)
+        self.scheduler = StairCaseLR(self.optimizer, oLRSetup)
+      else:
+        raise Exception(TRAINER_LR_SCHEDULER_INVALID_SETUP)
+    elif sSchedulingType.upper() == "CosineAnnealing":
+      self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer,
+                                                                  T_max=nFinalChangeEpoch,
+                                                                  eta_min=nFinalLR)
+    assert self.scheduler is not None, TRAINER_LR_SCHEDULER_UNSUPPORTED
+  # --------------------------------------------------------------------------------------------------------------------
+  def prepare(self):
+    hprm = self.hprm
+    hprm["Model.State.Best"] = self.best_model_state
+    sExperimentName = hprm["Experiment.Name"]
+    mlsys.filesys.configs.subfs("run_6classes").json.save(hprm, f"{sExperimentName}_hyperparams.json",
+                                                           is_sorted_keys=False)
+    if "Training.CrossEntropy.UseClassWeights" in hprm:
+      class_weights_tensor = torch.tensor(self.dataset.ts.class_weights, dtype=torch.float)
+      class_weights_tensor = class_weights_tensor.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
+      self.criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
+    else:
+      self.criterion = nn.CrossEntropyLoss()
+    self.build_optimizer()
+    self.build_lr_scheduler()
+  # --------------------------------------------------------------------------------------------------------------------
+  def fit(self, device):
+    self.model.to(device)
+    hprm = self.hprm
+    dInfo = {
+              "experiment_name": hprm["Experiment.Name"],
+              "experiment_fold_number": experiment_name_with_fold(hprm),
+              "model_name": hprm["Model.Name"],
+              "model_variants": hprm["Model.Variants"]
+            }
+    oLog: MLExperimentLog = MLExperimentLog(self.training_logs_file, dInfo)
+    best_val_f1_score = 0.0  # Track the best validation accuracy
+    patience = 8  # Number of epochs to wait for improvement
+    epochs_without_improvement = 0  # Counter for early stopping
+    nTSBatchCount = self.dataset.ts.minibatch_count
+    nVSBatchCount = self.dataset.vs.minibatch_count
+    nEpochCount = hprm["Training.Epochs"]
+    bInitialInfoSave = False
+    nLR = hprm["Training.LearningRate"]
+    self.experiment_fs.json.save(hprm, f'hyperparams_{hprm["Experiment.Name"]}.json')
+    oStepLoss = []
+    oStepAccuracy = []
+    all_labels = None
+    all_predictions = None
+    nEpochMinibatchCount = 0
+    for nEpochIndex in range(nEpochCount):
+      print(f"\nEpoch {nEpochIndex + 1}/{nEpochCount}")
+      # -------------------- Training --------------------
+      self.model.train()
+      train_loss, train_correct = 0.0, 0
+      nLR = self.scheduler.get_last_lr()[0]
+      progress_bar = tqdm(self.dataset.ts.loader, desc=f"Epoch {nEpochIndex + 1}/{nEpochCount} LR={nLR:.5f}", leave=False)
+      nStart = time.perf_counter()
+      nDebugSteps = 0
+      for inputs, labels, ids in progress_bar:
+        nDebugSteps += 1
+        inputs, labels = inputs.to(self.device), labels.to(self.device)
+        self.optimizer.zero_grad()
+        outputs = self.model(inputs)
+        loss = self.criterion(outputs, labels)
+        loss.backward()
+        self.optimizer.step()
+        # Accumulate
+        mb_loss = loss.item()
+        train_loss += mb_loss
+        _, predicted = torch.max(outputs, 1)
+        mb_correct = (predicted == labels).sum().item()
+        mb_count = len(labels)
+        mb_accuracy = mb_correct / mb_count
+        oStepLoss.append(mb_loss)
+        oStepAccuracy.append(mb_accuracy)
+        train_correct += mb_correct
+        nEpochMinibatchCount += mb_count
+        train_accuracy = train_correct / nEpochMinibatchCount
+        progress_bar.set_postfix(loss=f"{mb_loss:.4f}", accuracy=f"{mb_accuracy:.4f}")
+        if not bInitialInfoSave:
+          bInitialInfoSave = True
+        #if nDebugSteps == 4:
+          #break
+      nElapsedSecs = time.perf_counter() - nStart
+      nStart = time.perf_counter()
+      train_loss /= nTSBatchCount
+      train_accuracy = train_correct / self.dataset.ts.sample_count
+      print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_accuracy:.4f}")
+      # -------------------- Validation --------------------
+      self.model.eval()
+      val_loss, val_correct = 0.0, 0
+      all_labels= []
+      all_predictions = []
+      progress_bar = tqdm(self.dataset.vs.loader, desc=f"Validating {nEpochIndex + 1}/{nEpochCount}", leave=False)
+      with torch.no_grad():
+        for inputs, labels, ids in progress_bar:
+          inputs, labels = inputs.to(self.device), labels.to(self.device)
+          outputs = self.model(inputs)
+          loss = self.criterion(outputs, labels).double()
+          val_loss += loss.item()
+          _, predicted = torch.max(outputs, 1)
+          all_labels.extend(labels.cpu().numpy().tolist())
+          all_predictions.extend(predicted.cpu().numpy().tolist())
+          val_correct += (predicted == labels).sum().item()
+          progress_bar.set_postfix(loss=loss.item())
+      val_loss /= nVSBatchCount
+      val_accuracy = val_correct / self.dataset.vs.sample_count
+      if self.scheduler is not None:
+        self.scheduler.step()
+        nLR = self.scheduler.get_last_lr()[0]
+      print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_accuracy:.4f} | LR: {nLR:.5f}")
+      # -------------------- Update logs / Evaluation Report --------------------
+      oLog.append(epoch=nEpochIndex+1,
+                  epoch_time=nElapsedSecs,
+                  train_loss = train_loss,
+                  train_accuracy = train_accuracy,
+                  val_loss = val_loss,
+                  val_accuracy = val_accuracy,)
+      oLog.assign_series(train_step_loss = oStepLoss, train_step_accuracy = oStepAccuracy)
+      oLog.save(self.experiment_fs)
+      print(f"📊 Training logs saved to {self.training_logs_file}")
+      oEvaluator = self.evaluation_report(all_labels, all_predictions, oLog.logs, is_showing_plots=False)
+      # -------------------- Checkpoint & Early Stopping --------------------
+      val_f1_score = oEvaluator.average_f1score
+      if (val_f1_score > best_val_f1_score):
+        best_val_f1_score = val_f1_score
+        self.best_model_state_file = self.best_model_state_file
+        torch.save(self.model.state_dict(), self.best_model_state_file)
+        hprm["Model.State.BestEpoch"] = f"Epoch{nEpochIndex + 1}"
+        self.experiment_fs.json.save(hprm, f'hyperparams_{hprm["Experiment.Name"]}.json')
+        print(f'✅ Best model updated with F1 score: {best_val_f1_score:.4f}')
+        self.export_metrics(oEvaluator, nEpochIndex+1)
+        epochs_without_improvement = 0
+      else:
+        epochs_without_improvement += 1
+      nElapsedSecs = time.perf_counter() - nStart
+      #if epochs_without_improvement >= patience:
+      #  print(f'⏹ Early stopping after {nEpochIndex + 1} epochs without improvement.')
+      #  break
+    print("🎉 Training complete!")
+  # --------------------------------------------------------------------------------------------------------------
+  def print_trainable_blocks(self):
+    for sName, oParams in self.model.named_parameters():
+      bIsTrainable = oParams.requires_grad
+      if bIsTrainable:
+        print(f" |__ TRAINABLE: {sName}")
+  # --------------------------------------------------------------------------------------------------------------
+  def export_per_class_metrics(self, evaluator, opened_file, class_names=None):
+    if class_names is not None:
+      nClassCount = len(evaluator.class_names.keys())
+      oClasses = [f"{evaluator.class_names[x]:7}" for x in list(range(nClassCount))]
+    else:
+      oClasses = sorted(np.unique(evaluator.actual_classes))
+      nClassCount = len(oClasses)
+      oClasses = [f"{x:^7}" for x in oClasses]
+    evaluator.class_count = nClassCount
+    sClasses = " |".join(oClasses)
+    nRepeat = 28 + (7+2)*evaluator.class_count
+    print(f"                            |{sClasses}|", file=opened_file)
+    print("-"*nRepeat, file=opened_file)
+    print(f"Per Class Recall %          |{evaluator.format_series_as_pc(evaluator.recall[:])}|", file=opened_file)
+    print(f"Per Class Precision %       |{evaluator.format_series_as_pc(evaluator.precision[:])}|", file=opened_file)
+    print("-" * nRepeat, file=opened_file)
+  # --------------------------------------------------------------------------------------------------------------
+  def export_overall_metrics(self, evaluator, opened_file):
+    print(f"Accuracy %                  :{evaluator.accuracy*100.0       :.3f}", file=opened_file)
+    print(f"Average F1 Score %          :{evaluator.average_f1score*100.0:.3f}", file=opened_file)
+    print(f"Weighted Average Recall %   :{evaluator.average_recall*100.0:.3f}", file=opened_file)
+    print(f"Weighted Average Precision %:{evaluator.average_precision*100.0:.3f}", file=opened_file)
+    if (evaluator.class_count == 2) and (evaluator.auc is not None):
+      print(f"Area Under the Curve (AUC):{evaluator.auc:.4f}", file=opened_file)
+    print("", file=opened_file)
+  # --------------------------------------------------------------------------------------------------------------
+  def export_metrics(self, evaluator, epoch=None):
+    hprm = self.hprm
+    nFoldNumber = experiment_fold_number(hprm)
+    nRepeat = 80
+    sMetricsFileName = self.experiment_fs.file(f'metrics_{experiment_name_with_fold(hprm)}.txt')
+    with open(sMetricsFileName, "w") as oFile:
+      print("="*nRepeat, file=oFile)
+      if epoch is None:
+        print(f'Experiment [{hprm["Experiment.Name"]}] fold {nFoldNumber} trained.', file=oFile)
+      else:
+        print(f'Experiment [{hprm["Experiment.Name"]}] fold {nFoldNumber} training in progress, best epoch {epoch}.', file=oFile)
+      print("="*nRepeat, file=oFile)
+      self.export_overall_metrics(evaluator, oFile)
+      self.export_per_class_metrics(evaluator, oFile)
+  # --------------------------------------------------------------------------------------------------------------
+  def inspect_learned_params(self):
+    oParams = dict()
+    nClipCount = 0
+    nTempCount = 0
+    for nIndex, (name, tensor) in enumerate(self.model.state_dict().items()):
+      if "clip" in name:
+        nClipCount += 1
+        oParams[f"clip{nClipCount}"] = tensor.detach().cpu().numpy()
+      elif "temperature" in name:
+        nTempCount += 1
+        oParams[f"temp{nTempCount}"] = tensor.detach().cpu().numpy()
+    print(oParams)
+  # --------------------------------------------------------------------------------------------------------------
+  def evaluation_report(self, all_labels, all_preds, logs: dict = None,is_showing_plots=False, class_names=None):
+    oEvaluator = EvaluateClassification(all_labels, all_preds)
+    oEvaluator.print_overall()
+    oEvaluator.print_confusion_matrix()
+    oEvaluator.class_names = class_names
+    oPlot = PlotConfusionMatrix(oEvaluator.confusion_matrix)
+    oPlot = oPlot.prepare().save(self.experiment_fs.file("Confusion Matrix.png"))
+    if is_showing_plots:
+      oPlot.show()
+    if logs is not None:
+      oTrainingLogPlot = PlotLearningCurve(logs, f'Experiment {self.hprm["Experiment.Name"]}')
+      oTrainingLogPlot = oTrainingLogPlot.prepare(metric_key="accuracy").save(self.experiment_fs.file("LearningCurve_Accuracy.png"))
+      if is_showing_plots:
+        oTrainingLogPlot.show()
+      oTrainingLogPlot = PlotLearningCurve(logs, f'Experiment {self.hprm["Experiment.Name"]}')
+      oTrainingLogPlot = oTrainingLogPlot.prepare(metric_key="loss").save(self.experiment_fs.file("LearningCurve_Loss.png"))
+      if is_showing_plots:
+        oTrainingLogPlot.show()
+    plt.close()
+    self.inspect_learned_params()
+    return oEvaluator
+  # --------------------------------------------------------------------------------------------------------------
+  def load(self, filename=None):
+    if filename is None:
+      filename = self.best_model_state_file
+    oCheckpoint = torch.load(filename)
+    self.model.load_state_dict(oCheckpoint)
+    self.model.eval()
+  # --------------------------------------------------------------------------------------------------------------
+  def evaluate(self, class_names: dict=None, filename=None):
+    if filename is None:
+      filename = self.best_model_state_file
+    oCheckpoint = torch.load(filename)
+    self.model.load_state_dict(oCheckpoint)
+    self.model.eval()
+    all_preds, all_labels = [], []
+    with torch.no_grad():
+      # oDS
+      for inputs, labels, ids in tqdm(self.dataset.vs.loader, desc="Final Evaluation"):
+        inputs, labels = inputs.to(self.device), labels.to(self.device)
+        outputs = self.model(inputs)
+        _, preds = torch.max(outputs, 1)
+        all_preds.extend(preds.cpu().numpy())
+        all_labels.extend(labels.cpu().numpy())
+    oLog: MLExperimentLog = MLExperimentLog(self.training_logs_file)
+    oLog.load(self.experiment_fs)
+    oEvaluator = self.evaluation_report(all_labels, all_preds, oLog.logs,is_showing_plots=False, class_names=class_names)
+    # TODO: Keep epoch number for best
+    self.export_metrics(oEvaluator)
+  # --------------------------------------------------------------------------------------------------------------
+  def export_model(self):
+    nInputDim = self.hprm["Data.ModelInputSize"]
+    cpu_device = torch.device("cpu")
+    self.model.to(cpu_device)
+    self.model.eval()
+    tInput = torch.randn(self.hprm["Training.BatchSize"], 3, nInputDim, nInputDim, requires_grad=True)
+    tInput.to(cpu_device)
+    #TODO: Test
+    torch.onnx.export(self.model, tInput, self.best_model_state_onnx_file,
+                      export_params=True, opset_version=12, do_constant_folding=True,
+                      input_names=['input'], output_names=[], dynamo=False,
+                      dynamic_axes={
+                          "input": {0: "batch"},
+                          "output": {0: "batch"} }
+                      )
+    '''
+    # [TEMP] Guidance code for exporting the model
+    torch.onnx.export(self.model,  # model being run
+                      oInput,  # model input (or a tuple for multiple inputs)
+                      self.best_model_state_onnx_file,  # where to save the model
+                      export_params=True,  # store the trained parameter weights inside the model file
+                      opset_version=10,  # the ONNX version to export the model to
+                      do_constant_folding=True,  # whether to execute constant folding for optimization
+                      input_names=['modelInput'],  # the model's input names
+                      output_names=['modelOutput'],  # the model's output names
+                      dynamic_axes={'modelInput': {0: 'batch_size'},  # variable length axes
+                                    'modelOutput': {0: 'batch_size'}})
+    '''
+    print('Model has been converted to ONNX')
+  # --------------------------------------------------------------------------------------------------------------

radnn/learn/torch/staircase_lr_scheduler.py ADDED Viewed

@@ -0,0 +1,21 @@
+from torch.optim.lr_scheduler import LRScheduler
+class StairCaseLR(LRScheduler):
+  def __init__(self, optimizer, setup, last_epoch=-1):
+    self.setup = sorted(setup, key=lambda x: x[0])
+    self.lrs = [nLR for nEpochIndex, nLR in self.setup]
+    self.lrs_count = len(self.lrs)
+    super().__init__(optimizer, last_epoch)
+  def get_lr(self):
+    epoch = max(self.last_epoch, 0)
+    lr = self.setup[0][1]
+    for m, candidate_lr in self.setup:
+      if epoch >= m:
+        lr = candidate_lr
+      else:
+        break
+    return [lr for _ in self.optimizer.param_groups]

radnn/ml_system.py CHANGED Viewed

@@ -6,7 +6,7 @@
 # ______________________________________________________________________________________
 # ......................................................................................
-# Copyright (c) 2018-2025 Pantelis I. Kaplanoglou
+# Copyright (c) 2018-2026 Pantelis I. Kaplanoglou
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -30,35 +30,19 @@
 import os
 import random
 import numpy as np
-import importlib
+from .core import AIGridInfo, RequiredLibs
+from .utils import classproperty
+from radnn.system import FileSystem
 class MLSystem(object):
-  # --------------------------------------------------------------------------------------
   _instance = None
-  @classmethod
-  def Instance(cls):
+  @classproperty
+  def instance(cls):
     if cls._instance is None:
       cls._instance = cls()
-      mlsys = cls._instance
     return cls._instance
-  # --------------------------------------------------------------------------------------
-  @property
-  def is_using_tensorflow(self):
-      return self.is_tensorflow_installed and self._is_using_tensorflow
-  # --------------------------------------------------------------------------------------
-  @is_using_tensorflow.setter
-  def is_using_tensorflow(self, value):
-    self._is_using_tensorflow = value
-    self._is_using_torch = not value
-  # --------------------------------------------------------------------------------------
-  @property
-  def is_using_torch(self):
-      return self.is_torch_installed and self.is_using_torch
-  # --------------------------------------------------------------------------------------
-  @is_using_torch.setter
-  def is_using_torch(self, value):
-    self._is_using_torch = value
-    self._is_using_tensorflow = not value
   # --------------------------------------------------------------------------------------
   def __init__(self):
     self._is_random_seed_initialized = False
@@ -66,48 +50,87 @@ class MLSystem(object):
     self._seed = None
     self.switches = dict()
     self.switches["IsDebuggable"] = False
-    self.is_tensorflow_installed = False
-    self.is_torch_installed = False
-    self.is_opencv_installed = False
-    self._is_using_tensorflow = False
-    self.is_using_torch = False
+    self.req_libs: RequiredLibs = RequiredLibs()
+    self.framework = "other"
+    self.device = "CPU"
+    # Ensure cuBLAS reproducibility for torch and/or tensorflow
+    if self.req_libs.is_torch_installed:
+      os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"  # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
+      import torch
+    if self.req_libs.is_tensorflow_installed:
+      os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
+      import tensorflow as tf
+    if self.req_libs.is_tensorflow_installed:
+      self.framework = "tensorflow"
+    # By priority use torch for model trainers and data iterators (overrides co-existing tensorflow)
+    if self.req_libs.is_torch_installed:
+      self.framework = "torch"
+    self._info = None
+    # Initialize default device
+    if self.framework == "torch":
+      self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    elif self.framework == "tensorflow":
+      gpus = tf.config.list_physical_devices("GPU")
+      if gpus:
+        tf.config.set_visible_devices(gpus[0], "GPU")
+        tf.config.experimental.set_memory_growth(gpus[0], True)
+        self.device = "/GPU:0"
+      else:
+        self.device = "/CPU:0"
   # --------------------------------------------------------------------------------------
   @property
-  def filesys(self):
+  def info(self):
+    if self._info is None:
+      self._info = AIGridInfo()
+      self.info.discover_devices(self.framework)
+    return self._info
+  # --------------------------------------------------------------------------------------
+  @property
+  def filesys(self) -> FileSystem:
     return self._filesys
-  # ............................
   @filesys.setter
   def filesys(self, value):
     self._filesys = value
   # --------------------------------------------------------------------------------------
   @property
   def seed(self):
     return self._seed
   # --------------------------------------------------------------------------------------
-  # We are seeding the number generators to get some amount of determinism for the whole ML training process.
-  # For Tensorflow it is not ensuring 100% deterministic reproduction of an experiment on the GPU.
   def random_seed_all(self, seed, is_done_once=False, is_parallel_deterministic=False):
+    '''
+    We are seeding the number generators to get some amount of determinism for the whole ML training process.
+    For Tensorflow it is not ensuring 100% deterministic reproduction of an experiment that runs on the GPU.
+    :param seed:
+    :param is_done_once:
+    :param is_parallel_deterministic:
+    :return:
+    '''
     self._seed = seed
     bContinue = True
     if is_done_once:
       bContinue = (not self._is_random_seed_initialized)
     if bContinue:
       random.seed(seed)
       os.environ['PYTHONHASHSEED'] = str(seed)
       np.random.seed(seed)
-      if mlsys.is_tensorflow_installed:
+      if self.req_libs.is_tensorflow_installed:
         import tensorflow as tf
         tf.compat.v1.reset_default_graph()
         if is_parallel_deterministic:
           tf.config.experimental.enable_op_determinism()  # Enable determinism for num_parallel_calls
         tf.random.set_seed(seed)
         tf.keras.utils.set_random_seed(seed)
-      if mlsys.is_torch_installed:
+      if self.req_libs.is_torch_installed:
         import torch
         torch.manual_seed(seed)
         # GPU and multi-GPU
@@ -116,22 +139,15 @@ class MLSystem(object):
         # For GPU determinism
         torch.backends.cudnn.deterministic = True
         torch.backends.cudnn.benchmark = False
+        torch.use_deterministic_algorithms(True)
       self._is_random_seed_initialized = True
       print("(>) Random seed set to %d" % seed)
   # --------------------------------------------------------------------------------------
-# ----------------------------------------------------------------------------------------------------------------------
-def is_tensorflow_installed():
-    bIsInstalled = importlib.util.find_spec("tensorflow") is not None
-    if not is_tensorflow_installed:
-      bIsInstalled = importlib.util.find_spec("tensorflow-gpu") is not None
-    return bIsInstalled
-# ----------------------------------------------------------------------------------------------------------------------
+mlsys: MLSystem = MLSystem.instance
-mlsys: MLSystem = MLSystem.Instance()
-mlsys.is_tensorflow_installed = is_tensorflow_installed()
-mlsys.is_torch_installed = importlib.util.find_spec("torch") is not None
-mlsys.is_opencv_installed = importlib.util.find_spec("cv2") is not None

radnn/models/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .cnn.cnn_stem_setup import CNNSizeFactor, CNNStemSetup
+from .model_hyperparams import ModelHyperparams
+from .model_info import ModelInfo
+from .model_factory import ModelFactory, ModelBuildAdapter

radnn/models/cnn/__init__.py ADDED Viewed

File without changes

radnn 0.0.8__py3-none-any.whl → 0.1.0__py3-none-any.whl

radnn 0.0.8py3-none-any.whl → 0.1.0py3-none-any.whl