PyPI - gpbench - Versions diffs - 1.0.0__py3-none-any.whl - Mend

gpbench 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

gp_agent_tool/compute_dataset_feature.py +67 -0
gp_agent_tool/config.py +65 -0
gp_agent_tool/experience/create_masked_dataset_summary.py +97 -0
gp_agent_tool/experience/dataset_summary_info.py +13 -0
gp_agent_tool/experience/experience_info.py +12 -0
gp_agent_tool/experience/get_matched_experience.py +111 -0
gp_agent_tool/llm_client.py +119 -0
gp_agent_tool/logging_utils.py +24 -0
gp_agent_tool/main.py +347 -0
gp_agent_tool/read_agent/__init__.py +46 -0
gp_agent_tool/read_agent/nodes.py +674 -0
gp_agent_tool/read_agent/prompts.py +547 -0
gp_agent_tool/read_agent/python_repl_tool.py +165 -0
gp_agent_tool/read_agent/state.py +101 -0
gp_agent_tool/read_agent/workflow.py +54 -0
gpbench/__init__.py +25 -0
gpbench/_selftest.py +104 -0
gpbench/method_class/BayesA/BayesA_class.py +141 -0
gpbench/method_class/BayesA/__init__.py +5 -0
gpbench/method_class/BayesA/_bayesfromR.py +96 -0
gpbench/method_class/BayesA/_param_free_base_model.py +84 -0
gpbench/method_class/BayesA/bayesAfromR.py +16 -0
gpbench/method_class/BayesB/BayesB_class.py +140 -0
gpbench/method_class/BayesB/__init__.py +5 -0
gpbench/method_class/BayesB/_bayesfromR.py +96 -0
gpbench/method_class/BayesB/_param_free_base_model.py +84 -0
gpbench/method_class/BayesB/bayesBfromR.py +16 -0
gpbench/method_class/BayesC/BayesC_class.py +141 -0
gpbench/method_class/BayesC/__init__.py +4 -0
gpbench/method_class/BayesC/_bayesfromR.py +96 -0
gpbench/method_class/BayesC/_param_free_base_model.py +84 -0
gpbench/method_class/BayesC/bayesCfromR.py +16 -0
gpbench/method_class/CropARNet/CropARNet_class.py +186 -0
gpbench/method_class/CropARNet/CropARNet_he_class.py +154 -0
gpbench/method_class/CropARNet/__init__.py +5 -0
gpbench/method_class/CropARNet/base_CropARNet_class.py +178 -0
gpbench/method_class/Cropformer/Cropformer_class.py +308 -0
gpbench/method_class/Cropformer/__init__.py +5 -0
gpbench/method_class/Cropformer/cropformer_he_class.py +221 -0
gpbench/method_class/DL_GWAS/DL_GWAS_class.py +250 -0
gpbench/method_class/DL_GWAS/DL_GWAS_he_class.py +169 -0
gpbench/method_class/DL_GWAS/__init__.py +5 -0
gpbench/method_class/DNNGP/DNNGP_class.py +163 -0
gpbench/method_class/DNNGP/DNNGP_he_class.py +138 -0
gpbench/method_class/DNNGP/__init__.py +5 -0
gpbench/method_class/DNNGP/base_dnngp_class.py +116 -0
gpbench/method_class/DeepCCR/DeepCCR_class.py +172 -0
gpbench/method_class/DeepCCR/DeepCCR_he_class.py +161 -0
gpbench/method_class/DeepCCR/__init__.py +5 -0
gpbench/method_class/DeepCCR/base_DeepCCR_class.py +209 -0
gpbench/method_class/DeepGS/DeepGS_class.py +184 -0
gpbench/method_class/DeepGS/DeepGS_he_class.py +150 -0
gpbench/method_class/DeepGS/__init__.py +5 -0
gpbench/method_class/DeepGS/base_deepgs_class.py +153 -0
gpbench/method_class/EIR/EIR_class.py +276 -0
gpbench/method_class/EIR/EIR_he_class.py +184 -0
gpbench/method_class/EIR/__init__.py +5 -0
gpbench/method_class/EIR/utils/__init__.py +0 -0
gpbench/method_class/EIR/utils/array_output_modules.py +97 -0
gpbench/method_class/EIR/utils/common.py +65 -0
gpbench/method_class/EIR/utils/lcl_layers.py +235 -0
gpbench/method_class/EIR/utils/logging.py +59 -0
gpbench/method_class/EIR/utils/mlp_layers.py +92 -0
gpbench/method_class/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_class/EIR/utils/transformer_models.py +546 -0
gpbench/method_class/ElasticNet/ElasticNet_class.py +133 -0
gpbench/method_class/ElasticNet/ElasticNet_he_class.py +91 -0
gpbench/method_class/ElasticNet/__init__.py +5 -0
gpbench/method_class/G2PDeep/G2PDeep_he_class.py +217 -0
gpbench/method_class/G2PDeep/G2Pdeep_class.py +205 -0
gpbench/method_class/G2PDeep/__init__.py +5 -0
gpbench/method_class/G2PDeep/base_G2PDeep_class.py +209 -0
gpbench/method_class/GBLUP/GBLUP_class.py +183 -0
gpbench/method_class/GBLUP/__init__.py +5 -0
gpbench/method_class/GEFormer/GEFormer_class.py +169 -0
gpbench/method_class/GEFormer/GEFormer_he_class.py +137 -0
gpbench/method_class/GEFormer/__init__.py +5 -0
gpbench/method_class/GEFormer/gMLP_class.py +357 -0
gpbench/method_class/LightGBM/LightGBM_class.py +224 -0
gpbench/method_class/LightGBM/LightGBM_he_class.py +121 -0
gpbench/method_class/LightGBM/__init__.py +5 -0
gpbench/method_class/RF/RF_GPU_class.py +165 -0
gpbench/method_class/RF/RF_GPU_he_class.py +124 -0
gpbench/method_class/RF/__init__.py +5 -0
gpbench/method_class/SVC/SVC_GPU.py +181 -0
gpbench/method_class/SVC/SVC_GPU_he.py +106 -0
gpbench/method_class/SVC/__init__.py +5 -0
gpbench/method_class/SoyDNGP/AlexNet_206_class.py +179 -0
gpbench/method_class/SoyDNGP/SoyDNGP_class.py +189 -0
gpbench/method_class/SoyDNGP/SoyDNGP_he_class.py +112 -0
gpbench/method_class/SoyDNGP/__init__.py +5 -0
gpbench/method_class/XGBoost/XGboost_GPU_class.py +198 -0
gpbench/method_class/XGBoost/XGboost_GPU_he_class.py +178 -0
gpbench/method_class/XGBoost/__init__.py +5 -0
gpbench/method_class/__init__.py +52 -0
gpbench/method_class/rrBLUP/__init__.py +5 -0
gpbench/method_class/rrBLUP/rrBLUP_class.py +140 -0
gpbench/method_reg/BayesA/BayesA.py +116 -0
gpbench/method_reg/BayesA/__init__.py +5 -0
gpbench/method_reg/BayesA/_bayesfromR.py +96 -0
gpbench/method_reg/BayesA/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesA/bayesAfromR.py +16 -0
gpbench/method_reg/BayesB/BayesB.py +117 -0
gpbench/method_reg/BayesB/__init__.py +5 -0
gpbench/method_reg/BayesB/_bayesfromR.py +96 -0
gpbench/method_reg/BayesB/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesB/bayesBfromR.py +16 -0
gpbench/method_reg/BayesC/BayesC.py +115 -0
gpbench/method_reg/BayesC/__init__.py +5 -0
gpbench/method_reg/BayesC/_bayesfromR.py +96 -0
gpbench/method_reg/BayesC/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesC/bayesCfromR.py +16 -0
gpbench/method_reg/CropARNet/CropARNet.py +159 -0
gpbench/method_reg/CropARNet/CropARNet_Hyperparameters.py +109 -0
gpbench/method_reg/CropARNet/__init__.py +5 -0
gpbench/method_reg/CropARNet/base_CropARNet.py +137 -0
gpbench/method_reg/Cropformer/Cropformer.py +313 -0
gpbench/method_reg/Cropformer/Cropformer_Hyperparameters.py +250 -0
gpbench/method_reg/Cropformer/__init__.py +5 -0
gpbench/method_reg/DL_GWAS/DL_GWAS.py +186 -0
gpbench/method_reg/DL_GWAS/DL_GWAS_Hyperparameters.py +125 -0
gpbench/method_reg/DL_GWAS/__init__.py +5 -0
gpbench/method_reg/DNNGP/DNNGP.py +157 -0
gpbench/method_reg/DNNGP/DNNGP_Hyperparameters.py +118 -0
gpbench/method_reg/DNNGP/__init__.py +5 -0
gpbench/method_reg/DNNGP/base_dnngp.py +101 -0
gpbench/method_reg/DeepCCR/DeepCCR.py +149 -0
gpbench/method_reg/DeepCCR/DeepCCR_Hyperparameters.py +110 -0
gpbench/method_reg/DeepCCR/__init__.py +5 -0
gpbench/method_reg/DeepCCR/base_DeepCCR.py +171 -0
gpbench/method_reg/DeepGS/DeepGS.py +165 -0
gpbench/method_reg/DeepGS/DeepGS_Hyperparameters.py +114 -0
gpbench/method_reg/DeepGS/__init__.py +5 -0
gpbench/method_reg/DeepGS/base_deepgs.py +98 -0
gpbench/method_reg/EIR/EIR.py +258 -0
gpbench/method_reg/EIR/EIR_Hyperparameters.py +178 -0
gpbench/method_reg/EIR/__init__.py +5 -0
gpbench/method_reg/EIR/utils/__init__.py +0 -0
gpbench/method_reg/EIR/utils/array_output_modules.py +97 -0
gpbench/method_reg/EIR/utils/common.py +65 -0
gpbench/method_reg/EIR/utils/lcl_layers.py +235 -0
gpbench/method_reg/EIR/utils/logging.py +59 -0
gpbench/method_reg/EIR/utils/mlp_layers.py +92 -0
gpbench/method_reg/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_reg/EIR/utils/transformer_models.py +546 -0
gpbench/method_reg/ElasticNet/ElasticNet.py +123 -0
gpbench/method_reg/ElasticNet/ElasticNet_he.py +83 -0
gpbench/method_reg/ElasticNet/__init__.py +5 -0
gpbench/method_reg/G2PDeep/G2PDeep_Hyperparameters.py +107 -0
gpbench/method_reg/G2PDeep/G2Pdeep.py +166 -0
gpbench/method_reg/G2PDeep/__init__.py +5 -0
gpbench/method_reg/G2PDeep/base_G2PDeep.py +209 -0
gpbench/method_reg/GBLUP/GBLUP_R.py +182 -0
gpbench/method_reg/GBLUP/__init__.py +5 -0
gpbench/method_reg/GEFormer/GEFormer.py +164 -0
gpbench/method_reg/GEFormer/GEFormer_Hyperparameters.py +106 -0
gpbench/method_reg/GEFormer/__init__.py +5 -0
gpbench/method_reg/GEFormer/gMLP.py +341 -0
gpbench/method_reg/LightGBM/LightGBM.py +237 -0
gpbench/method_reg/LightGBM/LightGBM_Hyperparameters.py +77 -0
gpbench/method_reg/LightGBM/__init__.py +5 -0
gpbench/method_reg/MVP/MVP.py +182 -0
gpbench/method_reg/MVP/MVP_Hyperparameters.py +126 -0
gpbench/method_reg/MVP/__init__.py +5 -0
gpbench/method_reg/MVP/base_MVP.py +113 -0
gpbench/method_reg/RF/RF_GPU.py +174 -0
gpbench/method_reg/RF/RF_Hyperparameters.py +163 -0
gpbench/method_reg/RF/__init__.py +5 -0
gpbench/method_reg/SVC/SVC_GPU.py +194 -0
gpbench/method_reg/SVC/SVC_Hyperparameters.py +107 -0
gpbench/method_reg/SVC/__init__.py +5 -0
gpbench/method_reg/SoyDNGP/AlexNet_206.py +185 -0
gpbench/method_reg/SoyDNGP/SoyDNGP.py +179 -0
gpbench/method_reg/SoyDNGP/SoyDNGP_Hyperparameters.py +105 -0
gpbench/method_reg/SoyDNGP/__init__.py +5 -0
gpbench/method_reg/XGBoost/XGboost_GPU.py +188 -0
gpbench/method_reg/XGBoost/XGboost_Hyperparameters.py +167 -0
gpbench/method_reg/XGBoost/__init__.py +5 -0
gpbench/method_reg/__init__.py +55 -0
gpbench/method_reg/rrBLUP/__init__.py +5 -0
gpbench/method_reg/rrBLUP/rrBLUP.py +123 -0
gpbench-1.0.0.dist-info/METADATA +379 -0
gpbench-1.0.0.dist-info/RECORD +188 -0
gpbench-1.0.0.dist-info/WHEEL +5 -0
gpbench-1.0.0.dist-info/entry_points.txt +2 -0
gpbench-1.0.0.dist-info/top_level.txt +3 -0
tests/test_import.py +80 -0
tests/test_method.py +232 -0

gpbench/method_class/EIR/EIR_class.py ADDED Viewed

@@ -0,0 +1,276 @@
+# by ww
+import time
+import argparse
+import pynvml
+import psutil
+import os
+import torch
+import random
+import swanlab
+import numpy as np
+import pandas as pd
+from torch.utils.data import DataLoader,TensorDataset
+from sklearn.model_selection import StratifiedKFold, train_test_split
+from sklearn.preprocessing import LabelEncoder
+import  sys
+sys.path.append("..")
+from .utils.models_locally_connected import LCLModel
+from .utils.common import DataDimensions
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+from .EIR_he_class import Hyperparameter
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+def parse_args():
+    parser = argparse.ArgumentParser(description="Argument parser")
+    # Add arguments
+    parser.add_argument('--methods', type=str, default='EIR/', help='Random seed')
+    parser.add_argument('--species', type=str, default='', help='Species name')
+    parser.add_argument('--phe', type=str, default='', help='Dataset name')
+    parser.add_argument('--data_dir', type=str, default='../../data/')
+    parser.add_argument('--result_dir', type=str, default='result/')
+    parser.add_argument('--epochs', type=int, default=1000, help='Number of training rounds')
+    parser.add_argument('--batch_size', type=int, default=32, help='Batch size')
+    parser.add_argument('--learning_rate', type=float, default=0.01, help='Learning rate')
+    parser.add_argument('--patience', type=int, default=50, help='Patience for early stopping')
+    args = parser.parse_args()
+    return args
+def load_data(args):
+    xData = np.load(os.path.join(args.data_dir, args.species, 'genotype.npz'))["arr_0"]
+    yData = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_0"]
+    names = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_1"]
+    nsample = xData.shape[0]
+    nsnp = xData.shape[1]
+    print("Number of samples: ", nsample)
+    print("Number of SNPs: ", nsnp)
+    return xData, yData, nsample, nsnp, names
+def get_gpu_mem_by_pid(pid, handle=None):
+    if handle is None:
+        return 0.0
+    try:
+        procs = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
+        for p in procs:
+            if p.pid == pid:
+                return p.usedGpuMemory / 1024**2
+        return 0.0
+    except Exception:
+        return 0.0
+def init():
+    seed = 42
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def one_hot_encode_ATCG(char):
+    c = char
+    if char==0:
+        return [1,0,0,0]
+    elif char==1:
+        return [0,1,0,0]
+    elif char==2:
+        return [0,0,1,0]
+    else:
+        return [0,0,0,1]
+def one_hot_seq(df:pd.DataFrame, nsnp:int):
+    one_hot_df = df.applymap(lambda x:one_hot_encode_ATCG(x))
+    one_hot_df = one_hot_df.values.tolist()
+    one_hot_df = np.array(one_hot_df)
+    one_hot_df = np.reshape(one_hot_df, (one_hot_df.shape[0],-1))
+    tensor_data = torch.Tensor(one_hot_df)
+    return tensor_data
+def train_model(model, train_loader, valid_loader, optimizer, criterion, num_epochs, patience, device):
+    model.to(device)
+    best_loss = float('inf')
+    best_state = None
+    trigger_times = 0
+    for epoch in range(num_epochs):
+        model.train()
+        train_loss = 0.0
+        for inputs, labels in train_loader:
+            inputs, labels = inputs.to(device), labels.to(device)
+            optimizer.zero_grad()
+            outputs = model(inputs)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+            train_loss += loss.item() * inputs.size(0)
+        #scheduler.step()
+        # ---------- 验证 ----------
+        model.eval()
+        valid_loss = 0.0
+        with torch.no_grad():
+            for inputs, labels in valid_loader:
+                inputs, labels = inputs.to(device), labels.to(device)
+                outputs = model(inputs)
+                loss = criterion(outputs, labels)
+                valid_loss += loss.item() * inputs.size(0)
+        train_loss /= len(train_loader.dataset)
+        valid_loss /= len(valid_loader.dataset)
+        # ---------- Early stopping ----------
+        if valid_loss < best_loss:
+            best_loss = valid_loss
+            best_state = model.state_dict()
+            trigger_times = 0
+        else:
+            trigger_times += 1
+            if trigger_times >= patience:
+                print(f"Early stopping at epoch {epoch+1}")
+                break
+    if best_state is not None:
+        model.load_state_dict(best_state)
+    return best_loss
+def predict(model, test_loader, device):
+    model.eval()
+    y_pred = []
+    with torch.no_grad():
+        for inputs, _ in test_loader:
+            inputs = inputs.to(device)
+            outputs = model(inputs)  # (batch_size, num_classes)
+            preds = torch.argmax(outputs, dim=1)
+            y_pred.append(preds.cpu().numpy())
+    y_pred = np.concatenate(y_pred, axis=0)
+    return y_pred
+def run_nested_cv(args, data, label, nsnp, num_classes, device, gpu_handle=None):
+    result_dir = os.path.join(args.result_dir, args.methods + args.species + args.phe)
+    os.makedirs(result_dir, exist_ok=True)
+    print("Starting 10-fold cross-validation...")
+    kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
+    data = data.reshape(data.shape[0], 1, nsnp, -1)
+    all_acc, all_prec, all_rec, all_f1 = [], [], [], []
+    time_star = time.time()
+    for fold, (train_index, test_index) in enumerate(kf.split(data, label)):
+        print(f"Running fold {fold}...")
+        process = psutil.Process(os.getpid())
+        fold_start_time = time.time()
+        X_train, X_test = data[train_index], data[test_index]
+        y_train, y_test = label[train_index], label[test_index]
+        X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(
+            X_train, y_train, test_size=0.1, stratify=y_train, random_state=42
+        )
+        x_train_tensor = torch.from_numpy(X_train_sub).float().to(device)
+        y_train_tensor = torch.from_numpy(y_train_sub).long().to(device)
+        x_valid_tensor = torch.from_numpy(X_valid).float().to(device)
+        y_valid_tensor = torch.from_numpy(y_valid).long().to(device)
+        x_test_tensor = torch.from_numpy(X_test).float().to(device)
+        y_test_tensor = torch.from_numpy(y_test).long().to(device)
+        train_data = TensorDataset(x_train_tensor, y_train_tensor)
+        valid_data = TensorDataset(x_valid_tensor, y_valid_tensor)
+        test_data = TensorDataset(x_test_tensor, y_test_tensor)
+        train_loader = DataLoader(train_data, args.batch_size, shuffle=True)
+        valid_loader = DataLoader(valid_data, args.batch_size, shuffle=False)
+        test_loader = DataLoader(test_data, args.batch_size, shuffle=False)
+        model = LCLModel(DataDimensions(channels=1, height=nsnp, width=1)).to(device)
+        in_features = model.fc_2.in_features
+        model.fc_2 = torch.nn.Linear(in_features, num_classes).to(device)
+        if isinstance(model.downsample_identity, torch.nn.Linear):
+            identity_in_features = model.downsample_identity.in_features
+            model.downsample_identity = torch.nn.Linear(identity_in_features, num_classes).to(device)
+        else:
+            identity_in_features = model.lcl_blocks[-1].out_features
+            model.downsample_identity = torch.nn.Linear(identity_in_features, num_classes).to(device)
+        optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=1e-4)
+        loss_fn = torch.nn.CrossEntropyLoss()
+        train_model(model, train_loader, valid_loader, optimizer, loss_fn, args.epochs, args.patience, device)
+        y_pred = predict(model, test_loader, device)
+        acc = accuracy_score(y_test, y_pred)
+        prec, rec, f1, _ = precision_recall_fscore_support(
+            y_test, y_pred, average="macro", zero_division=0
+        )
+        all_acc.append(acc)
+        all_prec.append(prec)
+        all_rec.append(rec)
+        all_f1.append(f1)
+        fold_time = time.time() - fold_start_time
+        fold_gpu_mem = get_gpu_mem_by_pid(os.getpid(), gpu_handle)
+        fold_cpu_mem = process.memory_info().rss / 1024**2
+        print(f'Fold {fold}:  ACC={acc:.4f}, PREC={prec:.4f}, REC={rec:.4f}, F1={f1:.4f}, Time={fold_time:.2f}s, '
+              f'GPU={fold_gpu_mem:.2f}MB, CPU={fold_cpu_mem:.2f}MB')
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.reset_peak_memory_stats()
+        results_df = pd.DataFrame({'Y_test': y_test, 'Y_pred': y_pred})
+        results_df.to_csv(os.path.join(result_dir, f"fold{fold}.csv"), index=False)
+    print("\n===== Cross-validation summary =====")
+    print(f"ACC : {np.mean(all_acc):.4f} ± {np.std(all_acc):.4f}")
+    print(f"PREC: {np.mean(all_prec):.4f} ± {np.std(all_prec):.4f}")
+    print(f"REC : {np.mean(all_rec):.4f} ± {np.std(all_rec):.4f}")
+    print(f"F1  : {np.mean(all_f1):.4f} ± {np.std(all_f1):.4f}")
+    print(f"Time: {time.time() - time_star:.2f}s")
+def EIR_class():
+    start = time.time()
+    set_seed(42)
+    pynvml.nvmlInit()
+    gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+    args = parse_args()
+    all_species =  ["Human/Sim/"]
+    for i in range(len(all_species)):
+        args.species = all_species[i]
+        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        args.device = device
+        X, Y, nsamples, nsnp, names = load_data(args)
+        print("starting run " + args.methods + args.species)
+        label_raw = np.nan_to_num(Y[:, 0])
+        le = LabelEncoder()
+        label = le.fit_transform(label_raw)
+        num_classes = len(le.classes_)
+        best_params = Hyperparameter(X, label, nsnp, num_classes)
+        args.learning_rate = best_params['learning_rate']
+        args.batch_size = best_params['batch_size']
+        args.patience = best_params['patience']
+        start_time = time.time()
+        if torch.cuda.is_available():
+            torch.cuda.reset_peak_memory_stats()
+        process = psutil.Process(os.getpid())
+        run_nested_cv(args, data=X, label=label, nsnp=nsnp, num_classes=num_classes, device=args.device, gpu_handle=gpu_handle)
+        elapsed_time = time.time() - start_time
+        print(f"Running time: {elapsed_time:.2f}s")
+        print("successfully")
+if __name__ == '__main__':
+    EIR_class()

gpbench/method_class/EIR/EIR_he_class.py ADDED Viewed

@@ -0,0 +1,184 @@
+import os
+import time
+import psutil
+import random
+import torch
+import numpy as np
+import optuna
+from sklearn.model_selection import StratifiedKFold, train_test_split
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+from torch.utils.data  import DataLoader, TensorDataset
+from optuna.exceptions import TrialPruned
+from .utils.models_locally_connected import LCLModel
+from .utils.common import DataDimensions
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+def train_model(model, train_loader, valid_loader, optimizer, criterion, num_epochs, patience, device):
+    model.to(device)
+    best_loss = float('inf')
+    best_state = None
+    trigger_times = 0
+    for epoch in range(num_epochs):
+        model.train()
+        train_loss = 0.0
+        for inputs, labels in train_loader:
+            inputs, labels = inputs.to(device), labels.to(device)
+            optimizer.zero_grad()
+            outputs = model(inputs)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+            train_loss += loss.item() * inputs.size(0)
+        model.eval()
+        valid_loss = 0.0
+        with torch.no_grad():
+            for inputs, labels in valid_loader:
+                inputs, labels = inputs.to(device), labels.to(device)
+                outputs = model(inputs)
+                loss = criterion(outputs, labels)
+                valid_loss += loss.item() * inputs.size(0)
+        train_loss /= len(train_loader.dataset)
+        valid_loss /= len(valid_loader.dataset)
+        # ---------- Early stopping ----------
+        if valid_loss < best_loss:
+            best_loss = valid_loss
+            best_state = model.state_dict()
+            trigger_times = 0
+        else:
+            trigger_times += 1
+            if trigger_times >= patience:
+                print(f"Early stopping at epoch {epoch+1}")
+                break
+    if best_state is not None:
+        model.load_state_dict(best_state)
+    return best_loss
+def predict(model, test_loader, device):
+    model.eval()
+    y_pred = []
+    with torch.no_grad():
+        for inputs, _ in test_loader:
+            inputs = inputs.to(device)
+            outputs = model(inputs)  # (batch_size, num_classes)
+            preds = torch.argmax(outputs, dim=1)
+            y_pred.append(preds.cpu().numpy())
+    y_pred = np.concatenate(y_pred, axis=0)
+    return y_pred
+def run_nested_cv_with_early_stopping(data, label, nsnp, num_classes, learning_rate, patience, batch_size, epochs=1000):
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    print("Starting 10-fold cross-validation...")
+    kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
+    all_acc, all_prec, all_rec, all_f1 = [], [], [], []
+    for fold, (train_index, test_index) in enumerate(kf.split(data, label)):
+        print(f"Running fold {fold}...")
+        process = psutil.Process(os.getpid())
+        fold_start_time = time.time()
+        X_train, X_test = data[train_index], data[test_index]
+        y_train, y_test = label[train_index], label[test_index]
+        X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(
+            X_train, y_train, test_size=0.1, stratify=y_train, random_state=42
+        )
+        x_train_tensor = torch.from_numpy(X_train_sub).float().to(device)
+        y_train_tensor = torch.from_numpy(y_train_sub).long().to(device)
+        x_valid_tensor = torch.from_numpy(X_valid).float().to(device)
+        y_valid_tensor = torch.from_numpy(y_valid).long().to(device)
+        x_test_tensor = torch.from_numpy(X_test).float().to(device)
+        y_test_tensor = torch.from_numpy(y_test).long().to(device)
+        train_data = TensorDataset(x_train_tensor, y_train_tensor)
+        valid_data = TensorDataset(x_valid_tensor, y_valid_tensor)
+        test_data = TensorDataset(x_test_tensor, y_test_tensor)
+        train_loader = DataLoader(train_data, batch_size, shuffle=True)
+        valid_loader = DataLoader(valid_data, batch_size, shuffle=False)
+        test_loader = DataLoader(test_data, batch_size, shuffle=False)
+        model = LCLModel(DataDimensions(channels=1, height=nsnp, width=1)).to(device)
+        in_features = model.fc_2.in_features
+        model.fc_2 = torch.nn.Linear(in_features, num_classes).to(device)
+        if isinstance(model.downsample_identity, torch.nn.Linear):
+            identity_in_features = model.downsample_identity.in_features
+            model.downsample_identity = torch.nn.Linear(identity_in_features, num_classes).to(device)
+        else:
+            identity_in_features = model.lcl_blocks[-1].out_features
+            model.downsample_identity = torch.nn.Linear(identity_in_features, num_classes).to(device)
+        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
+        loss_fn = torch.nn.CrossEntropyLoss()
+        train_model(model, train_loader, valid_loader, optimizer, loss_fn, epochs, patience, device)
+        y_pred = predict(model, test_loader, device)
+        acc = accuracy_score(y_test, y_pred)
+        prec, rec, f1, _ = precision_recall_fscore_support(
+            y_test, y_pred, average="macro", zero_division=0
+        )
+        if np.isnan(f1) or f1 <= 0:
+            print(f"Fold {fold} resulted in NaN or zero F1, pruning the trial...")
+            raise TrialPruned()
+        all_acc.append(acc)
+        all_prec.append(prec)
+        all_rec.append(rec)
+        all_f1.append(f1)
+        fold_time = time.time() - fold_start_time
+        fold_cpu_mem = process.memory_info().rss / 1024**2
+        print(f'Fold {fold}:  ACC={acc:.4f}, PREC={prec:.4f}, REC={rec:.4f}, F1={f1:.4f}, '
+              f'Time={fold_time:.2f}s, CPU={fold_cpu_mem:.2f}MB')
+    print("\n===== Cross-validation summary =====")
+    print(f"Average ACC: {np.mean(all_acc):.4f} ± {np.std(all_acc):.4f}")
+    print(f"Average PREC: {np.mean(all_prec):.4f} ± {np.std(all_prec):.4f}")
+    print(f"Average REC: {np.mean(all_rec):.4f} ± {np.std(all_rec):.4f}")
+    print(f"Average F1 : {np.mean(all_f1):.4f} ± {np.std(all_f1):.4f}")
+    return float(np.mean(all_f1)) if all_f1 else 0.0
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def Hyperparameter(data, label, nsnp, num_classes):
+    set_seed(42)
+    def objective(trial):
+        learning_rate = trial.suggest_float("learning_rate", 1e-4, 0.1, log=True)
+        batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
+        patience = trial.suggest_int("patience", 10, 100, step=10)
+        try:
+            f1_score = run_nested_cv_with_early_stopping(
+                data=data,
+                label=label,
+                nsnp=nsnp,
+                num_classes=num_classes,
+                learning_rate=learning_rate,
+                patience=patience,
+                batch_size=batch_size
+            )
+        except TrialPruned:
+            return float("-inf")
+        return f1_score
+    study = optuna.create_study(direction="maximize")
+    study.optimize(objective, n_trials=20)
+    print("Best hyperparameters:", study.best_params)
+    print("successfully")
+    return study.best_params

gpbench/method_class/EIR/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .EIR_class import EIR_class
+EIR = EIR_class
+__all__ = ["EIR","EIR_class"]

gpbench/method_class/EIR/utils/__init__.py ADDED Viewed

File without changes

gpbench/method_class/EIR/utils/array_output_modules.py ADDED Viewed

@@ -0,0 +1,97 @@
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Literal, Type
+import torch
+from torch import nn
+from eir.models.input.array.array_models import al_pre_normalization
+from eir.models.input.array.models_locally_connected import LCLModel, LCLModelConfig
+from eir.models.layers.projection_layers import get_projection_layer
+from eir.models.output.array.output_array_models_cnn import (
+    CNNUpscaleModel,
+    CNNUpscaleModelConfig,
+)
+if TYPE_CHECKING:
+    from eir.setup.input_setup_modules.common import DataDimensions
+al_array_model_types = Literal["lcl", "cnn"]
+al_output_array_model_classes = Type[LCLModel] | Type[CNNUpscaleModel]
+al_output_array_models = LCLModel | CNNUpscaleModel
+al_output_array_model_config_classes = (
+    Type["LCLOutputModelConfig"] | Type[CNNUpscaleModelConfig]
+)
+@dataclass
+class LCLOutputModelConfig(LCLModelConfig):
+    cutoff: int | Literal["auto"] = "auto"
+@dataclass
+class ArrayOutputModuleConfig:
+    """
+    :param model_type:
+         Which type of image model to use.
+    :param model_init_config:
+          Configuration used to initialise model.
+    """
+    model_type: al_array_model_types
+    model_init_config: LCLOutputModelConfig
+    pre_normalization: al_pre_normalization = None
+class ArrayOutputWrapperModule(nn.Module):
+    def __init__(
+        self,
+        feature_extractor: al_output_array_models,
+        output_name: str,
+        target_data_dimensions: "DataDimensions",
+    ):
+        super().__init__()
+        self.feature_extractor = feature_extractor
+        self.output_name = output_name
+        self.data_dimensions = target_data_dimensions
+        self.target_width = self.data_dimensions.num_elements()
+        self.target_shape = self.data_dimensions.full_shape()
+        diff_tolerance = get_diff_tolerance(num_target_elements=self.target_width)
+        self.projection_head = get_projection_layer(
+            input_dimension=self.feature_extractor.num_out_features,
+            target_dimension=self.target_width,
+            projection_layer_type="lcl_residual",
+            lcl_diff_tolerance=diff_tolerance,
+        )
+    def forward(self, x: torch.Tensor) -> dict[str, torch.Tensor]:
+        out = self.feature_extractor(x)
+        out = out.reshape(out.shape[0], -1)
+        out = self.projection_head(out)
+        out = out[:, : self.target_width]
+        out = out.reshape(-1, *self.target_shape)
+        return {self.output_name: out}
+def get_diff_tolerance(num_target_elements: int) -> int:
+    return int(0.001 * num_target_elements)
+def get_array_output_module(
+    feature_extractor: al_output_array_models,
+    output_name: str,
+    target_data_dimensions: "DataDimensions",
+) -> ArrayOutputWrapperModule:
+    return ArrayOutputWrapperModule(
+        feature_extractor=feature_extractor,
+        output_name=output_name,
+        target_data_dimensions=target_data_dimensions,
+    )

gpbench/method_class/EIR/utils/common.py ADDED Viewed

@@ -0,0 +1,65 @@
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Optional, Tuple
+import numpy as np
+# from eir.data_load.data_source_modules.deeplake_ops import (
+#     get_deeplake_input_source_iterable,
+#     is_deeplake_dataset,
+#     load_deeplake_dataset,
+# )
+# from eir.data_load.label_setup import get_file_path_iterator
+@dataclass
+class DataDimensions:
+    channels: int
+    height: int
+    width: int
+    extra_dims: tuple[int, ...] = tuple()
+    def num_elements(self) -> int:
+        base = self.channels * self.height * self.width
+        return int(base * np.prod(self.extra_dims))
+    def full_shape(self) -> Tuple[int, ...]:
+        return (self.channels, self.height, self.width) + self.extra_dims
+# def get_data_dimension_from_data_source(
+#     data_source: Path,
+#     deeplake_inner_key: Optional[str] = None,
+# ) -> DataDimensions:
+#     """
+#     TODO: Make more dynamic / robust. Also weird to say "width" for a 1D vector.
+#     """
+#
+#     if is_deeplake_dataset(data_source=str(data_source)):
+#         assert deeplake_inner_key is not None, data_source
+#         deeplake_ds = load_deeplake_dataset(data_source=str(data_source))
+#         deeplake_iter = get_deeplake_input_source_iterable(
+#             deeplake_dataset=deeplake_ds, inner_key=deeplake_inner_key
+#         )
+#         shape = next(deeplake_iter).shape
+#     else:
+#         iterator = get_file_path_iterator(data_source=data_source)
+#         path = next(iterator)
+#         shape = np.load(file=path).shape
+#
+#     extra_dims: tuple[int, ...] = tuple()
+#     if len(shape) == 1:
+#         channels, height, width = 1, 1, shape[0]
+#     elif len(shape) == 2:
+#         channels, height, width = 1, shape[0], shape[1]
+#     elif len(shape) == 3:
+#         channels, height, width = shape
+#     else:
+#         channels, height, width = shape[0], shape[1], shape[2]
+#         extra_dims = shape[3:]
+#
+#     return DataDimensions(
+#         channels=channels, height=height, width=width, extra_dims=extra_dims
+#     )