PyPI - gpbench - Versions diffs - 1.0.0__py3-none-any.whl - Mend

gpbench 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

gp_agent_tool/compute_dataset_feature.py +67 -0
gp_agent_tool/config.py +65 -0
gp_agent_tool/experience/create_masked_dataset_summary.py +97 -0
gp_agent_tool/experience/dataset_summary_info.py +13 -0
gp_agent_tool/experience/experience_info.py +12 -0
gp_agent_tool/experience/get_matched_experience.py +111 -0
gp_agent_tool/llm_client.py +119 -0
gp_agent_tool/logging_utils.py +24 -0
gp_agent_tool/main.py +347 -0
gp_agent_tool/read_agent/__init__.py +46 -0
gp_agent_tool/read_agent/nodes.py +674 -0
gp_agent_tool/read_agent/prompts.py +547 -0
gp_agent_tool/read_agent/python_repl_tool.py +165 -0
gp_agent_tool/read_agent/state.py +101 -0
gp_agent_tool/read_agent/workflow.py +54 -0
gpbench/__init__.py +25 -0
gpbench/_selftest.py +104 -0
gpbench/method_class/BayesA/BayesA_class.py +141 -0
gpbench/method_class/BayesA/__init__.py +5 -0
gpbench/method_class/BayesA/_bayesfromR.py +96 -0
gpbench/method_class/BayesA/_param_free_base_model.py +84 -0
gpbench/method_class/BayesA/bayesAfromR.py +16 -0
gpbench/method_class/BayesB/BayesB_class.py +140 -0
gpbench/method_class/BayesB/__init__.py +5 -0
gpbench/method_class/BayesB/_bayesfromR.py +96 -0
gpbench/method_class/BayesB/_param_free_base_model.py +84 -0
gpbench/method_class/BayesB/bayesBfromR.py +16 -0
gpbench/method_class/BayesC/BayesC_class.py +141 -0
gpbench/method_class/BayesC/__init__.py +4 -0
gpbench/method_class/BayesC/_bayesfromR.py +96 -0
gpbench/method_class/BayesC/_param_free_base_model.py +84 -0
gpbench/method_class/BayesC/bayesCfromR.py +16 -0
gpbench/method_class/CropARNet/CropARNet_class.py +186 -0
gpbench/method_class/CropARNet/CropARNet_he_class.py +154 -0
gpbench/method_class/CropARNet/__init__.py +5 -0
gpbench/method_class/CropARNet/base_CropARNet_class.py +178 -0
gpbench/method_class/Cropformer/Cropformer_class.py +308 -0
gpbench/method_class/Cropformer/__init__.py +5 -0
gpbench/method_class/Cropformer/cropformer_he_class.py +221 -0
gpbench/method_class/DL_GWAS/DL_GWAS_class.py +250 -0
gpbench/method_class/DL_GWAS/DL_GWAS_he_class.py +169 -0
gpbench/method_class/DL_GWAS/__init__.py +5 -0
gpbench/method_class/DNNGP/DNNGP_class.py +163 -0
gpbench/method_class/DNNGP/DNNGP_he_class.py +138 -0
gpbench/method_class/DNNGP/__init__.py +5 -0
gpbench/method_class/DNNGP/base_dnngp_class.py +116 -0
gpbench/method_class/DeepCCR/DeepCCR_class.py +172 -0
gpbench/method_class/DeepCCR/DeepCCR_he_class.py +161 -0
gpbench/method_class/DeepCCR/__init__.py +5 -0
gpbench/method_class/DeepCCR/base_DeepCCR_class.py +209 -0
gpbench/method_class/DeepGS/DeepGS_class.py +184 -0
gpbench/method_class/DeepGS/DeepGS_he_class.py +150 -0
gpbench/method_class/DeepGS/__init__.py +5 -0
gpbench/method_class/DeepGS/base_deepgs_class.py +153 -0
gpbench/method_class/EIR/EIR_class.py +276 -0
gpbench/method_class/EIR/EIR_he_class.py +184 -0
gpbench/method_class/EIR/__init__.py +5 -0
gpbench/method_class/EIR/utils/__init__.py +0 -0
gpbench/method_class/EIR/utils/array_output_modules.py +97 -0
gpbench/method_class/EIR/utils/common.py +65 -0
gpbench/method_class/EIR/utils/lcl_layers.py +235 -0
gpbench/method_class/EIR/utils/logging.py +59 -0
gpbench/method_class/EIR/utils/mlp_layers.py +92 -0
gpbench/method_class/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_class/EIR/utils/transformer_models.py +546 -0
gpbench/method_class/ElasticNet/ElasticNet_class.py +133 -0
gpbench/method_class/ElasticNet/ElasticNet_he_class.py +91 -0
gpbench/method_class/ElasticNet/__init__.py +5 -0
gpbench/method_class/G2PDeep/G2PDeep_he_class.py +217 -0
gpbench/method_class/G2PDeep/G2Pdeep_class.py +205 -0
gpbench/method_class/G2PDeep/__init__.py +5 -0
gpbench/method_class/G2PDeep/base_G2PDeep_class.py +209 -0
gpbench/method_class/GBLUP/GBLUP_class.py +183 -0
gpbench/method_class/GBLUP/__init__.py +5 -0
gpbench/method_class/GEFormer/GEFormer_class.py +169 -0
gpbench/method_class/GEFormer/GEFormer_he_class.py +137 -0
gpbench/method_class/GEFormer/__init__.py +5 -0
gpbench/method_class/GEFormer/gMLP_class.py +357 -0
gpbench/method_class/LightGBM/LightGBM_class.py +224 -0
gpbench/method_class/LightGBM/LightGBM_he_class.py +121 -0
gpbench/method_class/LightGBM/__init__.py +5 -0
gpbench/method_class/RF/RF_GPU_class.py +165 -0
gpbench/method_class/RF/RF_GPU_he_class.py +124 -0
gpbench/method_class/RF/__init__.py +5 -0
gpbench/method_class/SVC/SVC_GPU.py +181 -0
gpbench/method_class/SVC/SVC_GPU_he.py +106 -0
gpbench/method_class/SVC/__init__.py +5 -0
gpbench/method_class/SoyDNGP/AlexNet_206_class.py +179 -0
gpbench/method_class/SoyDNGP/SoyDNGP_class.py +189 -0
gpbench/method_class/SoyDNGP/SoyDNGP_he_class.py +112 -0
gpbench/method_class/SoyDNGP/__init__.py +5 -0
gpbench/method_class/XGBoost/XGboost_GPU_class.py +198 -0
gpbench/method_class/XGBoost/XGboost_GPU_he_class.py +178 -0
gpbench/method_class/XGBoost/__init__.py +5 -0
gpbench/method_class/__init__.py +52 -0
gpbench/method_class/rrBLUP/__init__.py +5 -0
gpbench/method_class/rrBLUP/rrBLUP_class.py +140 -0
gpbench/method_reg/BayesA/BayesA.py +116 -0
gpbench/method_reg/BayesA/__init__.py +5 -0
gpbench/method_reg/BayesA/_bayesfromR.py +96 -0
gpbench/method_reg/BayesA/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesA/bayesAfromR.py +16 -0
gpbench/method_reg/BayesB/BayesB.py +117 -0
gpbench/method_reg/BayesB/__init__.py +5 -0
gpbench/method_reg/BayesB/_bayesfromR.py +96 -0
gpbench/method_reg/BayesB/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesB/bayesBfromR.py +16 -0
gpbench/method_reg/BayesC/BayesC.py +115 -0
gpbench/method_reg/BayesC/__init__.py +5 -0
gpbench/method_reg/BayesC/_bayesfromR.py +96 -0
gpbench/method_reg/BayesC/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesC/bayesCfromR.py +16 -0
gpbench/method_reg/CropARNet/CropARNet.py +159 -0
gpbench/method_reg/CropARNet/CropARNet_Hyperparameters.py +109 -0
gpbench/method_reg/CropARNet/__init__.py +5 -0
gpbench/method_reg/CropARNet/base_CropARNet.py +137 -0
gpbench/method_reg/Cropformer/Cropformer.py +313 -0
gpbench/method_reg/Cropformer/Cropformer_Hyperparameters.py +250 -0
gpbench/method_reg/Cropformer/__init__.py +5 -0
gpbench/method_reg/DL_GWAS/DL_GWAS.py +186 -0
gpbench/method_reg/DL_GWAS/DL_GWAS_Hyperparameters.py +125 -0
gpbench/method_reg/DL_GWAS/__init__.py +5 -0
gpbench/method_reg/DNNGP/DNNGP.py +157 -0
gpbench/method_reg/DNNGP/DNNGP_Hyperparameters.py +118 -0
gpbench/method_reg/DNNGP/__init__.py +5 -0
gpbench/method_reg/DNNGP/base_dnngp.py +101 -0
gpbench/method_reg/DeepCCR/DeepCCR.py +149 -0
gpbench/method_reg/DeepCCR/DeepCCR_Hyperparameters.py +110 -0
gpbench/method_reg/DeepCCR/__init__.py +5 -0
gpbench/method_reg/DeepCCR/base_DeepCCR.py +171 -0
gpbench/method_reg/DeepGS/DeepGS.py +165 -0
gpbench/method_reg/DeepGS/DeepGS_Hyperparameters.py +114 -0
gpbench/method_reg/DeepGS/__init__.py +5 -0
gpbench/method_reg/DeepGS/base_deepgs.py +98 -0
gpbench/method_reg/EIR/EIR.py +258 -0
gpbench/method_reg/EIR/EIR_Hyperparameters.py +178 -0
gpbench/method_reg/EIR/__init__.py +5 -0
gpbench/method_reg/EIR/utils/__init__.py +0 -0
gpbench/method_reg/EIR/utils/array_output_modules.py +97 -0
gpbench/method_reg/EIR/utils/common.py +65 -0
gpbench/method_reg/EIR/utils/lcl_layers.py +235 -0
gpbench/method_reg/EIR/utils/logging.py +59 -0
gpbench/method_reg/EIR/utils/mlp_layers.py +92 -0
gpbench/method_reg/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_reg/EIR/utils/transformer_models.py +546 -0
gpbench/method_reg/ElasticNet/ElasticNet.py +123 -0
gpbench/method_reg/ElasticNet/ElasticNet_he.py +83 -0
gpbench/method_reg/ElasticNet/__init__.py +5 -0
gpbench/method_reg/G2PDeep/G2PDeep_Hyperparameters.py +107 -0
gpbench/method_reg/G2PDeep/G2Pdeep.py +166 -0
gpbench/method_reg/G2PDeep/__init__.py +5 -0
gpbench/method_reg/G2PDeep/base_G2PDeep.py +209 -0
gpbench/method_reg/GBLUP/GBLUP_R.py +182 -0
gpbench/method_reg/GBLUP/__init__.py +5 -0
gpbench/method_reg/GEFormer/GEFormer.py +164 -0
gpbench/method_reg/GEFormer/GEFormer_Hyperparameters.py +106 -0
gpbench/method_reg/GEFormer/__init__.py +5 -0
gpbench/method_reg/GEFormer/gMLP.py +341 -0
gpbench/method_reg/LightGBM/LightGBM.py +237 -0
gpbench/method_reg/LightGBM/LightGBM_Hyperparameters.py +77 -0
gpbench/method_reg/LightGBM/__init__.py +5 -0
gpbench/method_reg/MVP/MVP.py +182 -0
gpbench/method_reg/MVP/MVP_Hyperparameters.py +126 -0
gpbench/method_reg/MVP/__init__.py +5 -0
gpbench/method_reg/MVP/base_MVP.py +113 -0
gpbench/method_reg/RF/RF_GPU.py +174 -0
gpbench/method_reg/RF/RF_Hyperparameters.py +163 -0
gpbench/method_reg/RF/__init__.py +5 -0
gpbench/method_reg/SVC/SVC_GPU.py +194 -0
gpbench/method_reg/SVC/SVC_Hyperparameters.py +107 -0
gpbench/method_reg/SVC/__init__.py +5 -0
gpbench/method_reg/SoyDNGP/AlexNet_206.py +185 -0
gpbench/method_reg/SoyDNGP/SoyDNGP.py +179 -0
gpbench/method_reg/SoyDNGP/SoyDNGP_Hyperparameters.py +105 -0
gpbench/method_reg/SoyDNGP/__init__.py +5 -0
gpbench/method_reg/XGBoost/XGboost_GPU.py +188 -0
gpbench/method_reg/XGBoost/XGboost_Hyperparameters.py +167 -0
gpbench/method_reg/XGBoost/__init__.py +5 -0
gpbench/method_reg/__init__.py +55 -0
gpbench/method_reg/rrBLUP/__init__.py +5 -0
gpbench/method_reg/rrBLUP/rrBLUP.py +123 -0
gpbench-1.0.0.dist-info/METADATA +379 -0
gpbench-1.0.0.dist-info/RECORD +188 -0
gpbench-1.0.0.dist-info/WHEEL +5 -0
gpbench-1.0.0.dist-info/entry_points.txt +2 -0
gpbench-1.0.0.dist-info/top_level.txt +3 -0
tests/test_import.py +80 -0
tests/test_method.py +232 -0

gp_agent_tool/read_agent/python_repl_tool.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""
+简化版 Python 代码解释器工具。
+参考 textMSA 中的 PythonREPL 实现，仅保留「执行代码」部分逻辑：
+- 接收一段 Python 代码字符串
+- 在受控环境中执行（支持表达式和多行脚本）
+- 捕获 stdout / stderr
+- 返回结构化的执行结果对象，便于上层判断是否成功
+"""
+from __future__ import annotations
+import contextlib
+import io
+import time
+from dataclasses import dataclass
+from typing import Any, Optional
+from logging_utils import get_logger
+logger = get_logger(__name__)
+@dataclass
+class PythonREPLExecutionResult:
+    """代码执行结果"""
+    stdout: str
+    stderr: str
+    execution_time: float
+    success: bool
+    error: Optional[Exception] = None
+class PythonREPL:
+    """
+    轻量级 Python 代码执行器。
+    设计目标：
+    - 与 langchain_experimental.utilities.PythonREPL 的接口尽量兼容（提供 run(code)）
+    - 保留跨调用的全局执行环境（可以在多次执行中复用变量）
+    - 捕获 stdout / stderr，供上层逻辑使用
+    """
+    def __init__(self, max_code_length: int = 10_000) -> None:
+        self._max_code_length = max_code_length
+        # 共享全局环境，便于多次执行之间复用变量
+        self._exec_globals: dict[str, Any] = {}
+        logger.info(
+            "PythonREPL initialized",
+            extra={"max_code_length": max_code_length},
+        )
+    def run(self, code: str) -> PythonREPLExecutionResult:
+        """执行一段 Python 代码并返回执行结果。"""
+        if not code:
+            return PythonREPLExecutionResult(
+                stdout="",
+                stderr="代码为空",
+                execution_time=0.0,
+                success=False,
+            )
+        if len(code) > self._max_code_length:
+            return PythonREPLExecutionResult(
+                stdout="",
+                stderr=f"代码长度超过限制 ({len(code)} > {self._max_code_length})",
+                execution_time=0.0,
+                success=False,
+            )
+        logger.info("Executing Python code", extra={"code_length": len(code)})
+        logger.debug("Code to execute", extra={"code_preview": code[:500]})
+        start_time = time.perf_counter()
+        stdout_buf = io.StringIO()
+        stderr_buf = io.StringIO()
+        # 判定使用 eval 还是 exec
+        try:
+            code_obj = compile(code, "<python-repl>", "eval")
+            use_eval = True
+        except SyntaxError:
+            code_obj = compile(code, "<python-repl>", "exec")
+            use_eval = False
+        try:
+            with contextlib.redirect_stdout(stdout_buf), contextlib.redirect_stderr(
+                stderr_buf
+            ):
+                if use_eval:
+                    result = eval(code_obj, self._exec_globals)  # noqa: S307
+                else:
+                    exec(code_obj, self._exec_globals)  # noqa: S102
+                    result = None
+            execution_time = time.perf_counter() - start_time
+            stdout = stdout_buf.getvalue()
+            stderr = stderr_buf.getvalue()
+            success = True
+            # eval 模式下如果有返回值，将其追加到 stdout，便于查看
+            if use_eval and result is not None:
+                result_str = result if isinstance(result, str) else str(result)
+                if stdout and not stdout.endswith("\n"):
+                    stdout += "\n"
+                stdout += result_str
+            logger.info(
+                "Code execution completed",
+                extra={
+                    "execution_time": execution_time,
+                    "stdout_length": len(stdout),
+                    "stderr_length": len(stderr),
+                    "success": success,
+                },
+            )
+            if stdout:
+                logger.info("Code execution stdout", extra={"stdout": stdout})
+            if stderr:
+                logger.warning("Code execution stderr", extra={"stderr": stderr})
+            return PythonREPLExecutionResult(
+                stdout=stdout,
+                stderr=stderr,
+                execution_time=execution_time,
+                success=success,
+            )
+        except BaseException as exc:  # noqa: BLE001
+            # 捕获 SystemExit / KeyboardInterrupt 等，避免上层进程被直接退出
+            execution_time = time.perf_counter() - start_time
+            stdout = stdout_buf.getvalue()
+            stderr = stderr_buf.getvalue()
+            # 将异常信息追加到 stderr，便于上层展示
+            if stderr:
+                stderr = f"{stderr}\n{exc}"
+            else:
+                stderr = str(exc)
+            logger.error(
+                "Code execution failed",
+                extra={
+                    "execution_time": execution_time,
+                    "error": stderr,
+                },
+                exc_info=True,
+            )
+            return PythonREPLExecutionResult(
+                stdout=stdout,
+                stderr=stderr,
+                execution_time=execution_time,
+                success=False,
+                error=exc if isinstance(exc, Exception) else None,
+            )
+__all__ = ["PythonREPL", "PythonREPLExecutionResult"]

gp_agent_tool/read_agent/state.py ADDED Viewed

@@ -0,0 +1,101 @@
+"""
+Read Agent 状态定义（独立于 textMSA 项目）。
+"""
+from __future__ import annotations
+from typing import Optional, TypedDict
+try:
+    from typing import NotRequired  # type: ignore[attr-defined]
+except Exception:  # pragma: no cover
+    from typing_extensions import NotRequired
+from logging_utils import get_logger
+logger = get_logger(__name__)
+class FileInfo(TypedDict, total=False):
+    """
+    外部传入的文件信息。
+    注意：字段严格按照用户约定，不新增额外字段。
+    """
+    file_name: str
+    file_path: str
+    description: str
+    preview: str
+class PlanHistory(TypedDict, total=False):
+    """计划历史记录"""
+    file_name: str  # 文件名
+    file_path: str  # 文件路径
+    plan_detail: str  # 计划详情
+    result: Optional[str]  # 执行结果
+    order_reasoning: NotRequired[str]  # 顺序理由
+class ReadAgentState(TypedDict, total=False):
+    """Read Agent 的状态（简化版）"""
+    # 用户查询
+    user_query: str
+    # 文件列表（外部传入的 file info）
+    files: list[FileInfo]
+    # 文件概览字符串（外部已经格式化好）
+    file_overview: str
+    # 语言
+    language: NotRequired[str]
+    # 历史计划
+    history_plans: list[PlanHistory]
+    # 当前计划索引
+    current_plan_index: int
+    # 最终答案
+    final_answer: NotRequired[Optional[str]]
+    # 下一步路由
+    next_route: NotRequired[str]
+    # 用户/项目 ID（可选）
+    user_id: NotRequired[str]
+    project_id: NotRequired[str]
+def build_initial_state(
+    user_query: str,
+    files: list[FileInfo],
+    file_overview: str,
+    user_id: Optional[str] = None,
+    project_id: Optional[str] = None,
+    language: str = "zh",
+) -> ReadAgentState:
+    """构建初始状态"""
+    state: ReadAgentState = {
+        "user_query": user_query,
+        "files": files,
+        "file_overview": file_overview,
+        "language": language,
+        "history_plans": [],
+        "current_plan_index": 0,
+    }
+    if user_id:
+        state["user_id"] = user_id
+    if project_id:
+        state["project_id"] = project_id
+    logger.info(
+        "Read Agent initial state ready",
+        extra={
+            "files_len": len(files),
+            "user_id": user_id,
+            "project_id": project_id,
+            "language": language,
+        },
+    )
+    return state

gp_agent_tool/read_agent/workflow.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""
+Read Agent 工作流（独立版本）
+"""
+from langgraph.graph import END, StateGraph
+from logging_utils import get_logger
+from .nodes import plan_node, execute_plan_node, read_node, answer_node
+from .state import ReadAgentState
+logger = get_logger(__name__)
+def _route_after_execute(state: ReadAgentState) -> str:
+    """路由函数：根据 next_route 决定下一步"""
+    next_route = state.get("next_route", "")
+    return next_route or "read"
+def build_read_agent_workflow() -> StateGraph:
+    """构建 Read Agent 工作流"""
+    workflow = StateGraph(ReadAgentState)
+    workflow.add_node("plan", plan_node)
+    workflow.add_node("execute_plan", execute_plan_node)
+    workflow.add_node("read", read_node)
+    workflow.add_node("answer", answer_node)
+    workflow.set_entry_point("plan")
+    workflow.add_edge("plan", "execute_plan")
+    workflow.add_conditional_edges(
+        "execute_plan",
+        _route_after_execute,
+        {
+            "read": "read",
+            "answer": "answer",
+        },
+    )
+    workflow.add_edge("read", "execute_plan")
+    workflow.add_edge("answer", END)
+    return workflow
+def compile_read_agent_workflow():
+    """编译 Read Agent 工作流"""
+    wf = build_read_agent_workflow()
+    return wf.compile()

gpbench/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""
+GPBench: A benchmarking toolkit for genomic prediction.
+This package provides implementations of various genomic prediction methods
+including classic linear statistical approaches and machine learning/deep learning methods.
+"""
+__version__ = "1.0.0"
+__author__ = "GPBench Contributors"
+__email__ = ""
+from ._selftest import run_import_test
+def test(verbose=True):
+    """
+    Run GPBench import self-test.
+    Usage:
+        import gpbench
+        gpbench.test()
+    """
+    return run_import_test(verbose=verbose)
+__all__ = ["test"]

gpbench/_selftest.py ADDED Viewed

@@ -0,0 +1,104 @@
+# gpbench/_selftest.py
+import importlib
+# 你要测试的所有导入路径（按你提供的41条命令整理）
+IMPORT_TESTS = [
+    # =========================
+    # method_reg methods
+    # =========================
+    ("gpbench.method_reg.BayesA", "BayesA_reg"),
+    ("gpbench.method_reg.BayesB", "BayesB_reg"),
+    ("gpbench.method_reg.BayesC", "BayesC_reg"),
+    ("gpbench.method_reg.CropARNet", "CropARNet_reg"),
+    ("gpbench.method_reg.Cropformer", "Cropformer_reg"),
+    ("gpbench.method_reg.DeepCCR", "DeepCCR_reg"),
+    ("gpbench.method_reg.DeepGS", "DeepGS_reg"),
+    ("gpbench.method_reg.DL_GWAS", "DL_GWAS_reg"),
+    ("gpbench.method_reg.DNNGP", "DNNGP_reg"),
+    ("gpbench.method_reg.EIR", "EIR_reg"),
+    ("gpbench.method_reg.ElasticNet", "ElasticNet_reg"),
+    ("gpbench.method_reg.G2PDeep", "G2PDeep_reg"),
+    ("gpbench.method_reg.GBLUP", "GBLUP_reg"),
+    ("gpbench.method_reg.GEFormer", "GEFormer_reg"),
+    ("gpbench.method_reg.LightGBM", "LightGBM_reg"),
+    ("gpbench.method_reg.MVP", "MVP_reg"),
+    ("gpbench.method_reg.RF", "RF_reg"),
+    ("gpbench.method_reg.rrBLUP", "rrBLUP_reg"),
+    ("gpbench.method_reg.SoyDNGP", "SoyDNGP_reg"),
+    ("gpbench.method_reg.SVC", "SVC_reg"),
+    ("gpbench.method_reg.XGBoost", "XGBoost_reg"),
+    # =========================
+    # method_class methods
+    # =========================
+    ("gpbench.method_class.BayesA", "BayesA_class"),
+    ("gpbench.method_class.BayesB", "BayesB_class"),
+    ("gpbench.method_class.BayesC", "BayesC_class"),
+    ("gpbench.method_class.CropARNet", "CropARNet_class"),
+    ("gpbench.method_class.Cropformer", "Cropformer_class"),
+    ("gpbench.method_class.DeepCCR", "DeepCCR_class"),
+    ("gpbench.method_class.DeepGS", "DeepGS_class"),
+    ("gpbench.method_class.DL_GWAS", "DL_GWAS_class"),
+    ("gpbench.method_class.DNNGP", "DNNGP_class"),
+    ("gpbench.method_class.EIR", "EIR_class"),
+    ("gpbench.method_class.ElasticNet", "ElasticNet_class"),
+    ("gpbench.method_class.G2PDeep", "G2PDeep_class"),
+    ("gpbench.method_class.GBLUP", "GBLUP_class"),
+    ("gpbench.method_class.GEFormer", "GEFormer_class"),
+    ("gpbench.method_class.LightGBM", "LightGBM_class"),
+    ("gpbench.method_class.RF", "RF_class"),
+    ("gpbench.method_class.rrBLUP", "rrBLUP_class"),
+    ("gpbench.method_class.SoyDNGP", "SoyDNGP_class"),
+    ("gpbench.method_class.SVC", "SVC_class"),
+    ("gpbench.method_class.XGBoost", "XGBoost_class"),
+]
+def run_import_test(verbose=True):
+    """
+    Run import test for all 41 methods.
+    Returns True if all passed, False otherwise.
+    """
+    print("\n==============================")
+    print("   GPBench Import Self Test   ")
+    print("==============================\n")
+    success = 0
+    failed = []
+    for module_name, obj_name in IMPORT_TESTS:
+        try:
+            module = importlib.import_module(module_name)
+            obj = getattr(module, obj_name)
+            success += 1
+            if verbose:
+                print(f"[OK]   from {module_name} import {obj_name}")
+        except Exception as e:
+            failed.append((module_name, obj_name, str(e)))
+            print(f"[FAIL] from {module_name} import {obj_name}")
+            print(f"       Error: {e}")
+    # Summary
+    print("\n==============================")
+    print("         Test Summary         ")
+    print("==============================")
+    print(f"Total Methods Tested: {len(IMPORT_TESTS)}")
+    print(f"Passed: {success}")
+    print(f"Failed: {len(failed)}")
+    if failed:
+        print("\n❌ Failed Imports:")
+        for mod, obj, err in failed:
+            print(f" - {mod}.{obj}: {err}")
+        print("\nSelf-test FAILED.\n")
+        return False
+    print("\n✅ All imports passed successfully!\n")
+    return True

gpbench/method_class/BayesA/BayesA_class.py ADDED Viewed

@@ -0,0 +1,141 @@
+import os
+import time
+import psutil
+import swanlab
+import argparse
+import random
+import torch
+import numpy as np
+import pandas as pd
+import sys
+from .bayesAfromR import BayesA
+from sklearn.model_selection import StratifiedKFold
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
+from sklearn.preprocessing import LabelEncoder
+def parse_args():
+    parser = argparse.ArgumentParser(description="Argument parser")
+    parser.add_argument('--methods', type=str, default='BayesA/', help='Model name')
+    parser.add_argument('--species', type=str, default='Human/', help='Species name')
+    parser.add_argument('--phe', type=str, default='', help='Phenotype name')
+    parser.add_argument('--task', type=str, default='classification', choices=['regression','classification'], help='Task: regression or classification')
+    parser.add_argument('--data_dir', type=str, default='../../data/', help='Path to data directory')
+    parser.add_argument('--result_dir', type=str, default='result/', help='Path to result directory')
+    return parser.parse_args()
+def load_data(args):
+    xData = np.load(os.path.join(args.data_dir, args.species, 'genotype.npz'))["arr_0"]
+    yData = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_0"]
+    names = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_1"]
+    nsample = xData.shape[0]
+    nsnp = xData.shape[1]
+    print("Number of samples: ", nsample)
+    print("Number of SNPs: ", nsnp)
+    return xData, yData, nsample, nsnp, names
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def run_nested_cv(args, data, label):
+    result_dir = os.path.join(args.result_dir, args.methods + args.species)
+    os.makedirs(result_dir, exist_ok=True)
+    print("Starting 10-fold cross-validation...")
+    kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
+    le = LabelEncoder()
+    label_all = le.fit_transform(label)
+    np.save(os.path.join(result_dir, 'label_mapping.npy'), le.classes_)
+    all_acc, all_prec, all_rec, all_f1 = [], [], [], []
+    start_time = time.time()
+    process = psutil.Process(os.getpid())
+    for fold, (train_index, test_index) in enumerate(kf.split(data, label_all)):
+        fold_start = time.time()
+        print(f"\n===== Fold {fold} =====")
+        X_train, X_test = data[train_index], data[test_index]
+        Y_train, Y_test = label_all[train_index], label_all[test_index]
+        if torch.cuda.is_available():
+            torch.cuda.reset_peak_memory_stats()
+        classes = np.unique(Y_train)
+        scores = np.zeros((len(classes), X_test.shape[0]))
+        for idx, cls in enumerate(classes):
+            y_train_bin = (Y_train == cls).astype(float)
+            model_k = BayesA(task="regression")
+            model_k.fit(X_train, y_train_bin)
+            scores[idx, :] = model_k.predict(X_test)
+        Y_pred = np.argmax(scores, axis=0)
+        acc = accuracy_score(Y_test, Y_pred)
+        prec, rec, f1, _ = precision_recall_fscore_support(Y_test, Y_pred, average='macro', zero_division=0)
+        cm = confusion_matrix(Y_test, Y_pred)
+        all_acc.append(acc)
+        all_prec.append(prec)
+        all_rec.append(rec)
+        all_f1.append(f1)
+        fold_time = time.time() - fold_start
+        fold_gpu_mem = torch.cuda.max_memory_allocated() / 1024**2 if torch.cuda.is_available() else 0
+        fold_cpu_mem = process.memory_info().rss / 1024**2
+        print(f'Fold {fold}: ACC={acc:.4f}, PREC={prec:.4f}, REC={rec:.4f}, F1={f1:.4f}, Time={fold_time:.2f}s, '
+              f'GPU={fold_gpu_mem:.2f}MB, CPU={fold_cpu_mem:.2f}MB')
+        # ========== 保存预测结果 ==========
+        Y_test_orig = le.inverse_transform(Y_test)
+        Y_pred_orig = le.inverse_transform(Y_pred)
+        results_df = pd.DataFrame({'Y_test': Y_test_orig, 'Y_pred': Y_pred_orig})
+        results_df.to_csv(os.path.join(result_dir, f"fold{fold}.csv"), index=False)
+    print("\n===== Cross-validation summary =====")
+    print(f"Average ACC: {np.mean(all_acc):.4f} ± {np.std(all_acc):.4f}")
+    print(f"Average PREC: {np.mean(all_prec):.4f} ± {np.std(all_prec):.4f}")
+    print(f"Average REC: {np.mean(all_rec):.4f} ± {np.std(all_rec):.4f}")
+    print(f"Average F1 : {np.mean(all_f1):.4f} ± {np.std(all_f1):.4f}")
+    print(f"Total time : {time.time() - start_time:.2f}s")
+def BayesA_class():
+    set_seed(42)
+    torch.cuda.empty_cache()
+    args = parse_args()
+    all_species = ["Human/Sim/"]
+    for i in range(len(all_species)):
+        args.species = all_species[i]
+        X, Y, nsamples, nsnp, names = load_data(args)
+        args.phe = names
+        print("Starting run " + args.methods + args.species)
+        label = Y[:, 0]
+        if args.task == 'classification':
+            s = pd.Series(label)
+            fill_val = s.mode().iloc[0] if not s.dropna().empty else 0
+            label = np.nan_to_num(label, nan=fill_val)
+        start_time = time.time()
+        torch.cuda.reset_peak_memory_stats()
+        process = psutil.Process(os.getpid())
+        run_nested_cv(args, data=X, label=label)
+        elapsed_time = time.time() - start_time
+        print(f"Total running time: {elapsed_time:.2f} s")
+        print("Successfully finished!")
+if __name__ == "__main__":
+    BayesA_class()

gpbench/method_class/BayesA/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .BayesA_class import BayesA_class
+BayesA = BayesA_class
+__all__ = ["BayesA","BayesA_class"]

gpbench/method_class/BayesA/_bayesfromR.py ADDED Viewed

@@ -0,0 +1,96 @@
+import numpy as np
+import rpy2
+from rpy2.robjects import numpy2ri
+rpy2.robjects.numpy2ri.activate()
+import rpy2.robjects as robjects
+from rpy2.robjects.packages import importr
+from . import _param_free_base_model
+from joblib import Parallel, delayed
+class Bayes_R(_param_free_base_model.ParamFreeBaseModel):
+    """
+    Implementation of a class for Bayesian alphabet.
+    *Attributes*
+        *Inherited attributes*
+        See :obj:`~easypheno.model._param_free_base_model.ParamFreeBaseModel` for more information on the attributes.
+        *Additional attributes*
+        - mu (*np.array*): intercept
+        - beta (*np.array*): effect size
+        - model_name (*str*): model to use (BayesA, BayesB or BayesC)
+        - n_iter (*int*): iterations for sampling
+        - burn_in (*int*): warmup/burnin for sampling
+    """
+    standard_encoding = '012'
+    possible_encodings = ['101']
+    def __init__(self, task: str, model_name: str, encoding: str = None, n_iter: int =1000, burn_in: int = 200):
+        super().__init__(task=task, encoding=encoding)
+        self.model_name = model_name
+        self.n_iter = n_iter
+        self.burn_in = burn_in
+        self.n_jobs = 1
+        self.mu = None
+        self.beta = None
+    def _run_chain(self, chain_num: int, R_X, R_y):
+        """
+        Helper function to run an individual MCMC chain.
+        """
+        BGLR = importr('BGLR')
+        # Run BGLR for BayesB on a single chain
+        ETA = robjects.r['list'](robjects.r['list'](X=R_X, model=self.model_name))
+        fmBB = BGLR.BGLR(y=R_y, ETA=ETA, verbose=False, nIter=self.n_iter, burnIn=self.burn_in)
+        # Extract the results for this chain
+        beta_chain = np.asarray(fmBB.rx2('ETA').rx2(1).rx2('b'))
+        mu_chain = np.asarray(fmBB.rx2('mu'))  # Extract mu (intercept) for this chain
+        return beta_chain, mu_chain
+    def fit(self, X: np.array, y: np.array) -> np.array:
+        """
+        Implementation of fit function for Bayesian alphabet imported from R.
+        See :obj:`~easypheno.model._param_free_base_model.ParamFreeBaseModel` for more information.
+        """
+        # import necessary R packages
+        base = importr('base')
+        BGLR = importr('BGLR')
+        # create R objects for X and y
+        R_X = robjects.r['matrix'](X, nrow=X.shape[0], ncol=X.shape[1])
+        R_y = robjects.FloatVector(y)
+        results = Parallel(n_jobs=self.n_jobs)(
+            delayed(self._run_chain)(chain_num, R_X, R_y) for chain_num in range(self.n_jobs)
+        )
+        # Aggregate results from all chains
+        beta_chains = [result[0] for result in results]
+        mu_chains = [result[1] for result in results]
+        # Compute the mean of beta and mu over all chains
+        self.beta = np.mean(beta_chains, axis=0)
+        self.mu = np.mean(mu_chains, axis=0)
+        # run BGLR for BayesB
+        # ETA = base.list(base.list(X=R_X, model=self.model_name))
+        # fmBB = BGLR.BGLR(y=R_y, ETA=ETA, verbose=True, nIter=self.n_iter, burnIn=self.burn_in)
+        # # save results as numpy arrays
+        # self.beta = np.asarray(fmBB.rx2('ETA').rx2(1).rx2('b'))
+        # self.mu = fmBB.rx2('mu')
+        return self.predict(X_in=X)
+    def predict(self, X_in: np.array) -> np.array:
+        """
+        Implementation of predict function for Bayesian alphabet model imported from R.
+        See :obj:`~easypheno.model._param_free_base_model.ParamFreeBaseModel` for more information.
+        """
+        return self.mu + np.matmul(X_in, self.beta)