PyPI - gpbench - Versions diffs - 1.0.0__py3-none-any.whl - Mend

gpbench 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

gp_agent_tool/compute_dataset_feature.py +67 -0
gp_agent_tool/config.py +65 -0
gp_agent_tool/experience/create_masked_dataset_summary.py +97 -0
gp_agent_tool/experience/dataset_summary_info.py +13 -0
gp_agent_tool/experience/experience_info.py +12 -0
gp_agent_tool/experience/get_matched_experience.py +111 -0
gp_agent_tool/llm_client.py +119 -0
gp_agent_tool/logging_utils.py +24 -0
gp_agent_tool/main.py +347 -0
gp_agent_tool/read_agent/__init__.py +46 -0
gp_agent_tool/read_agent/nodes.py +674 -0
gp_agent_tool/read_agent/prompts.py +547 -0
gp_agent_tool/read_agent/python_repl_tool.py +165 -0
gp_agent_tool/read_agent/state.py +101 -0
gp_agent_tool/read_agent/workflow.py +54 -0
gpbench/__init__.py +25 -0
gpbench/_selftest.py +104 -0
gpbench/method_class/BayesA/BayesA_class.py +141 -0
gpbench/method_class/BayesA/__init__.py +5 -0
gpbench/method_class/BayesA/_bayesfromR.py +96 -0
gpbench/method_class/BayesA/_param_free_base_model.py +84 -0
gpbench/method_class/BayesA/bayesAfromR.py +16 -0
gpbench/method_class/BayesB/BayesB_class.py +140 -0
gpbench/method_class/BayesB/__init__.py +5 -0
gpbench/method_class/BayesB/_bayesfromR.py +96 -0
gpbench/method_class/BayesB/_param_free_base_model.py +84 -0
gpbench/method_class/BayesB/bayesBfromR.py +16 -0
gpbench/method_class/BayesC/BayesC_class.py +141 -0
gpbench/method_class/BayesC/__init__.py +4 -0
gpbench/method_class/BayesC/_bayesfromR.py +96 -0
gpbench/method_class/BayesC/_param_free_base_model.py +84 -0
gpbench/method_class/BayesC/bayesCfromR.py +16 -0
gpbench/method_class/CropARNet/CropARNet_class.py +186 -0
gpbench/method_class/CropARNet/CropARNet_he_class.py +154 -0
gpbench/method_class/CropARNet/__init__.py +5 -0
gpbench/method_class/CropARNet/base_CropARNet_class.py +178 -0
gpbench/method_class/Cropformer/Cropformer_class.py +308 -0
gpbench/method_class/Cropformer/__init__.py +5 -0
gpbench/method_class/Cropformer/cropformer_he_class.py +221 -0
gpbench/method_class/DL_GWAS/DL_GWAS_class.py +250 -0
gpbench/method_class/DL_GWAS/DL_GWAS_he_class.py +169 -0
gpbench/method_class/DL_GWAS/__init__.py +5 -0
gpbench/method_class/DNNGP/DNNGP_class.py +163 -0
gpbench/method_class/DNNGP/DNNGP_he_class.py +138 -0
gpbench/method_class/DNNGP/__init__.py +5 -0
gpbench/method_class/DNNGP/base_dnngp_class.py +116 -0
gpbench/method_class/DeepCCR/DeepCCR_class.py +172 -0
gpbench/method_class/DeepCCR/DeepCCR_he_class.py +161 -0
gpbench/method_class/DeepCCR/__init__.py +5 -0
gpbench/method_class/DeepCCR/base_DeepCCR_class.py +209 -0
gpbench/method_class/DeepGS/DeepGS_class.py +184 -0
gpbench/method_class/DeepGS/DeepGS_he_class.py +150 -0
gpbench/method_class/DeepGS/__init__.py +5 -0
gpbench/method_class/DeepGS/base_deepgs_class.py +153 -0
gpbench/method_class/EIR/EIR_class.py +276 -0
gpbench/method_class/EIR/EIR_he_class.py +184 -0
gpbench/method_class/EIR/__init__.py +5 -0
gpbench/method_class/EIR/utils/__init__.py +0 -0
gpbench/method_class/EIR/utils/array_output_modules.py +97 -0
gpbench/method_class/EIR/utils/common.py +65 -0
gpbench/method_class/EIR/utils/lcl_layers.py +235 -0
gpbench/method_class/EIR/utils/logging.py +59 -0
gpbench/method_class/EIR/utils/mlp_layers.py +92 -0
gpbench/method_class/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_class/EIR/utils/transformer_models.py +546 -0
gpbench/method_class/ElasticNet/ElasticNet_class.py +133 -0
gpbench/method_class/ElasticNet/ElasticNet_he_class.py +91 -0
gpbench/method_class/ElasticNet/__init__.py +5 -0
gpbench/method_class/G2PDeep/G2PDeep_he_class.py +217 -0
gpbench/method_class/G2PDeep/G2Pdeep_class.py +205 -0
gpbench/method_class/G2PDeep/__init__.py +5 -0
gpbench/method_class/G2PDeep/base_G2PDeep_class.py +209 -0
gpbench/method_class/GBLUP/GBLUP_class.py +183 -0
gpbench/method_class/GBLUP/__init__.py +5 -0
gpbench/method_class/GEFormer/GEFormer_class.py +169 -0
gpbench/method_class/GEFormer/GEFormer_he_class.py +137 -0
gpbench/method_class/GEFormer/__init__.py +5 -0
gpbench/method_class/GEFormer/gMLP_class.py +357 -0
gpbench/method_class/LightGBM/LightGBM_class.py +224 -0
gpbench/method_class/LightGBM/LightGBM_he_class.py +121 -0
gpbench/method_class/LightGBM/__init__.py +5 -0
gpbench/method_class/RF/RF_GPU_class.py +165 -0
gpbench/method_class/RF/RF_GPU_he_class.py +124 -0
gpbench/method_class/RF/__init__.py +5 -0
gpbench/method_class/SVC/SVC_GPU.py +181 -0
gpbench/method_class/SVC/SVC_GPU_he.py +106 -0
gpbench/method_class/SVC/__init__.py +5 -0
gpbench/method_class/SoyDNGP/AlexNet_206_class.py +179 -0
gpbench/method_class/SoyDNGP/SoyDNGP_class.py +189 -0
gpbench/method_class/SoyDNGP/SoyDNGP_he_class.py +112 -0
gpbench/method_class/SoyDNGP/__init__.py +5 -0
gpbench/method_class/XGBoost/XGboost_GPU_class.py +198 -0
gpbench/method_class/XGBoost/XGboost_GPU_he_class.py +178 -0
gpbench/method_class/XGBoost/__init__.py +5 -0
gpbench/method_class/__init__.py +52 -0
gpbench/method_class/rrBLUP/__init__.py +5 -0
gpbench/method_class/rrBLUP/rrBLUP_class.py +140 -0
gpbench/method_reg/BayesA/BayesA.py +116 -0
gpbench/method_reg/BayesA/__init__.py +5 -0
gpbench/method_reg/BayesA/_bayesfromR.py +96 -0
gpbench/method_reg/BayesA/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesA/bayesAfromR.py +16 -0
gpbench/method_reg/BayesB/BayesB.py +117 -0
gpbench/method_reg/BayesB/__init__.py +5 -0
gpbench/method_reg/BayesB/_bayesfromR.py +96 -0
gpbench/method_reg/BayesB/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesB/bayesBfromR.py +16 -0
gpbench/method_reg/BayesC/BayesC.py +115 -0
gpbench/method_reg/BayesC/__init__.py +5 -0
gpbench/method_reg/BayesC/_bayesfromR.py +96 -0
gpbench/method_reg/BayesC/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesC/bayesCfromR.py +16 -0
gpbench/method_reg/CropARNet/CropARNet.py +159 -0
gpbench/method_reg/CropARNet/CropARNet_Hyperparameters.py +109 -0
gpbench/method_reg/CropARNet/__init__.py +5 -0
gpbench/method_reg/CropARNet/base_CropARNet.py +137 -0
gpbench/method_reg/Cropformer/Cropformer.py +313 -0
gpbench/method_reg/Cropformer/Cropformer_Hyperparameters.py +250 -0
gpbench/method_reg/Cropformer/__init__.py +5 -0
gpbench/method_reg/DL_GWAS/DL_GWAS.py +186 -0
gpbench/method_reg/DL_GWAS/DL_GWAS_Hyperparameters.py +125 -0
gpbench/method_reg/DL_GWAS/__init__.py +5 -0
gpbench/method_reg/DNNGP/DNNGP.py +157 -0
gpbench/method_reg/DNNGP/DNNGP_Hyperparameters.py +118 -0
gpbench/method_reg/DNNGP/__init__.py +5 -0
gpbench/method_reg/DNNGP/base_dnngp.py +101 -0
gpbench/method_reg/DeepCCR/DeepCCR.py +149 -0
gpbench/method_reg/DeepCCR/DeepCCR_Hyperparameters.py +110 -0
gpbench/method_reg/DeepCCR/__init__.py +5 -0
gpbench/method_reg/DeepCCR/base_DeepCCR.py +171 -0
gpbench/method_reg/DeepGS/DeepGS.py +165 -0
gpbench/method_reg/DeepGS/DeepGS_Hyperparameters.py +114 -0
gpbench/method_reg/DeepGS/__init__.py +5 -0
gpbench/method_reg/DeepGS/base_deepgs.py +98 -0
gpbench/method_reg/EIR/EIR.py +258 -0
gpbench/method_reg/EIR/EIR_Hyperparameters.py +178 -0
gpbench/method_reg/EIR/__init__.py +5 -0
gpbench/method_reg/EIR/utils/__init__.py +0 -0
gpbench/method_reg/EIR/utils/array_output_modules.py +97 -0
gpbench/method_reg/EIR/utils/common.py +65 -0
gpbench/method_reg/EIR/utils/lcl_layers.py +235 -0
gpbench/method_reg/EIR/utils/logging.py +59 -0
gpbench/method_reg/EIR/utils/mlp_layers.py +92 -0
gpbench/method_reg/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_reg/EIR/utils/transformer_models.py +546 -0
gpbench/method_reg/ElasticNet/ElasticNet.py +123 -0
gpbench/method_reg/ElasticNet/ElasticNet_he.py +83 -0
gpbench/method_reg/ElasticNet/__init__.py +5 -0
gpbench/method_reg/G2PDeep/G2PDeep_Hyperparameters.py +107 -0
gpbench/method_reg/G2PDeep/G2Pdeep.py +166 -0
gpbench/method_reg/G2PDeep/__init__.py +5 -0
gpbench/method_reg/G2PDeep/base_G2PDeep.py +209 -0
gpbench/method_reg/GBLUP/GBLUP_R.py +182 -0
gpbench/method_reg/GBLUP/__init__.py +5 -0
gpbench/method_reg/GEFormer/GEFormer.py +164 -0
gpbench/method_reg/GEFormer/GEFormer_Hyperparameters.py +106 -0
gpbench/method_reg/GEFormer/__init__.py +5 -0
gpbench/method_reg/GEFormer/gMLP.py +341 -0
gpbench/method_reg/LightGBM/LightGBM.py +237 -0
gpbench/method_reg/LightGBM/LightGBM_Hyperparameters.py +77 -0
gpbench/method_reg/LightGBM/__init__.py +5 -0
gpbench/method_reg/MVP/MVP.py +182 -0
gpbench/method_reg/MVP/MVP_Hyperparameters.py +126 -0
gpbench/method_reg/MVP/__init__.py +5 -0
gpbench/method_reg/MVP/base_MVP.py +113 -0
gpbench/method_reg/RF/RF_GPU.py +174 -0
gpbench/method_reg/RF/RF_Hyperparameters.py +163 -0
gpbench/method_reg/RF/__init__.py +5 -0
gpbench/method_reg/SVC/SVC_GPU.py +194 -0
gpbench/method_reg/SVC/SVC_Hyperparameters.py +107 -0
gpbench/method_reg/SVC/__init__.py +5 -0
gpbench/method_reg/SoyDNGP/AlexNet_206.py +185 -0
gpbench/method_reg/SoyDNGP/SoyDNGP.py +179 -0
gpbench/method_reg/SoyDNGP/SoyDNGP_Hyperparameters.py +105 -0
gpbench/method_reg/SoyDNGP/__init__.py +5 -0
gpbench/method_reg/XGBoost/XGboost_GPU.py +188 -0
gpbench/method_reg/XGBoost/XGboost_Hyperparameters.py +167 -0
gpbench/method_reg/XGBoost/__init__.py +5 -0
gpbench/method_reg/__init__.py +55 -0
gpbench/method_reg/rrBLUP/__init__.py +5 -0
gpbench/method_reg/rrBLUP/rrBLUP.py +123 -0
gpbench-1.0.0.dist-info/METADATA +379 -0
gpbench-1.0.0.dist-info/RECORD +188 -0
gpbench-1.0.0.dist-info/WHEEL +5 -0
gpbench-1.0.0.dist-info/entry_points.txt +2 -0
gpbench-1.0.0.dist-info/top_level.txt +3 -0
tests/test_import.py +80 -0
tests/test_method.py +232 -0

gp_agent_tool/compute_dataset_feature.py ADDED Viewed

@@ -0,0 +1,67 @@
+import numpy as np
+from scipy.stats import skew, kurtosis
+import os
+def process_one_phenotype(dataset_path:str) -> dict:
+    """
+    处理单个表型，返回 summary 字典
+    """
+    geno_path = os.path.join(dataset_path, "genotype.npz")
+    pheno_path = os.path.join(dataset_path, "phenotype.npz")
+    genotype = np.load(geno_path)['arr_0']
+    pheno_file = np.load(pheno_path)
+    phenotype = pheno_file['arr_0']
+    phe_name = pheno_file['arr_1']
+    sp_name = pheno_file['arr_2']
+    phe_data = phenotype[:, 0]
+    # 去除缺失值
+    mask = ~np.isnan(phe_data)
+    phe_clean = phe_data[mask]
+    geno_clean = genotype[mask] if mask.sum() > 0 else genotype
+    summary = {
+        # 基本信息
+        # 'species_phenotype': f"{sp_name}/{phe_name}",
+        'species': sp_name,
+        # 'phenotype_name': phe_name,
+        # 维度信息
+        'n_samples_total': genotype.shape[0],
+        'n_samples_valid': len(phe_clean),
+        'n_markers': genotype.shape[1] if genotype.ndim > 1 else 1,
+        'missing_rate': 1 - len(phe_clean) / genotype.shape[0],
+        # 表型统计特征
+        'pheno_mean': np.mean(phe_clean) if len(phe_clean) > 0 else np.nan,
+        'pheno_std': np.std(phe_clean) if len(phe_clean) > 0 else np.nan,
+        'pheno_min': np.min(phe_clean) if len(phe_clean) > 0 else np.nan,
+        'pheno_max': np.max(phe_clean) if len(phe_clean) > 0 else np.nan,
+        'pheno_median': np.median(phe_clean) if len(phe_clean) > 0 else np.nan,
+        'pheno_skewness': skew(phe_clean) if len(phe_clean) > 3 else np.nan,
+        'pheno_kurtosis': kurtosis(phe_clean) if len(phe_clean) > 3 else np.nan,
+        # 基因型统计特征
+        'geno_mean': np.mean(geno_clean) if geno_clean.size > 0 else np.nan,
+        'geno_std': np.std(geno_clean) if geno_clean.size > 0 else np.nan,
+        'geno_missing_rate': (
+            np.isnan(geno_clean).sum() / geno_clean.size
+            if geno_clean.size > 0 else np.nan
+        ),
+        'geno_maf': (
+            np.mean(
+                np.minimum(
+                    np.mean(geno_clean, axis=0),
+                    1 - np.mean(geno_clean, axis=0)
+                )
+            ) if geno_clean.ndim > 1 and geno_clean.size > 0 else np.nan
+        ),
+        # 类型信息
+        'geno_dtype': str(genotype.dtype),
+        'pheno_dtype': str(phe_data.dtype),
+        'is_pheno_binary': len(np.unique(phe_clean)) == 2 if len(phe_clean) > 0 else False
+    }
+    return summary

gp_agent_tool/config.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""
+Minimal config loader for gwas-llm-judge.
+The config file should be located at:
+    /home/common/hwluo/project/gwas-llm-judge/config/config.json
+and contain at least the sections:
+    - "llm"
+    - "codegen_llm"
+    - "multimodal_llm"
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any, Dict, Optional
+from logging_utils import get_logger
+logger = get_logger(__name__)
+_CONFIG_CACHE: Optional[Dict[str, Any]] = None
+def _config_path() -> Path:
+    # 固定为项目下的 config/config.json，避免依赖旧项目路径
+    return Path(__file__).resolve().parent / "config" / "config.json"
+def _load_config() -> Dict[str, Any]:
+    global _CONFIG_CACHE
+    if _CONFIG_CACHE is not None:
+        return _CONFIG_CACHE
+    path = _config_path()
+    try:
+        with path.open("r", encoding="utf-8") as f:
+            _CONFIG_CACHE = json.load(f)
+    except FileNotFoundError:
+        logger.warning("Config file not found: %s", path)
+        _CONFIG_CACHE = {}
+    except Exception as exc:  # noqa: BLE001
+        logger.error("Failed to load config %s: %s", path, exc)
+        _CONFIG_CACHE = {}
+    return _CONFIG_CACHE
+def get_llm_config() -> Dict[str, Any]:
+    cfg = _load_config()
+    return dict(cfg.get("llm", {}))
+def get_codegen_llm_config() -> Dict[str, Any]:
+    cfg = _load_config()
+    return dict(cfg.get("codegen_llm", {}))
+def get_multimodal_llm_config() -> Dict[str, Any]:
+    cfg = _load_config()
+    return dict(cfg.get("multimodal_llm", {}))

gp_agent_tool/experience/create_masked_dataset_summary.py ADDED Viewed

@@ -0,0 +1,97 @@
+import csv
+import os
+import uuid
+from typing import List, Optional
+def create_masked_dataset_summary(
+    excluded_species_phenotypes: List[str],
+    source_csv: str = "dataset_summary.csv",
+    output_suffix: str = "_masked",
+) -> tuple[str, str]:
+    """
+    根据给定的 species_phenotype 列表，从 dataset_summary.csv 中过滤掉这些行，
+    生成一个新的 CSV 文件，并返回新文件的绝对路径与前 10 行预览（包含表头）。
+    参数
+    ----
+    excluded_species_phenotypes : List[str]
+        需要被过滤掉的 species_phenotype 值列表（与源 CSV 中的第一列一致）。
+    source_csv : str, optional
+        源 CSV 文件路径，默认指向当前脚本同目录下的 dataset_summary.csv（相对路径）。
+    output_suffix : str, optional
+        生成的新文件名后缀，默认 "_masked"。
+    返回
+    ----
+    tuple[str, str]
+        新生成的 CSV 文件的绝对路径，以及 masked 数据的前 10 行（含表头）序列化字符串。
+    """
+    # 基于当前脚本位置来构造相对路径，避免依赖运行时工作目录
+    base_dir = os.path.dirname(__file__)
+    # 如果用户传入的是相对路径，则基于脚本目录解析（默认 dataset_summary.csv）
+    if not os.path.isabs(source_csv):
+        source_csv = os.path.join(base_dir, source_csv)
+    if not os.path.exists(source_csv):
+        raise FileNotFoundError(f"Source CSV not found: {source_csv}")
+    # 读取源文件并过滤
+    # 为了在匹配时实现“大小写不敏感”，预先构造一个全部转为小写的排除集合
+    excluded_lower = {item.lower() for item in excluded_species_phenotypes}
+    kept_rows: List[List[str]] = []
+    header: Optional[List[str]] = None
+    with open(source_csv, "r", newline="", encoding="utf-8") as f:
+        reader = csv.reader(f)
+        for i, row in enumerate(reader):
+            if i == 0:
+                header = row
+                continue
+            # row[0] 应为 species_phenotype
+            species_pheno = row[0]
+            # 使用小写形式进行匹配，实现大小写不敏感
+            if species_pheno.lower() in excluded_lower:
+                continue
+            kept_rows.append(row)
+    # 预览前 10 行（含表头），序列化为字符串（逗号分隔，每行以 \n 拼接）
+    preview_rows: List[List[str]] = []
+    if header is not None:
+        preview_rows.append(header)
+    data_limit = max(0, 10 - len(preview_rows))
+    if data_limit > 0:
+        preview_rows.extend(kept_rows[:data_limit])
+    preview_str = "\n".join([",".join(row) for row in preview_rows])
+    # 构造输出文件路径：写到 experience/tmp 目录（相对脚本目录）
+    tmp_dir = os.path.join(base_dir, "tmp")
+    os.makedirs(tmp_dir, exist_ok=True)
+    _, src_name = os.path.split(source_csv)
+    name, ext = os.path.splitext(src_name)
+    uid = uuid.uuid4().hex
+    output_name = f"{name}{output_suffix}_{uid}{ext}"
+    output_path = os.path.join(tmp_dir, output_name)
+    # 写出新的 CSV 文件
+    with open(output_path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        if header is not None:
+            writer.writerow(header)
+        writer.writerows(kept_rows)
+    return os.path.abspath(output_path), preview_str
+if __name__ == "__main__":
+    # 示例：排除若干 species_phenotype
+    example_excluded = ["Cattle/mkg", "Chicken/EW28"]
+    new_path, preview = create_masked_dataset_summary(example_excluded)
+    print(f"Masked dataset summary written to: {new_path}")
+    print("Preview (first 10 rows):")
+    print(preview)

gp_agent_tool/experience/dataset_summary_info.py ADDED Viewed

@@ -0,0 +1,13 @@
+dataset_summary_info = {
+    "file_name": "dataset_summary.csv",
+    "file_path": "/home/common/xwzhang/Project/GPBench/gp_agent_tool/experience/dataset_summary.csv",
+    "description": "This is the summary of the dataset, including the number of samples, the number of missing values, the mean, the standard deviation, the minimum, the maximum, the median, the skewness, the kurtosis, the genotype mean, the genotype standard deviation, the genotype missing rate, the genotype minor allele frequency, the genotype data type, the phenotype data type, and whether the phenotype is binary.",
+    "preview": "\n".join(
+        [
+            "species,phenotype_name,description,n_samples_total,n_samples_valid,n_markers,missing_rate,pheno_mean,pheno_std,pheno_min,pheno_max,pheno_median,pheno_skewness,pheno_kurtosis,geno_mean,geno_std,geno_missing_rate,geno_maf,geno_dtype,pheno_dtype,is_pheno_binary",
+            "Cattle,mkg,Milk yield,5024,5024,42551,0.0,-2.587579633930134e-09,0.9999004678058506,-3.383414,3.318611,0.009681,0.0086577861867763,-0.0458583441004787,1.0713618086920649,0.8048099006002398,0.0,-0.1621657327056165,int64,float64,False",
+        ]
+    ),
+}

gp_agent_tool/experience/experience_info.py ADDED Viewed

@@ -0,0 +1,12 @@
+experience_info = {
+    "file_name": "experience_origin.csv",
+    "file_path": "/home/common/xwzhang/Project/GPBench/gp_agent_tool/experience/experience_origin.csv",
+    "description": "This is the performance of 18 methods on different species and phenotype datasets, including the correlation coefficient, the mean absolute error, the mean squared error, the R2, the running time, and the resource usage.",
+    "preview": "\n".join(
+        [
+            "method,species,phenotype,description,all_time_s,corr_mean,corr_std,mae_mean,mae_std,mse_mean,mse_std,r2_mean,r2_std,cpu_mem_MB,gpu_mem_MB",
+            "rrBLUP,Cattle,scs,	Somatic cell score,2373.4649,0.7525,0.01936,0.5229,0.02173,0.4339,0.04136,0.5646,0.02846,12677.914,0",
+        ]
+    ),
+}

gp_agent_tool/experience/get_matched_experience.py ADDED Viewed

@@ -0,0 +1,111 @@
+import csv
+import os
+import uuid
+from typing import List, Optional, Tuple
+def get_matched_experience(
+    target_species_phenotypes: Optional[List[str]],
+    source_csv: Optional[str] = None,
+    output_suffix: str = "_matched",
+) -> Tuple[str, str]:
+    """
+    根据给定的 species/phenotype 列表，从 experience_origin.csv 中筛选出匹配的行，
+    生成新的 CSV 文件，并返回新文件的绝对路径与前 10 行预览（包含表头，字符串形式）。
+    参数
+    ----
+    target_species_phenotypes : List[str] | None
+        需要保留的 species/phenotype 组合，格式如 "Cattle/fpro"。
+        如果为 None，则不过滤，返回源 CSV 中的全部记录。
+    source_csv : str, optional
+        源 CSV 文件路径，默认指向当前脚本同目录下的 experience_origin.csv（相对路径）。
+    output_suffix : str, optional
+        生成的新文件名后缀，默认 "_matched"。
+    返回
+    ----
+    tuple[str, str]
+        新生成的 CSV 文件的绝对路径，以及匹配数据的前 10 行（含表头）序列化字符串。
+    """
+    base_dir = os.path.dirname(__file__)
+    if source_csv is None:
+        source_csv = os.path.join(base_dir, "experience_origin.csv")
+    elif not os.path.isabs(source_csv):
+        source_csv = os.path.join(base_dir, source_csv)
+    if not os.path.exists(source_csv):
+        raise FileNotFoundError(f"Source CSV not found: {source_csv}")
+    # 使用小写的 (species, phenotype) 组合来做匹配，
+    # 以实现大小写不敏感的匹配逻辑。
+    match_set = set()
+    if target_species_phenotypes is not None:
+        for item in target_species_phenotypes:
+            if "/" not in item:
+                raise ValueError(f"Invalid format (expected species/phenotype): {item}")
+            species, phenotype = item.split("/", 1)
+            species = species.strip()
+            phenotype = phenotype.strip()
+            if not species or not phenotype:
+                raise ValueError(f"Invalid species or phenotype in: {item}")
+            # 统一转为小写存入集合
+            match_set.add((species.lower(), phenotype.lower()))
+    kept_rows: List[List[str]] = []
+    header: Optional[List[str]] = None
+    with open(source_csv, "r", newline="", encoding="utf-8") as f:
+        reader = csv.reader(f)
+        for i, row in enumerate(reader):
+            if i == 0:
+                header = row
+                continue
+            if len(row) < 3:
+                continue
+            # 如果 target_species_phenotypes 为 None，则不过滤，保留所有记录
+            if target_species_phenotypes is None:
+                kept_rows.append(row)
+            else:
+                # 使用小写形式进行匹配，实现大小写不敏感
+                species_val = row[1].strip().lower()
+                pheno_val = row[2].strip().lower()
+                if (species_val, pheno_val) in match_set:
+                    kept_rows.append(row)
+    preview_rows: List[List[str]] = []
+    if header is not None:
+        preview_rows.append(header)
+    data_limit = max(0, 10 - len(preview_rows))
+    if data_limit > 0:
+        preview_rows.extend(kept_rows[:data_limit])
+    preview_str = "\n".join([",".join(row) for row in preview_rows])
+    tmp_dir = os.path.join(base_dir, "tmp")
+    os.makedirs(tmp_dir, exist_ok=True)
+    _, src_name = os.path.split(source_csv)
+    name, ext = os.path.splitext(src_name)
+    uid = uuid.uuid4().hex
+    output_name = f"{name}{output_suffix}_{uid}{ext}"
+    output_path = os.path.join(tmp_dir, output_name)
+    with open(output_path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        if header is not None:
+            writer.writerow(header)
+        writer.writerows(kept_rows)
+    return os.path.abspath(output_path), preview_str
+if __name__ == "__main__":
+    sample_targets = ["Rice/GYP_BLUP", "Mouse/weight", "Chickpea/Yield"]
+    new_path, preview = get_matched_experience(sample_targets)
+    print(f"Matched experience written to: {new_path}")
+    print("Preview (first 10 rows):")
+    print(preview)

gp_agent_tool/llm_client.py ADDED Viewed

@@ -0,0 +1,119 @@
+from __future__ import annotations
+from functools import lru_cache
+from typing import Any, Dict, List
+from dashscope import MultiModalConversation
+from langchain_core.messages import HumanMessage
+from langchain_openai import ChatOpenAI
+from config import (
+    get_codegen_llm_config,
+    get_llm_config,
+    get_multimodal_llm_config,
+)
+from logging_utils import get_logger
+logger = get_logger(__name__)
+@lru_cache(maxsize=1)
+def _base_llm_config() -> Dict[str, Any]:
+    return get_llm_config()
+@lru_cache(maxsize=1)
+def _base_codegen_llm_config() -> Dict[str, Any]:
+    return get_codegen_llm_config()
+def _build_chat_llm(
+    *, temperature: float, max_tokens: int, use_codegen: bool = False
+) -> ChatOpenAI:
+    base_config = _base_codegen_llm_config() if use_codegen else _base_llm_config()
+    params: Dict[str, Any] = {
+        "model": base_config.get("model"),
+        "api_key": base_config.get("api_key"),
+        "base_url": base_config.get("base_url"),
+        "temperature": temperature,
+        "max_tokens": max_tokens,
+    }
+    if base_config.get("timeout_seconds") is not None:
+        params["timeout"] = base_config["timeout_seconds"]
+    if base_config.get("max_retries") is not None:
+        params["max_retries"] = base_config["max_retries"]
+    return ChatOpenAI(**params)
+def run_llm(
+    prompt: str,
+    *,
+    temperature: float,
+    max_tokens: int,
+    use_codegen: bool = False,
+    node_name: str = "unknown",
+) -> str:
+    """单轮对话 LLM 调用，返回文本内容。"""
+    base_config = _base_codegen_llm_config() if use_codegen else _base_llm_config()
+    model_name = base_config.get("model", "unknown")
+    logger.info(
+        "[LLM Input] Node: %s | Model: %s | UseCodegen: %s",
+        node_name,
+        model_name,
+        use_codegen,
+    )
+    logger.info("[LLM Input Full] Node: %s\n%s", node_name, prompt)
+    llm = _build_chat_llm(
+        temperature=temperature,
+        max_tokens=max_tokens,
+        use_codegen=use_codegen,
+    )
+    response = llm.invoke([HumanMessage(content=prompt)])
+    response_content = getattr(response, "content", "") or ""
+    logger.info("[LLM Output] Node: %s | Model: %s", node_name, model_name)
+    logger.info("[LLM Output Full] Node: %s\n%s", node_name, response_content)
+    return response_content
+def run_multimodal_llm(
+    content_payload: List[dict],
+    *,
+    node_name: str = "unknown",
+) -> str:
+    """多模态 LLM 调用，当前用于图像分析。"""
+    multimodal_config = get_multimodal_llm_config()
+    model_name = multimodal_config.get("model", "unknown")
+    messages = [{"role": "user", "content": content_payload}]
+    logger.info(
+        "[Multimodal LLM Input] Node: %s | Model: %s",
+        node_name,
+        model_name,
+    )
+    response = MultiModalConversation.call(
+        api_key=multimodal_config["api_key"],
+        model=model_name,
+        messages=messages,
+    )
+    text = ""
+    if response.output and response.output.choices:
+        text = response.output.choices[0].message.content[0].get("text", "") or ""
+    logger.info(
+        "[Multimodal LLM Output] Node: %s | Model: %s",
+        node_name,
+        model_name,
+    )
+    logger.info("[Multimodal LLM Output Full] Node: %s\n%s", node_name, text)
+    return text

gp_agent_tool/logging_utils.py ADDED Viewed

@@ -0,0 +1,24 @@
+import logging
+from typing import Optional
+def get_logger(name: Optional[str] = None) -> logging.Logger:
+    """
+    简单的 logger 封装，避免依赖旧项目。
+    - 默认使用 INFO 级别。
+    - 只在根 logger 尚未配置 handler 时添加一个 StreamHandler。
+    """
+    logger = logging.getLogger(name)
+    if not logging.getLogger().handlers:
+        handler = logging.StreamHandler()
+        formatter = logging.Formatter(
+            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        )
+        handler.setFormatter(formatter)
+        logging.getLogger().addHandler(handler)
+        logging.getLogger().setLevel(logging.INFO)
+    return logger