@researai/deepscientist 1.5.17 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +309 -130
- package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
- package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
- package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
- package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
- package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
- package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
- package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
- package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
- package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
- package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
- package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
- package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
- package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
- package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
- package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
- package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
- package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
- package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
- package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
- package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
- package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
- package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
- package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
- package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
- package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
- package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
- package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
- package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
- package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
- package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
- package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
- package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
- package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
- package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
- package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
- package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
- package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
- package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
- package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
- package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
- package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
- package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
- package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
- package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
- package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
- package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
- package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
- package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
- package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
- package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
- package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
- package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
- package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
- package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
- package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
- package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
- package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
- package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
- package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
- package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
- package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
- package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
- package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
- package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
- package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
- package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
- package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
- package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
- package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
- package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
- package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
- package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
- package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
- package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
- package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
- package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
- package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
- package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
- package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
- package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
- package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
- package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
- package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
- package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
- package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
- package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
- package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
- package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
- package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
- package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
- package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
- package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
- package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
- package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
- package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
- package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
- package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
- package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
- package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
- package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
- package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
- package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
- package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
- package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
- package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
- package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
- package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
- package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
- package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
- package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
- package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
- package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
- package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
- package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
- package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
- package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
- package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
- package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
- package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
- package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
- package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
- package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
- package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
- package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
- package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
- package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
- package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
- package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
- package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
- package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
- package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
- package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
- package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
- package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
- package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
- package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
- package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
- package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
- package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
- package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
- package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
- package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
- package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
- package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
- package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
- package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
- package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
- package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
- package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
- package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
- package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
- package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
- package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
- package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
- package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
- package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
- package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
- package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
- package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
- package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
- package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
- package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
- package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
- package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
- package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
- package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
- package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
- package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
- package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
- package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
- package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
- package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
- package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
- package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
- package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
- package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
- package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
- package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
- package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
- package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
- package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
- package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
- package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
- package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
- package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
- package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
- package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
- package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
- package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
- package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
- package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
- package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
- package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
- package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
- package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
- package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
- package/AISB/image/aisb.b10.climate_earth.svg +16 -0
- package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
- package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
- package/AISB/image/aisb.b2.agent_systems.svg +16 -0
- package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
- package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
- package/AISB/image/aisb.b5.math_proof.svg +16 -0
- package/AISB/image/aisb.b6.research_process.svg +16 -0
- package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
- package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
- package/AISB/image/aisb.b9.material_science.svg +16 -0
- package/README.md +132 -11
- package/bin/ds.js +376 -49
- package/docs/en/00_QUICK_START.md +135 -18
- package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
- package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
- package/docs/en/05_TUI_GUIDE.md +171 -2
- package/docs/en/07_MEMORY_AND_MCP.md +38 -2
- package/docs/en/09_DOCTOR.md +64 -4
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
- package/docs/en/11_LICENSE_AND_RISK.md +4 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +622 -187
- package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +105 -2
- package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
- package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
- package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
- package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
- package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
- package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
- package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
- package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
- package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
- package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
- package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
- package/docs/en/91_DEVELOPMENT.md +29 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
- package/docs/en/README.md +44 -7
- package/docs/images/admin/admin-connectors-health-en.png +0 -0
- package/docs/images/admin/admin-controllers-en.png +0 -0
- package/docs/images/admin/admin-diagnostics-en.png +0 -0
- package/docs/images/admin/admin-errors-en.png +0 -0
- package/docs/images/admin/admin-issues-en.png +0 -0
- package/docs/images/admin/admin-logs-en.png +0 -0
- package/docs/images/admin/admin-quest-detail-en.png +0 -0
- package/docs/images/admin/admin-quests-en.png +0 -0
- package/docs/images/admin/admin-repairs-en.png +0 -0
- package/docs/images/admin/admin-runtime-en.png +0 -0
- package/docs/images/admin/admin-search-en.png +0 -0
- package/docs/images/admin/admin-stats-en.png +0 -0
- package/docs/images/admin/admin-summary-en.png +0 -0
- package/docs/images/connectors/connector-discord-en.png +0 -0
- package/docs/images/connectors/connector-feishu-en.png +0 -0
- package/docs/images/connectors/connector-lingzhu-en.png +0 -0
- package/docs/images/connectors/connector-qq-en.png +0 -0
- package/docs/images/connectors/connector-slack-en.png +0 -0
- package/docs/images/connectors/connector-telegram-en.png +0 -0
- package/docs/images/connectors/connector-weixin-en.png +0 -0
- package/docs/images/connectors/connector-whatsapp-en.png +0 -0
- package/docs/images/settings/settings-baselines-en.png +0 -0
- package/docs/images/settings/settings-config-en.png +0 -0
- package/docs/images/settings/settings-connectors-overview-en.png +0 -0
- package/docs/images/settings/settings-deepxiv-en.png +0 -0
- package/docs/images/settings/settings-mcp-servers-en.png +0 -0
- package/docs/images/settings/settings-plugins-en.png +0 -0
- package/docs/images/settings/settings-runners-en.png +0 -0
- package/docs/zh/00_QUICK_START.md +92 -17
- package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
- package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/zh/05_TUI_GUIDE.md +171 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
- package/docs/zh/09_DOCTOR.md +39 -4
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
- package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +550 -188
- package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +105 -2
- package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
- package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
- package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
- package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
- package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
- package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
- package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
- package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
- package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
- package/docs/zh/README.md +29 -7
- package/install.sh +122 -16
- package/package.json +4 -1
- package/pyproject.toml +2 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +13 -0
- package/src/deepscientist/admin/__init__.py +3 -0
- package/src/deepscientist/admin/charts.py +681 -0
- package/src/deepscientist/admin/logs.py +119 -0
- package/src/deepscientist/admin/repairs.py +217 -0
- package/src/deepscientist/admin/service.py +1310 -0
- package/src/deepscientist/admin/system_info.py +700 -0
- package/src/deepscientist/admin/tasks.py +465 -0
- package/src/deepscientist/admin/tool_metrics.py +600 -0
- package/src/deepscientist/artifact/guidance.py +8 -4
- package/src/deepscientist/artifact/schemas.py +115 -0
- package/src/deepscientist/artifact/service.py +4268 -260
- package/src/deepscientist/bash_exec/monitor.py +30 -3
- package/src/deepscientist/bash_exec/service.py +134 -1
- package/src/deepscientist/benchstore/__init__.py +4 -0
- package/src/deepscientist/benchstore/prompt_builder.py +224 -0
- package/src/deepscientist/benchstore/service.py +1716 -0
- package/src/deepscientist/channels/weixin_ilink.py +8 -1
- package/src/deepscientist/cli.py +92 -17
- package/src/deepscientist/codex_cli_compat.py +2 -2
- package/src/deepscientist/config/models.py +82 -11
- package/src/deepscientist/config/service.py +927 -91
- package/src/deepscientist/connector/weixin_support.py +48 -17
- package/src/deepscientist/daemon/api/handlers.py +697 -210
- package/src/deepscientist/daemon/api/router.py +76 -1
- package/src/deepscientist/daemon/app.py +1054 -51
- package/src/deepscientist/diagnostics/runner_failures.py +147 -0
- package/src/deepscientist/doctor.py +212 -65
- package/src/deepscientist/evidence_packets.py +590 -0
- package/src/deepscientist/home.py +52 -4
- package/src/deepscientist/kimi_cli_compat.py +50 -0
- package/src/deepscientist/latex_runtime.py +2 -2
- package/src/deepscientist/mcp/context.py +2 -0
- package/src/deepscientist/mcp/schemas.py +114 -0
- package/src/deepscientist/mcp/server.py +1566 -126
- package/src/deepscientist/memory/service.py +203 -16
- package/src/deepscientist/process_control.py +8 -1
- package/src/deepscientist/prompts/builder.py +836 -92
- package/src/deepscientist/quest/__init__.py +2 -2
- package/src/deepscientist/quest/layout.py +12 -1
- package/src/deepscientist/quest/node_traces.py +10 -0
- package/src/deepscientist/quest/service.py +1430 -139
- package/src/deepscientist/quest/stage_views.py +1 -1
- package/src/deepscientist/runners/__init__.py +18 -0
- package/src/deepscientist/runners/base.py +89 -1
- package/src/deepscientist/runners/builtins.py +13 -1
- package/src/deepscientist/runners/claude.py +391 -0
- package/src/deepscientist/runners/codex.py +421 -21
- package/src/deepscientist/runners/codex_telemetry.py +127 -0
- package/src/deepscientist/runners/kimi.py +334 -0
- package/src/deepscientist/runners/metadata.py +68 -0
- package/src/deepscientist/runners/opencode.py +414 -0
- package/src/deepscientist/runners/runtime_overrides.py +100 -0
- package/src/deepscientist/runners/simple_cli.py +538 -0
- package/src/deepscientist/runtime_storage.py +303 -0
- package/src/deepscientist/shared.py +61 -16
- package/src/deepscientist/skills/installer.py +37 -0
- package/src/deepscientist/skills/registry.py +2 -0
- package/src/deepscientist/tinytex.py +2 -2
- package/src/deepscientist/tui.py +10 -3
- package/src/prompts/benchstore/system.md +77 -0
- package/src/prompts/connectors/qq.md +33 -2
- package/src/prompts/connectors/weixin.md +208 -23
- package/src/prompts/contracts/admin_ops.md +74 -0
- package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
- package/src/prompts/contracts/shared_interaction.md +5 -11
- package/src/prompts/start_setup/system.md +422 -0
- package/src/prompts/system.md +409 -315
- package/src/prompts/system_copilot.md +88 -12
- package/src/skills/analysis-campaign/SKILL.md +239 -578
- package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
- package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
- package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
- package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
- package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
- package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
- package/src/skills/baseline/SKILL.md +183 -461
- package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
- package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
- package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
- package/src/skills/baseline/references/baseline-plan-template.md +37 -76
- package/src/skills/baseline/references/boundary-cases.md +86 -0
- package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
- package/src/skills/baseline/references/comparability-contract.md +7 -12
- package/src/skills/baseline/references/operational-guidance.md +56 -0
- package/src/skills/baseline/references/route-selection.md +5 -25
- package/src/skills/decision/SKILL.md +113 -306
- package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
- package/src/skills/decision/references/operational-guidance.md +94 -0
- package/src/skills/decision/references/research-route-criteria.md +7 -8
- package/src/skills/decision/references/strategic-decision-template.md +13 -26
- package/src/skills/experiment/SKILL.md +132 -670
- package/src/skills/experiment/references/execution-playbook.md +374 -0
- package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
- package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
- package/src/skills/experiment/references/operational-guidance.md +108 -0
- package/src/skills/finalize/SKILL.md +62 -0
- package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
- package/src/skills/finalize/references/resume-packet-template.md +7 -0
- package/src/skills/idea/SKILL.md +228 -15
- package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
- package/src/skills/idea/references/current-board-packet-template.md +61 -0
- package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
- package/src/skills/idea/references/idea-generation-playbook.md +21 -0
- package/src/skills/idea/references/idea-thinking-flow.md +6 -0
- package/src/skills/idea/references/literature-survey-template.md +3 -0
- package/src/skills/idea/references/objective-contract-template.md +54 -0
- package/src/skills/idea/references/outline-seeding-example.md +56 -0
- package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
- package/src/skills/idea/references/related-work-playbook.md +75 -2
- package/src/skills/idea/references/research-history-playbook.md +114 -0
- package/src/skills/idea/references/selection-gate.md +58 -6
- package/src/skills/intake-audit/SKILL.md +43 -2
- package/src/skills/intake-audit/references/state-audit-template.md +10 -0
- package/src/skills/nature-data/SKILL.md +128 -0
- package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-data/agents/openai.yaml +4 -0
- package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
- package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
- package/src/skills/nature-data/references/policy-principles.md +103 -0
- package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
- package/src/skills/nature-data/references/source-basis.md +54 -0
- package/src/skills/nature-data/references/statement-patterns.md +153 -0
- package/src/skills/nature-figure/SKILL.md +197 -0
- package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-figure/agents/openai.yaml +4 -0
- package/src/skills/nature-figure/evals/evals.json +37 -0
- package/src/skills/nature-figure/references/api.md +428 -0
- package/src/skills/nature-figure/references/backend-selection.md +100 -0
- package/src/skills/nature-figure/references/chart-types.md +281 -0
- package/src/skills/nature-figure/references/common-patterns.md +349 -0
- package/src/skills/nature-figure/references/design-theory.md +436 -0
- package/src/skills/nature-figure/references/figure-contract.md +93 -0
- package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
- package/src/skills/nature-figure/references/qa-contract.md +119 -0
- package/src/skills/nature-figure/references/r-template-index.md +66 -0
- package/src/skills/nature-figure/references/r-workflow.md +161 -0
- package/src/skills/nature-figure/references/tutorials.md +250 -0
- package/src/skills/nature-paper2ppt/SKILL.md +507 -0
- package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/SKILL.md +385 -0
- package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-polishing/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
- package/src/skills/nature-polishing/references/section-moves.md +240 -0
- package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
- package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
- package/src/skills/optimize/SKILL.md +177 -1568
- package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
- package/src/skills/optimize/references/candidate-board-template.md +13 -0
- package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
- package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
- package/src/skills/optimize/references/debug-response-template.md +29 -0
- package/src/skills/optimize/references/frontier-review-template.md +32 -0
- package/src/skills/optimize/references/fusion-playbook.md +36 -0
- package/src/skills/optimize/references/method-brief-template.md +73 -0
- package/src/skills/optimize/references/operational-guidance.md +621 -0
- package/src/skills/optimize/references/optimization-memory-template.md +30 -0
- package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
- package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
- package/src/skills/optimize/references/prompt-patterns.md +49 -0
- package/src/skills/paper-outline/SKILL.md +227 -0
- package/src/skills/paper-outline/references/outline-patterns.md +87 -0
- package/src/skills/paper-plot/SKILL.md +79 -0
- package/src/skills/paper-plot/agents/openai.yaml +4 -0
- package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
- package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
- package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
- package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
- package/src/skills/paper-plot/references/line_training_curve.md +44 -0
- package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
- package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
- package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
- package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
- package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
- package/src/skills/paper-plot/scripts/line_aime.py +94 -0
- package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
- package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
- package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
- package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
- package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
- package/src/skills/rebuttal/SKILL.md +9 -0
- package/src/skills/references/tool-usage-by-stage.md +438 -0
- package/src/skills/review/SKILL.md +105 -7
- package/src/skills/science/PROVENANCE.md +44 -0
- package/src/skills/science/SKILL.md +137 -0
- package/src/skills/science/references/artifact-science-tool.md +110 -0
- package/src/skills/science/references/claim-type-discipline.md +56 -0
- package/src/skills/science/references/domain-index.md +422 -0
- package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
- package/src/skills/science/references/package-check-playbook.md +64 -0
- package/src/skills/science/references/package-index.min.json +3616 -0
- package/src/skills/science/references/packages/abinit.md +80 -0
- package/src/skills/science/references/packages/acts.md +73 -0
- package/src/skills/science/references/packages/aiida-core.md +80 -0
- package/src/skills/science/references/packages/alamode.md +80 -0
- package/src/skills/science/references/packages/amuse.md +88 -0
- package/src/skills/science/references/packages/anndata.md +88 -0
- package/src/skills/science/references/packages/arbor.md +80 -0
- package/src/skills/science/references/packages/arc.md +73 -0
- package/src/skills/science/references/packages/astropy.md +88 -0
- package/src/skills/science/references/packages/astroquery.md +88 -0
- package/src/skills/science/references/packages/atomate2.md +80 -0
- package/src/skills/science/references/packages/atomsmltr.md +73 -0
- package/src/skills/science/references/packages/awkward.md +73 -0
- package/src/skills/science/references/packages/batman.md +88 -0
- package/src/skills/science/references/packages/biopython.md +88 -0
- package/src/skills/science/references/packages/bloqade.md +73 -0
- package/src/skills/science/references/packages/brian2.md +73 -0
- package/src/skills/science/references/packages/bullet3.md +73 -0
- package/src/skills/science/references/packages/calculix.md +80 -0
- package/src/skills/science/references/packages/cantera.md +73 -0
- package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
- package/src/skills/science/references/packages/ccdproc.md +88 -0
- package/src/skills/science/references/packages/celerite2.md +88 -0
- package/src/skills/science/references/packages/cellrank.md +73 -0
- package/src/skills/science/references/packages/cesm.md +80 -0
- package/src/skills/science/references/packages/chemicals.md +73 -0
- package/src/skills/science/references/packages/chempy.md +73 -0
- package/src/skills/science/references/packages/cirq.md +73 -0
- package/src/skills/science/references/packages/coffea.md +73 -0
- package/src/skills/science/references/packages/cp2k.md +88 -0
- package/src/skills/science/references/packages/custodian.md +80 -0
- package/src/skills/science/references/packages/dart.md +73 -0
- package/src/skills/science/references/packages/datamol.md +88 -0
- package/src/skills/science/references/packages/dd4hep.md +73 -0
- package/src/skills/science/references/packages/dealii.md +80 -0
- package/src/skills/science/references/packages/deepchem.md +88 -0
- package/src/skills/science/references/packages/delphes.md +73 -0
- package/src/skills/science/references/packages/devito.md +80 -0
- package/src/skills/science/references/packages/dftb.md +88 -0
- package/src/skills/science/references/packages/dftd4.md +88 -0
- package/src/skills/science/references/packages/dftk-jl.md +80 -0
- package/src/skills/science/references/packages/dolfinx.md +80 -0
- package/src/skills/science/references/packages/drake.md +73 -0
- package/src/skills/science/references/packages/dumux.md +73 -0
- package/src/skills/science/references/packages/elk.md +80 -0
- package/src/skills/science/references/packages/elmerfem.md +80 -0
- package/src/skills/science/references/packages/enzo-e.md +88 -0
- package/src/skills/science/references/packages/espresso.md +80 -0
- package/src/skills/science/references/packages/exoplanet.md +88 -0
- package/src/skills/science/references/packages/fairroot.md +73 -0
- package/src/skills/science/references/packages/fbpic.md +80 -0
- package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
- package/src/skills/science/references/packages/geant4.md +73 -0
- package/src/skills/science/references/packages/geosx.md +80 -0
- package/src/skills/science/references/packages/gprmax.md +80 -0
- package/src/skills/science/references/packages/gromacs.md +80 -0
- package/src/skills/science/references/packages/gwaslab.md +73 -0
- package/src/skills/science/references/packages/gz-sim.md +73 -0
- package/src/skills/science/references/packages/hail.md +88 -0
- package/src/skills/science/references/packages/hiphive.md +80 -0
- package/src/skills/science/references/packages/hoomd-blue.md +80 -0
- package/src/skills/science/references/packages/itensor.md +73 -0
- package/src/skills/science/references/packages/itensors-jl.md +73 -0
- package/src/skills/science/references/packages/jdftx.md +73 -0
- package/src/skills/science/references/packages/jobflow.md +80 -0
- package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
- package/src/skills/science/references/packages/kite.md +80 -0
- package/src/skills/science/references/packages/kratos.md +80 -0
- package/src/skills/science/references/packages/kwant.md +73 -0
- package/src/skills/science/references/packages/lammps.md +80 -0
- package/src/skills/science/references/packages/lightkurve.md +88 -0
- package/src/skills/science/references/packages/limix.md +73 -0
- package/src/skills/science/references/packages/maxwelllink.md +80 -0
- package/src/skills/science/references/packages/mcdc.md +73 -0
- package/src/skills/science/references/packages/meep.md +80 -0
- package/src/skills/science/references/packages/mfem.md +80 -0
- package/src/skills/science/references/packages/mitgcm.md +73 -0
- package/src/skills/science/references/packages/modflow6.md +73 -0
- package/src/skills/science/references/packages/molecool.md +73 -0
- package/src/skills/science/references/packages/mom6.md +73 -0
- package/src/skills/science/references/packages/moose.md +80 -0
- package/src/skills/science/references/packages/mpas-model.md +73 -0
- package/src/skills/science/references/packages/mujoco.md +73 -0
- package/src/skills/science/references/packages/mumax3.md +73 -0
- package/src/skills/science/references/packages/nekrs.md +80 -0
- package/src/skills/science/references/packages/nessi.md +73 -0
- package/src/skills/science/references/packages/nest-simulator.md +73 -0
- package/src/skills/science/references/packages/netket.md +73 -0
- package/src/skills/science/references/packages/neuron.md +73 -0
- package/src/skills/science/references/packages/nextflow.md +88 -0
- package/src/skills/science/references/packages/nwchem.md +88 -0
- package/src/skills/science/references/packages/openbabel.md +88 -0
- package/src/skills/science/references/packages/openems.md +80 -0
- package/src/skills/science/references/packages/openff-toolkit.md +88 -0
- package/src/skills/science/references/packages/openfoam-dev.md +80 -0
- package/src/skills/science/references/packages/openmc.md +73 -0
- package/src/skills/science/references/packages/openmm.md +80 -0
- package/src/skills/science/references/packages/openmoc.md +73 -0
- package/src/skills/science/references/packages/openmx.md +80 -0
- package/src/skills/science/references/packages/opensees.md +80 -0
- package/src/skills/science/references/packages/opensn.md +80 -0
- package/src/skills/science/references/packages/opm-simulators.md +73 -0
- package/src/skills/science/references/packages/oqupy.md +73 -0
- package/src/skills/science/references/packages/packmol.md +80 -0
- package/src/skills/science/references/packages/palabos.md +80 -0
- package/src/skills/science/references/packages/parflow.md +80 -0
- package/src/skills/science/references/packages/pennylane.md +88 -0
- package/src/skills/science/references/packages/perceval.md +73 -0
- package/src/skills/science/references/packages/phono3py.md +73 -0
- package/src/skills/science/references/packages/phonopy.md +73 -0
- package/src/skills/science/references/packages/photutils.md +88 -0
- package/src/skills/science/references/packages/picongpu.md +80 -0
- package/src/skills/science/references/packages/plink-ng.md +88 -0
- package/src/skills/science/references/packages/precice.md +73 -0
- package/src/skills/science/references/packages/psc.md +80 -0
- package/src/skills/science/references/packages/psi4.md +88 -0
- package/src/skills/science/references/packages/pybinding.md +73 -0
- package/src/skills/science/references/packages/pyfr.md +80 -0
- package/src/skills/science/references/packages/pyhf.md +73 -0
- package/src/skills/science/references/packages/pyiron_base.md +80 -0
- package/src/skills/science/references/packages/pylcp.md +73 -0
- package/src/skills/science/references/packages/pylith.md +80 -0
- package/src/skills/science/references/packages/pynbody.md +88 -0
- package/src/skills/science/references/packages/pysam.md +88 -0
- package/src/skills/science/references/packages/pyscf.md +88 -0
- package/src/skills/science/references/packages/q-e.md +73 -0
- package/src/skills/science/references/packages/qibo.md +73 -0
- package/src/skills/science/references/packages/qiskit.md +73 -0
- package/src/skills/science/references/packages/quantica-jl.md +73 -0
- package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
- package/src/skills/science/references/packages/quimb.md +73 -0
- package/src/skills/science/references/packages/qulacs.md +73 -0
- package/src/skills/science/references/packages/qutip.md +73 -0
- package/src/skills/science/references/packages/rdkit.md +88 -0
- package/src/skills/science/references/packages/rmg-py.md +73 -0
- package/src/skills/science/references/packages/root.md +73 -0
- package/src/skills/science/references/packages/scanpy.md +88 -0
- package/src/skills/science/references/packages/scikit-allel.md +88 -0
- package/src/skills/science/references/packages/scikit-bio.md +88 -0
- package/src/skills/science/references/packages/scqubits.md +73 -0
- package/src/skills/science/references/packages/scuff-em.md +80 -0
- package/src/skills/science/references/packages/scvi-tools.md +73 -0
- package/src/skills/science/references/packages/seissol.md +73 -0
- package/src/skills/science/references/packages/sfepy.md +80 -0
- package/src/skills/science/references/packages/sisl.md +73 -0
- package/src/skills/science/references/packages/smilei.md +80 -0
- package/src/skills/science/references/packages/snakemake.md +88 -0
- package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
- package/src/skills/science/references/packages/specutils.md +88 -0
- package/src/skills/science/references/packages/spglib.md +80 -0
- package/src/skills/science/references/packages/squidpy.md +88 -0
- package/src/skills/science/references/packages/starry.md +88 -0
- package/src/skills/science/references/packages/strawberryfields.md +73 -0
- package/src/skills/science/references/packages/su2.md +80 -0
- package/src/skills/science/references/packages/sunny-jl.md +73 -0
- package/src/skills/science/references/packages/sw4.md +73 -0
- package/src/skills/science/references/packages/swift.md +88 -0
- package/src/skills/science/references/packages/tdnegf.md +73 -0
- package/src/skills/science/references/packages/tenpy.md +73 -0
- package/src/skills/science/references/packages/thermo.md +73 -0
- package/src/skills/science/references/packages/tkwant.md +73 -0
- package/src/skills/science/references/packages/tvb-root.md +73 -0
- package/src/skills/science/references/packages/uproot5.md +73 -0
- package/src/skills/science/references/packages/vampire.md +80 -0
- package/src/skills/science/references/packages/wannier_tools.md +73 -0
- package/src/skills/science/references/packages/warpx.md +80 -0
- package/src/skills/science/references/packages/wrf.md +73 -0
- package/src/skills/science/references/packages/xtb.md +88 -0
- package/src/skills/science/references/packages/yt.md +73 -0
- package/src/skills/science/references/science-task-brief-template.md +71 -0
- package/src/skills/scout/SKILL.md +83 -425
- package/src/skills/scout/references/literature-scout-template.md +5 -24
- package/src/skills/scout/references/operational-guidance.md +191 -0
- package/src/skills/scout/references/paper-triage-playbook.md +11 -35
- package/src/skills/write/SKILL.md +744 -1246
- package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
- package/src/skills/write/references/oral_package_patterns.md +252 -0
- package/src/skills/write/references/oral_writing_principles.md +291 -0
- package/src/skills/write/references/section_rewrite_checklist.md +234 -0
- package/src/tui/dist/app/AppContainer.js +1314 -27
- package/src/tui/dist/components/Composer.js +26 -1
- package/src/tui/dist/components/ConfigScreen.js +2 -1
- package/src/tui/dist/components/InputPrompt.js +25 -9
- package/src/tui/dist/components/MainContent.js +18 -3
- package/src/tui/dist/components/QuestScreen.js +3 -2
- package/src/tui/dist/components/UtilityScreen.js +37 -0
- package/src/tui/dist/hooks/useSafeInput.js +10 -0
- package/src/tui/dist/index.js +13 -1
- package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
- package/src/tui/dist/lib/api.js +89 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AnalysisPlugin-BCKAfjba.js → AnalysisPlugin-CA94NGmI.js} +1 -1
- package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
- package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
- package/src/ui/dist/assets/{CodeViewerPlugin-CbaFRrUU.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
- package/src/ui/dist/assets/{DocViewerPlugin-DAjLVeQD.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
- package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
- package/src/ui/dist/assets/{GitDiffViewerPlugin-CQACjoAA.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
- package/src/ui/dist/assets/{GitSnapshotViewer-0r4nLPke.js → GitSnapshotViewer-CweA6VON.js} +2 -2
- package/src/ui/dist/assets/{ImageViewerPlugin-nBOmI2v_.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
- package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
- package/src/ui/dist/assets/{LatexPlugin-ZwtV8pIp.js → LatexPlugin-BQjAaA5J.js} +4 -4
- package/src/ui/dist/assets/{MarkdownViewerPlugin-DKqVfKyW.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
- package/src/ui/dist/assets/{MarketplacePlugin-BwxStZ9D.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
- package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
- package/src/ui/dist/assets/{NotebookEditor-DB9N_T9q.js → NotebookEditor-WFyd8Ybt.js} +3 -3
- package/src/ui/dist/assets/{PdfLoader-eWBONbQP.js → PdfLoader-CLE5u5TS.js} +3 -3
- package/src/ui/dist/assets/{PdfMarkdownPlugin-D22YOZL3.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
- package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
- package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
- package/src/ui/dist/assets/{TextViewerPlugin-C5xqeeUH.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
- package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
- package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
- package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
- package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
- package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
- package/src/ui/dist/assets/{code-WlFHE7z_.js → code-DbsmSd3Y.js} +1 -1
- package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
- package/src/ui/dist/assets/{wrap-text-BC-Hltpd.js → file-jump-queue-DeQBikaw.js} +3 -3
- package/src/ui/dist/assets/{file-socket-CfQPKQKj.js → file-socket-DA5XIx88.js} +1 -1
- package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
- package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
- package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
- package/src/ui/dist/assets/{index-CwNu1aH4.js → index-BsO46tJA.js} +1 -1
- package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
- package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
- package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
- package/src/ui/dist/assets/{project-sync-C9IdzdZW.js → project-sync-DPmWKmKD.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-E_gaeAxL.js → zoom-out-DAukFWen.js} +3 -3
- package/src/ui/dist/index.html +3 -3
- package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
- package/src/skills/baseline/references/memory-playbook.md +0 -40
- package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
- package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
- package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
- package/src/skills/write/references/paper-section-playbook.md +0 -64
- package/src/skills/write/references/reviewer-first-writing.md +0 -64
- package/src/skills/write/references/revision-checklist.md +0 -70
- package/src/skills/write/references/section-contracts.md +0 -82
- package/src/skills/write/references/sentence-level-proofing.md +0 -49
- package/src/ui/dist/assets/AiManusChatView-Bv-Z8YpU.js +0 -204
- package/src/ui/dist/assets/CliPlugin-BCKcpc35.js +0 -109
- package/src/ui/dist/assets/CodeEditorPlugin-DbOfSJ8K.js +0 -2
- package/src/ui/dist/assets/GitCommitViewerPlugin-CIUqbUDO.js +0 -1
- package/src/ui/dist/assets/LabCopilotPanel-BHxOxF4z.js +0 -14
- package/src/ui/dist/assets/LabPlugin-BKoZGs95.js +0 -22
- package/src/ui/dist/assets/NotebookEditor-BEQhaQbt.js +0 -81
- package/src/ui/dist/assets/PdfViewerPlugin-c-RK9DLM.js +0 -17
- package/src/ui/dist/assets/SearchPlugin-CxF9ytAx.js +0 -16
- package/src/ui/dist/assets/VNCViewer-BoLGLnHz.js +0 -11
- package/src/ui/dist/assets/bot-DREQOxzP.js +0 -6
- package/src/ui/dist/assets/chevron-up-C9Qpx4DE.js +0 -6
- package/src/ui/dist/assets/file-content-BZMz3RYp.js +0 -1
- package/src/ui/dist/assets/file-diff-panel-CQhw0jS2.js +0 -1
- package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
- package/src/ui/dist/assets/git-commit-horizontal-DxZ8DCZh.js +0 -6
- package/src/ui/dist/assets/image-Bgl4VIyx.js +0 -6
- package/src/ui/dist/assets/index-BpV6lusQ.css +0 -33
- package/src/ui/dist/assets/index-CBNVuWcP.js +0 -2496
- package/src/ui/dist/assets/index-DrUnlf6K.js +0 -1
- package/src/ui/dist/assets/index-NW-h8VzN.js +0 -1
- package/src/ui/dist/assets/pdf-effect-queue-J8OnM0jE.js +0 -6
- package/src/ui/dist/assets/popover-CLc0pPP8.js +0 -1
- package/src/ui/dist/assets/select-Cs2PmzwL.js +0 -11
- package/src/ui/dist/assets/sigma-ClKcHAXm.js +0 -6
- package/src/ui/dist/assets/trash-DwpbFr3w.js +0 -11
- package/src/ui/dist/assets/useCliAccess-NQ8m0Let.js +0 -1
- package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.009_scoremissing
|
|
3
|
+
name: Score Matching with Missing Data
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: 'Learn full score functions from partially observed tabular data using importance-weighting
|
|
6
|
+
and variational marginal score matching, evaluated on simulated GGM/ICA/normal-estimation
|
|
7
|
+
tasks and real-world S&P 100 and yeast graphical model recovery.
|
|
8
|
+
|
|
9
|
+
'
|
|
10
|
+
task_description: 'This benchmark packages the full experimental pipeline from the
|
|
11
|
+
ICML 2025 Spotlight paper "Score Matching with Missing Data". The core task is to
|
|
12
|
+
estimate the score (gradient of the log-density) of a multivariate distribution
|
|
13
|
+
when training samples have missing coordinates under a Missing Completely At Random
|
|
14
|
+
(MCAR) mechanism. Two complementary methods are provided: (1) marginal importance-weighting
|
|
15
|
+
(IW) score matching, which excels in lower-dimensional and small-sample settings,
|
|
16
|
+
and (2) marginal variational score matching, which is stronger in high-dimensional
|
|
17
|
+
problems such as Gaussian Graphical Model (GGM) estimation.
|
|
18
|
+
|
|
19
|
+
The experiment suite spans five sub-tasks run from Python scripts in HPC/: GGM structure
|
|
20
|
+
recovery (simulated), ICA parameter estimation, truncated/untruncated normal parameter
|
|
21
|
+
estimation, S&P 100 GGM estimation, and yeast GGM estimation. Each script accepts
|
|
22
|
+
a repeat count and a parameter-grid index as CLI arguments. Metrics (AUC, accuracy,
|
|
23
|
+
TPR, FPR) are computed inside the scripts via MSM/utils/data.py helper functions
|
|
24
|
+
and aggregated in Jupyter notebooks under plotcode/. Real-world data (yeast tensor,
|
|
25
|
+
S&P 100 CSV) is bundled in real_world_experiments/RealData/. No external evaluation
|
|
26
|
+
service or API credentials are needed; the benchmark is fully self-contained.
|
|
27
|
+
|
|
28
|
+
'
|
|
29
|
+
capability_tags:
|
|
30
|
+
- research_code_optimization
|
|
31
|
+
- missing_data
|
|
32
|
+
- probabilistic_modeling
|
|
33
|
+
- score_matching
|
|
34
|
+
- tabular_ml
|
|
35
|
+
- graphical_model_estimation
|
|
36
|
+
aisb_direction: T3
|
|
37
|
+
track_fit:
|
|
38
|
+
- paper_track
|
|
39
|
+
- benchmark_track
|
|
40
|
+
task_mode: experiment_driven
|
|
41
|
+
requires_execution: true
|
|
42
|
+
requires_paper: true
|
|
43
|
+
integrity_level: cas_plus_canary
|
|
44
|
+
snapshot_status: runnable
|
|
45
|
+
support_level: advanced
|
|
46
|
+
cost_band: medium
|
|
47
|
+
time_band: 6-24h
|
|
48
|
+
difficulty: hard
|
|
49
|
+
data_access: public
|
|
50
|
+
primary_outputs:
|
|
51
|
+
- auc
|
|
52
|
+
- accuracy
|
|
53
|
+
- tpr
|
|
54
|
+
- fpr
|
|
55
|
+
- structure_recovery_report
|
|
56
|
+
launch_profiles:
|
|
57
|
+
- id: quick_check
|
|
58
|
+
label: Quick Check
|
|
59
|
+
description: 'Run a single yeast or GGM experiment script with nrep=1 and one parameter-grid
|
|
60
|
+
index to verify the environment and produce a small set of AUC/accuracy values.
|
|
61
|
+
Expected wall-time ~10–30 minutes on CPU.
|
|
62
|
+
|
|
63
|
+
'
|
|
64
|
+
- id: full_experiments
|
|
65
|
+
label: Full Experiments
|
|
66
|
+
description: 'Sweep all HPC experiment scripts (GGM, ICA, NormalEstimation, snp100,
|
|
67
|
+
yeast) across the full parameter grids using the bash scripts in HPC/bashscripts/.
|
|
68
|
+
Each script runs 10 000 training epochs per configuration. Expect 6–24 hours on
|
|
69
|
+
a 16-core CPU or faster with a single GPU.
|
|
70
|
+
|
|
71
|
+
'
|
|
72
|
+
- id: plot_reproduction
|
|
73
|
+
label: Plot Reproduction
|
|
74
|
+
description: 'After full experiments, run the Jupyter notebooks in plotcode/ to
|
|
75
|
+
regenerate all paper figures and metric summaries from saved results.
|
|
76
|
+
|
|
77
|
+
'
|
|
78
|
+
dataset_download:
|
|
79
|
+
primary_method: bundled
|
|
80
|
+
sources:
|
|
81
|
+
- kind: archive
|
|
82
|
+
url: https://deepscientist.cc/AISB/009_scoremissing
|
|
83
|
+
access: public
|
|
84
|
+
note: 'Full snapshot including real-world data tensors (yeast, S&P 100) in real_world_experiments/RealData/.
|
|
85
|
+
Simulated data is generated on-the-fly by the experiment scripts. Archive is
|
|
86
|
+
a zip file.
|
|
87
|
+
|
|
88
|
+
'
|
|
89
|
+
notes:
|
|
90
|
+
- All datasets are either bundled or generated programmatically; no additional downloads
|
|
91
|
+
required.
|
|
92
|
+
- Total disk footprint after extraction is modest (well under 10 GB including results).
|
|
93
|
+
credential_requirements:
|
|
94
|
+
mode: none
|
|
95
|
+
items: []
|
|
96
|
+
notes: []
|
|
97
|
+
resources:
|
|
98
|
+
minimum:
|
|
99
|
+
cpu_cores: 8
|
|
100
|
+
ram_gb: 32
|
|
101
|
+
disk_gb: 80
|
|
102
|
+
gpu_count: 0
|
|
103
|
+
gpu_vram_gb: 0
|
|
104
|
+
recommended:
|
|
105
|
+
cpu_cores: 16
|
|
106
|
+
ram_gb: 64
|
|
107
|
+
disk_gb: 150
|
|
108
|
+
gpu_count: 1
|
|
109
|
+
gpu_vram_gb: 16
|
|
110
|
+
environment:
|
|
111
|
+
python: '3.10'
|
|
112
|
+
cuda: null
|
|
113
|
+
pytorch: 1.12.1
|
|
114
|
+
flash_attn: null
|
|
115
|
+
key_packages:
|
|
116
|
+
- jax==0.3.25
|
|
117
|
+
- jaxlib==0.3.25
|
|
118
|
+
- tensorflow==2.11.0
|
|
119
|
+
- scikit-learn
|
|
120
|
+
- tqdm
|
|
121
|
+
notes:
|
|
122
|
+
- CPU-only execution is plausible for the minimum route; GPU accelerates PyTorch
|
|
123
|
+
training loops.
|
|
124
|
+
- The MSM package is PyTorch-based; JAX/TensorFlow dependencies support specific
|
|
125
|
+
experiment variants.
|
|
126
|
+
- An environment.yml is bundled in the snapshot for conda-based setup.
|
|
127
|
+
- See the bundled README and requirements for the full dependency set.
|
|
128
|
+
risk_flags:
|
|
129
|
+
- mixed_framework_dependencies
|
|
130
|
+
- long_sweep_walltime
|
|
131
|
+
risk_notes:
|
|
132
|
+
- 'The project mixes PyTorch, JAX, and TensorFlow dependencies. Resolving compatible
|
|
133
|
+
versions in one environment may require careful pinning or separate conda environments.
|
|
134
|
+
|
|
135
|
+
'
|
|
136
|
+
- 'Full experiment sweeps (all five sub-tasks × all parameter-grid indices × multiple
|
|
137
|
+
repeats) can take many hours. Plan for at least 6 hours wall-time on recommended
|
|
138
|
+
hardware.
|
|
139
|
+
|
|
140
|
+
'
|
|
141
|
+
- 'No benchmark execution was performed during the packaging pass; metrics are code-backed
|
|
142
|
+
but not yet runtime-verified.
|
|
143
|
+
|
|
144
|
+
'
|
|
145
|
+
recommended_when: 'Use this benchmark when you want a self-contained probabilistic
|
|
146
|
+
missing-data task combining theoretical score-matching methods with both synthetic
|
|
147
|
+
and real-world graphical model evaluation. Good fit for agents that can navigate
|
|
148
|
+
multi-script experiment pipelines, handle CLI-parameterised sweeps, and interpret
|
|
149
|
+
structure-recovery metrics (AUC, TPR, FPR).
|
|
150
|
+
|
|
151
|
+
'
|
|
152
|
+
not_recommended_when: 'Avoid if you need a lightweight single-script benchmark, a
|
|
153
|
+
task centred on large pretrained foundation models, or a benchmark with GPU-intensive
|
|
154
|
+
deep learning. Also not suitable if you cannot install mixed PyTorch/JAX/TensorFlow
|
|
155
|
+
environments.
|
|
156
|
+
|
|
157
|
+
'
|
|
158
|
+
paper:
|
|
159
|
+
title: Score Matching with Missing Data
|
|
160
|
+
venue: ICML 2025 Spotlight
|
|
161
|
+
year: 2025
|
|
162
|
+
url: https://arxiv.org/abs/2506.00557
|
|
163
|
+
download:
|
|
164
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.009_scoremissing.zip
|
|
165
|
+
archive_type: zip
|
|
166
|
+
local_dir_name: paper-9-ScoreMissing
|
|
167
|
+
provider: github_release
|
|
168
|
+
repo: ResearAI/DeepScientist
|
|
169
|
+
tag: aisb-v0.0.1
|
|
170
|
+
asset_name: aisb.t3.009_scoremissing.zip
|
|
171
|
+
sha256: bed639544fdb6e317d4a5b2c2663c8765aa508e39566551817d1586541cccb27
|
|
172
|
+
size_bytes: 27750054
|
|
173
|
+
commercial:
|
|
174
|
+
annual_fee: null
|
|
175
|
+
display:
|
|
176
|
+
palette_seed: moss-indigo-density
|
|
177
|
+
art_style: statistical-notebook
|
|
178
|
+
accent_priority: medium
|
|
179
|
+
image_path: ../image/009_aisb.t3.009_scoremissing.jpg
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.009_scoremissing
|
|
3
|
+
name: 缺失数据的分数匹配
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: '从部分观测的表格数据中学习完整分数函数,采用重要性加权和变分边际分数匹配方法,在模拟的GGM/ICA/正态估计任务以及真实的S&P 100和酵母图形模型恢复任务上进行评估。'
|
|
6
|
+
task_description: '该基准测试打包了ICML 2025 Spotlight论文"Score Matching with Missing Data"的完整实验流程。核心任务是在训练样本存在缺失坐标且缺失机制为完全随机缺失(MCAR)的条件下,估计多元分布的分数(对数密度梯度)。提供了两种互补方法:(1)边际重要性加权(IW)分数匹配,在低维和小样本场景表现优异;(2)边际变分分数匹配,在高维问题(如高斯图形模型GGM估计)上更强。
|
|
7
|
+
|
|
8
|
+
实验套件包含五个子任务,从HPC/目录下的Python脚本运行:GGM结构恢复(模拟)、ICA参数估计、截断/非截断正态参数估计、S&P 100 GGM估计和酵母GGM估计。每个脚本接受重复次数和参数网格索引作为命令行参数。指标(AUC、准确率、TPR、FPR)通过MSM/utils/data.py辅助函数在脚本内计算,并在plotcode/目录下的Jupyter笔记本中聚合。真实数据(酵母张量、S&P 100 CSV)打包在real_world_experiments/RealData/中。无需外部评估服务或API凭据;该基准测试完全自包含。'
|
|
9
|
+
capability_tags:
|
|
10
|
+
- research_code_optimization
|
|
11
|
+
- missing_data
|
|
12
|
+
- probabilistic_modeling
|
|
13
|
+
- score_matching
|
|
14
|
+
- tabular_ml
|
|
15
|
+
- graphical_model_estimation
|
|
16
|
+
aisb_direction: T3
|
|
17
|
+
track_fit:
|
|
18
|
+
- paper_track
|
|
19
|
+
- benchmark_track
|
|
20
|
+
task_mode: experiment_driven
|
|
21
|
+
requires_execution: true
|
|
22
|
+
requires_paper: true
|
|
23
|
+
integrity_level: cas_plus_canary
|
|
24
|
+
snapshot_status: runnable
|
|
25
|
+
support_level: advanced
|
|
26
|
+
cost_band: medium
|
|
27
|
+
time_band: 6-24h
|
|
28
|
+
difficulty: hard
|
|
29
|
+
data_access: public
|
|
30
|
+
primary_outputs:
|
|
31
|
+
- auc
|
|
32
|
+
- accuracy
|
|
33
|
+
- tpr
|
|
34
|
+
- fpr
|
|
35
|
+
- structure_recovery_report
|
|
36
|
+
launch_profiles:
|
|
37
|
+
- id: quick_check
|
|
38
|
+
label: 快速检查
|
|
39
|
+
description: '运行单个酵母或GGM实验脚本,nrep=1和一个参数网格索引,以验证环境并生成少量AUC/准确率值。预期CPU运行时间约10-30分钟。'
|
|
40
|
+
- id: full_experiments
|
|
41
|
+
label: 完整实验
|
|
42
|
+
description: '使用HPC/bashscripts/目录下的bash脚本,对所有HPC实验脚本(GGM、ICA、NormalEstimation、snp100、yeast)进行完整参数网格扫描。每个脚本每个配置运行10000个训练轮次。预期在16核CPU或配备单GPU的更快硬件上需要6-24小时。'
|
|
43
|
+
- id: plot_reproduction
|
|
44
|
+
label: 图表复现
|
|
45
|
+
description: '在完成完整实验后,运行plotcode/目录下的Jupyter笔记本,从保存的结果中重新生成所有论文图表和指标摘要。'
|
|
46
|
+
dataset_download:
|
|
47
|
+
primary_method: bundled
|
|
48
|
+
sources:
|
|
49
|
+
- kind: archive
|
|
50
|
+
url: https://deepscientist.cc/AISB/009_scoremissing
|
|
51
|
+
access: public
|
|
52
|
+
note: '完整快照,包含真实世界数据张量(酵母、S&P 100)位于real_world_experiments/RealData/。模拟数据由实验脚本动态生成。压缩包为zip格式。'
|
|
53
|
+
notes:
|
|
54
|
+
- 所有数据集均为内置或程序生成,无需额外下载。
|
|
55
|
+
- 解压后磁盘占用很小(包括结果文件在内远低于10 GB)。
|
|
56
|
+
credential_requirements:
|
|
57
|
+
mode: none
|
|
58
|
+
items: []
|
|
59
|
+
notes: []
|
|
60
|
+
resources:
|
|
61
|
+
minimum:
|
|
62
|
+
cpu_cores: 8
|
|
63
|
+
ram_gb: 32
|
|
64
|
+
disk_gb: 80
|
|
65
|
+
gpu_count: 0
|
|
66
|
+
gpu_vram_gb: 0
|
|
67
|
+
recommended:
|
|
68
|
+
cpu_cores: 16
|
|
69
|
+
ram_gb: 64
|
|
70
|
+
disk_gb: 150
|
|
71
|
+
gpu_count: 1
|
|
72
|
+
gpu_vram_gb: 16
|
|
73
|
+
environment:
|
|
74
|
+
python: '3.10'
|
|
75
|
+
cuda: null
|
|
76
|
+
pytorch: 1.12.1
|
|
77
|
+
flash_attn: null
|
|
78
|
+
key_packages:
|
|
79
|
+
- jax==0.3.25
|
|
80
|
+
- jaxlib==0.3.25
|
|
81
|
+
- tensorflow==2.11.0
|
|
82
|
+
- scikit-learn
|
|
83
|
+
- tqdm
|
|
84
|
+
notes:
|
|
85
|
+
- 最小配置下支持纯CPU执行;GPU可加速PyTorch训练循环。
|
|
86
|
+
- MSM包基于PyTorch;JAX/TensorFlow依赖支持特定的实验变体。
|
|
87
|
+
- 快照中捆绑了conda环境配置文件environment.yml。
|
|
88
|
+
- 详见捆绑的README和requirements文件以了解完整依赖集。
|
|
89
|
+
risk_flags:
|
|
90
|
+
- mixed_framework_dependencies
|
|
91
|
+
- long_sweep_walltime
|
|
92
|
+
risk_notes:
|
|
93
|
+
- '项目混合了PyTorch、JAX和TensorFlow依赖。在单一环境中解决兼容版本可能需要仔细固定版本或使用独立的conda环境。'
|
|
94
|
+
- '完整实验扫描(所有五个子任务×所有参数网格索引×多次重复)可能需要数小时。在推荐硬件上至少预留6小时运行时间。'
|
|
95
|
+
- '打包过程中未执行基准测试;指标有代码支持但尚未运行时验证。'
|
|
96
|
+
recommended_when: '当你需要一个自包含的概率缺失数据任务,结合理论分数匹配方法与合成和真实世界图形模型评估时使用。非常适合能够处理多脚本实验流程、处理CLI参数化扫描以及解读结构恢复指标(AUC、TPR、FPR)的智能体。'
|
|
97
|
+
not_recommended_when: '避免用于以下场景:需要轻量级单脚本基准测试、以大型预训练基础模型为中心的任务、或GPU密集型深度学习基准测试。如果无法安装混合PyTorch/JAX/TensorFlow环境,也不适用。'
|
|
98
|
+
paper:
|
|
99
|
+
title: Score Matching with Missing Data
|
|
100
|
+
venue: ICML 2025 Spotlight
|
|
101
|
+
year: 2025
|
|
102
|
+
url: https://arxiv.org/abs/2506.00557
|
|
103
|
+
download:
|
|
104
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.009_scoremissing.zip
|
|
105
|
+
archive_type: zip
|
|
106
|
+
local_dir_name: paper-9-ScoreMissing
|
|
107
|
+
provider: github_release
|
|
108
|
+
repo: ResearAI/DeepScientist
|
|
109
|
+
tag: aisb-v0.0.1
|
|
110
|
+
asset_name: aisb.t3.009_scoremissing.zip
|
|
111
|
+
sha256: bed639544fdb6e317d4a5b2c2663c8765aa508e39566551817d1586541cccb27
|
|
112
|
+
size_bytes: 27750054
|
|
113
|
+
commercial:
|
|
114
|
+
annual_fee: null
|
|
115
|
+
display:
|
|
116
|
+
palette_seed: moss-indigo-density
|
|
117
|
+
art_style: statistical-notebook
|
|
118
|
+
accent_priority: medium
|
|
119
|
+
image_path: ../image/009_aisb.t3.009_scoremissing.jpg
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.010_suitabilityfilter
|
|
3
|
+
name: 'Suitability Filter: A Statistical Framework for Classifier Evaluation in Real-World
|
|
4
|
+
Deployment Settings'
|
|
5
|
+
version: 0.1.0
|
|
6
|
+
one_line: 'Optimize a deployment-time statistical filter that uses non-inferiority
|
|
7
|
+
hypothesis testing on model-derived suitability signals to decide whether a classifier''s
|
|
8
|
+
accuracy has degraded beyond an acceptable margin on unlabeled, distribution-shifted
|
|
9
|
+
user data (evaluated on WILDS FMoW/RxRx1/CivilComments).
|
|
10
|
+
|
|
11
|
+
'
|
|
12
|
+
task_description: 'This benchmark packages the Suitability Filter framework from ICML
|
|
13
|
+
2025. The core task is to improve a statistical decision system that determines
|
|
14
|
+
whether a pre-trained classifier remains safe to deploy under covariate shift, without
|
|
15
|
+
access to ground-truth labels on the target data.
|
|
16
|
+
|
|
17
|
+
The filter works by: (1) extracting suitability signals (max softmax, predictive
|
|
18
|
+
entropy, logit statistics, loss/energy features — 12 signals total) from both labeled
|
|
19
|
+
test data and unlabeled user data; (2) training a lightweight prediction-correctness
|
|
20
|
+
estimator (logistic regression by default, calibrated via CalibratedClassifierCV)
|
|
21
|
+
on a labeled holdout set to map signals to per-sample correctness probabilities
|
|
22
|
+
p_c; (3) comparing the p_c distributions between test and user data via a one-sided
|
|
23
|
+
non-inferiority test (Welch''s t-test or z-test) with configurable margin m and
|
|
24
|
+
significance level alpha; (4) outputting SUITABLE (reject H0) or INCONCLUSIVE.
|
|
25
|
+
|
|
26
|
+
The packaged evaluation route runs cached FMoW-WILDS features across multiple seeds
|
|
27
|
+
(0, 1, 2), fold configurations, feature subsets (confidence/logit/loss-energy),
|
|
28
|
+
and classifier hyperparameters. Primary metrics are the OOD suitability score (ROC-AUC
|
|
29
|
+
of the filter''s p-values against ground-truth suitability labels across ~29k experiments),
|
|
30
|
+
the raw p-value, and the binary suitability decision. Optimization targets include
|
|
31
|
+
the correctness estimator''s regularization (C sweep), signal subset selection,
|
|
32
|
+
fold count, calibration strategy, and Stouffer''s method for combining p-values
|
|
33
|
+
across feature subsets.
|
|
34
|
+
|
|
35
|
+
Pre-extracted feature caches and split indices are bundled under results/features/
|
|
36
|
+
and results/split_indices/. The WILDS datasets themselves (FMoW, RxRx1, CivilComments)
|
|
37
|
+
and pretrained model weights are needed only if regenerating features from scratch;
|
|
38
|
+
the default execution route uses cached features and does not require downloading
|
|
39
|
+
WILDS data.
|
|
40
|
+
|
|
41
|
+
'
|
|
42
|
+
capability_tags:
|
|
43
|
+
- research_code_optimization
|
|
44
|
+
- classifier_evaluation
|
|
45
|
+
- distribution_shift
|
|
46
|
+
- statistical_testing
|
|
47
|
+
- reliability
|
|
48
|
+
- hypothesis_testing
|
|
49
|
+
- deployment_monitoring
|
|
50
|
+
aisb_direction: T3
|
|
51
|
+
track_fit:
|
|
52
|
+
- paper_track
|
|
53
|
+
- benchmark_track
|
|
54
|
+
task_mode: experiment_driven
|
|
55
|
+
requires_execution: true
|
|
56
|
+
requires_paper: true
|
|
57
|
+
integrity_level: cas_plus_canary
|
|
58
|
+
snapshot_status: runnable
|
|
59
|
+
support_level: turnkey
|
|
60
|
+
cost_band: medium
|
|
61
|
+
time_band: 6-24h
|
|
62
|
+
difficulty: hard
|
|
63
|
+
data_access: public
|
|
64
|
+
primary_outputs:
|
|
65
|
+
- ood_suitability_score
|
|
66
|
+
- p_value
|
|
67
|
+
- suitability_decision
|
|
68
|
+
launch_profiles:
|
|
69
|
+
- id: quick_check
|
|
70
|
+
label: Quick Check
|
|
71
|
+
description: 'Run the packaged suitability filter logic on cached FMoW features
|
|
72
|
+
for a single seed to verify statistical test outputs (p-value, suitability decision)
|
|
73
|
+
and basic ROC-AUC.
|
|
74
|
+
|
|
75
|
+
'
|
|
76
|
+
- id: shift_eval
|
|
77
|
+
label: Full Shift Evaluation
|
|
78
|
+
description: 'Run the complete FMoW-WILDS deployment-shift evaluation across all
|
|
79
|
+
seeds (0, 1, 2), feature subsets, fold configurations, and classifier settings
|
|
80
|
+
using run_fmow.py. Reports OOD suitability ROC-AUC across ~29k experiment configurations.
|
|
81
|
+
|
|
82
|
+
'
|
|
83
|
+
- id: lr_tune
|
|
84
|
+
label: LR Hyperparameter Sweep
|
|
85
|
+
description: 'Run run_fmow_lr_tune.py to sweep logistic regression regularization
|
|
86
|
+
C values (0.01, 0.1, 1.0, 5.0, 10.0) on OOD data for seed=0 to find optimal correctness
|
|
87
|
+
estimator configuration.
|
|
88
|
+
|
|
89
|
+
'
|
|
90
|
+
- id: seed_search
|
|
91
|
+
label: Seed Search
|
|
92
|
+
description: 'Run run_fmow_seed_search.py variants to explore reproducibility and
|
|
93
|
+
seed sensitivity of the suitability filter decisions.
|
|
94
|
+
|
|
95
|
+
'
|
|
96
|
+
dataset_download:
|
|
97
|
+
primary_method: bundled_cache
|
|
98
|
+
sources:
|
|
99
|
+
- kind: bundled
|
|
100
|
+
url: null
|
|
101
|
+
access: public
|
|
102
|
+
note: 'Pre-extracted feature caches (results/features/*.pkl) and split indices
|
|
103
|
+
(results/split_indices/*.pkl) are included in the snapshot. No download needed
|
|
104
|
+
for the default execution route.
|
|
105
|
+
|
|
106
|
+
'
|
|
107
|
+
- kind: pip_package
|
|
108
|
+
url: https://pypi.org/project/wilds/
|
|
109
|
+
access: public
|
|
110
|
+
note: 'WILDS 2.0.0 package needed for feature re-extraction from raw data. FMoW-WILDS,
|
|
111
|
+
RxRx1-WILDS, and CivilComments-WILDS datasets are downloaded on first use via
|
|
112
|
+
the wilds library. FMoW alone is ~50 GB; all three together can exceed 100 GB.
|
|
113
|
+
|
|
114
|
+
'
|
|
115
|
+
notes:
|
|
116
|
+
- Default route uses bundled cached features; WILDS download only needed for feature
|
|
117
|
+
regeneration.
|
|
118
|
+
- Pretrained model weights (ERM) are required for feature regeneration but not for
|
|
119
|
+
cached route.
|
|
120
|
+
credential_requirements:
|
|
121
|
+
mode: none
|
|
122
|
+
items: []
|
|
123
|
+
notes:
|
|
124
|
+
- No API keys or credentials required for any execution route.
|
|
125
|
+
resources:
|
|
126
|
+
minimum:
|
|
127
|
+
cpu_cores: 8
|
|
128
|
+
ram_gb: 32
|
|
129
|
+
disk_gb: 100
|
|
130
|
+
gpu_count: 1
|
|
131
|
+
gpu_vram_gb: 16
|
|
132
|
+
recommended:
|
|
133
|
+
cpu_cores: 16
|
|
134
|
+
ram_gb: 64
|
|
135
|
+
disk_gb: 200
|
|
136
|
+
gpu_count: 1
|
|
137
|
+
gpu_vram_gb: 24
|
|
138
|
+
environment:
|
|
139
|
+
python: 3.11.9
|
|
140
|
+
cuda: '11.7'
|
|
141
|
+
pytorch: 2.0.0
|
|
142
|
+
flash_attn: null
|
|
143
|
+
key_packages:
|
|
144
|
+
- transformers==4.41.2
|
|
145
|
+
- torch-geometric==2.5.3
|
|
146
|
+
- wilds==2.0.0
|
|
147
|
+
- scikit-learn
|
|
148
|
+
- scipy
|
|
149
|
+
- statsmodels
|
|
150
|
+
- pandas
|
|
151
|
+
- numpy
|
|
152
|
+
notes:
|
|
153
|
+
- See the bundled requirements.txt for the full conda environment specification.
|
|
154
|
+
- GPU is used for feature extraction (torch.device("cuda" if available)); the cached-feature
|
|
155
|
+
route runs statistical tests on CPU only (scikit-learn, scipy).
|
|
156
|
+
- The run_fmow.py scripts set sys.path to '/repo' — adjust if running outside the
|
|
157
|
+
expected container.
|
|
158
|
+
risk_flags:
|
|
159
|
+
- large_experiment_matrix
|
|
160
|
+
- external_dataset_for_regeneration
|
|
161
|
+
risk_notes:
|
|
162
|
+
- 'The full evaluation route runs ~29k experiment configurations across seeds, folds,
|
|
163
|
+
feature subsets, and margins. Wall time can reach several hours on CPU.
|
|
164
|
+
|
|
165
|
+
'
|
|
166
|
+
- 'Feature regeneration from raw WILDS data requires downloading FMoW-WILDS (~50 GB)
|
|
167
|
+
and pretrained ERM model weights, but this is not needed for the default cached
|
|
168
|
+
route.
|
|
169
|
+
|
|
170
|
+
'
|
|
171
|
+
- 'The run scripts hardcode sys.path to ''/repo'' and os.chdir(''/repo''); execution
|
|
172
|
+
outside the expected container environment may require path adjustments.
|
|
173
|
+
|
|
174
|
+
'
|
|
175
|
+
- 'No runtime execution was performed during the packaging pass; metric values should
|
|
176
|
+
be verified by running the benchmark.
|
|
177
|
+
|
|
178
|
+
'
|
|
179
|
+
recommended_when: 'Use this benchmark when you want a model-monitoring optimization
|
|
180
|
+
task that combines statistical hypothesis testing (non-inferiority tests) with realistic
|
|
181
|
+
WILDS deployment shifts. Well-suited for research on deployment-time safety, distribution
|
|
182
|
+
shift detection, unsupervised accuracy estimation, and selective prediction. The
|
|
183
|
+
cached-feature route makes iteration fast without needing to re-run model inference.
|
|
184
|
+
|
|
185
|
+
'
|
|
186
|
+
not_recommended_when: 'Not suitable if you need a tiny or quick-to-iterate toy benchmark,
|
|
187
|
+
cannot allocate 32+ GB RAM for the experiment matrices, or want to evaluate non-classification
|
|
188
|
+
tasks. Also not ideal if you need end-to-end training of the base classifier — this
|
|
189
|
+
benchmark focuses on the filter/monitoring layer, not the upstream model training.
|
|
190
|
+
|
|
191
|
+
'
|
|
192
|
+
paper:
|
|
193
|
+
title: 'Suitability Filter: A Statistical Framework for Classifier Evaluation in
|
|
194
|
+
Real-World Deployment Settings'
|
|
195
|
+
authors:
|
|
196
|
+
- Angéline Pouget
|
|
197
|
+
- Mohammad Yaghini
|
|
198
|
+
- Stephan Rabanser
|
|
199
|
+
- Nicolas Papernot
|
|
200
|
+
venue: ICML 2025
|
|
201
|
+
year: 2025
|
|
202
|
+
url: https://arxiv.org/abs/2505.22356
|
|
203
|
+
volume: 267
|
|
204
|
+
publisher: PMLR
|
|
205
|
+
download:
|
|
206
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.010_suitabilityfilter.zip
|
|
207
|
+
archive_type: zip
|
|
208
|
+
local_dir_name: paper-10-SuitabilityFilter
|
|
209
|
+
provider: github_release
|
|
210
|
+
repo: ResearAI/DeepScientist
|
|
211
|
+
tag: aisb-v0.0.1
|
|
212
|
+
asset_name: aisb.t3.010_suitabilityfilter.zip
|
|
213
|
+
sha256: 146f3477fb91d43974f51dac9013768be094e9ad355d2196c820915f062645f1
|
|
214
|
+
size_bytes: 54606221
|
|
215
|
+
commercial:
|
|
216
|
+
annual_fee: null
|
|
217
|
+
display:
|
|
218
|
+
palette_seed: sand-graphite-verify
|
|
219
|
+
art_style: clean-statistical
|
|
220
|
+
accent_priority: high
|
|
221
|
+
image_path: ../image/010_aisb.t3.010_suitabilityfilter.jpg
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.010_suitabilityfilter
|
|
3
|
+
name: "适用性过滤器:真实世界部署场景下分类器评估的统计框架"
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: "优化一个部署时的统计过滤器,使用非劣性假设检验对模型衍生的适用性信号进行测试,以判断分类器在无标签、分布偏移的用户数据上的准确率是否已退化到不可接受的阈值(基于WILDS FMoW/RxRx1/CivilComments评估)。"
|
|
6
|
+
task_description: |
|
|
7
|
+
本基准测试打包了ICML 2025的适用性过滤器框架。核心任务是改进一个统计决策系统,在无法获得目标数据真实标签的情况下,判断预训练分类器在协变量偏移下是否仍然安全可部署。
|
|
8
|
+
|
|
9
|
+
过滤器的工作流程为:(1) 从带标签的测试数据和无标签的用户数据中提取适用性信号(最大softmax值、预测熵、logit统计量、损失/能量特征——共12个信号);(2) 在带标签的保留集上训练一个轻量级的预测正确性估计器(默认使用逻辑回归,通过CalibratedClassifierCV进行校准),将信号映射到每个样本的正确概率p_c;(3) 通过单侧非劣性检验(Welch''s t检验或z检验,可配置边际m和显著性水平alpha)比较测试数据和用户数据之间的p_c分布;(4) 输出SUITABLE(拒绝H0)或INCONCLUSIVE。
|
|
10
|
+
|
|
11
|
+
打包的评估流程跨多个随机种子(0、1、2)、折数配置、特征子集(置信度/logit/损失-能量)和分类器超参数运行缓存的FMoW-WILDS特征。主要指标包括:OOD适用性得分(过滤器p值相对于真实适用性标签的ROC-AUC,跨约29k个实验)、原始p值和二元适用性决策。优化目标包括正确性估计器的正则化(C扫描)、信号子集选择、折数、校准策略,以及用于组合特征子集p值的Stouffer方法。
|
|
12
|
+
|
|
13
|
+
预提取的特征缓存和分割索引分别打包在results/features/和results/split_indices/下。仅在从头重新生成特征时才需要WILDS数据集本身(FMoW、RxRx1、CivilComments)和预训练模型权重;默认执行流程使用缓存特征,无需下载WILDS数据。
|
|
14
|
+
capability_tags:
|
|
15
|
+
- 研究代码优化
|
|
16
|
+
- 分类器评估
|
|
17
|
+
- 分布偏移
|
|
18
|
+
- 统计检验
|
|
19
|
+
- 可靠性
|
|
20
|
+
- 假设检验
|
|
21
|
+
- 部署监控
|
|
22
|
+
aisb_direction: T3
|
|
23
|
+
track_fit:
|
|
24
|
+
- paper_track
|
|
25
|
+
- benchmark_track
|
|
26
|
+
task_mode: experiment_driven
|
|
27
|
+
requires_execution: true
|
|
28
|
+
requires_paper: true
|
|
29
|
+
integrity_level: cas_plus_canary
|
|
30
|
+
snapshot_status: runnable
|
|
31
|
+
support_level: turnkey
|
|
32
|
+
cost_band: medium
|
|
33
|
+
time_band: 6-24h
|
|
34
|
+
difficulty: hard
|
|
35
|
+
data_access: public
|
|
36
|
+
primary_outputs:
|
|
37
|
+
- ood_suitability_score
|
|
38
|
+
- p_value
|
|
39
|
+
- suitability_decision
|
|
40
|
+
launch_profiles:
|
|
41
|
+
- id: quick_check
|
|
42
|
+
label: 快速检查
|
|
43
|
+
description: "在缓存的FMoW特征上运行打包的适用性过滤器逻辑,使用单个种子验证统计检验输出(p值、适用性决策)和基本ROC-AUC。"
|
|
44
|
+
- id: shift_eval
|
|
45
|
+
label: 完整偏移评估
|
|
46
|
+
description: "跨所有种子(0、1、2)、特征子集、折数配置和分类器设置运行完整的FMoW-WILDS部署偏移评估。使用run_fmow.py,报告约29k个实验配置下的OOD适用性ROC-AUC。"
|
|
47
|
+
- id: lr_tune
|
|
48
|
+
label: LR超参数扫描
|
|
49
|
+
description: "运行run_fmow_lr_tune.py,在OOD数据上扫描逻辑回归正则化C值(0.01、0.1、1.0、5.0、10.0),种子=0,以找到最优的正确性估计器配置。"
|
|
50
|
+
- id: seed_search
|
|
51
|
+
label: 种子搜索
|
|
52
|
+
description: "运行run_fmow_seed_search.py变体,探索适用性过滤器决策的可重复性和种子敏感性。"
|
|
53
|
+
dataset_download:
|
|
54
|
+
primary_method: bundled_cache
|
|
55
|
+
sources:
|
|
56
|
+
- kind: bundled
|
|
57
|
+
url: null
|
|
58
|
+
access: public
|
|
59
|
+
note: "预提取的特征缓存(results/features/*.pkl)和分割索引(results/split_indices/*.pkl)已包含在快照中。默认执行流程无需下载。"
|
|
60
|
+
- kind: pip_package
|
|
61
|
+
url: https://pypi.org/project/wilds/
|
|
62
|
+
access: public
|
|
63
|
+
note: "WILDS 2.0.0包用于从原始数据重新提取特征。FMoW-WILDS、RxRx1-WILDS和CivilComments-WILDS数据集通过wilds库首次使用时下载。仅FMoW约50GB;三个数据集合计可超过100GB。"
|
|
64
|
+
notes:
|
|
65
|
+
- 默认流程使用打包的缓存特征;仅在需要重新生成特征时才需要下载WILDS。
|
|
66
|
+
- 预训练模型权重(ERM)用于特征重新生成,缓存流程无需此权重。
|
|
67
|
+
credential_requirements:
|
|
68
|
+
mode: none
|
|
69
|
+
items: []
|
|
70
|
+
notes:
|
|
71
|
+
- 任何执行流程都无需API密钥或凭据。
|
|
72
|
+
resources:
|
|
73
|
+
minimum:
|
|
74
|
+
cpu_cores: 8
|
|
75
|
+
ram_gb: 32
|
|
76
|
+
disk_gb: 100
|
|
77
|
+
gpu_count: 1
|
|
78
|
+
gpu_vram_gb: 16
|
|
79
|
+
recommended:
|
|
80
|
+
cpu_cores: 16
|
|
81
|
+
ram_gb: 64
|
|
82
|
+
disk_gb: 200
|
|
83
|
+
gpu_count: 1
|
|
84
|
+
gpu_vram_gb: 24
|
|
85
|
+
environment:
|
|
86
|
+
python: 3.11.9
|
|
87
|
+
cuda: "11.7"
|
|
88
|
+
pytorch: 2.0.0
|
|
89
|
+
flash_attn: null
|
|
90
|
+
key_packages:
|
|
91
|
+
- transformers==4.41.2
|
|
92
|
+
- torch-geometric==2.5.3
|
|
93
|
+
- wilds==2.0.0
|
|
94
|
+
- scikit-learn
|
|
95
|
+
- scipy
|
|
96
|
+
- statsmodels
|
|
97
|
+
- pandas
|
|
98
|
+
- numpy
|
|
99
|
+
notes:
|
|
100
|
+
- 完整的conda环境规范请参见打包的requirements.txt。
|
|
101
|
+
- GPU用于特征提取(torch.device("cuda" if available));缓存特征流程仅在CPU上运行统计检验(scikit-learn、scipy)。
|
|
102
|
+
- run_fmow.py脚本设置sys.path为'/repo'——如在预期容器外运行需调整路径。
|
|
103
|
+
risk_flags:
|
|
104
|
+
- large_experiment_matrix
|
|
105
|
+
- external_dataset_for_regeneration
|
|
106
|
+
risk_notes:
|
|
107
|
+
- "完整评估流程跨种子、折数、特征子集和边际运行约29k个实验配置。CPU上运行时间可达数小时。"
|
|
108
|
+
- "从原始WILDS数据重新生成特征需要下载FMoW-WILDS(约50GB)和预训练ERM模型权重,但默认缓存流程无需此操作。"
|
|
109
|
+
- "运行脚本硬编码sys.path为'/repo'和os.chdir('/repo');在预期容器环境外执行可能需要调整路径。"
|
|
110
|
+
- "打包过程中未执行运行时测试;指标值应通过运行基准测试进行验证。"
|
|
111
|
+
recommended_when: "当您需要一个结合统计假设检验(非劣性检验)与真实WILDS部署偏移的模型监控优化任务时使用此基准测试。非常适合研究部署时安全性、分布偏移检测、无监督准确率估计和选择性预测。缓存特征流程使迭代快速,无需重新运行模型推理。"
|
|
112
|
+
not_recommended_when: "如果需要一个小型的或快速迭代的玩具基准测试、无法分配32GB以上内存用于实验矩阵,或需要评估非分类任务,则不适合使用。此外,如果您需要端到端训练基础分类器——本基准测试专注于过滤器/监控层,而非上游模型训练——也不太理想。"
|
|
113
|
+
paper:
|
|
114
|
+
title: "Suitability Filter: A Statistical Framework for Classifier Evaluation in Real-World Deployment Settings"
|
|
115
|
+
authors:
|
|
116
|
+
- Angéline Pouget
|
|
117
|
+
- Mohammad Yaghini
|
|
118
|
+
- Stephan Rabanser
|
|
119
|
+
- Nicolas Papernot
|
|
120
|
+
venue: ICML 2025
|
|
121
|
+
year: 2025
|
|
122
|
+
url: https://arxiv.org/abs/2505.22356
|
|
123
|
+
volume: 267
|
|
124
|
+
publisher: PMLR
|
|
125
|
+
download:
|
|
126
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.010_suitabilityfilter.zip
|
|
127
|
+
archive_type: zip
|
|
128
|
+
local_dir_name: paper-10-SuitabilityFilter
|
|
129
|
+
provider: github_release
|
|
130
|
+
repo: ResearAI/DeepScientist
|
|
131
|
+
tag: aisb-v0.0.1
|
|
132
|
+
asset_name: aisb.t3.010_suitabilityfilter.zip
|
|
133
|
+
sha256: 146f3477fb91d43974f51dac9013768be094e9ad355d2196c820915f062645f1
|
|
134
|
+
size_bytes: 54606221
|
|
135
|
+
commercial:
|
|
136
|
+
annual_fee: null
|
|
137
|
+
display:
|
|
138
|
+
palette_seed: sand-graphite-verify
|
|
139
|
+
art_style: clean-statistical
|
|
140
|
+
accent_priority: high
|
|
141
|
+
image_path: ../image/010_aisb.t3.010_suitabilityfilter.jpg
|