@researai/deepscientist 1.5.17 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +309 -130
- package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
- package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
- package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
- package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
- package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
- package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
- package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
- package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
- package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
- package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
- package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
- package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
- package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
- package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
- package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
- package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
- package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
- package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
- package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
- package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
- package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
- package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
- package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
- package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
- package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
- package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
- package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
- package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
- package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
- package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
- package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
- package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
- package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
- package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
- package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
- package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
- package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
- package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
- package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
- package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
- package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
- package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
- package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
- package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
- package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
- package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
- package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
- package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
- package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
- package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
- package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
- package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
- package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
- package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
- package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
- package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
- package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
- package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
- package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
- package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
- package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
- package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
- package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
- package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
- package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
- package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
- package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
- package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
- package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
- package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
- package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
- package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
- package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
- package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
- package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
- package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
- package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
- package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
- package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
- package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
- package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
- package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
- package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
- package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
- package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
- package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
- package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
- package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
- package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
- package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
- package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
- package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
- package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
- package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
- package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
- package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
- package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
- package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
- package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
- package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
- package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
- package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
- package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
- package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
- package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
- package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
- package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
- package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
- package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
- package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
- package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
- package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
- package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
- package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
- package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
- package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
- package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
- package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
- package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
- package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
- package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
- package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
- package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
- package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
- package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
- package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
- package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
- package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
- package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
- package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
- package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
- package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
- package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
- package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
- package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
- package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
- package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
- package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
- package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
- package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
- package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
- package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
- package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
- package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
- package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
- package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
- package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
- package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
- package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
- package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
- package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
- package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
- package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
- package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
- package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
- package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
- package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
- package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
- package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
- package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
- package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
- package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
- package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
- package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
- package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
- package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
- package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
- package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
- package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
- package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
- package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
- package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
- package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
- package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
- package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
- package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
- package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
- package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
- package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
- package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
- package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
- package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
- package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
- package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
- package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
- package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
- package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
- package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
- package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
- package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
- package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
- package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
- package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
- package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
- package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
- package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
- package/AISB/image/aisb.b10.climate_earth.svg +16 -0
- package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
- package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
- package/AISB/image/aisb.b2.agent_systems.svg +16 -0
- package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
- package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
- package/AISB/image/aisb.b5.math_proof.svg +16 -0
- package/AISB/image/aisb.b6.research_process.svg +16 -0
- package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
- package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
- package/AISB/image/aisb.b9.material_science.svg +16 -0
- package/README.md +132 -11
- package/bin/ds.js +376 -49
- package/docs/en/00_QUICK_START.md +135 -18
- package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
- package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
- package/docs/en/05_TUI_GUIDE.md +171 -2
- package/docs/en/07_MEMORY_AND_MCP.md +38 -2
- package/docs/en/09_DOCTOR.md +64 -4
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
- package/docs/en/11_LICENSE_AND_RISK.md +4 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +622 -187
- package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +105 -2
- package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
- package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
- package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
- package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
- package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
- package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
- package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
- package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
- package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
- package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
- package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
- package/docs/en/91_DEVELOPMENT.md +29 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
- package/docs/en/README.md +44 -7
- package/docs/images/admin/admin-connectors-health-en.png +0 -0
- package/docs/images/admin/admin-controllers-en.png +0 -0
- package/docs/images/admin/admin-diagnostics-en.png +0 -0
- package/docs/images/admin/admin-errors-en.png +0 -0
- package/docs/images/admin/admin-issues-en.png +0 -0
- package/docs/images/admin/admin-logs-en.png +0 -0
- package/docs/images/admin/admin-quest-detail-en.png +0 -0
- package/docs/images/admin/admin-quests-en.png +0 -0
- package/docs/images/admin/admin-repairs-en.png +0 -0
- package/docs/images/admin/admin-runtime-en.png +0 -0
- package/docs/images/admin/admin-search-en.png +0 -0
- package/docs/images/admin/admin-stats-en.png +0 -0
- package/docs/images/admin/admin-summary-en.png +0 -0
- package/docs/images/connectors/connector-discord-en.png +0 -0
- package/docs/images/connectors/connector-feishu-en.png +0 -0
- package/docs/images/connectors/connector-lingzhu-en.png +0 -0
- package/docs/images/connectors/connector-qq-en.png +0 -0
- package/docs/images/connectors/connector-slack-en.png +0 -0
- package/docs/images/connectors/connector-telegram-en.png +0 -0
- package/docs/images/connectors/connector-weixin-en.png +0 -0
- package/docs/images/connectors/connector-whatsapp-en.png +0 -0
- package/docs/images/settings/settings-baselines-en.png +0 -0
- package/docs/images/settings/settings-config-en.png +0 -0
- package/docs/images/settings/settings-connectors-overview-en.png +0 -0
- package/docs/images/settings/settings-deepxiv-en.png +0 -0
- package/docs/images/settings/settings-mcp-servers-en.png +0 -0
- package/docs/images/settings/settings-plugins-en.png +0 -0
- package/docs/images/settings/settings-runners-en.png +0 -0
- package/docs/zh/00_QUICK_START.md +92 -17
- package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
- package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/zh/05_TUI_GUIDE.md +171 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
- package/docs/zh/09_DOCTOR.md +39 -4
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
- package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +550 -188
- package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +105 -2
- package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
- package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
- package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
- package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
- package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
- package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
- package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
- package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
- package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
- package/docs/zh/README.md +29 -7
- package/install.sh +122 -16
- package/package.json +4 -1
- package/pyproject.toml +2 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +13 -0
- package/src/deepscientist/admin/__init__.py +3 -0
- package/src/deepscientist/admin/charts.py +681 -0
- package/src/deepscientist/admin/logs.py +119 -0
- package/src/deepscientist/admin/repairs.py +217 -0
- package/src/deepscientist/admin/service.py +1310 -0
- package/src/deepscientist/admin/system_info.py +700 -0
- package/src/deepscientist/admin/tasks.py +465 -0
- package/src/deepscientist/admin/tool_metrics.py +600 -0
- package/src/deepscientist/artifact/guidance.py +8 -4
- package/src/deepscientist/artifact/schemas.py +115 -0
- package/src/deepscientist/artifact/service.py +4268 -260
- package/src/deepscientist/bash_exec/monitor.py +30 -3
- package/src/deepscientist/bash_exec/service.py +134 -1
- package/src/deepscientist/benchstore/__init__.py +4 -0
- package/src/deepscientist/benchstore/prompt_builder.py +224 -0
- package/src/deepscientist/benchstore/service.py +1716 -0
- package/src/deepscientist/channels/weixin_ilink.py +8 -1
- package/src/deepscientist/cli.py +92 -17
- package/src/deepscientist/codex_cli_compat.py +2 -2
- package/src/deepscientist/config/models.py +82 -11
- package/src/deepscientist/config/service.py +927 -91
- package/src/deepscientist/connector/weixin_support.py +48 -17
- package/src/deepscientist/daemon/api/handlers.py +697 -210
- package/src/deepscientist/daemon/api/router.py +76 -1
- package/src/deepscientist/daemon/app.py +1054 -51
- package/src/deepscientist/diagnostics/runner_failures.py +147 -0
- package/src/deepscientist/doctor.py +212 -65
- package/src/deepscientist/evidence_packets.py +590 -0
- package/src/deepscientist/home.py +52 -4
- package/src/deepscientist/kimi_cli_compat.py +50 -0
- package/src/deepscientist/latex_runtime.py +2 -2
- package/src/deepscientist/mcp/context.py +2 -0
- package/src/deepscientist/mcp/schemas.py +114 -0
- package/src/deepscientist/mcp/server.py +1566 -126
- package/src/deepscientist/memory/service.py +203 -16
- package/src/deepscientist/process_control.py +8 -1
- package/src/deepscientist/prompts/builder.py +836 -92
- package/src/deepscientist/quest/__init__.py +2 -2
- package/src/deepscientist/quest/layout.py +12 -1
- package/src/deepscientist/quest/node_traces.py +10 -0
- package/src/deepscientist/quest/service.py +1430 -139
- package/src/deepscientist/quest/stage_views.py +1 -1
- package/src/deepscientist/runners/__init__.py +18 -0
- package/src/deepscientist/runners/base.py +89 -1
- package/src/deepscientist/runners/builtins.py +13 -1
- package/src/deepscientist/runners/claude.py +391 -0
- package/src/deepscientist/runners/codex.py +421 -21
- package/src/deepscientist/runners/codex_telemetry.py +127 -0
- package/src/deepscientist/runners/kimi.py +334 -0
- package/src/deepscientist/runners/metadata.py +68 -0
- package/src/deepscientist/runners/opencode.py +414 -0
- package/src/deepscientist/runners/runtime_overrides.py +100 -0
- package/src/deepscientist/runners/simple_cli.py +538 -0
- package/src/deepscientist/runtime_storage.py +303 -0
- package/src/deepscientist/shared.py +61 -16
- package/src/deepscientist/skills/installer.py +37 -0
- package/src/deepscientist/skills/registry.py +2 -0
- package/src/deepscientist/tinytex.py +2 -2
- package/src/deepscientist/tui.py +10 -3
- package/src/prompts/benchstore/system.md +77 -0
- package/src/prompts/connectors/qq.md +33 -2
- package/src/prompts/connectors/weixin.md +208 -23
- package/src/prompts/contracts/admin_ops.md +74 -0
- package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
- package/src/prompts/contracts/shared_interaction.md +5 -11
- package/src/prompts/start_setup/system.md +422 -0
- package/src/prompts/system.md +409 -315
- package/src/prompts/system_copilot.md +88 -12
- package/src/skills/analysis-campaign/SKILL.md +239 -578
- package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
- package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
- package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
- package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
- package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
- package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
- package/src/skills/baseline/SKILL.md +183 -461
- package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
- package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
- package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
- package/src/skills/baseline/references/baseline-plan-template.md +37 -76
- package/src/skills/baseline/references/boundary-cases.md +86 -0
- package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
- package/src/skills/baseline/references/comparability-contract.md +7 -12
- package/src/skills/baseline/references/operational-guidance.md +56 -0
- package/src/skills/baseline/references/route-selection.md +5 -25
- package/src/skills/decision/SKILL.md +113 -306
- package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
- package/src/skills/decision/references/operational-guidance.md +94 -0
- package/src/skills/decision/references/research-route-criteria.md +7 -8
- package/src/skills/decision/references/strategic-decision-template.md +13 -26
- package/src/skills/experiment/SKILL.md +132 -670
- package/src/skills/experiment/references/execution-playbook.md +374 -0
- package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
- package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
- package/src/skills/experiment/references/operational-guidance.md +108 -0
- package/src/skills/finalize/SKILL.md +62 -0
- package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
- package/src/skills/finalize/references/resume-packet-template.md +7 -0
- package/src/skills/idea/SKILL.md +228 -15
- package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
- package/src/skills/idea/references/current-board-packet-template.md +61 -0
- package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
- package/src/skills/idea/references/idea-generation-playbook.md +21 -0
- package/src/skills/idea/references/idea-thinking-flow.md +6 -0
- package/src/skills/idea/references/literature-survey-template.md +3 -0
- package/src/skills/idea/references/objective-contract-template.md +54 -0
- package/src/skills/idea/references/outline-seeding-example.md +56 -0
- package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
- package/src/skills/idea/references/related-work-playbook.md +75 -2
- package/src/skills/idea/references/research-history-playbook.md +114 -0
- package/src/skills/idea/references/selection-gate.md +58 -6
- package/src/skills/intake-audit/SKILL.md +43 -2
- package/src/skills/intake-audit/references/state-audit-template.md +10 -0
- package/src/skills/nature-data/SKILL.md +128 -0
- package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-data/agents/openai.yaml +4 -0
- package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
- package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
- package/src/skills/nature-data/references/policy-principles.md +103 -0
- package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
- package/src/skills/nature-data/references/source-basis.md +54 -0
- package/src/skills/nature-data/references/statement-patterns.md +153 -0
- package/src/skills/nature-figure/SKILL.md +197 -0
- package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-figure/agents/openai.yaml +4 -0
- package/src/skills/nature-figure/evals/evals.json +37 -0
- package/src/skills/nature-figure/references/api.md +428 -0
- package/src/skills/nature-figure/references/backend-selection.md +100 -0
- package/src/skills/nature-figure/references/chart-types.md +281 -0
- package/src/skills/nature-figure/references/common-patterns.md +349 -0
- package/src/skills/nature-figure/references/design-theory.md +436 -0
- package/src/skills/nature-figure/references/figure-contract.md +93 -0
- package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
- package/src/skills/nature-figure/references/qa-contract.md +119 -0
- package/src/skills/nature-figure/references/r-template-index.md +66 -0
- package/src/skills/nature-figure/references/r-workflow.md +161 -0
- package/src/skills/nature-figure/references/tutorials.md +250 -0
- package/src/skills/nature-paper2ppt/SKILL.md +507 -0
- package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/SKILL.md +385 -0
- package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-polishing/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
- package/src/skills/nature-polishing/references/section-moves.md +240 -0
- package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
- package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
- package/src/skills/optimize/SKILL.md +177 -1568
- package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
- package/src/skills/optimize/references/candidate-board-template.md +13 -0
- package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
- package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
- package/src/skills/optimize/references/debug-response-template.md +29 -0
- package/src/skills/optimize/references/frontier-review-template.md +32 -0
- package/src/skills/optimize/references/fusion-playbook.md +36 -0
- package/src/skills/optimize/references/method-brief-template.md +73 -0
- package/src/skills/optimize/references/operational-guidance.md +621 -0
- package/src/skills/optimize/references/optimization-memory-template.md +30 -0
- package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
- package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
- package/src/skills/optimize/references/prompt-patterns.md +49 -0
- package/src/skills/paper-outline/SKILL.md +227 -0
- package/src/skills/paper-outline/references/outline-patterns.md +87 -0
- package/src/skills/paper-plot/SKILL.md +79 -0
- package/src/skills/paper-plot/agents/openai.yaml +4 -0
- package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
- package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
- package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
- package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
- package/src/skills/paper-plot/references/line_training_curve.md +44 -0
- package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
- package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
- package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
- package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
- package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
- package/src/skills/paper-plot/scripts/line_aime.py +94 -0
- package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
- package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
- package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
- package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
- package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
- package/src/skills/rebuttal/SKILL.md +9 -0
- package/src/skills/references/tool-usage-by-stage.md +438 -0
- package/src/skills/review/SKILL.md +105 -7
- package/src/skills/science/PROVENANCE.md +44 -0
- package/src/skills/science/SKILL.md +137 -0
- package/src/skills/science/references/artifact-science-tool.md +110 -0
- package/src/skills/science/references/claim-type-discipline.md +56 -0
- package/src/skills/science/references/domain-index.md +422 -0
- package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
- package/src/skills/science/references/package-check-playbook.md +64 -0
- package/src/skills/science/references/package-index.min.json +3616 -0
- package/src/skills/science/references/packages/abinit.md +80 -0
- package/src/skills/science/references/packages/acts.md +73 -0
- package/src/skills/science/references/packages/aiida-core.md +80 -0
- package/src/skills/science/references/packages/alamode.md +80 -0
- package/src/skills/science/references/packages/amuse.md +88 -0
- package/src/skills/science/references/packages/anndata.md +88 -0
- package/src/skills/science/references/packages/arbor.md +80 -0
- package/src/skills/science/references/packages/arc.md +73 -0
- package/src/skills/science/references/packages/astropy.md +88 -0
- package/src/skills/science/references/packages/astroquery.md +88 -0
- package/src/skills/science/references/packages/atomate2.md +80 -0
- package/src/skills/science/references/packages/atomsmltr.md +73 -0
- package/src/skills/science/references/packages/awkward.md +73 -0
- package/src/skills/science/references/packages/batman.md +88 -0
- package/src/skills/science/references/packages/biopython.md +88 -0
- package/src/skills/science/references/packages/bloqade.md +73 -0
- package/src/skills/science/references/packages/brian2.md +73 -0
- package/src/skills/science/references/packages/bullet3.md +73 -0
- package/src/skills/science/references/packages/calculix.md +80 -0
- package/src/skills/science/references/packages/cantera.md +73 -0
- package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
- package/src/skills/science/references/packages/ccdproc.md +88 -0
- package/src/skills/science/references/packages/celerite2.md +88 -0
- package/src/skills/science/references/packages/cellrank.md +73 -0
- package/src/skills/science/references/packages/cesm.md +80 -0
- package/src/skills/science/references/packages/chemicals.md +73 -0
- package/src/skills/science/references/packages/chempy.md +73 -0
- package/src/skills/science/references/packages/cirq.md +73 -0
- package/src/skills/science/references/packages/coffea.md +73 -0
- package/src/skills/science/references/packages/cp2k.md +88 -0
- package/src/skills/science/references/packages/custodian.md +80 -0
- package/src/skills/science/references/packages/dart.md +73 -0
- package/src/skills/science/references/packages/datamol.md +88 -0
- package/src/skills/science/references/packages/dd4hep.md +73 -0
- package/src/skills/science/references/packages/dealii.md +80 -0
- package/src/skills/science/references/packages/deepchem.md +88 -0
- package/src/skills/science/references/packages/delphes.md +73 -0
- package/src/skills/science/references/packages/devito.md +80 -0
- package/src/skills/science/references/packages/dftb.md +88 -0
- package/src/skills/science/references/packages/dftd4.md +88 -0
- package/src/skills/science/references/packages/dftk-jl.md +80 -0
- package/src/skills/science/references/packages/dolfinx.md +80 -0
- package/src/skills/science/references/packages/drake.md +73 -0
- package/src/skills/science/references/packages/dumux.md +73 -0
- package/src/skills/science/references/packages/elk.md +80 -0
- package/src/skills/science/references/packages/elmerfem.md +80 -0
- package/src/skills/science/references/packages/enzo-e.md +88 -0
- package/src/skills/science/references/packages/espresso.md +80 -0
- package/src/skills/science/references/packages/exoplanet.md +88 -0
- package/src/skills/science/references/packages/fairroot.md +73 -0
- package/src/skills/science/references/packages/fbpic.md +80 -0
- package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
- package/src/skills/science/references/packages/geant4.md +73 -0
- package/src/skills/science/references/packages/geosx.md +80 -0
- package/src/skills/science/references/packages/gprmax.md +80 -0
- package/src/skills/science/references/packages/gromacs.md +80 -0
- package/src/skills/science/references/packages/gwaslab.md +73 -0
- package/src/skills/science/references/packages/gz-sim.md +73 -0
- package/src/skills/science/references/packages/hail.md +88 -0
- package/src/skills/science/references/packages/hiphive.md +80 -0
- package/src/skills/science/references/packages/hoomd-blue.md +80 -0
- package/src/skills/science/references/packages/itensor.md +73 -0
- package/src/skills/science/references/packages/itensors-jl.md +73 -0
- package/src/skills/science/references/packages/jdftx.md +73 -0
- package/src/skills/science/references/packages/jobflow.md +80 -0
- package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
- package/src/skills/science/references/packages/kite.md +80 -0
- package/src/skills/science/references/packages/kratos.md +80 -0
- package/src/skills/science/references/packages/kwant.md +73 -0
- package/src/skills/science/references/packages/lammps.md +80 -0
- package/src/skills/science/references/packages/lightkurve.md +88 -0
- package/src/skills/science/references/packages/limix.md +73 -0
- package/src/skills/science/references/packages/maxwelllink.md +80 -0
- package/src/skills/science/references/packages/mcdc.md +73 -0
- package/src/skills/science/references/packages/meep.md +80 -0
- package/src/skills/science/references/packages/mfem.md +80 -0
- package/src/skills/science/references/packages/mitgcm.md +73 -0
- package/src/skills/science/references/packages/modflow6.md +73 -0
- package/src/skills/science/references/packages/molecool.md +73 -0
- package/src/skills/science/references/packages/mom6.md +73 -0
- package/src/skills/science/references/packages/moose.md +80 -0
- package/src/skills/science/references/packages/mpas-model.md +73 -0
- package/src/skills/science/references/packages/mujoco.md +73 -0
- package/src/skills/science/references/packages/mumax3.md +73 -0
- package/src/skills/science/references/packages/nekrs.md +80 -0
- package/src/skills/science/references/packages/nessi.md +73 -0
- package/src/skills/science/references/packages/nest-simulator.md +73 -0
- package/src/skills/science/references/packages/netket.md +73 -0
- package/src/skills/science/references/packages/neuron.md +73 -0
- package/src/skills/science/references/packages/nextflow.md +88 -0
- package/src/skills/science/references/packages/nwchem.md +88 -0
- package/src/skills/science/references/packages/openbabel.md +88 -0
- package/src/skills/science/references/packages/openems.md +80 -0
- package/src/skills/science/references/packages/openff-toolkit.md +88 -0
- package/src/skills/science/references/packages/openfoam-dev.md +80 -0
- package/src/skills/science/references/packages/openmc.md +73 -0
- package/src/skills/science/references/packages/openmm.md +80 -0
- package/src/skills/science/references/packages/openmoc.md +73 -0
- package/src/skills/science/references/packages/openmx.md +80 -0
- package/src/skills/science/references/packages/opensees.md +80 -0
- package/src/skills/science/references/packages/opensn.md +80 -0
- package/src/skills/science/references/packages/opm-simulators.md +73 -0
- package/src/skills/science/references/packages/oqupy.md +73 -0
- package/src/skills/science/references/packages/packmol.md +80 -0
- package/src/skills/science/references/packages/palabos.md +80 -0
- package/src/skills/science/references/packages/parflow.md +80 -0
- package/src/skills/science/references/packages/pennylane.md +88 -0
- package/src/skills/science/references/packages/perceval.md +73 -0
- package/src/skills/science/references/packages/phono3py.md +73 -0
- package/src/skills/science/references/packages/phonopy.md +73 -0
- package/src/skills/science/references/packages/photutils.md +88 -0
- package/src/skills/science/references/packages/picongpu.md +80 -0
- package/src/skills/science/references/packages/plink-ng.md +88 -0
- package/src/skills/science/references/packages/precice.md +73 -0
- package/src/skills/science/references/packages/psc.md +80 -0
- package/src/skills/science/references/packages/psi4.md +88 -0
- package/src/skills/science/references/packages/pybinding.md +73 -0
- package/src/skills/science/references/packages/pyfr.md +80 -0
- package/src/skills/science/references/packages/pyhf.md +73 -0
- package/src/skills/science/references/packages/pyiron_base.md +80 -0
- package/src/skills/science/references/packages/pylcp.md +73 -0
- package/src/skills/science/references/packages/pylith.md +80 -0
- package/src/skills/science/references/packages/pynbody.md +88 -0
- package/src/skills/science/references/packages/pysam.md +88 -0
- package/src/skills/science/references/packages/pyscf.md +88 -0
- package/src/skills/science/references/packages/q-e.md +73 -0
- package/src/skills/science/references/packages/qibo.md +73 -0
- package/src/skills/science/references/packages/qiskit.md +73 -0
- package/src/skills/science/references/packages/quantica-jl.md +73 -0
- package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
- package/src/skills/science/references/packages/quimb.md +73 -0
- package/src/skills/science/references/packages/qulacs.md +73 -0
- package/src/skills/science/references/packages/qutip.md +73 -0
- package/src/skills/science/references/packages/rdkit.md +88 -0
- package/src/skills/science/references/packages/rmg-py.md +73 -0
- package/src/skills/science/references/packages/root.md +73 -0
- package/src/skills/science/references/packages/scanpy.md +88 -0
- package/src/skills/science/references/packages/scikit-allel.md +88 -0
- package/src/skills/science/references/packages/scikit-bio.md +88 -0
- package/src/skills/science/references/packages/scqubits.md +73 -0
- package/src/skills/science/references/packages/scuff-em.md +80 -0
- package/src/skills/science/references/packages/scvi-tools.md +73 -0
- package/src/skills/science/references/packages/seissol.md +73 -0
- package/src/skills/science/references/packages/sfepy.md +80 -0
- package/src/skills/science/references/packages/sisl.md +73 -0
- package/src/skills/science/references/packages/smilei.md +80 -0
- package/src/skills/science/references/packages/snakemake.md +88 -0
- package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
- package/src/skills/science/references/packages/specutils.md +88 -0
- package/src/skills/science/references/packages/spglib.md +80 -0
- package/src/skills/science/references/packages/squidpy.md +88 -0
- package/src/skills/science/references/packages/starry.md +88 -0
- package/src/skills/science/references/packages/strawberryfields.md +73 -0
- package/src/skills/science/references/packages/su2.md +80 -0
- package/src/skills/science/references/packages/sunny-jl.md +73 -0
- package/src/skills/science/references/packages/sw4.md +73 -0
- package/src/skills/science/references/packages/swift.md +88 -0
- package/src/skills/science/references/packages/tdnegf.md +73 -0
- package/src/skills/science/references/packages/tenpy.md +73 -0
- package/src/skills/science/references/packages/thermo.md +73 -0
- package/src/skills/science/references/packages/tkwant.md +73 -0
- package/src/skills/science/references/packages/tvb-root.md +73 -0
- package/src/skills/science/references/packages/uproot5.md +73 -0
- package/src/skills/science/references/packages/vampire.md +80 -0
- package/src/skills/science/references/packages/wannier_tools.md +73 -0
- package/src/skills/science/references/packages/warpx.md +80 -0
- package/src/skills/science/references/packages/wrf.md +73 -0
- package/src/skills/science/references/packages/xtb.md +88 -0
- package/src/skills/science/references/packages/yt.md +73 -0
- package/src/skills/science/references/science-task-brief-template.md +71 -0
- package/src/skills/scout/SKILL.md +83 -425
- package/src/skills/scout/references/literature-scout-template.md +5 -24
- package/src/skills/scout/references/operational-guidance.md +191 -0
- package/src/skills/scout/references/paper-triage-playbook.md +11 -35
- package/src/skills/write/SKILL.md +744 -1246
- package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
- package/src/skills/write/references/oral_package_patterns.md +252 -0
- package/src/skills/write/references/oral_writing_principles.md +291 -0
- package/src/skills/write/references/section_rewrite_checklist.md +234 -0
- package/src/tui/dist/app/AppContainer.js +1314 -27
- package/src/tui/dist/components/Composer.js +26 -1
- package/src/tui/dist/components/ConfigScreen.js +2 -1
- package/src/tui/dist/components/InputPrompt.js +25 -9
- package/src/tui/dist/components/MainContent.js +18 -3
- package/src/tui/dist/components/QuestScreen.js +3 -2
- package/src/tui/dist/components/UtilityScreen.js +37 -0
- package/src/tui/dist/hooks/useSafeInput.js +10 -0
- package/src/tui/dist/index.js +13 -1
- package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
- package/src/tui/dist/lib/api.js +89 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AnalysisPlugin-BCKAfjba.js → AnalysisPlugin-CA94NGmI.js} +1 -1
- package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
- package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
- package/src/ui/dist/assets/{CodeViewerPlugin-CbaFRrUU.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
- package/src/ui/dist/assets/{DocViewerPlugin-DAjLVeQD.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
- package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
- package/src/ui/dist/assets/{GitDiffViewerPlugin-CQACjoAA.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
- package/src/ui/dist/assets/{GitSnapshotViewer-0r4nLPke.js → GitSnapshotViewer-CweA6VON.js} +2 -2
- package/src/ui/dist/assets/{ImageViewerPlugin-nBOmI2v_.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
- package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
- package/src/ui/dist/assets/{LatexPlugin-ZwtV8pIp.js → LatexPlugin-BQjAaA5J.js} +4 -4
- package/src/ui/dist/assets/{MarkdownViewerPlugin-DKqVfKyW.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
- package/src/ui/dist/assets/{MarketplacePlugin-BwxStZ9D.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
- package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
- package/src/ui/dist/assets/{NotebookEditor-DB9N_T9q.js → NotebookEditor-WFyd8Ybt.js} +3 -3
- package/src/ui/dist/assets/{PdfLoader-eWBONbQP.js → PdfLoader-CLE5u5TS.js} +3 -3
- package/src/ui/dist/assets/{PdfMarkdownPlugin-D22YOZL3.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
- package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
- package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
- package/src/ui/dist/assets/{TextViewerPlugin-C5xqeeUH.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
- package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
- package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
- package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
- package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
- package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
- package/src/ui/dist/assets/{code-WlFHE7z_.js → code-DbsmSd3Y.js} +1 -1
- package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
- package/src/ui/dist/assets/{wrap-text-BC-Hltpd.js → file-jump-queue-DeQBikaw.js} +3 -3
- package/src/ui/dist/assets/{file-socket-CfQPKQKj.js → file-socket-DA5XIx88.js} +1 -1
- package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
- package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
- package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
- package/src/ui/dist/assets/{index-CwNu1aH4.js → index-BsO46tJA.js} +1 -1
- package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
- package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
- package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
- package/src/ui/dist/assets/{project-sync-C9IdzdZW.js → project-sync-DPmWKmKD.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-E_gaeAxL.js → zoom-out-DAukFWen.js} +3 -3
- package/src/ui/dist/index.html +3 -3
- package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
- package/src/skills/baseline/references/memory-playbook.md +0 -40
- package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
- package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
- package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
- package/src/skills/write/references/paper-section-playbook.md +0 -64
- package/src/skills/write/references/reviewer-first-writing.md +0 -64
- package/src/skills/write/references/revision-checklist.md +0 -70
- package/src/skills/write/references/section-contracts.md +0 -82
- package/src/skills/write/references/sentence-level-proofing.md +0 -49
- package/src/ui/dist/assets/AiManusChatView-Bv-Z8YpU.js +0 -204
- package/src/ui/dist/assets/CliPlugin-BCKcpc35.js +0 -109
- package/src/ui/dist/assets/CodeEditorPlugin-DbOfSJ8K.js +0 -2
- package/src/ui/dist/assets/GitCommitViewerPlugin-CIUqbUDO.js +0 -1
- package/src/ui/dist/assets/LabCopilotPanel-BHxOxF4z.js +0 -14
- package/src/ui/dist/assets/LabPlugin-BKoZGs95.js +0 -22
- package/src/ui/dist/assets/NotebookEditor-BEQhaQbt.js +0 -81
- package/src/ui/dist/assets/PdfViewerPlugin-c-RK9DLM.js +0 -17
- package/src/ui/dist/assets/SearchPlugin-CxF9ytAx.js +0 -16
- package/src/ui/dist/assets/VNCViewer-BoLGLnHz.js +0 -11
- package/src/ui/dist/assets/bot-DREQOxzP.js +0 -6
- package/src/ui/dist/assets/chevron-up-C9Qpx4DE.js +0 -6
- package/src/ui/dist/assets/file-content-BZMz3RYp.js +0 -1
- package/src/ui/dist/assets/file-diff-panel-CQhw0jS2.js +0 -1
- package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
- package/src/ui/dist/assets/git-commit-horizontal-DxZ8DCZh.js +0 -6
- package/src/ui/dist/assets/image-Bgl4VIyx.js +0 -6
- package/src/ui/dist/assets/index-BpV6lusQ.css +0 -33
- package/src/ui/dist/assets/index-CBNVuWcP.js +0 -2496
- package/src/ui/dist/assets/index-DrUnlf6K.js +0 -1
- package/src/ui/dist/assets/index-NW-h8VzN.js +0 -1
- package/src/ui/dist/assets/pdf-effect-queue-J8OnM0jE.js +0 -6
- package/src/ui/dist/assets/popover-CLc0pPP8.js +0 -1
- package/src/ui/dist/assets/select-Cs2PmzwL.js +0 -11
- package/src/ui/dist/assets/sigma-ClKcHAXm.js +0 -6
- package/src/ui/dist/assets/trash-DwpbFr3w.js +0 -11
- package/src/ui/dist/assets/useCliAccess-NQ8m0Let.js +0 -1
- package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.012_efficientqat
|
|
3
|
+
name: 'EfficientQAT:大型语言模型的高效量化感知训练'
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: '针对 Llama-2/3 和 Mistral 系列(7B–70B)的两阶段低位宽量化感知训练(Block-AP → E2E-QP),通过 WikiText-2 困惑度、C4 困惑度及五个推理基准零样本准确率进行评估。
|
|
6
|
+
|
|
7
|
+
'
|
|
8
|
+
task_description: '本基准实现了 EfficientQAT,这是一种针对大型语言模型在 2/3/4 位权重仅量化精度下的两阶段量化感知训练流程。阶段一(Block-AP)使用 4096 个 RedPajama 样本(上下文长度 2048)的重构损失,对所有参数——权重、缩放因子和零点——进行逐块训练。阶段二(E2E-QP)冻结量化后的整数权重,仅在目标数据集(RedPajama 或 Alpaca,上下文长度 4096)上端到端微调量化缩放因子。主要可执行入口为 main_block_ap.py 和 main_e2e_qp.py,并配有辅助的缩放因子校准脚本(calibrate_scales.py、calibrate_scales_v2.py)。评估指标包括 WikiText-2 困惑度、C4 困惑度,以及在 WinoGrande、PIQA、HellaSwag、ARC-Easy 和 ARC-Challenge 上使用 lm-eval v0.4.2 测量的平均零样本准确率。多种模型/位宽配置的预量化检查点可在 HuggingFace 上获取,并可通过捆绑的转换脚本转换为 GPTQ 或 BitBLAS 格式。论文报告称,2 位 Llama-2-70B 可在单张 A100-80GB 上于 41 小时内完成,且精度下降小于 3 个点。无需外部评估服务,所有指标均在本地计算。
|
|
9
|
+
|
|
10
|
+
'
|
|
11
|
+
capability_tags:
|
|
12
|
+
- research_code_optimization
|
|
13
|
+
- large_language_models
|
|
14
|
+
- quantization
|
|
15
|
+
- model_compression
|
|
16
|
+
- efficient_inference
|
|
17
|
+
- quantization_aware_training
|
|
18
|
+
aisb_direction: T3
|
|
19
|
+
track_fit:
|
|
20
|
+
- paper_track
|
|
21
|
+
- benchmark_track
|
|
22
|
+
task_mode: experiment_driven
|
|
23
|
+
requires_execution: true
|
|
24
|
+
requires_paper: true
|
|
25
|
+
integrity_level: cas_plus_canary
|
|
26
|
+
snapshot_status: runnable
|
|
27
|
+
support_level: advanced
|
|
28
|
+
time_band: 1d+
|
|
29
|
+
cost_band: high
|
|
30
|
+
difficulty: hard
|
|
31
|
+
data_access: public
|
|
32
|
+
primary_outputs:
|
|
33
|
+
- wikitext2_ppl
|
|
34
|
+
- c4_ppl
|
|
35
|
+
- quantized_checkpoint
|
|
36
|
+
- avg_accuracy
|
|
37
|
+
launch_profiles:
|
|
38
|
+
- id: calibration_only
|
|
39
|
+
label: 仅校准
|
|
40
|
+
description: '使用 WikiText-2 训练数据对预量化检查点运行缩放因子校准(calibrate_scales.py 或 calibrate_scales_v2.py)。生成更新后的缩放因子参数,无需完整的 Block-AP 或 E2E-QP 再训练。最快的路径;需要磁盘上有预量化模型。
|
|
41
|
+
|
|
42
|
+
'
|
|
43
|
+
- id: qat_eval
|
|
44
|
+
label: QAT + 评估
|
|
45
|
+
description: '运行完整的两阶段 EfficientQAT 流程:通过 main_block_ap.py 进行 Block-AP 逐块训练,然后通过 main_e2e_qp.py 进行 E2E-QP 端到端缩放因子训练,最后评估 WikiText-2/C4 困惑度及五个推理任务的零样本准确率。这是论文忠实路线,需要大量 GPU 时间(根据模型大小可能需要数小时到数天)。
|
|
46
|
+
|
|
47
|
+
'
|
|
48
|
+
- id: eval_pretrained
|
|
49
|
+
label: 评估预量化模型
|
|
50
|
+
description: '从 HuggingFace 下载预量化的 EfficientQAT 检查点,无需任何训练即可评估困惑度和零样本准确率。使用带 --resume_quant 参数的 main_block_ap.py。
|
|
51
|
+
|
|
52
|
+
'
|
|
53
|
+
dataset_download:
|
|
54
|
+
primary_method: mixed
|
|
55
|
+
sources:
|
|
56
|
+
- kind: huggingface
|
|
57
|
+
url: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T-Sample
|
|
58
|
+
access: public
|
|
59
|
+
note: 'RedPajama 校准数据,用于 Block-AP(4096 样本,序列长度 2048)和 E2E-QP(4096 样本,序列长度 4096)。由 datautils_block.py 自动下载。
|
|
60
|
+
|
|
61
|
+
'
|
|
62
|
+
- kind: huggingface
|
|
63
|
+
url: https://huggingface.co/datasets/wikitext
|
|
64
|
+
access: public
|
|
65
|
+
note: 'WikiText-2(wikitext-2-raw-v1),用于困惑度评估和缩放因子校准。在评估期间自动下载。
|
|
66
|
+
|
|
67
|
+
'
|
|
68
|
+
- kind: huggingface
|
|
69
|
+
url: https://huggingface.co/ChenMnZ
|
|
70
|
+
access: public
|
|
71
|
+
note: 'Llama-2(7B/13B/70B)、Llama-3(8B/70B)、Llama-3-Instruct 和 Mistral-Large 的预量化模型检查点,支持 EQAT/GPTQ/BitBLAS 格式。各检查点大小从约 2 GB 到约 39 GB 不等。
|
|
72
|
+
|
|
73
|
+
'
|
|
74
|
+
- kind: huggingface
|
|
75
|
+
url: https://huggingface.co/datasets/tatsu-lab/alpaca
|
|
76
|
+
access: public
|
|
77
|
+
note: 'Alpaca 指令微调数据集,用作指令微调场景的替代 E2E-QP 训练数据。
|
|
78
|
+
|
|
79
|
+
'
|
|
80
|
+
notes:
|
|
81
|
+
- 全精度基模型(如约 131 GB 的 Llama-2-70B)如需从头运行 Block-AP,必须从其原始 HuggingFace 仓库单独下载。
|
|
82
|
+
- 预量化检查点要小得多(例如 w2g64 Llama-2-70B 约 20 GB)。
|
|
83
|
+
- 总磁盘使用量很大程度上取决于目标模型系列和位宽。
|
|
84
|
+
credential_requirements:
|
|
85
|
+
mode: none
|
|
86
|
+
items:
|
|
87
|
+
- HuggingFace 账户可能需要用于门控模型(Llama-2、Llama-3),但访问 EfficientQAT 检查点本身无需账户。
|
|
88
|
+
notes:
|
|
89
|
+
- Meta Llama 模型权重需要在 HuggingFace 上接受许可协议后方可下载。
|
|
90
|
+
- ChenMnZ 命名空间下的预量化 EfficientQAT 检查点可公开访问。
|
|
91
|
+
resources:
|
|
92
|
+
minimum:
|
|
93
|
+
cpu_cores: 16
|
|
94
|
+
ram_gb: 64
|
|
95
|
+
disk_gb: 200
|
|
96
|
+
gpu_count: 1
|
|
97
|
+
gpu_vram_gb: 48
|
|
98
|
+
recommended:
|
|
99
|
+
cpu_cores: 32
|
|
100
|
+
ram_gb: 128
|
|
101
|
+
disk_gb: 500
|
|
102
|
+
gpu_count: 2
|
|
103
|
+
gpu_vram_gb: 80
|
|
104
|
+
environment:
|
|
105
|
+
python: '3.11'
|
|
106
|
+
cuda: null
|
|
107
|
+
pytorch: 2.2.2
|
|
108
|
+
flash_attn: null
|
|
109
|
+
key_packages:
|
|
110
|
+
- bitsandbytes==0.41.0
|
|
111
|
+
- transformers==4.40.1
|
|
112
|
+
- lm-eval==0.4.2
|
|
113
|
+
- accelerate
|
|
114
|
+
- datasets
|
|
115
|
+
notes:
|
|
116
|
+
- 完整的依赖项列表请参阅捆绑的 requirements.txt。
|
|
117
|
+
- CUDA 工具包版本在仓库中未固定;任何支持 PyTorch 2.2.2 的版本均可使用。
|
|
118
|
+
- GPTQModel(已测试 v0.9.8)仅在模型格式转换为 GPTQ/BitBLAS 时需要;核心 QAT 和评估不需要。
|
|
119
|
+
- bitsandbytes 用于 E2E-QP 训练中的 AdamW 优化器。
|
|
120
|
+
risk_flags:
|
|
121
|
+
- large_model_download
|
|
122
|
+
- high_gpu_memory
|
|
123
|
+
- long_training_time
|
|
124
|
+
- gated_model_access
|
|
125
|
+
risk_notes:
|
|
126
|
+
- 70B 模型路线需要单张 A100-80GB(论文报告 2 位 Llama-2-70B 的 Block-AP + E2E-QP 需 41 小时)或多人 GPU 配置。
|
|
127
|
+
- 7B 模型路线可在单张 48GB GPU 上完成,但完整的 Block-AP + E2E-QP 仍需数小时。
|
|
128
|
+
- Llama-2/3 的全精度基模型权重在 HuggingFace 上受门控限制,需要接受许可协议。
|
|
129
|
+
- Block-AP 中的 --off_load_to_disk 标志可以训练速度换取降低 CPU 内存使用量。
|
|
130
|
+
- 打包过程中未执行基准测试运行;仍需运行时验证。
|
|
131
|
+
- 论文报告 2 位 Llama-2-70B 的 E2E-QP 内存需求为 34.2 GB。
|
|
132
|
+
recommended_when: '当您需要一个以激进位宽(2-4 位)量化感知训练为中心的 LLM 系统任务,且具备自包含的训练和评估流程,覆盖从 7B 到 70B 的多种模型规模时,可使用此基准。适用于评估低位宽 QAT 的优化策略、与 PTQ 和 Q-PEFT 基线对比,或生成可部署的 GPTQ/BitBLAS 格式量化检查点。
|
|
133
|
+
|
|
134
|
+
'
|
|
135
|
+
not_recommended_when: '如果您无法访问至少一块 ≥48 GB VRAM 的 GPU、需要亚小时级基准测试周转时间,或关注的是小型模型(<7B 参数),则不应使用此基准。此外,如需权重-激活量化基准也不适用(此基准为权重仅量化)。
|
|
136
|
+
|
|
137
|
+
'
|
|
138
|
+
paper:
|
|
139
|
+
title: 'EfficientQAT: Efficient Quantization-Aware Training for Large Language Models'
|
|
140
|
+
venue: ACL 2025 Main
|
|
141
|
+
year: 2025
|
|
142
|
+
url: https://arxiv.org/abs/2407.11062
|
|
143
|
+
download:
|
|
144
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.012_efficientqat.zip
|
|
145
|
+
archive_type: zip
|
|
146
|
+
local_dir_name: paper-12-EfficientQAT
|
|
147
|
+
provider: github_release
|
|
148
|
+
repo: ResearAI/DeepScientist
|
|
149
|
+
tag: aisb-v0.0.1
|
|
150
|
+
asset_name: aisb.t3.012_efficientqat.zip
|
|
151
|
+
sha256: 8f53850f12f1bdbc4e3212b21fb51a7479ebb8ab715f3138d96ef1da979b2977
|
|
152
|
+
size_bytes: 107235
|
|
153
|
+
commercial:
|
|
154
|
+
annual_fee: null
|
|
155
|
+
display:
|
|
156
|
+
palette_seed: amber-steel-llm
|
|
157
|
+
art_style: hardware-editorial
|
|
158
|
+
accent_priority: high
|
|
159
|
+
image_path: ../image/012_aisb.t3.012_efficientqat.jpg
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.013_appl
|
|
3
|
+
name: 'APPL: A Prompt Programming Language for Harmonious Integration of Programs
|
|
4
|
+
and Large Language Model Prompts'
|
|
5
|
+
version: 0.1.0
|
|
6
|
+
one_line: Measure and optimize the AST compactness of APPL prompt-programming constructs
|
|
7
|
+
by running a code-backed evaluator on canonical code snippets from the paper.
|
|
8
|
+
task_description: 'This benchmark evaluates the structural compactness of APPL, a
|
|
9
|
+
Python-native prompt programming language that seamlessly integrates LLM calls into
|
|
10
|
+
conventional programs. The primary task is to optimize the APPL runtime and language
|
|
11
|
+
constructs so that canonical prompt-program patterns (e.g., Chain-of-Thought with
|
|
12
|
+
self-consistency from Figure 7a of the paper) yield smaller Python ASTs, measured
|
|
13
|
+
by counting AST nodes via the bundled eval_ast_size.py script. The evaluator parses
|
|
14
|
+
a fixed code snippet with Python''s ast module and reports the total node count.
|
|
15
|
+
No model training is involved; the benchmark is CPU-only and focuses on compiler-style
|
|
16
|
+
simplification and language design efficiency. An LLM API key (e.g., OpenAI) is
|
|
17
|
+
required only if you wish to exercise the APPL runtime end-to-end beyond the AST
|
|
18
|
+
metric; the core AST evaluation itself needs no external API.
|
|
19
|
+
|
|
20
|
+
'
|
|
21
|
+
capability_tags:
|
|
22
|
+
- research_code_optimization
|
|
23
|
+
- prompt_programming
|
|
24
|
+
- software_language_tools
|
|
25
|
+
- llm_tooling
|
|
26
|
+
- python
|
|
27
|
+
aisb_direction: T3
|
|
28
|
+
track_fit:
|
|
29
|
+
- paper_track
|
|
30
|
+
- benchmark_track
|
|
31
|
+
task_mode: evaluation_driven
|
|
32
|
+
requires_execution: true
|
|
33
|
+
requires_paper: true
|
|
34
|
+
integrity_level: cas_plus_canary
|
|
35
|
+
snapshot_status: runnable
|
|
36
|
+
support_level: turnkey
|
|
37
|
+
cost_band: low
|
|
38
|
+
time_band: 30-60m
|
|
39
|
+
difficulty: medium
|
|
40
|
+
data_access: public
|
|
41
|
+
primary_outputs:
|
|
42
|
+
- ast_size
|
|
43
|
+
- runtime_output
|
|
44
|
+
launch_profiles:
|
|
45
|
+
- id: quick_check
|
|
46
|
+
label: Quick Check
|
|
47
|
+
description: 'Run eval_ast_size.py to compute the AST node count of the canonical
|
|
48
|
+
CoT-SC snippet. No LLM API key needed. Completes in seconds.
|
|
49
|
+
|
|
50
|
+
'
|
|
51
|
+
- id: ast_eval
|
|
52
|
+
label: AST Eval
|
|
53
|
+
description: 'Run the full compactness-focused APPL evaluation workflow, including
|
|
54
|
+
any optimizations applied to the APPL language constructs, and report the resulting
|
|
55
|
+
ast_size metric.
|
|
56
|
+
|
|
57
|
+
'
|
|
58
|
+
- id: runtime_exercise
|
|
59
|
+
label: Runtime Exercise
|
|
60
|
+
description: 'Install applang, configure an LLM backend (e.g., OpenAI), and run
|
|
61
|
+
the bundled examples to verify end-to-end runtime behavior alongside the AST metric.
|
|
62
|
+
|
|
63
|
+
'
|
|
64
|
+
dataset_download:
|
|
65
|
+
primary_method: self_contained
|
|
66
|
+
sources: []
|
|
67
|
+
notes:
|
|
68
|
+
- No external dataset download is required. The evaluation code snippet is embedded
|
|
69
|
+
in eval_ast_size.py.
|
|
70
|
+
- The full APPL library source is included in the snapshot under src/appl/.
|
|
71
|
+
credential_requirements:
|
|
72
|
+
mode: optional
|
|
73
|
+
items:
|
|
74
|
+
- OpenAI API key (only for end-to-end runtime exercises, not for the core AST metric)
|
|
75
|
+
notes:
|
|
76
|
+
- The primary ast_size metric requires no credentials or network access.
|
|
77
|
+
- Set OPENAI_API_KEY in .env or as an environment variable if exercising runtime
|
|
78
|
+
examples.
|
|
79
|
+
- Other LLM backends supported via litellm may require their own API keys.
|
|
80
|
+
resources:
|
|
81
|
+
minimum:
|
|
82
|
+
cpu_cores: 4
|
|
83
|
+
ram_gb: 8
|
|
84
|
+
disk_gb: 10
|
|
85
|
+
gpu_count: 0
|
|
86
|
+
gpu_vram_gb: 0
|
|
87
|
+
recommended:
|
|
88
|
+
cpu_cores: 8
|
|
89
|
+
ram_gb: 16
|
|
90
|
+
disk_gb: 20
|
|
91
|
+
gpu_count: 0
|
|
92
|
+
gpu_vram_gb: 0
|
|
93
|
+
environment:
|
|
94
|
+
python: '3.9'
|
|
95
|
+
cuda: null
|
|
96
|
+
pytorch: null
|
|
97
|
+
flash_attn: null
|
|
98
|
+
key_packages:
|
|
99
|
+
- applang>=0.2.2
|
|
100
|
+
- litellm>=1.59.8
|
|
101
|
+
- openai>=1.13.3
|
|
102
|
+
- pydantic>=2.6.3
|
|
103
|
+
- libcst>=1.4.0
|
|
104
|
+
notes:
|
|
105
|
+
- CPU-only execution is sufficient for the core AST evaluation metric.
|
|
106
|
+
- Python 3.9+ required; tested through 3.13.
|
|
107
|
+
- Install via pip install -U applang or from the bundled pyproject.toml with pdm.
|
|
108
|
+
- See pyproject.toml for the full dependency set including optional extras (lunary,
|
|
109
|
+
instructor, langfuse).
|
|
110
|
+
risk_flags:
|
|
111
|
+
- optional_api_dependency
|
|
112
|
+
risk_notes:
|
|
113
|
+
- The core ast_size metric is fully self-contained and reproducible without any external
|
|
114
|
+
service.
|
|
115
|
+
- End-to-end runtime exercises require a working LLM API (OpenAI or other litellm-supported
|
|
116
|
+
backend), which incurs API costs and introduces non-determinism in generated text.
|
|
117
|
+
- No benchmark execution was performed during the packaging pass; metric values should
|
|
118
|
+
be verified by running eval_ast_size.py.
|
|
119
|
+
recommended_when: 'Use this benchmark when you want a software-oriented LLM task that
|
|
120
|
+
emphasizes runtime behavior, prompt-program structure, and code compactness rather
|
|
121
|
+
than GPU training. Suitable for evaluating compiler-style optimizations to prompt
|
|
122
|
+
programming language constructs.
|
|
123
|
+
|
|
124
|
+
'
|
|
125
|
+
not_recommended_when: 'Do not use this if you are looking for a heavy model-training
|
|
126
|
+
benchmark, a purely offline task with no LLM backend integration path, or a benchmark
|
|
127
|
+
that measures model accuracy on downstream NLP tasks.
|
|
128
|
+
|
|
129
|
+
'
|
|
130
|
+
paper:
|
|
131
|
+
title: 'APPL: A Prompt Programming Language for Harmonious Integration of Programs
|
|
132
|
+
and Large Language Model Prompts'
|
|
133
|
+
venue: arXiv preprint
|
|
134
|
+
year: 2024
|
|
135
|
+
url: https://arxiv.org/abs/2406.13161
|
|
136
|
+
download:
|
|
137
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.013_appl.zip
|
|
138
|
+
archive_type: zip
|
|
139
|
+
local_dir_name: paper-13-APPL
|
|
140
|
+
provider: github_release
|
|
141
|
+
repo: ResearAI/DeepScientist
|
|
142
|
+
tag: aisb-v0.0.1
|
|
143
|
+
asset_name: aisb.t3.013_appl.zip
|
|
144
|
+
sha256: 92f377259bd60724229ee6c61adda761177ee60e47813ebb71dc37f43c0e24f2
|
|
145
|
+
size_bytes: 2259394
|
|
146
|
+
commercial:
|
|
147
|
+
annual_fee: null
|
|
148
|
+
display:
|
|
149
|
+
palette_seed: apple-ink-notebook
|
|
150
|
+
art_style: language-design
|
|
151
|
+
accent_priority: medium
|
|
152
|
+
image_path: ../image/013_aisb.t3.013_appl.jpg
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.013_appl
|
|
3
|
+
name: 'APPL: A Prompt Programming Language for Harmonious Integration of Programs
|
|
4
|
+
and Large Language Model Prompts'
|
|
5
|
+
version: 0.1.0
|
|
6
|
+
one_line: 通过对论文中的规范代码片段运行代码支持评估器,测量并优化APPL提示程序结构的AST紧凑性。
|
|
7
|
+
task_description: '该基准测试用于评估APPL(一种原生Python提示编程语言,可将LLM调用无缝集成到传统程序中)的结构紧凑性。主要任务是优化APPL运行时和语言结构,使规范的提示程序模式(例如论文图7a中的带自洽的思维链)生成更小的Python AST,通过捆绑的eval_ast_size.py脚本统计AST节点数量来测量。评估器使用Python的ast模块解析固定代码片段并报告总节点数。不涉及模型训练;基准测试仅需CPU,专注于编译器风格的简化和语言设计效率。如果您希望在AST指标之外对APPL运行时进行端到端练习,则需要LLM API密钥(如OpenAI);核心AST评估本身无需外部API。
|
|
8
|
+
|
|
9
|
+
'
|
|
10
|
+
capability_tags:
|
|
11
|
+
- research_code_optimization
|
|
12
|
+
- prompt_programming
|
|
13
|
+
- software_language_tools
|
|
14
|
+
- llm_tooling
|
|
15
|
+
- python
|
|
16
|
+
aisb_direction: T3
|
|
17
|
+
track_fit:
|
|
18
|
+
- paper_track
|
|
19
|
+
- benchmark_track
|
|
20
|
+
task_mode: evaluation_driven
|
|
21
|
+
requires_execution: true
|
|
22
|
+
requires_paper: true
|
|
23
|
+
integrity_level: cas_plus_canary
|
|
24
|
+
snapshot_status: runnable
|
|
25
|
+
support_level: turnkey
|
|
26
|
+
cost_band: low
|
|
27
|
+
time_band: 30-60m
|
|
28
|
+
difficulty: medium
|
|
29
|
+
data_access: public
|
|
30
|
+
primary_outputs:
|
|
31
|
+
- ast_size
|
|
32
|
+
- runtime_output
|
|
33
|
+
launch_profiles:
|
|
34
|
+
- id: quick_check
|
|
35
|
+
label: 快速检查
|
|
36
|
+
description: '运行eval_ast_size.py计算规范CoT-SC代码片段的AST节点数。无需LLM API密钥。几秒内完成。
|
|
37
|
+
|
|
38
|
+
'
|
|
39
|
+
- id: ast_eval
|
|
40
|
+
label: AST评估
|
|
41
|
+
description: '运行完整的以紧凑性为重点的APPL评估工作流程,包括对APPL语言结构应用的任何优化,并报告最终的ast_size指标。
|
|
42
|
+
|
|
43
|
+
'
|
|
44
|
+
- id: runtime_exercise
|
|
45
|
+
label: 运行时练习
|
|
46
|
+
description: '安装applang,配置LLM后端(如OpenAI),并运行捆绑的示例以验证端到端运行时行为以及AST指标。
|
|
47
|
+
|
|
48
|
+
'
|
|
49
|
+
dataset_download:
|
|
50
|
+
primary_method: self_contained
|
|
51
|
+
sources: []
|
|
52
|
+
notes:
|
|
53
|
+
- 无需外部数据集下载。评估代码片段已嵌入eval_ast_size.py中。
|
|
54
|
+
- 完整的APPL库源代码包含在快照的src/appl/目录下。
|
|
55
|
+
credential_requirements:
|
|
56
|
+
mode: optional
|
|
57
|
+
items:
|
|
58
|
+
- OpenAI API密钥(仅用于端到端运行时练习,不用于核心AST指标)
|
|
59
|
+
notes:
|
|
60
|
+
- 主要的ast_size指标无需凭据或网络访问。
|
|
61
|
+
- 如果需要运行运行时示例,请在.env中设置OPENAI_API_KEY或作为环境变量。
|
|
62
|
+
- 通过litellm支持的其他LLM后端可能需要各自的API密钥。
|
|
63
|
+
resources:
|
|
64
|
+
minimum:
|
|
65
|
+
cpu_cores: 4
|
|
66
|
+
ram_gb: 8
|
|
67
|
+
disk_gb: 10
|
|
68
|
+
gpu_count: 0
|
|
69
|
+
gpu_vram_gb: 0
|
|
70
|
+
recommended:
|
|
71
|
+
cpu_cores: 8
|
|
72
|
+
ram_gb: 16
|
|
73
|
+
disk_gb: 20
|
|
74
|
+
gpu_count: 0
|
|
75
|
+
gpu_vram_gb: 0
|
|
76
|
+
environment:
|
|
77
|
+
python: '3.9'
|
|
78
|
+
cuda: null
|
|
79
|
+
pytorch: null
|
|
80
|
+
flash_attn: null
|
|
81
|
+
key_packages:
|
|
82
|
+
- applang>=0.2.2
|
|
83
|
+
- litellm>=1.59.8
|
|
84
|
+
- openai>=1.13.3
|
|
85
|
+
- pydantic>=2.6.3
|
|
86
|
+
- libcst>=1.4.0
|
|
87
|
+
notes:
|
|
88
|
+
- 纯CPU执行足以完成核心AST评估指标。
|
|
89
|
+
- 需要Python 3.9+;已测试至3.13版本。
|
|
90
|
+
- 可通过pip install -U applang安装,或使用pdm从捆绑的pyproject.toml安装。
|
|
91
|
+
- 完整的依赖项集合(包括可选的lunary、instructor、langfuse)请参见pyproject.toml。
|
|
92
|
+
risk_flags:
|
|
93
|
+
- optional_api_dependency
|
|
94
|
+
risk_notes:
|
|
95
|
+
- 核心ast_size指标完全自包含,无需任何外部服务即可复现。
|
|
96
|
+
- 端到端运行时练习需要可用的LLM API(OpenAI或其他litellm支持的后端),会产生API费用,并在生成的文本中引入非确定性。
|
|
97
|
+
- 打包过程中未执行基准测试;应通过运行eval_ast_size.py来验证指标值。
|
|
98
|
+
recommended_when: '当您需要一个强调运行时行为、提示程序结构和代码紧凑性而非GPU训练的面向软件的LLM任务时使用此基准测试。适用于评估对提示编程语言结构的编译器风格优化。
|
|
99
|
+
|
|
100
|
+
'
|
|
101
|
+
not_recommended_when: '如果您正在寻找重型模型训练基准测试、完全离线且无LLM后端集成路径的任务,或测量模型在下游NLP任务上准确性的基准测试,请勿使用此基准测试。
|
|
102
|
+
|
|
103
|
+
'
|
|
104
|
+
paper:
|
|
105
|
+
title: 'APPL: A Prompt Programming Language for Harmonious Integration of Programs
|
|
106
|
+
and Large Language Model Prompts'
|
|
107
|
+
venue: arXiv preprint
|
|
108
|
+
year: 2024
|
|
109
|
+
url: https://arxiv.org/abs/2406.13161
|
|
110
|
+
download:
|
|
111
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.013_appl.zip
|
|
112
|
+
archive_type: zip
|
|
113
|
+
local_dir_name: paper-13-APPL
|
|
114
|
+
provider: github_release
|
|
115
|
+
repo: ResearAI/DeepScientist
|
|
116
|
+
tag: aisb-v0.0.1
|
|
117
|
+
asset_name: aisb.t3.013_appl.zip
|
|
118
|
+
sha256: 92f377259bd60724229ee6c61adda761177ee60e47813ebb71dc37f43c0e24f2
|
|
119
|
+
size_bytes: 2259394
|
|
120
|
+
commercial:
|
|
121
|
+
annual_fee: null
|
|
122
|
+
display:
|
|
123
|
+
palette_seed: apple-ink-notebook
|
|
124
|
+
art_style: language-design
|
|
125
|
+
accent_priority: medium
|
|
126
|
+
image_path: ../image/013_aisb.t3.013_appl.jpg
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.014_piguard
|
|
3
|
+
name: 'PIGuard: Prompt Injection Guardrail via Mitigating Overdefense for Free'
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: 'Train or fine-tune a DeBERTa-based prompt-injection guard model (PIGuard)
|
|
6
|
+
and evaluate it on four benchmarks (NotInject, PINT, BIPIA, WildGuard-Benign) measuring
|
|
7
|
+
malicious detection, benign accuracy, and over-defense accuracy across trigger-word
|
|
8
|
+
difficulty levels.
|
|
9
|
+
|
|
10
|
+
'
|
|
11
|
+
task_description: 'This benchmark packages the full PIGuard pipeline: a lightweight
|
|
12
|
+
DeBERTa-based binary classifier that detects prompt-injection attacks while minimizing
|
|
13
|
+
over-defense (false positives on benign inputs containing trigger words like "ignore").
|
|
14
|
+
The optimization task is to improve injection recall without increasing false positives.
|
|
15
|
+
Training data from 20 open-source datasets plus LLM-augmented samples is bundled
|
|
16
|
+
in the snapshot. Evaluation runs across four test sets: NotInject (339 benign samples
|
|
17
|
+
with 1/2/3 trigger words, split into three difficulty subsets of 113 each), BIPIA
|
|
18
|
+
(malicious), WildGuard-Benign, and PINT. The PINT benchmark requires a separate
|
|
19
|
+
access request. Three evaluation scripts are provided: eval.py (loads local checkpoint
|
|
20
|
+
via PIGuard.py), eval_piguard.py (loads HuggingFace-format weights from /tmp/PIGuard_weights),
|
|
21
|
+
and run_official_eval.py (same but with a custom benign-score threshold of 0.10).
|
|
22
|
+
Primary metrics are over-defense accuracy on NotInject (overall and per-trigger-count),
|
|
23
|
+
malicious accuracy on PINT/BIPIA, and benign accuracy on PINT/WildGuard-Benign.
|
|
24
|
+
The model weights can be loaded from HuggingFace (leolee99/PIGuard) or from a Google
|
|
25
|
+
Drive checkpoint.
|
|
26
|
+
|
|
27
|
+
'
|
|
28
|
+
capability_tags:
|
|
29
|
+
- research_code_optimization
|
|
30
|
+
- llm_security
|
|
31
|
+
- classification
|
|
32
|
+
- prompt_injection
|
|
33
|
+
- robustness
|
|
34
|
+
aisb_direction: T3
|
|
35
|
+
track_fit:
|
|
36
|
+
- paper_track
|
|
37
|
+
- benchmark_track
|
|
38
|
+
task_mode: experiment_driven
|
|
39
|
+
requires_execution: true
|
|
40
|
+
requires_paper: true
|
|
41
|
+
integrity_level: cas_plus_canary
|
|
42
|
+
snapshot_status: runnable
|
|
43
|
+
support_level: turnkey
|
|
44
|
+
cost_band: medium
|
|
45
|
+
time_band: 6-24h
|
|
46
|
+
difficulty: medium
|
|
47
|
+
data_access: restricted
|
|
48
|
+
primary_outputs:
|
|
49
|
+
- over_defense_accuracy
|
|
50
|
+
- one_trigger_accuracy
|
|
51
|
+
- two_trigger_accuracy
|
|
52
|
+
- three_trigger_accuracy
|
|
53
|
+
- malicious_accuracy
|
|
54
|
+
- benign_accuracy
|
|
55
|
+
- trigger_accuracy_breakdown
|
|
56
|
+
- evaluation_report
|
|
57
|
+
launch_profiles:
|
|
58
|
+
- id: quick_check
|
|
59
|
+
label: Quick Check (HuggingFace weights)
|
|
60
|
+
description: 'Run eval_hf.py or eval_piguard.py to evaluate the pretrained PIGuard
|
|
61
|
+
model from HuggingFace on all bundled test sets (NotInject, BIPIA, WildGuard-Benign).
|
|
62
|
+
No training needed. PINT evaluation is skipped unless you have obtained access
|
|
63
|
+
separately.
|
|
64
|
+
|
|
65
|
+
'
|
|
66
|
+
- id: full_eval
|
|
67
|
+
label: Full Eval (all four benchmarks)
|
|
68
|
+
description: 'Run eval.py --resume <checkpoint> or run_official_eval.py across NotInject,
|
|
69
|
+
BIPIA, WildGuard-Benign, and PINT. Requires PINT access request and YAML-to-JSON
|
|
70
|
+
conversion via util.py.
|
|
71
|
+
|
|
72
|
+
'
|
|
73
|
+
- id: train_and_eval
|
|
74
|
+
label: Train + Evaluate
|
|
75
|
+
description: 'Run train.py to retrain PIGuard with the MOF strategy on the bundled
|
|
76
|
+
20-source training set, then evaluate the resulting checkpoint on all four benchmarks.
|
|
77
|
+
Expect 6-24h on a single GPU depending on VRAM and batch size.
|
|
78
|
+
|
|
79
|
+
'
|
|
80
|
+
dataset_download:
|
|
81
|
+
primary_method: mixed
|
|
82
|
+
sources:
|
|
83
|
+
- kind: huggingface
|
|
84
|
+
url: https://huggingface.co/datasets/leolee99/NotInject
|
|
85
|
+
access: public
|
|
86
|
+
note: 'NotInject over-defense evaluation dataset (339 samples, three subsets of
|
|
87
|
+
113 each). Also bundled in the snapshot under datasets/.
|
|
88
|
+
|
|
89
|
+
'
|
|
90
|
+
- kind: huggingface
|
|
91
|
+
url: https://huggingface.co/leolee99/PIGuard
|
|
92
|
+
access: public
|
|
93
|
+
note: Pretrained PIGuard model weights (DeBERTa-based).
|
|
94
|
+
- kind: google_drive
|
|
95
|
+
url: https://drive.google.com/file/d/1JpiVb_wtnbBLNEjIx1KS7PHuvmARQKTu/view?usp=sharing
|
|
96
|
+
access: public
|
|
97
|
+
note: Alternative checkpoint download.
|
|
98
|
+
- kind: external
|
|
99
|
+
url: https://share-eu1.hsforms.com/1TwiBEvLXRrCjJSdnbnHpLwfdfs3
|
|
100
|
+
access: request_required
|
|
101
|
+
note: 'PINT benchmark from Lakera AI. Not public; requires filling out an access
|
|
102
|
+
request form. Must convert from YAML to JSON via util.py after download.
|
|
103
|
+
|
|
104
|
+
'
|
|
105
|
+
- kind: bundled
|
|
106
|
+
url: null
|
|
107
|
+
access: public
|
|
108
|
+
note: 'Training data (20 open-source datasets + LLM augmentations), validation
|
|
109
|
+
set (144 samples), and test sets (NotInject, BIPIA, WildGuard-Benign) are all
|
|
110
|
+
bundled under PIGuard/datasets/.
|
|
111
|
+
|
|
112
|
+
'
|
|
113
|
+
notes:
|
|
114
|
+
- Total bundled data is modest (tens of MB). No large-scale download required for
|
|
115
|
+
most profiles.
|
|
116
|
+
- PINT is the only external dataset that requires a separate access request.
|
|
117
|
+
credential_requirements:
|
|
118
|
+
mode: none
|
|
119
|
+
items: []
|
|
120
|
+
notes:
|
|
121
|
+
- PINT benchmark access requires a form submission to Lakera AI, but no API key.
|
|
122
|
+
- HuggingFace model download is public; no token needed.
|
|
123
|
+
resources:
|
|
124
|
+
minimum:
|
|
125
|
+
cpu_cores: 8
|
|
126
|
+
ram_gb: 32
|
|
127
|
+
disk_gb: 50
|
|
128
|
+
gpu_count: 1
|
|
129
|
+
gpu_vram_gb: 12
|
|
130
|
+
recommended:
|
|
131
|
+
cpu_cores: 16
|
|
132
|
+
ram_gb: 64
|
|
133
|
+
disk_gb: 120
|
|
134
|
+
gpu_count: 1
|
|
135
|
+
gpu_vram_gb: 24
|
|
136
|
+
environment:
|
|
137
|
+
python: '3.10'
|
|
138
|
+
cuda: '11.8'
|
|
139
|
+
pytorch: 2.4.0
|
|
140
|
+
flash_attn: 2.6.1
|
|
141
|
+
key_packages:
|
|
142
|
+
- flash-attn==2.6.1
|
|
143
|
+
- vllm==0.5.4
|
|
144
|
+
- transformers==4.44.0
|
|
145
|
+
- ptflops
|
|
146
|
+
notes:
|
|
147
|
+
- See the bundled requirements.txt for the full dependency set.
|
|
148
|
+
- The model is DeBERTa-based (not an LLM); vllm is used only in certain evaluation
|
|
149
|
+
modes.
|
|
150
|
+
- flash-attn requires compatible CUDA and GPU architecture.
|
|
151
|
+
risk_flags:
|
|
152
|
+
- restricted_dataset_component
|
|
153
|
+
- external_model_weights
|
|
154
|
+
risk_notes:
|
|
155
|
+
- 'The PINT benchmark is not public and not bundled. Full four-benchmark evaluation
|
|
156
|
+
requires requesting access from Lakera AI via a web form. Without PINT, three of
|
|
157
|
+
four test sets are still available.
|
|
158
|
+
|
|
159
|
+
'
|
|
160
|
+
- 'Model weights must be downloaded from HuggingFace or Google Drive at runtime unless
|
|
161
|
+
pre-staged at /tmp/PIGuard_weights.
|
|
162
|
+
|
|
163
|
+
'
|
|
164
|
+
- 'eval_piguard.py and run_official_eval.py hardcode MODEL_PATH to /tmp/PIGuard_weights
|
|
165
|
+
and DATASET_ROOT to /repo/datasets. These paths may need adjustment.
|
|
166
|
+
|
|
167
|
+
'
|
|
168
|
+
- 'run_official_eval.py uses a custom BENIGN_THRESHOLD of 0.10 (lowered from 0.50),
|
|
169
|
+
which changes the classification boundary compared to the default argmax approach
|
|
170
|
+
in eval.py.
|
|
171
|
+
|
|
172
|
+
'
|
|
173
|
+
recommended_when: 'Use this benchmark when you want a security-oriented text classification
|
|
174
|
+
task with dual pressure: improving prompt-injection detection recall while controlling
|
|
175
|
+
over-defense (false positives on benign inputs containing attack-like trigger words).
|
|
176
|
+
Good fit for studying shortcut learning, DeBERTa fine-tuning strategies, and threshold
|
|
177
|
+
calibration on lightweight guard models.
|
|
178
|
+
|
|
179
|
+
'
|
|
180
|
+
not_recommended_when: 'Do not use this if you cannot provide a GPU (the model requires
|
|
181
|
+
CUDA for practical training and batched inference), if you need the full PINT evaluation
|
|
182
|
+
but cannot obtain access, or if you are looking for a generative LLM benchmark rather
|
|
183
|
+
than a binary classification task.
|
|
184
|
+
|
|
185
|
+
'
|
|
186
|
+
paper:
|
|
187
|
+
title: 'PIGuard: Prompt Injection Guardrail via Mitigating Overdefense for Free'
|
|
188
|
+
venue: ACL 2025
|
|
189
|
+
year: 2025
|
|
190
|
+
url: https://aclanthology.org/2025.acl-long.1468/
|
|
191
|
+
download:
|
|
192
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.014_piguard.zip
|
|
193
|
+
archive_type: zip
|
|
194
|
+
local_dir_name: paper-14-PIGuard
|
|
195
|
+
provider: github_release
|
|
196
|
+
repo: ResearAI/DeepScientist
|
|
197
|
+
tag: aisb-v0.0.1
|
|
198
|
+
asset_name: aisb.t3.014_piguard.zip
|
|
199
|
+
sha256: 9a7996fc3b40709caa357b763d7a51e1f1f2ab449480e403ec34a7c248eb4c81
|
|
200
|
+
size_bytes: 1621668
|
|
201
|
+
commercial:
|
|
202
|
+
annual_fee: null
|
|
203
|
+
display:
|
|
204
|
+
palette_seed: rust-sand-guard
|
|
205
|
+
art_style: safety-dashboard
|
|
206
|
+
accent_priority: high
|
|
207
|
+
image_path: ../image/014_aisb.t3.014_piguard.jpg
|