@researai/deepscientist 1.5.16 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +309 -130
- package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
- package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
- package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
- package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
- package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
- package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
- package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
- package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
- package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
- package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
- package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
- package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
- package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
- package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
- package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
- package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
- package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
- package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
- package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
- package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
- package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
- package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
- package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
- package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
- package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
- package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
- package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
- package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
- package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
- package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
- package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
- package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
- package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
- package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
- package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
- package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
- package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
- package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
- package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
- package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
- package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
- package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
- package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
- package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
- package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
- package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
- package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
- package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
- package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
- package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
- package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
- package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
- package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
- package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
- package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
- package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
- package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
- package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
- package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
- package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
- package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
- package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
- package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
- package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
- package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
- package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
- package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
- package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
- package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
- package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
- package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
- package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
- package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
- package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
- package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
- package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
- package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
- package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
- package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
- package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
- package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
- package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
- package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
- package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
- package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
- package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
- package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
- package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
- package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
- package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
- package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
- package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
- package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
- package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
- package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
- package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
- package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
- package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
- package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
- package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
- package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
- package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
- package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
- package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
- package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
- package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
- package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
- package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
- package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
- package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
- package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
- package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
- package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
- package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
- package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
- package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
- package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
- package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
- package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
- package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
- package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
- package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
- package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
- package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
- package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
- package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
- package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
- package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
- package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
- package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
- package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
- package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
- package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
- package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
- package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
- package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
- package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
- package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
- package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
- package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
- package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
- package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
- package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
- package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
- package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
- package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
- package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
- package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
- package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
- package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
- package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
- package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
- package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
- package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
- package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
- package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
- package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
- package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
- package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
- package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
- package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
- package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
- package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
- package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
- package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
- package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
- package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
- package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
- package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
- package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
- package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
- package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
- package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
- package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
- package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
- package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
- package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
- package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
- package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
- package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
- package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
- package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
- package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
- package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
- package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
- package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
- package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
- package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
- package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
- package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
- package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
- package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
- package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
- package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
- package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
- package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
- package/AISB/image/aisb.b10.climate_earth.svg +16 -0
- package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
- package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
- package/AISB/image/aisb.b2.agent_systems.svg +16 -0
- package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
- package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
- package/AISB/image/aisb.b5.math_proof.svg +16 -0
- package/AISB/image/aisb.b6.research_process.svg +16 -0
- package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
- package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
- package/AISB/image/aisb.b9.material_science.svg +16 -0
- package/README.md +196 -32
- package/bin/ds.js +924 -66
- package/docs/en/00_QUICK_START.md +195 -18
- package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
- package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
- package/docs/en/05_TUI_GUIDE.md +171 -2
- package/docs/en/07_MEMORY_AND_MCP.md +38 -2
- package/docs/en/09_DOCTOR.md +78 -7
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
- package/docs/en/11_LICENSE_AND_RISK.md +4 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +624 -180
- package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +386 -0
- package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
- package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
- package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
- package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
- package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
- package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
- package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
- package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
- package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
- package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
- package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
- package/docs/en/91_DEVELOPMENT.md +266 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
- package/docs/en/README.md +48 -7
- package/docs/images/admin/admin-connectors-health-en.png +0 -0
- package/docs/images/admin/admin-controllers-en.png +0 -0
- package/docs/images/admin/admin-diagnostics-en.png +0 -0
- package/docs/images/admin/admin-errors-en.png +0 -0
- package/docs/images/admin/admin-issues-en.png +0 -0
- package/docs/images/admin/admin-logs-en.png +0 -0
- package/docs/images/admin/admin-quest-detail-en.png +0 -0
- package/docs/images/admin/admin-quests-en.png +0 -0
- package/docs/images/admin/admin-repairs-en.png +0 -0
- package/docs/images/admin/admin-runtime-en.png +0 -0
- package/docs/images/admin/admin-search-en.png +0 -0
- package/docs/images/admin/admin-stats-en.png +0 -0
- package/docs/images/admin/admin-summary-en.png +0 -0
- package/docs/images/connectors/connector-discord-en.png +0 -0
- package/docs/images/connectors/connector-feishu-en.png +0 -0
- package/docs/images/connectors/connector-lingzhu-en.png +0 -0
- package/docs/images/connectors/connector-qq-en.png +0 -0
- package/docs/images/connectors/connector-slack-en.png +0 -0
- package/docs/images/connectors/connector-telegram-en.png +0 -0
- package/docs/images/connectors/connector-weixin-en.png +0 -0
- package/docs/images/connectors/connector-whatsapp-en.png +0 -0
- package/docs/images/settings/settings-baselines-en.png +0 -0
- package/docs/images/settings/settings-config-en.png +0 -0
- package/docs/images/settings/settings-connectors-overview-en.png +0 -0
- package/docs/images/settings/settings-deepxiv-en.png +0 -0
- package/docs/images/settings/settings-mcp-servers-en.png +0 -0
- package/docs/images/settings/settings-plugins-en.png +0 -0
- package/docs/images/settings/settings-runners-en.png +0 -0
- package/docs/zh/00_QUICK_START.md +142 -18
- package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
- package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/zh/05_TUI_GUIDE.md +171 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
- package/docs/zh/09_DOCTOR.md +54 -8
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
- package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +552 -181
- package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +384 -0
- package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
- package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
- package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
- package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
- package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
- package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
- package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
- package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
- package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
- package/docs/zh/README.md +33 -7
- package/install.sh +168 -20
- package/package.json +5 -1
- package/pyproject.toml +2 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +13 -0
- package/src/deepscientist/admin/__init__.py +3 -0
- package/src/deepscientist/admin/charts.py +681 -0
- package/src/deepscientist/admin/logs.py +119 -0
- package/src/deepscientist/admin/repairs.py +217 -0
- package/src/deepscientist/admin/service.py +1310 -0
- package/src/deepscientist/admin/system_info.py +700 -0
- package/src/deepscientist/admin/tasks.py +465 -0
- package/src/deepscientist/admin/tool_metrics.py +600 -0
- package/src/deepscientist/artifact/guidance.py +8 -4
- package/src/deepscientist/artifact/schemas.py +115 -0
- package/src/deepscientist/artifact/service.py +4268 -260
- package/src/deepscientist/bash_exec/monitor.py +30 -3
- package/src/deepscientist/bash_exec/service.py +134 -1
- package/src/deepscientist/benchstore/__init__.py +4 -0
- package/src/deepscientist/benchstore/prompt_builder.py +224 -0
- package/src/deepscientist/benchstore/service.py +1716 -0
- package/src/deepscientist/bridges/connectors.py +8 -2
- package/src/deepscientist/channels/weixin_ilink.py +8 -1
- package/src/deepscientist/cli.py +92 -17
- package/src/deepscientist/codex_cli_compat.py +187 -74
- package/src/deepscientist/config/models.py +82 -11
- package/src/deepscientist/config/service.py +1077 -93
- package/src/deepscientist/connector/weixin_support.py +48 -17
- package/src/deepscientist/daemon/api/handlers.py +827 -235
- package/src/deepscientist/daemon/api/router.py +81 -1
- package/src/deepscientist/daemon/app.py +1512 -85
- package/src/deepscientist/diagnostics/__init__.py +6 -0
- package/src/deepscientist/diagnostics/runner_failures.py +277 -0
- package/src/deepscientist/doctor.py +407 -56
- package/src/deepscientist/evidence_packets.py +590 -0
- package/src/deepscientist/home.py +52 -4
- package/src/deepscientist/kimi_cli_compat.py +50 -0
- package/src/deepscientist/latex_runtime.py +2 -2
- package/src/deepscientist/mcp/context.py +2 -0
- package/src/deepscientist/mcp/schemas.py +114 -0
- package/src/deepscientist/mcp/server.py +1566 -126
- package/src/deepscientist/memory/service.py +203 -16
- package/src/deepscientist/process_control.py +8 -1
- package/src/deepscientist/prompts/builder.py +850 -88
- package/src/deepscientist/quest/__init__.py +2 -2
- package/src/deepscientist/quest/layout.py +12 -1
- package/src/deepscientist/quest/node_traces.py +10 -0
- package/src/deepscientist/quest/service.py +1852 -161
- package/src/deepscientist/quest/stage_views.py +1 -1
- package/src/deepscientist/runners/__init__.py +18 -0
- package/src/deepscientist/runners/base.py +89 -1
- package/src/deepscientist/runners/builtins.py +13 -1
- package/src/deepscientist/runners/claude.py +391 -0
- package/src/deepscientist/runners/codex.py +480 -35
- package/src/deepscientist/runners/codex_telemetry.py +127 -0
- package/src/deepscientist/runners/kimi.py +334 -0
- package/src/deepscientist/runners/metadata.py +68 -0
- package/src/deepscientist/runners/opencode.py +414 -0
- package/src/deepscientist/runners/runtime_overrides.py +100 -0
- package/src/deepscientist/runners/simple_cli.py +538 -0
- package/src/deepscientist/runtime_storage.py +303 -0
- package/src/deepscientist/shared.py +80 -16
- package/src/deepscientist/skills/installer.py +37 -0
- package/src/deepscientist/skills/registry.py +2 -0
- package/src/deepscientist/tinytex.py +2 -2
- package/src/deepscientist/tui.py +10 -3
- package/src/prompts/benchstore/system.md +77 -0
- package/src/prompts/connectors/qq.md +33 -2
- package/src/prompts/connectors/weixin.md +208 -23
- package/src/prompts/contracts/admin_ops.md +74 -0
- package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
- package/src/prompts/contracts/shared_interaction.md +5 -10
- package/src/prompts/start_setup/system.md +422 -0
- package/src/prompts/system.md +411 -304
- package/src/prompts/system_copilot.md +89 -0
- package/src/skills/analysis-campaign/SKILL.md +239 -578
- package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
- package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
- package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
- package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
- package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
- package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
- package/src/skills/baseline/SKILL.md +183 -461
- package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
- package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
- package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
- package/src/skills/baseline/references/baseline-plan-template.md +37 -76
- package/src/skills/baseline/references/boundary-cases.md +86 -0
- package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
- package/src/skills/baseline/references/comparability-contract.md +7 -12
- package/src/skills/baseline/references/operational-guidance.md +56 -0
- package/src/skills/baseline/references/route-selection.md +5 -25
- package/src/skills/decision/SKILL.md +113 -306
- package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
- package/src/skills/decision/references/operational-guidance.md +94 -0
- package/src/skills/decision/references/research-route-criteria.md +7 -8
- package/src/skills/decision/references/strategic-decision-template.md +13 -26
- package/src/skills/experiment/SKILL.md +132 -670
- package/src/skills/experiment/references/execution-playbook.md +374 -0
- package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
- package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
- package/src/skills/experiment/references/operational-guidance.md +108 -0
- package/src/skills/finalize/SKILL.md +62 -0
- package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
- package/src/skills/finalize/references/resume-packet-template.md +7 -0
- package/src/skills/idea/SKILL.md +228 -15
- package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
- package/src/skills/idea/references/current-board-packet-template.md +61 -0
- package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
- package/src/skills/idea/references/idea-generation-playbook.md +21 -0
- package/src/skills/idea/references/idea-thinking-flow.md +6 -0
- package/src/skills/idea/references/literature-survey-template.md +3 -0
- package/src/skills/idea/references/objective-contract-template.md +54 -0
- package/src/skills/idea/references/outline-seeding-example.md +56 -0
- package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
- package/src/skills/idea/references/related-work-playbook.md +75 -2
- package/src/skills/idea/references/research-history-playbook.md +114 -0
- package/src/skills/idea/references/selection-gate.md +58 -6
- package/src/skills/intake-audit/SKILL.md +43 -2
- package/src/skills/intake-audit/references/state-audit-template.md +10 -0
- package/src/skills/nature-data/SKILL.md +128 -0
- package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-data/agents/openai.yaml +4 -0
- package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
- package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
- package/src/skills/nature-data/references/policy-principles.md +103 -0
- package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
- package/src/skills/nature-data/references/source-basis.md +54 -0
- package/src/skills/nature-data/references/statement-patterns.md +153 -0
- package/src/skills/nature-figure/SKILL.md +197 -0
- package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-figure/agents/openai.yaml +4 -0
- package/src/skills/nature-figure/evals/evals.json +37 -0
- package/src/skills/nature-figure/references/api.md +428 -0
- package/src/skills/nature-figure/references/backend-selection.md +100 -0
- package/src/skills/nature-figure/references/chart-types.md +281 -0
- package/src/skills/nature-figure/references/common-patterns.md +349 -0
- package/src/skills/nature-figure/references/design-theory.md +436 -0
- package/src/skills/nature-figure/references/figure-contract.md +93 -0
- package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
- package/src/skills/nature-figure/references/qa-contract.md +119 -0
- package/src/skills/nature-figure/references/r-template-index.md +66 -0
- package/src/skills/nature-figure/references/r-workflow.md +161 -0
- package/src/skills/nature-figure/references/tutorials.md +250 -0
- package/src/skills/nature-paper2ppt/SKILL.md +507 -0
- package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/SKILL.md +385 -0
- package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-polishing/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
- package/src/skills/nature-polishing/references/section-moves.md +240 -0
- package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
- package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
- package/src/skills/optimize/SKILL.md +177 -1568
- package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
- package/src/skills/optimize/references/candidate-board-template.md +13 -0
- package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
- package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
- package/src/skills/optimize/references/debug-response-template.md +29 -0
- package/src/skills/optimize/references/frontier-review-template.md +32 -0
- package/src/skills/optimize/references/fusion-playbook.md +36 -0
- package/src/skills/optimize/references/method-brief-template.md +73 -0
- package/src/skills/optimize/references/operational-guidance.md +621 -0
- package/src/skills/optimize/references/optimization-memory-template.md +30 -0
- package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
- package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
- package/src/skills/optimize/references/prompt-patterns.md +49 -0
- package/src/skills/paper-outline/SKILL.md +227 -0
- package/src/skills/paper-outline/references/outline-patterns.md +87 -0
- package/src/skills/paper-plot/SKILL.md +79 -0
- package/src/skills/paper-plot/agents/openai.yaml +4 -0
- package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
- package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
- package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
- package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
- package/src/skills/paper-plot/references/line_training_curve.md +44 -0
- package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
- package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
- package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
- package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
- package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
- package/src/skills/paper-plot/scripts/line_aime.py +94 -0
- package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
- package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
- package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
- package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
- package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
- package/src/skills/rebuttal/SKILL.md +9 -0
- package/src/skills/references/tool-usage-by-stage.md +438 -0
- package/src/skills/review/SKILL.md +105 -7
- package/src/skills/science/PROVENANCE.md +44 -0
- package/src/skills/science/SKILL.md +137 -0
- package/src/skills/science/references/artifact-science-tool.md +110 -0
- package/src/skills/science/references/claim-type-discipline.md +56 -0
- package/src/skills/science/references/domain-index.md +422 -0
- package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
- package/src/skills/science/references/package-check-playbook.md +64 -0
- package/src/skills/science/references/package-index.min.json +3616 -0
- package/src/skills/science/references/packages/abinit.md +80 -0
- package/src/skills/science/references/packages/acts.md +73 -0
- package/src/skills/science/references/packages/aiida-core.md +80 -0
- package/src/skills/science/references/packages/alamode.md +80 -0
- package/src/skills/science/references/packages/amuse.md +88 -0
- package/src/skills/science/references/packages/anndata.md +88 -0
- package/src/skills/science/references/packages/arbor.md +80 -0
- package/src/skills/science/references/packages/arc.md +73 -0
- package/src/skills/science/references/packages/astropy.md +88 -0
- package/src/skills/science/references/packages/astroquery.md +88 -0
- package/src/skills/science/references/packages/atomate2.md +80 -0
- package/src/skills/science/references/packages/atomsmltr.md +73 -0
- package/src/skills/science/references/packages/awkward.md +73 -0
- package/src/skills/science/references/packages/batman.md +88 -0
- package/src/skills/science/references/packages/biopython.md +88 -0
- package/src/skills/science/references/packages/bloqade.md +73 -0
- package/src/skills/science/references/packages/brian2.md +73 -0
- package/src/skills/science/references/packages/bullet3.md +73 -0
- package/src/skills/science/references/packages/calculix.md +80 -0
- package/src/skills/science/references/packages/cantera.md +73 -0
- package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
- package/src/skills/science/references/packages/ccdproc.md +88 -0
- package/src/skills/science/references/packages/celerite2.md +88 -0
- package/src/skills/science/references/packages/cellrank.md +73 -0
- package/src/skills/science/references/packages/cesm.md +80 -0
- package/src/skills/science/references/packages/chemicals.md +73 -0
- package/src/skills/science/references/packages/chempy.md +73 -0
- package/src/skills/science/references/packages/cirq.md +73 -0
- package/src/skills/science/references/packages/coffea.md +73 -0
- package/src/skills/science/references/packages/cp2k.md +88 -0
- package/src/skills/science/references/packages/custodian.md +80 -0
- package/src/skills/science/references/packages/dart.md +73 -0
- package/src/skills/science/references/packages/datamol.md +88 -0
- package/src/skills/science/references/packages/dd4hep.md +73 -0
- package/src/skills/science/references/packages/dealii.md +80 -0
- package/src/skills/science/references/packages/deepchem.md +88 -0
- package/src/skills/science/references/packages/delphes.md +73 -0
- package/src/skills/science/references/packages/devito.md +80 -0
- package/src/skills/science/references/packages/dftb.md +88 -0
- package/src/skills/science/references/packages/dftd4.md +88 -0
- package/src/skills/science/references/packages/dftk-jl.md +80 -0
- package/src/skills/science/references/packages/dolfinx.md +80 -0
- package/src/skills/science/references/packages/drake.md +73 -0
- package/src/skills/science/references/packages/dumux.md +73 -0
- package/src/skills/science/references/packages/elk.md +80 -0
- package/src/skills/science/references/packages/elmerfem.md +80 -0
- package/src/skills/science/references/packages/enzo-e.md +88 -0
- package/src/skills/science/references/packages/espresso.md +80 -0
- package/src/skills/science/references/packages/exoplanet.md +88 -0
- package/src/skills/science/references/packages/fairroot.md +73 -0
- package/src/skills/science/references/packages/fbpic.md +80 -0
- package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
- package/src/skills/science/references/packages/geant4.md +73 -0
- package/src/skills/science/references/packages/geosx.md +80 -0
- package/src/skills/science/references/packages/gprmax.md +80 -0
- package/src/skills/science/references/packages/gromacs.md +80 -0
- package/src/skills/science/references/packages/gwaslab.md +73 -0
- package/src/skills/science/references/packages/gz-sim.md +73 -0
- package/src/skills/science/references/packages/hail.md +88 -0
- package/src/skills/science/references/packages/hiphive.md +80 -0
- package/src/skills/science/references/packages/hoomd-blue.md +80 -0
- package/src/skills/science/references/packages/itensor.md +73 -0
- package/src/skills/science/references/packages/itensors-jl.md +73 -0
- package/src/skills/science/references/packages/jdftx.md +73 -0
- package/src/skills/science/references/packages/jobflow.md +80 -0
- package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
- package/src/skills/science/references/packages/kite.md +80 -0
- package/src/skills/science/references/packages/kratos.md +80 -0
- package/src/skills/science/references/packages/kwant.md +73 -0
- package/src/skills/science/references/packages/lammps.md +80 -0
- package/src/skills/science/references/packages/lightkurve.md +88 -0
- package/src/skills/science/references/packages/limix.md +73 -0
- package/src/skills/science/references/packages/maxwelllink.md +80 -0
- package/src/skills/science/references/packages/mcdc.md +73 -0
- package/src/skills/science/references/packages/meep.md +80 -0
- package/src/skills/science/references/packages/mfem.md +80 -0
- package/src/skills/science/references/packages/mitgcm.md +73 -0
- package/src/skills/science/references/packages/modflow6.md +73 -0
- package/src/skills/science/references/packages/molecool.md +73 -0
- package/src/skills/science/references/packages/mom6.md +73 -0
- package/src/skills/science/references/packages/moose.md +80 -0
- package/src/skills/science/references/packages/mpas-model.md +73 -0
- package/src/skills/science/references/packages/mujoco.md +73 -0
- package/src/skills/science/references/packages/mumax3.md +73 -0
- package/src/skills/science/references/packages/nekrs.md +80 -0
- package/src/skills/science/references/packages/nessi.md +73 -0
- package/src/skills/science/references/packages/nest-simulator.md +73 -0
- package/src/skills/science/references/packages/netket.md +73 -0
- package/src/skills/science/references/packages/neuron.md +73 -0
- package/src/skills/science/references/packages/nextflow.md +88 -0
- package/src/skills/science/references/packages/nwchem.md +88 -0
- package/src/skills/science/references/packages/openbabel.md +88 -0
- package/src/skills/science/references/packages/openems.md +80 -0
- package/src/skills/science/references/packages/openff-toolkit.md +88 -0
- package/src/skills/science/references/packages/openfoam-dev.md +80 -0
- package/src/skills/science/references/packages/openmc.md +73 -0
- package/src/skills/science/references/packages/openmm.md +80 -0
- package/src/skills/science/references/packages/openmoc.md +73 -0
- package/src/skills/science/references/packages/openmx.md +80 -0
- package/src/skills/science/references/packages/opensees.md +80 -0
- package/src/skills/science/references/packages/opensn.md +80 -0
- package/src/skills/science/references/packages/opm-simulators.md +73 -0
- package/src/skills/science/references/packages/oqupy.md +73 -0
- package/src/skills/science/references/packages/packmol.md +80 -0
- package/src/skills/science/references/packages/palabos.md +80 -0
- package/src/skills/science/references/packages/parflow.md +80 -0
- package/src/skills/science/references/packages/pennylane.md +88 -0
- package/src/skills/science/references/packages/perceval.md +73 -0
- package/src/skills/science/references/packages/phono3py.md +73 -0
- package/src/skills/science/references/packages/phonopy.md +73 -0
- package/src/skills/science/references/packages/photutils.md +88 -0
- package/src/skills/science/references/packages/picongpu.md +80 -0
- package/src/skills/science/references/packages/plink-ng.md +88 -0
- package/src/skills/science/references/packages/precice.md +73 -0
- package/src/skills/science/references/packages/psc.md +80 -0
- package/src/skills/science/references/packages/psi4.md +88 -0
- package/src/skills/science/references/packages/pybinding.md +73 -0
- package/src/skills/science/references/packages/pyfr.md +80 -0
- package/src/skills/science/references/packages/pyhf.md +73 -0
- package/src/skills/science/references/packages/pyiron_base.md +80 -0
- package/src/skills/science/references/packages/pylcp.md +73 -0
- package/src/skills/science/references/packages/pylith.md +80 -0
- package/src/skills/science/references/packages/pynbody.md +88 -0
- package/src/skills/science/references/packages/pysam.md +88 -0
- package/src/skills/science/references/packages/pyscf.md +88 -0
- package/src/skills/science/references/packages/q-e.md +73 -0
- package/src/skills/science/references/packages/qibo.md +73 -0
- package/src/skills/science/references/packages/qiskit.md +73 -0
- package/src/skills/science/references/packages/quantica-jl.md +73 -0
- package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
- package/src/skills/science/references/packages/quimb.md +73 -0
- package/src/skills/science/references/packages/qulacs.md +73 -0
- package/src/skills/science/references/packages/qutip.md +73 -0
- package/src/skills/science/references/packages/rdkit.md +88 -0
- package/src/skills/science/references/packages/rmg-py.md +73 -0
- package/src/skills/science/references/packages/root.md +73 -0
- package/src/skills/science/references/packages/scanpy.md +88 -0
- package/src/skills/science/references/packages/scikit-allel.md +88 -0
- package/src/skills/science/references/packages/scikit-bio.md +88 -0
- package/src/skills/science/references/packages/scqubits.md +73 -0
- package/src/skills/science/references/packages/scuff-em.md +80 -0
- package/src/skills/science/references/packages/scvi-tools.md +73 -0
- package/src/skills/science/references/packages/seissol.md +73 -0
- package/src/skills/science/references/packages/sfepy.md +80 -0
- package/src/skills/science/references/packages/sisl.md +73 -0
- package/src/skills/science/references/packages/smilei.md +80 -0
- package/src/skills/science/references/packages/snakemake.md +88 -0
- package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
- package/src/skills/science/references/packages/specutils.md +88 -0
- package/src/skills/science/references/packages/spglib.md +80 -0
- package/src/skills/science/references/packages/squidpy.md +88 -0
- package/src/skills/science/references/packages/starry.md +88 -0
- package/src/skills/science/references/packages/strawberryfields.md +73 -0
- package/src/skills/science/references/packages/su2.md +80 -0
- package/src/skills/science/references/packages/sunny-jl.md +73 -0
- package/src/skills/science/references/packages/sw4.md +73 -0
- package/src/skills/science/references/packages/swift.md +88 -0
- package/src/skills/science/references/packages/tdnegf.md +73 -0
- package/src/skills/science/references/packages/tenpy.md +73 -0
- package/src/skills/science/references/packages/thermo.md +73 -0
- package/src/skills/science/references/packages/tkwant.md +73 -0
- package/src/skills/science/references/packages/tvb-root.md +73 -0
- package/src/skills/science/references/packages/uproot5.md +73 -0
- package/src/skills/science/references/packages/vampire.md +80 -0
- package/src/skills/science/references/packages/wannier_tools.md +73 -0
- package/src/skills/science/references/packages/warpx.md +80 -0
- package/src/skills/science/references/packages/wrf.md +73 -0
- package/src/skills/science/references/packages/xtb.md +88 -0
- package/src/skills/science/references/packages/yt.md +73 -0
- package/src/skills/science/references/science-task-brief-template.md +71 -0
- package/src/skills/scout/SKILL.md +83 -425
- package/src/skills/scout/references/literature-scout-template.md +5 -24
- package/src/skills/scout/references/operational-guidance.md +191 -0
- package/src/skills/scout/references/paper-triage-playbook.md +11 -35
- package/src/skills/write/SKILL.md +744 -1246
- package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
- package/src/skills/write/references/oral_package_patterns.md +252 -0
- package/src/skills/write/references/oral_writing_principles.md +291 -0
- package/src/skills/write/references/section_rewrite_checklist.md +234 -0
- package/src/tui/dist/app/AppContainer.js +1314 -27
- package/src/tui/dist/components/Composer.js +26 -1
- package/src/tui/dist/components/ConfigScreen.js +2 -1
- package/src/tui/dist/components/InputPrompt.js +25 -9
- package/src/tui/dist/components/MainContent.js +18 -3
- package/src/tui/dist/components/QuestScreen.js +3 -2
- package/src/tui/dist/components/UtilityScreen.js +37 -0
- package/src/tui/dist/hooks/useSafeInput.js +10 -0
- package/src/tui/dist/index.js +13 -1
- package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
- package/src/tui/dist/lib/api.js +89 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AnalysisPlugin-DnSm0GZn.js → AnalysisPlugin-CA94NGmI.js} +1 -1
- package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
- package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
- package/src/ui/dist/assets/{CodeViewerPlugin-itb0tltR.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
- package/src/ui/dist/assets/{DocViewerPlugin-DqKkiCI6.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
- package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
- package/src/ui/dist/assets/{GitDiffViewerPlugin-DxL2ezFG.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
- package/src/ui/dist/assets/{GitSnapshotViewer-B_RQm1YZ.js → GitSnapshotViewer-CweA6VON.js} +2 -2
- package/src/ui/dist/assets/{ImageViewerPlugin-tHqlXY3n.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
- package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
- package/src/ui/dist/assets/{LatexPlugin-B495DTXC.js → LatexPlugin-BQjAaA5J.js} +4 -4
- package/src/ui/dist/assets/{MarkdownViewerPlugin-DG28-61B.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
- package/src/ui/dist/assets/{MarketplacePlugin-BiOGT-Kj.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
- package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
- package/src/ui/dist/assets/{NotebookEditor-CVsj8h_T.js → NotebookEditor-WFyd8Ybt.js} +23 -23
- package/src/ui/dist/assets/{PdfLoader-CASDQmxJ.js → PdfLoader-CLE5u5TS.js} +3 -3
- package/src/ui/dist/assets/{PdfMarkdownPlugin-BFhwoKsY.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
- package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
- package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
- package/src/ui/dist/assets/{TextViewerPlugin-CB4DYfWO.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
- package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
- package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
- package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
- package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
- package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
- package/src/ui/dist/assets/{code-DLC6G24T.js → code-DbsmSd3Y.js} +1 -1
- package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
- package/src/ui/dist/assets/{wrap-text-CwMn-iqb.js → file-jump-queue-DeQBikaw.js} +3 -3
- package/src/ui/dist/assets/{file-socket-Cu4Qln7Y.js → file-socket-DA5XIx88.js} +1 -1
- package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
- package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
- package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
- package/src/ui/dist/assets/{index-wQ7RIIRd.js → index-BsO46tJA.js} +1 -1
- package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
- package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
- package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
- package/src/ui/dist/assets/{project-sync-CsX08Qno.js → project-sync-DPmWKmKD.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-R-GWEhzS.js → zoom-out-DAukFWen.js} +3 -3
- package/src/ui/dist/index.html +3 -3
- package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
- package/src/skills/baseline/references/memory-playbook.md +0 -40
- package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
- package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
- package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
- package/src/skills/write/references/paper-section-playbook.md +0 -64
- package/src/skills/write/references/reviewer-first-writing.md +0 -64
- package/src/skills/write/references/revision-checklist.md +0 -70
- package/src/skills/write/references/section-contracts.md +0 -82
- package/src/skills/write/references/sentence-level-proofing.md +0 -49
- package/src/ui/dist/assets/AiManusChatView-COFACy7V.js +0 -204
- package/src/ui/dist/assets/CliPlugin-CvwCmDQ5.js +0 -109
- package/src/ui/dist/assets/CodeEditorPlugin-cOqSa0xq.js +0 -2
- package/src/ui/dist/assets/GitCommitViewerPlugin-DVgNHBCS.js +0 -1
- package/src/ui/dist/assets/LabCopilotPanel-ClMbq5Yu.js +0 -14
- package/src/ui/dist/assets/LabPlugin-L_SuE8ow.js +0 -22
- package/src/ui/dist/assets/NotebookEditor-C-4Kt1p9.js +0 -81
- package/src/ui/dist/assets/PdfViewerPlugin-DcOzU9vd.js +0 -17
- package/src/ui/dist/assets/SearchPlugin-CHj7M58O.js +0 -16
- package/src/ui/dist/assets/VNCViewer-CjlbyCB3.js +0 -11
- package/src/ui/dist/assets/bot-CFkZY-JP.js +0 -6
- package/src/ui/dist/assets/chevron-up-Dq5ofbht.js +0 -6
- package/src/ui/dist/assets/file-content-Dv4LoZec.js +0 -1
- package/src/ui/dist/assets/file-diff-panel-Denq-lC3.js +0 -1
- package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
- package/src/ui/dist/assets/git-commit-horizontal-BUh6G52n.js +0 -6
- package/src/ui/dist/assets/image-B9HUUddG.js +0 -6
- package/src/ui/dist/assets/index-B2B1sg-M.js +0 -1
- package/src/ui/dist/assets/index-Cgla8biy.css +0 -33
- package/src/ui/dist/assets/index-DRyx7vAc.js +0 -1
- package/src/ui/dist/assets/index-Gbl53BNp.js +0 -2496
- package/src/ui/dist/assets/pdf-effect-queue-ZtnHFCAi.js +0 -6
- package/src/ui/dist/assets/popover-DL6h35vr.js +0 -1
- package/src/ui/dist/assets/select-DvmXt1yY.js +0 -11
- package/src/ui/dist/assets/sigma-7jpXazui.js +0 -6
- package/src/ui/dist/assets/trash-xA7kFt8i.js +0 -11
- package/src/ui/dist/assets/useCliAccess-DsMwDjOp.js +0 -1
- package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.014_piguard
|
|
3
|
+
name: 'PIGuard: 通过缓解过度防御实现提示词注入防护栏'
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: '训练或微调基于DeBERTa的提示词注入防护模型(PIGuard),并在四个基准数据集(NotInject、PINT、BIPIA、WildGuard-Benign)上进行评估,通过测量恶意检测准确率、良性准确率和过度防御准确率来评估不同触发词难度级别下的表现。
|
|
6
|
+
|
|
7
|
+
'
|
|
8
|
+
task_description: '本基准测试整合了完整的PIGuard流程:一个轻量级的基于DeBERTa的二分类器,用于检测提示词注入攻击,同时最大限度减少过度防御(对包含触发词(如"ignore")的良性输入的误报)。优化任务是提高注入召回率而不增加误报率。训练数据来自20个开源数据集及LLM增强样本,已打包在快照中。评估涵盖四个测试集:NotInject(339个良性样本,含1/2/3个触发词,分为三个难度子集,各113个)、BIPIA(恶意样本)、WildGuard-Benign和PINT。PINT基准需要单独的访问申请。提供了三个评估脚本:eval.py(通过PIGuard.py加载本地检查点)、eval_piguard.py(从/tmp/PIGuard_weights加载HuggingFace格式权重)和run_official_eval.py(与eval_piguard.py相同但使用自定义良性分数阈值0.10)。主要指标包括NotInject上的过度防御准确率(整体及各触发词数量)、PINT/BIPIA上的恶意准确率以及PINT/WildGuard-Benign上的良性准确率。模型权重可从HuggingFace(leolee99/PIGuard)或Google Drive检查点加载。
|
|
9
|
+
|
|
10
|
+
'
|
|
11
|
+
capability_tags:
|
|
12
|
+
- research_code_optimization
|
|
13
|
+
- llm_security
|
|
14
|
+
- classification
|
|
15
|
+
- prompt_injection
|
|
16
|
+
- robustness
|
|
17
|
+
aisb_direction: T3
|
|
18
|
+
track_fit:
|
|
19
|
+
- paper_track
|
|
20
|
+
- benchmark_track
|
|
21
|
+
task_mode: experiment_driven
|
|
22
|
+
requires_execution: true
|
|
23
|
+
requires_paper: true
|
|
24
|
+
integrity_level: cas_plus_canary
|
|
25
|
+
snapshot_status: runnable
|
|
26
|
+
support_level: turnkey
|
|
27
|
+
cost_band: medium
|
|
28
|
+
time_band: 6-24h
|
|
29
|
+
difficulty: medium
|
|
30
|
+
data_access: restricted
|
|
31
|
+
primary_outputs:
|
|
32
|
+
- over_defense_accuracy
|
|
33
|
+
- one_trigger_accuracy
|
|
34
|
+
- two_trigger_accuracy
|
|
35
|
+
- three_trigger_accuracy
|
|
36
|
+
- malicious_accuracy
|
|
37
|
+
- benign_accuracy
|
|
38
|
+
- trigger_accuracy_breakdown
|
|
39
|
+
- evaluation_report
|
|
40
|
+
launch_profiles:
|
|
41
|
+
- id: quick_check
|
|
42
|
+
label: 快速检查(HuggingFace权重)
|
|
43
|
+
description: '运行eval_hf.py或eval_piguard.py,在所有打包的测试集(NotInject、BIPIA、WildGuard-Benign)上评估来自HuggingFace的预训练PIGuard模型。无需训练。除非单独获取访问权限,否则跳过PINT评估。
|
|
44
|
+
|
|
45
|
+
'
|
|
46
|
+
- id: full_eval
|
|
47
|
+
label: 完整评估(全部四个基准)
|
|
48
|
+
description: '在NotInject、BIPIA、WildGuard-Benign和PINT上运行eval.py --resume <checkpoint>或run_official_eval.py。需要PINT访问权限并通过util.py进行YAML到JSON的转换。
|
|
49
|
+
|
|
50
|
+
'
|
|
51
|
+
- id: train_and_eval
|
|
52
|
+
label: 训练 + 评估
|
|
53
|
+
description: '运行train.py,使用MOF策略在打包的20源训练集上重新训练PIGuard,然后在所有四个基准上评估生成的检查点。根据VRAM和批大小,在单GPU上预计需要6-24小时。
|
|
54
|
+
|
|
55
|
+
'
|
|
56
|
+
dataset_download:
|
|
57
|
+
primary_method: mixed
|
|
58
|
+
sources:
|
|
59
|
+
- kind: huggingface
|
|
60
|
+
url: https://huggingface.co/datasets/leolee99/NotInject
|
|
61
|
+
access: public
|
|
62
|
+
note: 'NotInject过度防御评估数据集(339个样本,三个子集各113个)。也已打包在快照的datasets/目录下。
|
|
63
|
+
|
|
64
|
+
'
|
|
65
|
+
- kind: huggingface
|
|
66
|
+
url: https://huggingface.co/leolee99/PIGuard
|
|
67
|
+
access: public
|
|
68
|
+
note: 预训练PIGuard模型权重(基于DeBERTa)。
|
|
69
|
+
- kind: google_drive
|
|
70
|
+
url: https://drive.google.com/file/d/1JpiVb_wtnbBLNEjIx1KS7PHuvmARQKTu/view?usp=sharing
|
|
71
|
+
access: public
|
|
72
|
+
note: 备用检查点下载。
|
|
73
|
+
- kind: external
|
|
74
|
+
url: https://share-eu1.hsforms.com/1TwiBEvLXRrCjJSdnbnHpLwfdfs3
|
|
75
|
+
access: request_required
|
|
76
|
+
note: '来自Lakera AI的PINT基准。非公开,需要填写访问申请表。下载后必须通过util.py从YAML转换为JSON。
|
|
77
|
+
|
|
78
|
+
'
|
|
79
|
+
- kind: bundled
|
|
80
|
+
url: null
|
|
81
|
+
access: public
|
|
82
|
+
note: '训练数据(20个开源数据集+LLM增强)、验证集(144个样本)和测试集(NotInject、BIPIA、WildGuard-Benign)全部打包在PIGuard/datasets/目录下。
|
|
83
|
+
|
|
84
|
+
'
|
|
85
|
+
notes:
|
|
86
|
+
- 打包的数据总量较小(几十MB)。大多数配置不需要大规模下载。
|
|
87
|
+
- PINT是唯一需要单独申请访问权限的外部数据集。
|
|
88
|
+
credential_requirements:
|
|
89
|
+
mode: none
|
|
90
|
+
items: []
|
|
91
|
+
notes:
|
|
92
|
+
- PINT基准访问需要向Lakera AI提交表单,但不需要API密钥。
|
|
93
|
+
- HuggingFace模型下载是公开的,无需令牌。
|
|
94
|
+
resources:
|
|
95
|
+
minimum:
|
|
96
|
+
cpu_cores: 8
|
|
97
|
+
ram_gb: 32
|
|
98
|
+
disk_gb: 50
|
|
99
|
+
gpu_count: 1
|
|
100
|
+
gpu_vram_gb: 12
|
|
101
|
+
recommended:
|
|
102
|
+
cpu_cores: 16
|
|
103
|
+
ram_gb: 64
|
|
104
|
+
disk_gb: 120
|
|
105
|
+
gpu_count: 1
|
|
106
|
+
gpu_vram_gb: 24
|
|
107
|
+
environment:
|
|
108
|
+
python: '3.10'
|
|
109
|
+
cuda: '11.8'
|
|
110
|
+
pytorch: 2.4.0
|
|
111
|
+
flash_attn: 2.6.1
|
|
112
|
+
key_packages:
|
|
113
|
+
- flash-attn==2.6.1
|
|
114
|
+
- vllm==0.5.4
|
|
115
|
+
- transformers==4.44.0
|
|
116
|
+
- ptflops
|
|
117
|
+
notes:
|
|
118
|
+
- 完整的依赖项列表请参见打包的requirements.txt。
|
|
119
|
+
- 模型基于DeBERTa(非LLM);vllm仅在某些评估模式下使用。
|
|
120
|
+
- flash-attn需要兼容的CUDA和GPU架构。
|
|
121
|
+
risk_flags:
|
|
122
|
+
- restricted_dataset_component
|
|
123
|
+
- external_model_weights
|
|
124
|
+
risk_notes:
|
|
125
|
+
- 'PINT基准非公开且未打包。完整的四基准评估需要通过网页表单向Lakera AI申请访问权限。没有PINT的情况下,仍可使用四个测试集中的三个。
|
|
126
|
+
|
|
127
|
+
'
|
|
128
|
+
- '模型权重必须在运行时从HuggingFace或Google Drive下载,除非已预置在/tmp/PIGuard_weights。
|
|
129
|
+
|
|
130
|
+
'
|
|
131
|
+
- 'eval_piguard.py和run_official_eval.py硬编码MODEL_PATH为/tmp/PIGuard_weights,DATASET_ROOT为/repo/datasets。这些路径可能需要调整。
|
|
132
|
+
|
|
133
|
+
'
|
|
134
|
+
- 'run_official_eval.py使用自定义的BENIGN_THRESHOLD为0.10(从0.50降低),这与eval.py中的默认argmax方法相比改变了分类边界。
|
|
135
|
+
|
|
136
|
+
'
|
|
137
|
+
recommended_when: '当您需要一个安全导向的文本分类任务,具有双重压力:在提高提示词注入检测召回率的同时控制过度防御(对包含类攻击触发词的良性输入的误报)。非常适合研究捷径学习、DeBERTa微调策略以及轻量级防护模型的阈值校准。
|
|
138
|
+
|
|
139
|
+
'
|
|
140
|
+
not_recommended_when: '如果无法提供GPU(模型需要CUDA来进行实际训练和批量推理)、无法获取PINT完整评估访问权限,或者您寻找的是生成式LLM基准而非二分类任务,则不应使用此基准。
|
|
141
|
+
|
|
142
|
+
'
|
|
143
|
+
paper:
|
|
144
|
+
title: 'PIGuard: Prompt Injection Guardrail via Mitigating Overdefense for Free'
|
|
145
|
+
venue: ACL 2025
|
|
146
|
+
year: 2025
|
|
147
|
+
url: https://aclanthology.org/2025.acl-long.1468/
|
|
148
|
+
download:
|
|
149
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.014_piguard.zip
|
|
150
|
+
archive_type: zip
|
|
151
|
+
local_dir_name: paper-14-PIGuard
|
|
152
|
+
provider: github_release
|
|
153
|
+
repo: ResearAI/DeepScientist
|
|
154
|
+
tag: aisb-v0.0.1
|
|
155
|
+
asset_name: aisb.t3.014_piguard.zip
|
|
156
|
+
sha256: 9a7996fc3b40709caa357b763d7a51e1f1f2ab449480e403ec34a7c248eb4c81
|
|
157
|
+
size_bytes: 1621668
|
|
158
|
+
commercial:
|
|
159
|
+
annual_fee: null
|
|
160
|
+
display:
|
|
161
|
+
palette_seed: rust-sand-guard
|
|
162
|
+
art_style: safety-dashboard
|
|
163
|
+
accent_priority: high
|
|
164
|
+
image_path: ../image/014_aisb.t3.014_piguard.jpg
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.015_frspec
|
|
3
|
+
name: 'FR-Spec: Accelerating Large-Vocabulary Language Models via Frequency-Ranked
|
|
4
|
+
Speculative Sampling'
|
|
5
|
+
version: 0.1.0
|
|
6
|
+
one_line: Benchmark frequency-ranked speculative decoding for large-vocabulary LLMs
|
|
7
|
+
by building custom C/CUDA kernels and measuring tokens/s speedup over EAGLE-2 baselines
|
|
8
|
+
on Spec-Bench, HumanEval, and GSM8K.
|
|
9
|
+
task_description: 'This benchmark evaluates FR-Spec, a frequency-ranked speculative
|
|
10
|
+
sampling framework that accelerates large-vocabulary LLM inference by restricting
|
|
11
|
+
the draft model''s LM Head to a high-frequency token subset (e.g., top 25% of vocabulary),
|
|
12
|
+
reducing LM Head computation by ~75% while preserving output-distribution equivalence
|
|
13
|
+
during verification. The task requires: (1) compiling a custom C/CUDA extension
|
|
14
|
+
(llamacu) with nvcc targeting the correct GPU compute capability, (2) downloading
|
|
15
|
+
target LLM weights (e.g., Llama-3-8B-Instruct, Qwen2-7B-Instruct) and corresponding
|
|
16
|
+
EAGLE-2 draft model weights, (3) obtaining or generating token-frequency statistics
|
|
17
|
+
from SlimPajama-627B via fr/fr.py, (4) running baseline (autoregressive), EAGLE-2,
|
|
18
|
+
and FR-Spec inference on three benchmark suites (spec_bench, human_eval, gsm8k)
|
|
19
|
+
using bundled shell scripts, and (5) measuring tokens-per-second throughput and
|
|
20
|
+
optionally checking correctness on human_eval and gsm8k. The primary metrics are
|
|
21
|
+
throughput (tokens/s) across seven Spec-Bench subtasks (MT, conversation, RAG, math,
|
|
22
|
+
QA, summarization, code) plus HumanEval and GSM8K speed. The code includes a modified
|
|
23
|
+
FlashAttention with tree-attention bitmask support and preallocated memory management.
|
|
24
|
+
No external evaluation service is needed; all scoring is local.
|
|
25
|
+
|
|
26
|
+
'
|
|
27
|
+
capability_tags:
|
|
28
|
+
- research_code_optimization
|
|
29
|
+
- large_language_models
|
|
30
|
+
- speculative_decoding
|
|
31
|
+
- cuda_kernels
|
|
32
|
+
- inference_acceleration
|
|
33
|
+
aisb_direction: T3
|
|
34
|
+
track_fit:
|
|
35
|
+
- paper_track
|
|
36
|
+
- benchmark_track
|
|
37
|
+
task_mode: evaluation_driven
|
|
38
|
+
requires_execution: true
|
|
39
|
+
requires_paper: true
|
|
40
|
+
integrity_level: cas_plus_canary
|
|
41
|
+
snapshot_status: runnable
|
|
42
|
+
support_level: advanced
|
|
43
|
+
cost_band: high
|
|
44
|
+
time_band: 6-24h
|
|
45
|
+
difficulty: hard
|
|
46
|
+
data_access: public
|
|
47
|
+
primary_outputs:
|
|
48
|
+
- mt_bench_tokens_per_second
|
|
49
|
+
- translation_tokens_per_second
|
|
50
|
+
- summarization_tokens_per_second
|
|
51
|
+
- qa_tokens_per_second
|
|
52
|
+
- math_reasoning_tokens_per_second
|
|
53
|
+
- rag_tokens_per_second
|
|
54
|
+
- task_throughput_report
|
|
55
|
+
- speed_benchmark
|
|
56
|
+
launch_profiles:
|
|
57
|
+
- id: quick_check
|
|
58
|
+
label: Quick Check
|
|
59
|
+
description: 'Run a single packaged speed benchmark (e.g., spec_bench with Llama-3-8B-Instruct)
|
|
60
|
+
to verify FR-Spec throughput gains versus EAGLE-2 baseline. Requires model weights
|
|
61
|
+
already downloaded and CUDA extension compiled.
|
|
62
|
+
|
|
63
|
+
'
|
|
64
|
+
- id: full_speed_suite
|
|
65
|
+
label: Full Speed Suite
|
|
66
|
+
description: 'Run all three benchmark suites (spec_bench, human_eval, gsm8k) for
|
|
67
|
+
baseline, EAGLE-2, and FR-Spec configurations. Includes speed measurement and
|
|
68
|
+
optional correctness checks for human_eval and gsm8k. Covers all seven Spec-Bench
|
|
69
|
+
subtasks.
|
|
70
|
+
|
|
71
|
+
'
|
|
72
|
+
- id: freq_stats_generation
|
|
73
|
+
label: Frequency Statistics Generation
|
|
74
|
+
description: 'Generate custom token-frequency statistics from SlimPajama-627B using
|
|
75
|
+
fr/fr.py before running evaluation. Useful if pre-computed stats are unavailable
|
|
76
|
+
or a different model is targeted.
|
|
77
|
+
|
|
78
|
+
'
|
|
79
|
+
dataset_download:
|
|
80
|
+
primary_method: mixed
|
|
81
|
+
sources:
|
|
82
|
+
- kind: huggingface
|
|
83
|
+
url: https://huggingface.co/thunlp/LLaMA3-Instruct-8B-FR-Spec
|
|
84
|
+
access: public
|
|
85
|
+
note: Pre-computed token-frequency statistics for Llama-3-8B-Instruct.
|
|
86
|
+
- kind: huggingface
|
|
87
|
+
url: https://huggingface.co/thunlp/LLaMA3.2-Instruct-1B-FR-Spec
|
|
88
|
+
access: public
|
|
89
|
+
note: Pre-computed token-frequency statistics for Llama-3.2-1B-Instruct.
|
|
90
|
+
- kind: huggingface
|
|
91
|
+
url: https://huggingface.co/thunlp/Qwen2-7B-Instruct-FR-Spec
|
|
92
|
+
access: public
|
|
93
|
+
note: Pre-computed token-frequency statistics for Qwen2-7B-Instruct.
|
|
94
|
+
- kind: huggingface
|
|
95
|
+
url: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
|
|
96
|
+
access: gated
|
|
97
|
+
note: Target LLM weights. Requires acceptance of Meta license on HuggingFace.
|
|
98
|
+
- kind: huggingface
|
|
99
|
+
url: https://huggingface.co/cerebras/SlimPajama-627B
|
|
100
|
+
access: public
|
|
101
|
+
note: Streaming corpus used by fr/fr.py for token-frequency stat generation. Not
|
|
102
|
+
needed if using pre-computed stats.
|
|
103
|
+
notes:
|
|
104
|
+
- EAGLE-2 draft model weights must also be downloaded; see EAGLE repository for
|
|
105
|
+
URLs.
|
|
106
|
+
- Target LLM weights are 15-30 GB depending on model; plan 50-100 GB total for models
|
|
107
|
+
+ draft weights + stats.
|
|
108
|
+
- Spec-Bench evaluation datasets are bundled or fetched by the evaluation scripts.
|
|
109
|
+
credential_requirements:
|
|
110
|
+
mode: optional
|
|
111
|
+
items:
|
|
112
|
+
- HuggingFace token (required only for gated model weights like Llama-3)
|
|
113
|
+
notes:
|
|
114
|
+
- If using only Qwen2 or pre-downloaded weights, no credentials are needed.
|
|
115
|
+
- OpenAI/Anthropic API keys listed in setup.py install_requires (fschat, openai,
|
|
116
|
+
anthropic) are for MT-Bench judging and may not be exercised in throughput-only
|
|
117
|
+
runs.
|
|
118
|
+
resources:
|
|
119
|
+
minimum:
|
|
120
|
+
cpu_cores: 8
|
|
121
|
+
ram_gb: 32
|
|
122
|
+
disk_gb: 100
|
|
123
|
+
gpu_count: 1
|
|
124
|
+
gpu_vram_gb: 24
|
|
125
|
+
recommended:
|
|
126
|
+
cpu_cores: 16
|
|
127
|
+
ram_gb: 64
|
|
128
|
+
disk_gb: 200
|
|
129
|
+
gpu_count: 1
|
|
130
|
+
gpu_vram_gb: 48
|
|
131
|
+
notes:
|
|
132
|
+
- CUDA compilation (nvcc) requires ~8-9 GB RAM per parallel job; setup.py auto-limits
|
|
133
|
+
MAX_JOBS based on free memory.
|
|
134
|
+
- 24 GB VRAM is sufficient for Llama-3-8B in FP16; 48 GB recommended for comfortable
|
|
135
|
+
headroom and larger batch experiments.
|
|
136
|
+
- Compilation itself can take 10-30 minutes depending on core count.
|
|
137
|
+
environment:
|
|
138
|
+
python: '3.11'
|
|
139
|
+
cuda: 11.8+
|
|
140
|
+
pytorch: 2.0+
|
|
141
|
+
key_packages:
|
|
142
|
+
- transformers==4.46.2
|
|
143
|
+
- accelerate==0.26.0
|
|
144
|
+
- datasets
|
|
145
|
+
- fschat
|
|
146
|
+
- human_eval
|
|
147
|
+
- pybind11
|
|
148
|
+
- psutil
|
|
149
|
+
- ninja
|
|
150
|
+
notes:
|
|
151
|
+
- Building the CUDA extension requires nvcc and correct GPU compute capability set
|
|
152
|
+
in setup.py (default arch="80" for A100; change for other GPUs).
|
|
153
|
+
- The package bundles a modified FlashAttention v2.4.2 csrc with tree-attention
|
|
154
|
+
bitmask support; external flash-attn installation is not needed.
|
|
155
|
+
- Compilation links against cublas.
|
|
156
|
+
- See setup.py install_requires for the complete dependency set.
|
|
157
|
+
risk_flags:
|
|
158
|
+
- custom_cuda_build
|
|
159
|
+
- large_model_download
|
|
160
|
+
- gated_model_access
|
|
161
|
+
- compilation_complexity
|
|
162
|
+
risk_notes:
|
|
163
|
+
- CUDA compilation requires matching nvcc version and correct compute capability in
|
|
164
|
+
setup.py; misconfiguration causes silent wrong results or build failures.
|
|
165
|
+
- Gated Llama-3 weights require HuggingFace license acceptance; this may block fully
|
|
166
|
+
automated runs.
|
|
167
|
+
- EAGLE-2 draft model weights are a separate download not fully documented in the
|
|
168
|
+
snapshot; recovery may require consulting the EAGLE repository.
|
|
169
|
+
- MT-Bench quality judging (as opposed to throughput measurement) may require OpenAI
|
|
170
|
+
API keys, but throughput-only evaluation does not.
|
|
171
|
+
- The default arch="80" in setup.py targets A100; users on other GPUs must manually
|
|
172
|
+
edit this value.
|
|
173
|
+
recommended_when: 'Use this benchmark when you want an LLM-systems optimization task
|
|
174
|
+
focused on custom C/CUDA kernel development, speculative decoding acceleration,
|
|
175
|
+
and inference throughput measurement for large-vocabulary models (128k+ tokens).
|
|
176
|
+
Well-suited for evaluating GPU kernel optimization skills and understanding the
|
|
177
|
+
interplay between vocabulary size, LM Head computation, and speculative sampling
|
|
178
|
+
acceptance rates.
|
|
179
|
+
|
|
180
|
+
'
|
|
181
|
+
not_recommended_when: 'Do not use this if you lack a modern NVIDIA GPU with ≥24 GB
|
|
182
|
+
VRAM, cannot compile custom CUDA extensions, or need a task focused on model training
|
|
183
|
+
rather than inference optimization. Not appropriate if you need a pure-Python benchmark
|
|
184
|
+
without native code compilation.
|
|
185
|
+
|
|
186
|
+
'
|
|
187
|
+
paper:
|
|
188
|
+
title: 'FR-Spec: Accelerating Large-Vocabulary Language Models via Frequency-Ranked
|
|
189
|
+
Speculative Sampling'
|
|
190
|
+
venue: ACL 2025 Main
|
|
191
|
+
year: 2025
|
|
192
|
+
url: https://arxiv.org/abs/2502.14856
|
|
193
|
+
download:
|
|
194
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.015_frspec.zip
|
|
195
|
+
archive_type: zip
|
|
196
|
+
local_dir_name: paper-15-FRSpec
|
|
197
|
+
provider: github_release
|
|
198
|
+
repo: ResearAI/DeepScientist
|
|
199
|
+
tag: aisb-v0.0.1
|
|
200
|
+
asset_name: aisb.t3.015_frspec.zip
|
|
201
|
+
sha256: bf4aaa74d5faae067f125c2df48fa0eb7d8e120988bec5fbd748666fc6e1a9e9
|
|
202
|
+
size_bytes: 974875
|
|
203
|
+
commercial:
|
|
204
|
+
annual_fee: null
|
|
205
|
+
display:
|
|
206
|
+
palette_seed: neon-slate-kernel
|
|
207
|
+
art_style: systems-benchmark
|
|
208
|
+
accent_priority: high
|
|
209
|
+
image_path: ../image/015_aisb.t3.015_frspec.jpg
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.015_frspec
|
|
3
|
+
name: 'FR-Spec: Accelerating Large-Vocabulary Language Models via Frequency-Ranked
|
|
4
|
+
Speculative Sampling'
|
|
5
|
+
version: 0.1.0
|
|
6
|
+
one_line: 通过构建自定义C/CUDA内核并测量Spec-Bench、HumanEval和GSM8K上相较于EAGLE-2基线的tokens/s加速比,来对大词表LLM的频率排序投机解码进行基准测试。
|
|
7
|
+
task_description: '本基准测试评估FR-Spec,这是一种频率排序投机采样框架,通过将draft模型的LM Head限制在高频token子集(例如词汇表前25%),减少约75%的LM Head计算量,同时在验证过程中保持输出分布等价性,从而加速大词表LLM推理。该任务要求:(1)使用nvcc编译面向正确GPU计算能力目标的自定义C/CUDA扩展(llamacu),(2)下载目标LLM权重(如Llama-3-8B-Instruct、Qwen2-7B-Instruct)及相应的EAGLE-2 draft模型权重,(3)通过fr/fr.py从SlimPajama-627B获取或生成token频率统计数据,(4)使用捆绑的shell脚本在三个基准测试套件(spec_bench、human_eval、gsm8k)上运行基线(自回归)、EAGLE-2和FR-Spec推理,(5)测量tokens-per-second吞吐量和可选地在human_eval和gsm8k上检查正确性。主要指标是七个Spec-Bench子任务(MT、对话、RAG、数学、QA、摘要、代码)以及HumanEval和GSM8K速度的吞吐量(tokens/s)。代码包含支持树注意力位掩码和预分配内存管理的改进版FlashAttention。无需外部评估服务;所有评分均在本地完成。
|
|
8
|
+
|
|
9
|
+
'
|
|
10
|
+
capability_tags:
|
|
11
|
+
- research_code_optimization
|
|
12
|
+
- large_language_models
|
|
13
|
+
- speculative_decoding
|
|
14
|
+
- cuda_kernels
|
|
15
|
+
- inference_acceleration
|
|
16
|
+
aisb_direction: T3
|
|
17
|
+
track_fit:
|
|
18
|
+
- paper_track
|
|
19
|
+
- benchmark_track
|
|
20
|
+
task_mode: evaluation_driven
|
|
21
|
+
requires_execution: true
|
|
22
|
+
requires_paper: true
|
|
23
|
+
integrity_level: cas_plus_canary
|
|
24
|
+
snapshot_status: runnable
|
|
25
|
+
support_level: advanced
|
|
26
|
+
cost_band: high
|
|
27
|
+
time_band: 6-24h
|
|
28
|
+
difficulty: hard
|
|
29
|
+
data_access: public
|
|
30
|
+
primary_outputs:
|
|
31
|
+
- mt_bench_tokens_per_second
|
|
32
|
+
- translation_tokens_per_second
|
|
33
|
+
- summarization_tokens_per_second
|
|
34
|
+
- qa_tokens_per_second
|
|
35
|
+
- math_reasoning_tokens_per_second
|
|
36
|
+
- rag_tokens_per_second
|
|
37
|
+
- task_throughput_report
|
|
38
|
+
- speed_benchmark
|
|
39
|
+
launch_profiles:
|
|
40
|
+
- id: quick_check
|
|
41
|
+
label: 快速检查
|
|
42
|
+
description: '运行单个打包的速度基准测试(例如使用Llama-3-8B-Instruct的spec_bench),以验证FR-Spec相较于EAGLE-2基线的吞吐量提升。需要已下载模型权重并编译好CUDA扩展。
|
|
43
|
+
|
|
44
|
+
'
|
|
45
|
+
- id: full_speed_suite
|
|
46
|
+
label: 完整速度套件
|
|
47
|
+
description: '为基线、EAGLE-2和FR-Spec配置运行所有三个基准测试套件(spec_bench、human_eval、gsm8k)。包括速度测量和可选的human_eval与gsm8k正确性检查。覆盖所有七个Spec-Bench子任务。
|
|
48
|
+
|
|
49
|
+
'
|
|
50
|
+
- id: freq_stats_generation
|
|
51
|
+
label: 频率统计生成
|
|
52
|
+
description: '使用fr/fr.py从SlimPajama-627B生成自定义token频率统计数据,然后再运行评估。当预计算统计数据不可用或针对不同模型时,此选项非常有用。
|
|
53
|
+
|
|
54
|
+
'
|
|
55
|
+
dataset_download:
|
|
56
|
+
primary_method: mixed
|
|
57
|
+
sources:
|
|
58
|
+
- kind: huggingface
|
|
59
|
+
url: https://huggingface.co/thunlp/LLaMA3-Instruct-8B-FR-Spec
|
|
60
|
+
access: public
|
|
61
|
+
note: Llama-3-8B-Instruct的预计算token频率统计数据。
|
|
62
|
+
- kind: huggingface
|
|
63
|
+
url: https://huggingface.co/thunlp/LLaMA3.2-Instruct-1B-FR-Spec
|
|
64
|
+
access: public
|
|
65
|
+
note: Llama-3.2-1B-Instruct的预计算token频率统计数据。
|
|
66
|
+
- kind: huggingface
|
|
67
|
+
url: https://huggingface.co/thunlp/Qwen2-7B-Instruct-FR-Spec
|
|
68
|
+
access: public
|
|
69
|
+
note: Qwen2-7B-Instruct的预计算token频率统计数据。
|
|
70
|
+
- kind: huggingface
|
|
71
|
+
url: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
|
|
72
|
+
access: gated
|
|
73
|
+
note: 目标LLM权重。需要接受HuggingFace上的Meta许可协议。
|
|
74
|
+
- kind: huggingface
|
|
75
|
+
url: https://huggingface.co/cerebras/SlimPajama-627B
|
|
76
|
+
access: public
|
|
77
|
+
note: fr/fr.py用于生成token频率统计的流式语料库。如果使用预计算统计数据,则不需要。
|
|
78
|
+
notes:
|
|
79
|
+
- EAGLE-2 draft模型权重也必须下载;请参阅EAGLE仓库获取URL。
|
|
80
|
+
- 目标LLM权重根据模型不同为15-30 GB;请预留50-100 GB总空间用于模型 + draft权重 + 统计数据。
|
|
81
|
+
- Spec-Bench评估数据集由评估脚本捆绑或获取。
|
|
82
|
+
credential_requirements:
|
|
83
|
+
mode: optional
|
|
84
|
+
items:
|
|
85
|
+
- HuggingFace token(仅在访问门控模型权重如Llama-3时需要)
|
|
86
|
+
notes:
|
|
87
|
+
- 如果仅使用Qwen2或已下载的权重,则不需要凭据。
|
|
88
|
+
- setup.py install_requires中列出的OpenAI/Anthropic API密钥(fschat、openai、anthropic)用于MT-Bench评判,在仅吞吐量运行中可能不会被调用。
|
|
89
|
+
resources:
|
|
90
|
+
minimum:
|
|
91
|
+
cpu_cores: 8
|
|
92
|
+
ram_gb: 32
|
|
93
|
+
disk_gb: 100
|
|
94
|
+
gpu_count: 1
|
|
95
|
+
gpu_vram_gb: 24
|
|
96
|
+
recommended:
|
|
97
|
+
cpu_cores: 16
|
|
98
|
+
ram_gb: 64
|
|
99
|
+
disk_gb: 200
|
|
100
|
+
gpu_count: 1
|
|
101
|
+
gpu_vram_gb: 48
|
|
102
|
+
notes:
|
|
103
|
+
- CUDA编译(nvcc)每个并行作业需要约8-9 GB RAM;setup.py会根据可用内存自动限制MAX_JOBS。
|
|
104
|
+
- 24 GB显存足以支持FP16精度下的Llama-3-8B;建议使用48 GB以获得充足余量和更大的批处理实验空间。
|
|
105
|
+
- 编译本身可能需要10-30分钟,具体取决于核心数量。
|
|
106
|
+
environment:
|
|
107
|
+
python: '3.11'
|
|
108
|
+
cuda: 11.8+
|
|
109
|
+
pytorch: 2.0+
|
|
110
|
+
key_packages:
|
|
111
|
+
- transformers==4.46.2
|
|
112
|
+
- accelerate==0.26.0
|
|
113
|
+
- datasets
|
|
114
|
+
- fschat
|
|
115
|
+
- human_eval
|
|
116
|
+
- pybind11
|
|
117
|
+
- psutil
|
|
118
|
+
- ninja
|
|
119
|
+
notes:
|
|
120
|
+
- 构建CUDA扩展需要nvcc和正确的GPU计算能力设置在setup.py中(默认arch="80"适用于A100;其他GPU请更改此值)。
|
|
121
|
+
- 该包捆绑了支持树注意力位掩码的改进版FlashAttention v2.4.2 csrc;无需外部flash-attn安装。
|
|
122
|
+
- 编译链接cublas。
|
|
123
|
+
- 请参阅setup.py install_requires获取完整的依赖集。
|
|
124
|
+
risk_flags:
|
|
125
|
+
- custom_cuda_build
|
|
126
|
+
- large_model_download
|
|
127
|
+
- gated_model_access
|
|
128
|
+
- compilation_complexity
|
|
129
|
+
risk_notes:
|
|
130
|
+
- CUDA编译需要匹配的nvcc版本和setup.py中正确的计算能力设置;配置错误会导致静默的错误结果或构建失败。
|
|
131
|
+
- 门控的Llama-3权重需要接受HuggingFace许可协议;这可能阻止完全自动化运行。
|
|
132
|
+
- EAGLE-2 draft模型权重是本快照中未完全记录的单独下载;恢复可能需要查阅EAGLE仓库。
|
|
133
|
+
- MT-Bench质量评判(而非吞吐量测量)可能需要OpenAI API密钥,但仅吞吐量评估不需要。
|
|
134
|
+
- setup.py中的默认arch="80"面向A100;其他GPU用户必须手动编辑此值。
|
|
135
|
+
recommended_when: '当您需要一个专注于自定义C/CUDA内核开发、投机解码加速和大词表模型(128k+ tokens)推理吞吐量测量的LLM系统优化任务时,请使用此基准测试。非常适合评估GPU内核优化技能并理解词汇表大小、LM Head计算与投机采样接受率之间的相互作用。
|
|
136
|
+
|
|
137
|
+
'
|
|
138
|
+
not_recommended_when: '如果您没有配备≥24 GB显存的现代NVIDIA GPU、无法编译自定义CUDA扩展,或需要一个专注于模型训练而非推理优化的任务,请勿使用此基准测试。如果需要一个没有原生代码编译的纯Python基准测试,也不适用。
|
|
139
|
+
|
|
140
|
+
'
|
|
141
|
+
paper:
|
|
142
|
+
title: 'FR-Spec: Accelerating Large-Vocabulary Language Models via Frequency-Ranked
|
|
143
|
+
Speculative Sampling'
|
|
144
|
+
venue: ACL 2025 Main
|
|
145
|
+
year: 2025
|
|
146
|
+
url: https://arxiv.org/abs/2502.14856
|
|
147
|
+
download:
|
|
148
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.015_frspec.zip
|
|
149
|
+
archive_type: zip
|
|
150
|
+
local_dir_name: paper-15-FRSpec
|
|
151
|
+
provider: github_release
|
|
152
|
+
repo: ResearAI/DeepScientist
|
|
153
|
+
tag: aisb-v0.0.1
|
|
154
|
+
asset_name: aisb.t3.015_frspec.zip
|
|
155
|
+
sha256: bf4aaa74d5faae067f125c2df48fa0eb7d8e120988bec5fbd748666fc6e1a9e9
|
|
156
|
+
size_bytes: 974875
|
|
157
|
+
commercial:
|
|
158
|
+
annual_fee: null
|
|
159
|
+
display:
|
|
160
|
+
palette_seed: neon-slate-kernel
|
|
161
|
+
art_style: systems-benchmark
|
|
162
|
+
accent_priority: high
|
|
163
|
+
image_path: ../image/015_aisb.t3.015_frspec.jpg
|