@researai/deepscientist 1.5.16 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +309 -130
- package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
- package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
- package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
- package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
- package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
- package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
- package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
- package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
- package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
- package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
- package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
- package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
- package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
- package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
- package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
- package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
- package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
- package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
- package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
- package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
- package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
- package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
- package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
- package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
- package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
- package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
- package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
- package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
- package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
- package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
- package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
- package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
- package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
- package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
- package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
- package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
- package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
- package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
- package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
- package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
- package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
- package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
- package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
- package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
- package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
- package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
- package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
- package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
- package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
- package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
- package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
- package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
- package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
- package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
- package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
- package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
- package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
- package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
- package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
- package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
- package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
- package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
- package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
- package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
- package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
- package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
- package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
- package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
- package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
- package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
- package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
- package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
- package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
- package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
- package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
- package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
- package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
- package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
- package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
- package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
- package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
- package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
- package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
- package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
- package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
- package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
- package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
- package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
- package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
- package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
- package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
- package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
- package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
- package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
- package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
- package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
- package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
- package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
- package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
- package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
- package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
- package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
- package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
- package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
- package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
- package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
- package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
- package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
- package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
- package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
- package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
- package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
- package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
- package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
- package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
- package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
- package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
- package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
- package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
- package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
- package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
- package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
- package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
- package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
- package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
- package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
- package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
- package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
- package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
- package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
- package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
- package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
- package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
- package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
- package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
- package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
- package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
- package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
- package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
- package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
- package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
- package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
- package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
- package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
- package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
- package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
- package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
- package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
- package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
- package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
- package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
- package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
- package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
- package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
- package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
- package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
- package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
- package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
- package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
- package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
- package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
- package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
- package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
- package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
- package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
- package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
- package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
- package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
- package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
- package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
- package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
- package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
- package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
- package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
- package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
- package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
- package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
- package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
- package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
- package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
- package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
- package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
- package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
- package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
- package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
- package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
- package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
- package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
- package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
- package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
- package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
- package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
- package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
- package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
- package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
- package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
- package/AISB/image/aisb.b10.climate_earth.svg +16 -0
- package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
- package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
- package/AISB/image/aisb.b2.agent_systems.svg +16 -0
- package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
- package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
- package/AISB/image/aisb.b5.math_proof.svg +16 -0
- package/AISB/image/aisb.b6.research_process.svg +16 -0
- package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
- package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
- package/AISB/image/aisb.b9.material_science.svg +16 -0
- package/README.md +196 -32
- package/bin/ds.js +924 -66
- package/docs/en/00_QUICK_START.md +195 -18
- package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
- package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
- package/docs/en/05_TUI_GUIDE.md +171 -2
- package/docs/en/07_MEMORY_AND_MCP.md +38 -2
- package/docs/en/09_DOCTOR.md +78 -7
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
- package/docs/en/11_LICENSE_AND_RISK.md +4 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +624 -180
- package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +386 -0
- package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
- package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
- package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
- package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
- package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
- package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
- package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
- package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
- package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
- package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
- package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
- package/docs/en/91_DEVELOPMENT.md +266 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
- package/docs/en/README.md +48 -7
- package/docs/images/admin/admin-connectors-health-en.png +0 -0
- package/docs/images/admin/admin-controllers-en.png +0 -0
- package/docs/images/admin/admin-diagnostics-en.png +0 -0
- package/docs/images/admin/admin-errors-en.png +0 -0
- package/docs/images/admin/admin-issues-en.png +0 -0
- package/docs/images/admin/admin-logs-en.png +0 -0
- package/docs/images/admin/admin-quest-detail-en.png +0 -0
- package/docs/images/admin/admin-quests-en.png +0 -0
- package/docs/images/admin/admin-repairs-en.png +0 -0
- package/docs/images/admin/admin-runtime-en.png +0 -0
- package/docs/images/admin/admin-search-en.png +0 -0
- package/docs/images/admin/admin-stats-en.png +0 -0
- package/docs/images/admin/admin-summary-en.png +0 -0
- package/docs/images/connectors/connector-discord-en.png +0 -0
- package/docs/images/connectors/connector-feishu-en.png +0 -0
- package/docs/images/connectors/connector-lingzhu-en.png +0 -0
- package/docs/images/connectors/connector-qq-en.png +0 -0
- package/docs/images/connectors/connector-slack-en.png +0 -0
- package/docs/images/connectors/connector-telegram-en.png +0 -0
- package/docs/images/connectors/connector-weixin-en.png +0 -0
- package/docs/images/connectors/connector-whatsapp-en.png +0 -0
- package/docs/images/settings/settings-baselines-en.png +0 -0
- package/docs/images/settings/settings-config-en.png +0 -0
- package/docs/images/settings/settings-connectors-overview-en.png +0 -0
- package/docs/images/settings/settings-deepxiv-en.png +0 -0
- package/docs/images/settings/settings-mcp-servers-en.png +0 -0
- package/docs/images/settings/settings-plugins-en.png +0 -0
- package/docs/images/settings/settings-runners-en.png +0 -0
- package/docs/zh/00_QUICK_START.md +142 -18
- package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
- package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/zh/05_TUI_GUIDE.md +171 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
- package/docs/zh/09_DOCTOR.md +54 -8
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
- package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +552 -181
- package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +384 -0
- package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
- package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
- package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
- package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
- package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
- package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
- package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
- package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
- package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
- package/docs/zh/README.md +33 -7
- package/install.sh +168 -20
- package/package.json +5 -1
- package/pyproject.toml +2 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +13 -0
- package/src/deepscientist/admin/__init__.py +3 -0
- package/src/deepscientist/admin/charts.py +681 -0
- package/src/deepscientist/admin/logs.py +119 -0
- package/src/deepscientist/admin/repairs.py +217 -0
- package/src/deepscientist/admin/service.py +1310 -0
- package/src/deepscientist/admin/system_info.py +700 -0
- package/src/deepscientist/admin/tasks.py +465 -0
- package/src/deepscientist/admin/tool_metrics.py +600 -0
- package/src/deepscientist/artifact/guidance.py +8 -4
- package/src/deepscientist/artifact/schemas.py +115 -0
- package/src/deepscientist/artifact/service.py +4268 -260
- package/src/deepscientist/bash_exec/monitor.py +30 -3
- package/src/deepscientist/bash_exec/service.py +134 -1
- package/src/deepscientist/benchstore/__init__.py +4 -0
- package/src/deepscientist/benchstore/prompt_builder.py +224 -0
- package/src/deepscientist/benchstore/service.py +1716 -0
- package/src/deepscientist/bridges/connectors.py +8 -2
- package/src/deepscientist/channels/weixin_ilink.py +8 -1
- package/src/deepscientist/cli.py +92 -17
- package/src/deepscientist/codex_cli_compat.py +187 -74
- package/src/deepscientist/config/models.py +82 -11
- package/src/deepscientist/config/service.py +1077 -93
- package/src/deepscientist/connector/weixin_support.py +48 -17
- package/src/deepscientist/daemon/api/handlers.py +827 -235
- package/src/deepscientist/daemon/api/router.py +81 -1
- package/src/deepscientist/daemon/app.py +1512 -85
- package/src/deepscientist/diagnostics/__init__.py +6 -0
- package/src/deepscientist/diagnostics/runner_failures.py +277 -0
- package/src/deepscientist/doctor.py +407 -56
- package/src/deepscientist/evidence_packets.py +590 -0
- package/src/deepscientist/home.py +52 -4
- package/src/deepscientist/kimi_cli_compat.py +50 -0
- package/src/deepscientist/latex_runtime.py +2 -2
- package/src/deepscientist/mcp/context.py +2 -0
- package/src/deepscientist/mcp/schemas.py +114 -0
- package/src/deepscientist/mcp/server.py +1566 -126
- package/src/deepscientist/memory/service.py +203 -16
- package/src/deepscientist/process_control.py +8 -1
- package/src/deepscientist/prompts/builder.py +850 -88
- package/src/deepscientist/quest/__init__.py +2 -2
- package/src/deepscientist/quest/layout.py +12 -1
- package/src/deepscientist/quest/node_traces.py +10 -0
- package/src/deepscientist/quest/service.py +1852 -161
- package/src/deepscientist/quest/stage_views.py +1 -1
- package/src/deepscientist/runners/__init__.py +18 -0
- package/src/deepscientist/runners/base.py +89 -1
- package/src/deepscientist/runners/builtins.py +13 -1
- package/src/deepscientist/runners/claude.py +391 -0
- package/src/deepscientist/runners/codex.py +480 -35
- package/src/deepscientist/runners/codex_telemetry.py +127 -0
- package/src/deepscientist/runners/kimi.py +334 -0
- package/src/deepscientist/runners/metadata.py +68 -0
- package/src/deepscientist/runners/opencode.py +414 -0
- package/src/deepscientist/runners/runtime_overrides.py +100 -0
- package/src/deepscientist/runners/simple_cli.py +538 -0
- package/src/deepscientist/runtime_storage.py +303 -0
- package/src/deepscientist/shared.py +80 -16
- package/src/deepscientist/skills/installer.py +37 -0
- package/src/deepscientist/skills/registry.py +2 -0
- package/src/deepscientist/tinytex.py +2 -2
- package/src/deepscientist/tui.py +10 -3
- package/src/prompts/benchstore/system.md +77 -0
- package/src/prompts/connectors/qq.md +33 -2
- package/src/prompts/connectors/weixin.md +208 -23
- package/src/prompts/contracts/admin_ops.md +74 -0
- package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
- package/src/prompts/contracts/shared_interaction.md +5 -10
- package/src/prompts/start_setup/system.md +422 -0
- package/src/prompts/system.md +411 -304
- package/src/prompts/system_copilot.md +89 -0
- package/src/skills/analysis-campaign/SKILL.md +239 -578
- package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
- package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
- package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
- package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
- package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
- package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
- package/src/skills/baseline/SKILL.md +183 -461
- package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
- package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
- package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
- package/src/skills/baseline/references/baseline-plan-template.md +37 -76
- package/src/skills/baseline/references/boundary-cases.md +86 -0
- package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
- package/src/skills/baseline/references/comparability-contract.md +7 -12
- package/src/skills/baseline/references/operational-guidance.md +56 -0
- package/src/skills/baseline/references/route-selection.md +5 -25
- package/src/skills/decision/SKILL.md +113 -306
- package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
- package/src/skills/decision/references/operational-guidance.md +94 -0
- package/src/skills/decision/references/research-route-criteria.md +7 -8
- package/src/skills/decision/references/strategic-decision-template.md +13 -26
- package/src/skills/experiment/SKILL.md +132 -670
- package/src/skills/experiment/references/execution-playbook.md +374 -0
- package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
- package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
- package/src/skills/experiment/references/operational-guidance.md +108 -0
- package/src/skills/finalize/SKILL.md +62 -0
- package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
- package/src/skills/finalize/references/resume-packet-template.md +7 -0
- package/src/skills/idea/SKILL.md +228 -15
- package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
- package/src/skills/idea/references/current-board-packet-template.md +61 -0
- package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
- package/src/skills/idea/references/idea-generation-playbook.md +21 -0
- package/src/skills/idea/references/idea-thinking-flow.md +6 -0
- package/src/skills/idea/references/literature-survey-template.md +3 -0
- package/src/skills/idea/references/objective-contract-template.md +54 -0
- package/src/skills/idea/references/outline-seeding-example.md +56 -0
- package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
- package/src/skills/idea/references/related-work-playbook.md +75 -2
- package/src/skills/idea/references/research-history-playbook.md +114 -0
- package/src/skills/idea/references/selection-gate.md +58 -6
- package/src/skills/intake-audit/SKILL.md +43 -2
- package/src/skills/intake-audit/references/state-audit-template.md +10 -0
- package/src/skills/nature-data/SKILL.md +128 -0
- package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-data/agents/openai.yaml +4 -0
- package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
- package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
- package/src/skills/nature-data/references/policy-principles.md +103 -0
- package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
- package/src/skills/nature-data/references/source-basis.md +54 -0
- package/src/skills/nature-data/references/statement-patterns.md +153 -0
- package/src/skills/nature-figure/SKILL.md +197 -0
- package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-figure/agents/openai.yaml +4 -0
- package/src/skills/nature-figure/evals/evals.json +37 -0
- package/src/skills/nature-figure/references/api.md +428 -0
- package/src/skills/nature-figure/references/backend-selection.md +100 -0
- package/src/skills/nature-figure/references/chart-types.md +281 -0
- package/src/skills/nature-figure/references/common-patterns.md +349 -0
- package/src/skills/nature-figure/references/design-theory.md +436 -0
- package/src/skills/nature-figure/references/figure-contract.md +93 -0
- package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
- package/src/skills/nature-figure/references/qa-contract.md +119 -0
- package/src/skills/nature-figure/references/r-template-index.md +66 -0
- package/src/skills/nature-figure/references/r-workflow.md +161 -0
- package/src/skills/nature-figure/references/tutorials.md +250 -0
- package/src/skills/nature-paper2ppt/SKILL.md +507 -0
- package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/SKILL.md +385 -0
- package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-polishing/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
- package/src/skills/nature-polishing/references/section-moves.md +240 -0
- package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
- package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
- package/src/skills/optimize/SKILL.md +177 -1568
- package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
- package/src/skills/optimize/references/candidate-board-template.md +13 -0
- package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
- package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
- package/src/skills/optimize/references/debug-response-template.md +29 -0
- package/src/skills/optimize/references/frontier-review-template.md +32 -0
- package/src/skills/optimize/references/fusion-playbook.md +36 -0
- package/src/skills/optimize/references/method-brief-template.md +73 -0
- package/src/skills/optimize/references/operational-guidance.md +621 -0
- package/src/skills/optimize/references/optimization-memory-template.md +30 -0
- package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
- package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
- package/src/skills/optimize/references/prompt-patterns.md +49 -0
- package/src/skills/paper-outline/SKILL.md +227 -0
- package/src/skills/paper-outline/references/outline-patterns.md +87 -0
- package/src/skills/paper-plot/SKILL.md +79 -0
- package/src/skills/paper-plot/agents/openai.yaml +4 -0
- package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
- package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
- package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
- package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
- package/src/skills/paper-plot/references/line_training_curve.md +44 -0
- package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
- package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
- package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
- package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
- package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
- package/src/skills/paper-plot/scripts/line_aime.py +94 -0
- package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
- package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
- package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
- package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
- package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
- package/src/skills/rebuttal/SKILL.md +9 -0
- package/src/skills/references/tool-usage-by-stage.md +438 -0
- package/src/skills/review/SKILL.md +105 -7
- package/src/skills/science/PROVENANCE.md +44 -0
- package/src/skills/science/SKILL.md +137 -0
- package/src/skills/science/references/artifact-science-tool.md +110 -0
- package/src/skills/science/references/claim-type-discipline.md +56 -0
- package/src/skills/science/references/domain-index.md +422 -0
- package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
- package/src/skills/science/references/package-check-playbook.md +64 -0
- package/src/skills/science/references/package-index.min.json +3616 -0
- package/src/skills/science/references/packages/abinit.md +80 -0
- package/src/skills/science/references/packages/acts.md +73 -0
- package/src/skills/science/references/packages/aiida-core.md +80 -0
- package/src/skills/science/references/packages/alamode.md +80 -0
- package/src/skills/science/references/packages/amuse.md +88 -0
- package/src/skills/science/references/packages/anndata.md +88 -0
- package/src/skills/science/references/packages/arbor.md +80 -0
- package/src/skills/science/references/packages/arc.md +73 -0
- package/src/skills/science/references/packages/astropy.md +88 -0
- package/src/skills/science/references/packages/astroquery.md +88 -0
- package/src/skills/science/references/packages/atomate2.md +80 -0
- package/src/skills/science/references/packages/atomsmltr.md +73 -0
- package/src/skills/science/references/packages/awkward.md +73 -0
- package/src/skills/science/references/packages/batman.md +88 -0
- package/src/skills/science/references/packages/biopython.md +88 -0
- package/src/skills/science/references/packages/bloqade.md +73 -0
- package/src/skills/science/references/packages/brian2.md +73 -0
- package/src/skills/science/references/packages/bullet3.md +73 -0
- package/src/skills/science/references/packages/calculix.md +80 -0
- package/src/skills/science/references/packages/cantera.md +73 -0
- package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
- package/src/skills/science/references/packages/ccdproc.md +88 -0
- package/src/skills/science/references/packages/celerite2.md +88 -0
- package/src/skills/science/references/packages/cellrank.md +73 -0
- package/src/skills/science/references/packages/cesm.md +80 -0
- package/src/skills/science/references/packages/chemicals.md +73 -0
- package/src/skills/science/references/packages/chempy.md +73 -0
- package/src/skills/science/references/packages/cirq.md +73 -0
- package/src/skills/science/references/packages/coffea.md +73 -0
- package/src/skills/science/references/packages/cp2k.md +88 -0
- package/src/skills/science/references/packages/custodian.md +80 -0
- package/src/skills/science/references/packages/dart.md +73 -0
- package/src/skills/science/references/packages/datamol.md +88 -0
- package/src/skills/science/references/packages/dd4hep.md +73 -0
- package/src/skills/science/references/packages/dealii.md +80 -0
- package/src/skills/science/references/packages/deepchem.md +88 -0
- package/src/skills/science/references/packages/delphes.md +73 -0
- package/src/skills/science/references/packages/devito.md +80 -0
- package/src/skills/science/references/packages/dftb.md +88 -0
- package/src/skills/science/references/packages/dftd4.md +88 -0
- package/src/skills/science/references/packages/dftk-jl.md +80 -0
- package/src/skills/science/references/packages/dolfinx.md +80 -0
- package/src/skills/science/references/packages/drake.md +73 -0
- package/src/skills/science/references/packages/dumux.md +73 -0
- package/src/skills/science/references/packages/elk.md +80 -0
- package/src/skills/science/references/packages/elmerfem.md +80 -0
- package/src/skills/science/references/packages/enzo-e.md +88 -0
- package/src/skills/science/references/packages/espresso.md +80 -0
- package/src/skills/science/references/packages/exoplanet.md +88 -0
- package/src/skills/science/references/packages/fairroot.md +73 -0
- package/src/skills/science/references/packages/fbpic.md +80 -0
- package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
- package/src/skills/science/references/packages/geant4.md +73 -0
- package/src/skills/science/references/packages/geosx.md +80 -0
- package/src/skills/science/references/packages/gprmax.md +80 -0
- package/src/skills/science/references/packages/gromacs.md +80 -0
- package/src/skills/science/references/packages/gwaslab.md +73 -0
- package/src/skills/science/references/packages/gz-sim.md +73 -0
- package/src/skills/science/references/packages/hail.md +88 -0
- package/src/skills/science/references/packages/hiphive.md +80 -0
- package/src/skills/science/references/packages/hoomd-blue.md +80 -0
- package/src/skills/science/references/packages/itensor.md +73 -0
- package/src/skills/science/references/packages/itensors-jl.md +73 -0
- package/src/skills/science/references/packages/jdftx.md +73 -0
- package/src/skills/science/references/packages/jobflow.md +80 -0
- package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
- package/src/skills/science/references/packages/kite.md +80 -0
- package/src/skills/science/references/packages/kratos.md +80 -0
- package/src/skills/science/references/packages/kwant.md +73 -0
- package/src/skills/science/references/packages/lammps.md +80 -0
- package/src/skills/science/references/packages/lightkurve.md +88 -0
- package/src/skills/science/references/packages/limix.md +73 -0
- package/src/skills/science/references/packages/maxwelllink.md +80 -0
- package/src/skills/science/references/packages/mcdc.md +73 -0
- package/src/skills/science/references/packages/meep.md +80 -0
- package/src/skills/science/references/packages/mfem.md +80 -0
- package/src/skills/science/references/packages/mitgcm.md +73 -0
- package/src/skills/science/references/packages/modflow6.md +73 -0
- package/src/skills/science/references/packages/molecool.md +73 -0
- package/src/skills/science/references/packages/mom6.md +73 -0
- package/src/skills/science/references/packages/moose.md +80 -0
- package/src/skills/science/references/packages/mpas-model.md +73 -0
- package/src/skills/science/references/packages/mujoco.md +73 -0
- package/src/skills/science/references/packages/mumax3.md +73 -0
- package/src/skills/science/references/packages/nekrs.md +80 -0
- package/src/skills/science/references/packages/nessi.md +73 -0
- package/src/skills/science/references/packages/nest-simulator.md +73 -0
- package/src/skills/science/references/packages/netket.md +73 -0
- package/src/skills/science/references/packages/neuron.md +73 -0
- package/src/skills/science/references/packages/nextflow.md +88 -0
- package/src/skills/science/references/packages/nwchem.md +88 -0
- package/src/skills/science/references/packages/openbabel.md +88 -0
- package/src/skills/science/references/packages/openems.md +80 -0
- package/src/skills/science/references/packages/openff-toolkit.md +88 -0
- package/src/skills/science/references/packages/openfoam-dev.md +80 -0
- package/src/skills/science/references/packages/openmc.md +73 -0
- package/src/skills/science/references/packages/openmm.md +80 -0
- package/src/skills/science/references/packages/openmoc.md +73 -0
- package/src/skills/science/references/packages/openmx.md +80 -0
- package/src/skills/science/references/packages/opensees.md +80 -0
- package/src/skills/science/references/packages/opensn.md +80 -0
- package/src/skills/science/references/packages/opm-simulators.md +73 -0
- package/src/skills/science/references/packages/oqupy.md +73 -0
- package/src/skills/science/references/packages/packmol.md +80 -0
- package/src/skills/science/references/packages/palabos.md +80 -0
- package/src/skills/science/references/packages/parflow.md +80 -0
- package/src/skills/science/references/packages/pennylane.md +88 -0
- package/src/skills/science/references/packages/perceval.md +73 -0
- package/src/skills/science/references/packages/phono3py.md +73 -0
- package/src/skills/science/references/packages/phonopy.md +73 -0
- package/src/skills/science/references/packages/photutils.md +88 -0
- package/src/skills/science/references/packages/picongpu.md +80 -0
- package/src/skills/science/references/packages/plink-ng.md +88 -0
- package/src/skills/science/references/packages/precice.md +73 -0
- package/src/skills/science/references/packages/psc.md +80 -0
- package/src/skills/science/references/packages/psi4.md +88 -0
- package/src/skills/science/references/packages/pybinding.md +73 -0
- package/src/skills/science/references/packages/pyfr.md +80 -0
- package/src/skills/science/references/packages/pyhf.md +73 -0
- package/src/skills/science/references/packages/pyiron_base.md +80 -0
- package/src/skills/science/references/packages/pylcp.md +73 -0
- package/src/skills/science/references/packages/pylith.md +80 -0
- package/src/skills/science/references/packages/pynbody.md +88 -0
- package/src/skills/science/references/packages/pysam.md +88 -0
- package/src/skills/science/references/packages/pyscf.md +88 -0
- package/src/skills/science/references/packages/q-e.md +73 -0
- package/src/skills/science/references/packages/qibo.md +73 -0
- package/src/skills/science/references/packages/qiskit.md +73 -0
- package/src/skills/science/references/packages/quantica-jl.md +73 -0
- package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
- package/src/skills/science/references/packages/quimb.md +73 -0
- package/src/skills/science/references/packages/qulacs.md +73 -0
- package/src/skills/science/references/packages/qutip.md +73 -0
- package/src/skills/science/references/packages/rdkit.md +88 -0
- package/src/skills/science/references/packages/rmg-py.md +73 -0
- package/src/skills/science/references/packages/root.md +73 -0
- package/src/skills/science/references/packages/scanpy.md +88 -0
- package/src/skills/science/references/packages/scikit-allel.md +88 -0
- package/src/skills/science/references/packages/scikit-bio.md +88 -0
- package/src/skills/science/references/packages/scqubits.md +73 -0
- package/src/skills/science/references/packages/scuff-em.md +80 -0
- package/src/skills/science/references/packages/scvi-tools.md +73 -0
- package/src/skills/science/references/packages/seissol.md +73 -0
- package/src/skills/science/references/packages/sfepy.md +80 -0
- package/src/skills/science/references/packages/sisl.md +73 -0
- package/src/skills/science/references/packages/smilei.md +80 -0
- package/src/skills/science/references/packages/snakemake.md +88 -0
- package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
- package/src/skills/science/references/packages/specutils.md +88 -0
- package/src/skills/science/references/packages/spglib.md +80 -0
- package/src/skills/science/references/packages/squidpy.md +88 -0
- package/src/skills/science/references/packages/starry.md +88 -0
- package/src/skills/science/references/packages/strawberryfields.md +73 -0
- package/src/skills/science/references/packages/su2.md +80 -0
- package/src/skills/science/references/packages/sunny-jl.md +73 -0
- package/src/skills/science/references/packages/sw4.md +73 -0
- package/src/skills/science/references/packages/swift.md +88 -0
- package/src/skills/science/references/packages/tdnegf.md +73 -0
- package/src/skills/science/references/packages/tenpy.md +73 -0
- package/src/skills/science/references/packages/thermo.md +73 -0
- package/src/skills/science/references/packages/tkwant.md +73 -0
- package/src/skills/science/references/packages/tvb-root.md +73 -0
- package/src/skills/science/references/packages/uproot5.md +73 -0
- package/src/skills/science/references/packages/vampire.md +80 -0
- package/src/skills/science/references/packages/wannier_tools.md +73 -0
- package/src/skills/science/references/packages/warpx.md +80 -0
- package/src/skills/science/references/packages/wrf.md +73 -0
- package/src/skills/science/references/packages/xtb.md +88 -0
- package/src/skills/science/references/packages/yt.md +73 -0
- package/src/skills/science/references/science-task-brief-template.md +71 -0
- package/src/skills/scout/SKILL.md +83 -425
- package/src/skills/scout/references/literature-scout-template.md +5 -24
- package/src/skills/scout/references/operational-guidance.md +191 -0
- package/src/skills/scout/references/paper-triage-playbook.md +11 -35
- package/src/skills/write/SKILL.md +744 -1246
- package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
- package/src/skills/write/references/oral_package_patterns.md +252 -0
- package/src/skills/write/references/oral_writing_principles.md +291 -0
- package/src/skills/write/references/section_rewrite_checklist.md +234 -0
- package/src/tui/dist/app/AppContainer.js +1314 -27
- package/src/tui/dist/components/Composer.js +26 -1
- package/src/tui/dist/components/ConfigScreen.js +2 -1
- package/src/tui/dist/components/InputPrompt.js +25 -9
- package/src/tui/dist/components/MainContent.js +18 -3
- package/src/tui/dist/components/QuestScreen.js +3 -2
- package/src/tui/dist/components/UtilityScreen.js +37 -0
- package/src/tui/dist/hooks/useSafeInput.js +10 -0
- package/src/tui/dist/index.js +13 -1
- package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
- package/src/tui/dist/lib/api.js +89 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AnalysisPlugin-DnSm0GZn.js → AnalysisPlugin-CA94NGmI.js} +1 -1
- package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
- package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
- package/src/ui/dist/assets/{CodeViewerPlugin-itb0tltR.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
- package/src/ui/dist/assets/{DocViewerPlugin-DqKkiCI6.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
- package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
- package/src/ui/dist/assets/{GitDiffViewerPlugin-DxL2ezFG.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
- package/src/ui/dist/assets/{GitSnapshotViewer-B_RQm1YZ.js → GitSnapshotViewer-CweA6VON.js} +2 -2
- package/src/ui/dist/assets/{ImageViewerPlugin-tHqlXY3n.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
- package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
- package/src/ui/dist/assets/{LatexPlugin-B495DTXC.js → LatexPlugin-BQjAaA5J.js} +4 -4
- package/src/ui/dist/assets/{MarkdownViewerPlugin-DG28-61B.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
- package/src/ui/dist/assets/{MarketplacePlugin-BiOGT-Kj.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
- package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
- package/src/ui/dist/assets/{NotebookEditor-CVsj8h_T.js → NotebookEditor-WFyd8Ybt.js} +23 -23
- package/src/ui/dist/assets/{PdfLoader-CASDQmxJ.js → PdfLoader-CLE5u5TS.js} +3 -3
- package/src/ui/dist/assets/{PdfMarkdownPlugin-BFhwoKsY.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
- package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
- package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
- package/src/ui/dist/assets/{TextViewerPlugin-CB4DYfWO.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
- package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
- package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
- package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
- package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
- package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
- package/src/ui/dist/assets/{code-DLC6G24T.js → code-DbsmSd3Y.js} +1 -1
- package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
- package/src/ui/dist/assets/{wrap-text-CwMn-iqb.js → file-jump-queue-DeQBikaw.js} +3 -3
- package/src/ui/dist/assets/{file-socket-Cu4Qln7Y.js → file-socket-DA5XIx88.js} +1 -1
- package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
- package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
- package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
- package/src/ui/dist/assets/{index-wQ7RIIRd.js → index-BsO46tJA.js} +1 -1
- package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
- package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
- package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
- package/src/ui/dist/assets/{project-sync-CsX08Qno.js → project-sync-DPmWKmKD.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-R-GWEhzS.js → zoom-out-DAukFWen.js} +3 -3
- package/src/ui/dist/index.html +3 -3
- package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
- package/src/skills/baseline/references/memory-playbook.md +0 -40
- package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
- package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
- package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
- package/src/skills/write/references/paper-section-playbook.md +0 -64
- package/src/skills/write/references/reviewer-first-writing.md +0 -64
- package/src/skills/write/references/revision-checklist.md +0 -70
- package/src/skills/write/references/section-contracts.md +0 -82
- package/src/skills/write/references/sentence-level-proofing.md +0 -49
- package/src/ui/dist/assets/AiManusChatView-COFACy7V.js +0 -204
- package/src/ui/dist/assets/CliPlugin-CvwCmDQ5.js +0 -109
- package/src/ui/dist/assets/CodeEditorPlugin-cOqSa0xq.js +0 -2
- package/src/ui/dist/assets/GitCommitViewerPlugin-DVgNHBCS.js +0 -1
- package/src/ui/dist/assets/LabCopilotPanel-ClMbq5Yu.js +0 -14
- package/src/ui/dist/assets/LabPlugin-L_SuE8ow.js +0 -22
- package/src/ui/dist/assets/NotebookEditor-C-4Kt1p9.js +0 -81
- package/src/ui/dist/assets/PdfViewerPlugin-DcOzU9vd.js +0 -17
- package/src/ui/dist/assets/SearchPlugin-CHj7M58O.js +0 -16
- package/src/ui/dist/assets/VNCViewer-CjlbyCB3.js +0 -11
- package/src/ui/dist/assets/bot-CFkZY-JP.js +0 -6
- package/src/ui/dist/assets/chevron-up-Dq5ofbht.js +0 -6
- package/src/ui/dist/assets/file-content-Dv4LoZec.js +0 -1
- package/src/ui/dist/assets/file-diff-panel-Denq-lC3.js +0 -1
- package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
- package/src/ui/dist/assets/git-commit-horizontal-BUh6G52n.js +0 -6
- package/src/ui/dist/assets/image-B9HUUddG.js +0 -6
- package/src/ui/dist/assets/index-B2B1sg-M.js +0 -1
- package/src/ui/dist/assets/index-Cgla8biy.css +0 -33
- package/src/ui/dist/assets/index-DRyx7vAc.js +0 -1
- package/src/ui/dist/assets/index-Gbl53BNp.js +0 -2496
- package/src/ui/dist/assets/pdf-effect-queue-ZtnHFCAi.js +0 -6
- package/src/ui/dist/assets/popover-DL6h35vr.js +0 -1
- package/src/ui/dist/assets/select-DvmXt1yY.js +0 -11
- package/src/ui/dist/assets/sigma-7jpXazui.js +0 -6
- package/src/ui/dist/assets/trash-xA7kFt8i.js +0 -11
- package/src/ui/dist/assets/useCliAccess-DsMwDjOp.js +0 -1
- package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.019_dyscaleut
|
|
3
|
+
name: 面向代码奖励建模的单元测试动态缩放
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: '将LLM生成的单元测试作为奖励信号进行动态缩放,用于最佳-of-N代码解决方案选择,在HumanEval Plus、MBPP Plus和LiveCodeBench上通过pass@1进行评估。
|
|
6
|
+
|
|
7
|
+
'
|
|
8
|
+
task_description: '本基准测试复现了CodeRM流程,通过缩放单元测试奖励建模来改进代码生成。核心工作流程为:(1) 策略LLM为每个编程问题生成N个候选代码解决方案,(2) 奖励LLM(或微调后的CodeRM-8B单元测试生成器)为每个问题生成M个单元测试,(3) 在Docker沙箱内对解决方案执行单元测试,(4) 通过执行结果的方差加权多数投票选择最佳解决方案。动态缩放机制使用训练好的难度分类器为更难的问题分配更多单元测试。主要指标为pass@1(最佳-of-N准确率),通过evaluation/calculate_result.py计算。本地快照包含评估/执行代码、打包的基准数据、预计算的推理结果,以及用于沙箱代码执行的Docker镜像规格。推理和预处理步骤(从头开始使用策略/奖励LLM生成新的解决方案/单元测试)需要外部模型访问,已由打包脚本部分覆盖,但可能需要适配。可用Google Drive上的预计算output.tar.gz替代推理+执行步骤。
|
|
9
|
+
|
|
10
|
+
'
|
|
11
|
+
capability_tags:
|
|
12
|
+
- research_code_optimization
|
|
13
|
+
- code_generation
|
|
14
|
+
- reward_modeling
|
|
15
|
+
- unit_test_generation
|
|
16
|
+
- evaluation
|
|
17
|
+
aisb_direction: T3
|
|
18
|
+
track_fit:
|
|
19
|
+
- paper_track
|
|
20
|
+
- benchmark_track
|
|
21
|
+
task_mode: experiment_driven
|
|
22
|
+
requires_execution: true
|
|
23
|
+
requires_paper: true
|
|
24
|
+
integrity_level: cas_plus_canary
|
|
25
|
+
snapshot_status: partial
|
|
26
|
+
support_level: advanced
|
|
27
|
+
cost_band: high
|
|
28
|
+
time_band: 1d+
|
|
29
|
+
difficulty: hard
|
|
30
|
+
data_access: public
|
|
31
|
+
primary_outputs:
|
|
32
|
+
- pass_at_1
|
|
33
|
+
- scaled_unit_tests
|
|
34
|
+
- reward_scores
|
|
35
|
+
launch_profiles:
|
|
36
|
+
- id: quick_check
|
|
37
|
+
label: 快速检查
|
|
38
|
+
description: '在打包或下载的预计算执行结果上运行evaluation/calculate_result.py,验证单一基准/模型组合的pass@1。无需GPU或Docker。
|
|
39
|
+
|
|
40
|
+
'
|
|
41
|
+
- id: code_reward_eval
|
|
42
|
+
label: 代码奖励评估
|
|
43
|
+
description: '拉取Docker沙箱镜像,使用evaluation/evaluate.py对候选解决方案执行单元测试,然后通过evaluation/calculate_result.py计算pass@1。需要Docker和中等算力。使用打包的基准数据和预生成的解决方案/单元测试,无需LLM推理。
|
|
44
|
+
|
|
45
|
+
'
|
|
46
|
+
- id: full_pipeline
|
|
47
|
+
label: 全流程(推理+评估)
|
|
48
|
+
description: '端到端运行:多进程LLM推理生成解决方案和单元测试、预处理/合并、基于Docker的执行,以及最终pass@1计算。需要GPU访问(用于LLM推理,CodeRM-8B或更大模型)和Docker执行环境。
|
|
49
|
+
|
|
50
|
+
'
|
|
51
|
+
dataset_download:
|
|
52
|
+
primary_method: mixed
|
|
53
|
+
sources:
|
|
54
|
+
- kind: huggingface
|
|
55
|
+
url: https://huggingface.co/datasets/KAKA22/CodeRM-UnitTest
|
|
56
|
+
access: public
|
|
57
|
+
note: 用于训练CodeRM-8B的60k个合成Python单元测试。
|
|
58
|
+
- kind: huggingface
|
|
59
|
+
url: https://huggingface.co/KAKA22/CodeRM-8B
|
|
60
|
+
access: public
|
|
61
|
+
note: 微调后的8B单元测试生成器模型权重。
|
|
62
|
+
- kind: google_drive
|
|
63
|
+
url: https://drive.google.com/drive/folders/1-wUvy9Ox49V5CY38TMjCr5RlLysapyyj?usp=sharing
|
|
64
|
+
access: public
|
|
65
|
+
note: 预计算的执行输出(output.tar.gz),可替代管道的第1-3步。
|
|
66
|
+
|
|
67
|
+
- kind: bundled
|
|
68
|
+
url: null
|
|
69
|
+
access: local
|
|
70
|
+
note: 基准数据(HumanEval Plus、MBPP Plus、LiveCodeBench)和预生成的推理结果包含在快照的data/benchmark/和data/result/目录下。
|
|
71
|
+
notes:
|
|
72
|
+
- 训练数据集约60k样本;模型权重约16GB;预计算输出因基准不同而异。
|
|
73
|
+
- 基准测试本身(HumanEval Plus、MBPP Plus、LiveCodeBench)是公开的。
|
|
74
|
+
credential_requirements:
|
|
75
|
+
mode: none
|
|
76
|
+
items: []
|
|
77
|
+
notes:
|
|
78
|
+
- 使用打包数据和CodeRM-8B进行推理无需API密钥。
|
|
79
|
+
- 若需复现GPT-4o-mini或GPT-3.5的策略/奖励实验,则需要OpenAI API密钥。
|
|
80
|
+
resources:
|
|
81
|
+
minimum:
|
|
82
|
+
cpu_cores: 16
|
|
83
|
+
ram_gb: 64
|
|
84
|
+
disk_gb: 150
|
|
85
|
+
gpu_count: 1
|
|
86
|
+
gpu_vram_gb: 24
|
|
87
|
+
recommended:
|
|
88
|
+
cpu_cores: 32
|
|
89
|
+
ram_gb: 128
|
|
90
|
+
disk_gb: 300
|
|
91
|
+
gpu_count: 2
|
|
92
|
+
gpu_vram_gb: 48
|
|
93
|
+
environment:
|
|
94
|
+
python: null
|
|
95
|
+
cuda: null
|
|
96
|
+
pytorch: null
|
|
97
|
+
flash_attn: null
|
|
98
|
+
key_packages:
|
|
99
|
+
- vllm
|
|
100
|
+
- transformers
|
|
101
|
+
- docker
|
|
102
|
+
notes:
|
|
103
|
+
- 快照包含专用Docker执行环境(kaka0605/exec_unit_test:24.12.30),用于沙箱化大规模代码执行生成的单元测试。
|
|
104
|
+
- 沙箱依赖见docker_source/Dockerfile和docker_source/requirements.txt。
|
|
105
|
+
- 主机端依赖集见打包的README和requirements。
|
|
106
|
+
- 推理使用多进程Python(inference/inference_mp.py),可能需要vLLM或类似工具实现高效服务。
|
|
107
|
+
risk_flags:
|
|
108
|
+
- docker_required
|
|
109
|
+
- partial_snapshot
|
|
110
|
+
- external_model_for_full_replication
|
|
111
|
+
- code_execution_sandbox
|
|
112
|
+
risk_notes:
|
|
113
|
+
- 在运行evaluation/evaluate.py之前必须本地拉取或构建Docker沙箱镜像(kaka0605/exec_unit_test:24.12.30)。没有Docker时,只能在预计算输出上运行最后的calculate_result.py步骤。
|
|
114
|
+
- 预处理脚本(preprocess/)存在,但完整推理管道需要服务策略LLM和奖励LLM,快照中未完全自动化。
|
|
115
|
+
- exec_main.py通过signal.SIGALRM超时机制执行任意生成的Python代码;应仅在提供的Docker沙箱或等效隔离环境中运行。
|
|
116
|
+
- 打包过程中未执行基准测试;指标值尚未验证。
|
|
117
|
+
recommended_when: 当您想研究缩放LLM生成的单元测试如何改进代码奖励信号质量和最佳-of-N代码选择时,或当您需要一个在评估循环中包含真实单元测试执行的代码生成任务时使用此基准。也可用于评估轻量级单元测试生成器与更大教师模型的性能。
|
|
118
|
+
not_recommended_when: 当您无法提供基于Docker的容器化代码执行时不要使用;当您需要一个没有代码执行的纯文本奖励模型基准时不要使用;或者当您缺乏LLM推理的GPU资源且仅需快速指标检查时不要使用(改用预计算输出的quick_check配置文件)。
|
|
119
|
+
paper:
|
|
120
|
+
title: Dynamic Scaling of Unit Tests for Code Reward Modeling
|
|
121
|
+
authors:
|
|
122
|
+
- Zeyao Ma
|
|
123
|
+
- Xiaokang Zhang
|
|
124
|
+
- Jing Zhang
|
|
125
|
+
- Jifan Yu
|
|
126
|
+
- Sijia Luo
|
|
127
|
+
- Jie Tang
|
|
128
|
+
venue: ACL 2025
|
|
129
|
+
year: 2025
|
|
130
|
+
url: https://arxiv.org/abs/2501.01054
|
|
131
|
+
homepage: https://code-reward-model.github.io/
|
|
132
|
+
download:
|
|
133
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.019_dyscaleut.zip
|
|
134
|
+
archive_type: zip
|
|
135
|
+
local_dir_name: paper-19-DyScaleUT
|
|
136
|
+
provider: github_release
|
|
137
|
+
repo: ResearAI/DeepScientist
|
|
138
|
+
tag: aisb-v0.0.1
|
|
139
|
+
asset_name: aisb.t3.019_dyscaleut.zip
|
|
140
|
+
sha256: 2eee5573353ade5e13c254f7372a3294b71459ee7c668205f27f2852347c141f
|
|
141
|
+
size_bytes: 60766
|
|
142
|
+
commercial:
|
|
143
|
+
annual_fee: null
|
|
144
|
+
display:
|
|
145
|
+
palette_seed: olive-ink-runtime
|
|
146
|
+
art_style: code-lab
|
|
147
|
+
accent_priority: high
|
|
148
|
+
image_path: ../image/019_aisb.t3.019_dyscaleut.jpg
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.020_aristotle
|
|
3
|
+
name: 'Aristotle: Mastering Logical Reasoning with A Logic-Complete Decompose-Search-Resolve
|
|
4
|
+
Framework'
|
|
5
|
+
version: 0.1.0
|
|
6
|
+
one_line: 'Multi-stage LLM-driven logical reasoning benchmark using symbolic decomposition,
|
|
7
|
+
dual-path proof-by-contradiction search, and resolution over ProntoQA, ProofWriter,
|
|
8
|
+
and LogicNLI datasets.
|
|
9
|
+
|
|
10
|
+
'
|
|
11
|
+
task_description: 'This benchmark evaluates and optimizes the Aristotle framework,
|
|
12
|
+
a logic-complete reasoning pipeline for LLMs. The workflow has four stages: (1)
|
|
13
|
+
translate_decompose.py converts natural-language premises into Prolog-style symbolic
|
|
14
|
+
expressions and decomposes them into Conjunctive Normal Form; (2) negate.py initialises
|
|
15
|
+
dual search paths by negating the target conjecture; (3) search_resolve.py runs
|
|
16
|
+
proof-by-contradiction search and clause resolution twice (once with negation=True,
|
|
17
|
+
once with negation=False), producing two result files per dataset; (4) evaluate.py
|
|
18
|
+
aggregates dual-path answers using the paper''s Eq. (1) and computes accuracy against
|
|
19
|
+
gold labels. All LLM calls go through an OpenAI-compatible API (GPT-4, GPT-4o, Claude,
|
|
20
|
+
or LLaMA via base_url). Three bundled datasets of increasing difficulty are supported:
|
|
21
|
+
ProntoQA (basic deductive), ProofWriter (and/or structures), and LogicNLI (either/or,
|
|
22
|
+
biconditional). The primary metric is accuracy, code-backed via evaluate.py. No
|
|
23
|
+
GPU is required; the main cost driver is API token usage.
|
|
24
|
+
|
|
25
|
+
'
|
|
26
|
+
capability_tags:
|
|
27
|
+
- research_code_optimization
|
|
28
|
+
- logical_reasoning
|
|
29
|
+
- llm_tooling
|
|
30
|
+
- evaluation
|
|
31
|
+
- symbolic_reasoning
|
|
32
|
+
aisb_direction: T3
|
|
33
|
+
track_fit:
|
|
34
|
+
- paper_track
|
|
35
|
+
- benchmark_track
|
|
36
|
+
task_mode: evaluation_driven
|
|
37
|
+
requires_execution: true
|
|
38
|
+
requires_paper: true
|
|
39
|
+
integrity_level: cas_plus_canary
|
|
40
|
+
cost_band: low
|
|
41
|
+
time_band: 1-2h
|
|
42
|
+
difficulty: medium
|
|
43
|
+
data_access: public
|
|
44
|
+
snapshot_status: runnable
|
|
45
|
+
support_level: turnkey
|
|
46
|
+
primary_outputs:
|
|
47
|
+
- accuracy
|
|
48
|
+
- reasoning_traces
|
|
49
|
+
- evaluation_report
|
|
50
|
+
launch_profiles:
|
|
51
|
+
- id: quick_check
|
|
52
|
+
label: Quick Check
|
|
53
|
+
description: 'Run the full four-stage pipeline (translate_decompose → negate → search_resolve
|
|
54
|
+
× 2 → evaluate) on a single dataset (e.g., ProntoQA dev split) to verify end-to-end
|
|
55
|
+
connectivity and API access.
|
|
56
|
+
|
|
57
|
+
'
|
|
58
|
+
- id: benchmark_eval
|
|
59
|
+
label: Full Benchmark Evaluation
|
|
60
|
+
description: 'Run the Aristotle pipeline on all three datasets (ProntoQA, ProofWriter,
|
|
61
|
+
LogicNLI) with the target model (GPT-4 or GPT-4o) and collect accuracy scores
|
|
62
|
+
for comparison with paper baselines.
|
|
63
|
+
|
|
64
|
+
'
|
|
65
|
+
- id: single_dataset
|
|
66
|
+
label: Single Dataset Run
|
|
67
|
+
description: 'Run on one selected dataset to iterate on framework improvements before
|
|
68
|
+
committing to the full suite.
|
|
69
|
+
|
|
70
|
+
'
|
|
71
|
+
dataset_download:
|
|
72
|
+
primary_method: bundled
|
|
73
|
+
sources:
|
|
74
|
+
- kind: local
|
|
75
|
+
url: ./data
|
|
76
|
+
access: public
|
|
77
|
+
note: 'Data for ProntoQA, ProofWriter, and LogicNLI is expected under ./data/{dataset_name}/.
|
|
78
|
+
Prompt templates are under ./prompts/{dataset_name}/. Some data-error corrections
|
|
79
|
+
are stored in ./data_errors/LogicNLI/.
|
|
80
|
+
|
|
81
|
+
'
|
|
82
|
+
notes:
|
|
83
|
+
- Dataset files appear to be bundled in the snapshot under ./data; verify completeness
|
|
84
|
+
after download.
|
|
85
|
+
- Total data footprint is small (well under 1 GB).
|
|
86
|
+
credential_requirements:
|
|
87
|
+
mode: api_key
|
|
88
|
+
items:
|
|
89
|
+
- OpenAI API key (or compatible endpoint key for GPT-4/GPT-4o/Claude/LLaMA serving)
|
|
90
|
+
notes:
|
|
91
|
+
- The --api_key flag is required by translate_decompose.py and search_resolve.py.
|
|
92
|
+
- An optional --base_url flag allows pointing to non-OpenAI endpoints.
|
|
93
|
+
- API costs depend on dataset size and model choice; GPT-4 is significantly more
|
|
94
|
+
expensive per token than GPT-4o.
|
|
95
|
+
resources:
|
|
96
|
+
minimum:
|
|
97
|
+
cpu_cores: 4
|
|
98
|
+
ram_gb: 8
|
|
99
|
+
disk_gb: 10
|
|
100
|
+
gpu_count: 0
|
|
101
|
+
gpu_vram_gb: 0
|
|
102
|
+
recommended:
|
|
103
|
+
cpu_cores: 8
|
|
104
|
+
ram_gb: 16
|
|
105
|
+
disk_gb: 20
|
|
106
|
+
gpu_count: 0
|
|
107
|
+
gpu_vram_gb: 0
|
|
108
|
+
environment:
|
|
109
|
+
python: '3.10'
|
|
110
|
+
cuda: null
|
|
111
|
+
pytorch: null
|
|
112
|
+
flash_attn: null
|
|
113
|
+
key_packages:
|
|
114
|
+
- openai==0.27.9
|
|
115
|
+
notes:
|
|
116
|
+
- CPU-only execution; no GPU required.
|
|
117
|
+
- All heavy computation is offloaded to the LLM API; local compute handles I/O and
|
|
118
|
+
symbolic search routing.
|
|
119
|
+
- negate.py is pure Python with no API calls.
|
|
120
|
+
- See requirements.txt for the full dependency set.
|
|
121
|
+
- The openai package version (0.27.9) uses the legacy API interface; verify compatibility
|
|
122
|
+
if using a newer SDK.
|
|
123
|
+
risk_flags:
|
|
124
|
+
- api_dependency
|
|
125
|
+
- api_cost_variable
|
|
126
|
+
- legacy_sdk_version
|
|
127
|
+
risk_notes:
|
|
128
|
+
- All translation, decomposition, search, and resolution stages except negate.py require
|
|
129
|
+
live API access; the benchmark cannot run offline.
|
|
130
|
+
- API costs scale with dataset size and number of search iterations (controlled by
|
|
131
|
+
--search_round and --batch_num).
|
|
132
|
+
- openai==0.27.9 is a legacy SDK; newer OpenAI endpoints may require migration to
|
|
133
|
+
openai>=1.0.
|
|
134
|
+
- No benchmark execution was performed during the packaging pass; metric values are
|
|
135
|
+
not yet validated.
|
|
136
|
+
- Concurrent batch execution via threading may hit API rate limits depending on the
|
|
137
|
+
provider.
|
|
138
|
+
recommended_when: 'Use this benchmark when you want a structured logical-reasoning
|
|
139
|
+
evaluation driven by API-backed decomposition, symbolic search routing, and resolution
|
|
140
|
+
stages across datasets of increasing logical complexity. Good fit for studying how
|
|
141
|
+
LLMs handle formal symbolic reasoning, proof by contradiction, and CNF-based clause
|
|
142
|
+
resolution without requiring local GPU resources.
|
|
143
|
+
|
|
144
|
+
'
|
|
145
|
+
not_recommended_when: 'Do not use this if you need a fully offline or GPU-training-centric
|
|
146
|
+
benchmark. Not suitable if you cannot provision an OpenAI-compatible API key or
|
|
147
|
+
if you need to avoid per-query API costs. Also not ideal if you require deterministic
|
|
148
|
+
reproducibility, as LLM API outputs are stochastic.
|
|
149
|
+
|
|
150
|
+
'
|
|
151
|
+
paper:
|
|
152
|
+
title: 'Aristotle: Mastering Logical Reasoning with A Logic-Complete Decompose-Search-Resolve
|
|
153
|
+
Framework'
|
|
154
|
+
venue: ACL 2025
|
|
155
|
+
year: 2025
|
|
156
|
+
url: https://arxiv.org/abs/2412.16953
|
|
157
|
+
download:
|
|
158
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.020_aristotle.zip
|
|
159
|
+
archive_type: zip
|
|
160
|
+
local_dir_name: paper-20-Aristotle
|
|
161
|
+
provider: github_release
|
|
162
|
+
repo: ResearAI/DeepScientist
|
|
163
|
+
tag: aisb-v0.0.1
|
|
164
|
+
asset_name: aisb.t3.020_aristotle.zip
|
|
165
|
+
sha256: f6c04512a751c42a8cd33286a62c46b34019cf91eac8d1245cead4de475b84e2
|
|
166
|
+
size_bytes: 695245
|
|
167
|
+
commercial:
|
|
168
|
+
annual_fee: null
|
|
169
|
+
display:
|
|
170
|
+
palette_seed: parchment-navy-logic
|
|
171
|
+
art_style: structured-proof
|
|
172
|
+
accent_priority: medium
|
|
173
|
+
image_path: ../image/020_aisb.t3.020_aristotle.jpg
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.020_aristotle
|
|
3
|
+
name: 'Aristotle:使用逻辑完备的分解-搜索-解决框架掌握逻辑推理'
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: '多阶段LLM驱动的逻辑推理基准测试,采用符号分解、双路径反证法搜索和针对ProntoQA、ProofWriter及LogicNLI数据集的归结技术。'
|
|
6
|
+
task_description: '本基准测试用于评估和优化Aristotle框架——一个面向LLM的逻辑完备推理流程。该工作流包含四个阶段:(1) translate_decompose.py将自然语言前提转换为Prolog风格的符号表达式,并将其分解为合取范式;(2) negate.py通过对目标猜想取反来初始化双搜索路径;(3) search_resolve.py执行反证法搜索和子句归结,运行两次(一次negation=True,一次negation=False),为每个数据集生成两个结果文件;(4) evaluate.py使用论文中的公式(1)聚合双路径答案,并计算相对于黄金标签的准确率。所有LLM调用都通过OpenAI兼容API进行(GPT-4、GPT-4o、Claude或LLaMA,通过base_url配置)。支持三个难度递增的捆绑数据集:ProntoQA(基础演绎推理)、ProofWriter(与或结构)和LogicNLI(异或、双条件)。主要评估指标是准确率,通过evaluate.py计算。无需GPU,主要成本驱动因素是API token使用量。'
|
|
7
|
+
capability_tags:
|
|
8
|
+
- research_code_optimization
|
|
9
|
+
- logical_reasoning
|
|
10
|
+
- llm_tooling
|
|
11
|
+
- evaluation
|
|
12
|
+
- symbolic_reasoning
|
|
13
|
+
aisb_direction: T3
|
|
14
|
+
track_fit:
|
|
15
|
+
- paper_track
|
|
16
|
+
- benchmark_track
|
|
17
|
+
task_mode: evaluation_driven
|
|
18
|
+
requires_execution: true
|
|
19
|
+
requires_paper: true
|
|
20
|
+
integrity_level: cas_plus_canary
|
|
21
|
+
cost_band: low
|
|
22
|
+
time_band: 1-2h
|
|
23
|
+
difficulty: medium
|
|
24
|
+
data_access: public
|
|
25
|
+
snapshot_status: runnable
|
|
26
|
+
support_level: turnkey
|
|
27
|
+
primary_outputs:
|
|
28
|
+
- accuracy
|
|
29
|
+
- reasoning_traces
|
|
30
|
+
- evaluation_report
|
|
31
|
+
launch_profiles:
|
|
32
|
+
- id: quick_check
|
|
33
|
+
label: 快速检查
|
|
34
|
+
description: '在单个数据集(如ProntoQA开发集)上运行完整的四阶段流程(translate_decompose → negate → search_resolve × 2 → evaluate),以验证端到端连通性和API访问。'
|
|
35
|
+
- id: benchmark_eval
|
|
36
|
+
label: 完整基准测试评估
|
|
37
|
+
description: '使用目标模型(GPT-4或GPT-4o)在所有三个数据集(ProntoQA、ProofWriter、LogicNLI)上运行Aristotle流程,并收集准确率分数以与论文基准进行对比。'
|
|
38
|
+
- id: single_dataset
|
|
39
|
+
label: 单数据集运行
|
|
40
|
+
description: '在选定的单个数据集上运行,以便在提交完整测试套件之前迭代框架改进。'
|
|
41
|
+
dataset_download:
|
|
42
|
+
primary_method: bundled
|
|
43
|
+
sources:
|
|
44
|
+
- kind: local
|
|
45
|
+
url: ./data
|
|
46
|
+
access: public
|
|
47
|
+
note: 'ProntoQA、ProofWriter和LogicNLI的数据应位于./data/{dataset_name}/目录下。提示模板位于./prompts/{dataset_name}/目录下。部分数据错误修正存储在./data_errors/LogicNLI/中。'
|
|
48
|
+
notes:
|
|
49
|
+
- 数据集文件似乎以捆绑形式包含在快照的./data目录下;下载后请验证完整性。
|
|
50
|
+
- 总体数据占用很小(远低于1 GB)。
|
|
51
|
+
credential_requirements:
|
|
52
|
+
mode: api_key
|
|
53
|
+
items:
|
|
54
|
+
- OpenAI API密钥(或用于GPT-4/GPT-4o/Claude/LLaMA服务的兼容端点密钥)
|
|
55
|
+
notes:
|
|
56
|
+
- translate_decompose.py和search_resolve.py需要--api_key参数。
|
|
57
|
+
- 可选的--base_url参数允许指向非OpenAI端点。
|
|
58
|
+
- API成本取决于数据集大小和模型选择;GPT-4每个token的成本显著高于GPT-4o。
|
|
59
|
+
resources:
|
|
60
|
+
minimum:
|
|
61
|
+
cpu_cores: 4
|
|
62
|
+
ram_gb: 8
|
|
63
|
+
disk_gb: 10
|
|
64
|
+
gpu_count: 0
|
|
65
|
+
gpu_vram_gb: 0
|
|
66
|
+
recommended:
|
|
67
|
+
cpu_cores: 8
|
|
68
|
+
ram_gb: 16
|
|
69
|
+
disk_gb: 20
|
|
70
|
+
gpu_count: 0
|
|
71
|
+
gpu_vram_gb: 0
|
|
72
|
+
environment:
|
|
73
|
+
python: '3.10'
|
|
74
|
+
cuda: null
|
|
75
|
+
pytorch: null
|
|
76
|
+
flash_attn: null
|
|
77
|
+
key_packages:
|
|
78
|
+
- openai==0.27.9
|
|
79
|
+
notes:
|
|
80
|
+
- 仅CPU执行;无需GPU。
|
|
81
|
+
- 所有重型计算都卸载到LLM API;本地计算负责I/O和符号搜索路由。
|
|
82
|
+
- negate.py是纯Python,无需API调用。
|
|
83
|
+
- 完整依赖项请参见requirements.txt。
|
|
84
|
+
- openai包版本(0.27.9)使用旧版API接口;如使用新版SDK请验证兼容性。
|
|
85
|
+
risk_flags:
|
|
86
|
+
- api_dependency
|
|
87
|
+
- api_cost_variable
|
|
88
|
+
- legacy_sdk_version
|
|
89
|
+
risk_notes:
|
|
90
|
+
- 除negate.py外,所有翻译、分解、搜索和归结阶段都需要实时API访问;基准测试无法离线运行。
|
|
91
|
+
- API成本随数据集大小和搜索迭代次数(由--search_round和--batch_num控制)而增加。
|
|
92
|
+
- openai==0.27.9是旧版SDK;较新的OpenAI端点可能需要迁移到openai>=1.0。
|
|
93
|
+
- 打包过程中未执行基准测试;尚未验证指标值。
|
|
94
|
+
- 通过线程进行的并发批处理执行可能会触及提供商的API速率限制。
|
|
95
|
+
recommended_when: '当您需要进行由API支持的分解、符号搜索路由和归结阶段驱动的结构化逻辑推理评估,且覆盖逻辑复杂度递增的数据集时使用此基准测试。非常适合研究LLM如何处理形式符号推理、反证法证明和基于CNF的子句归结,同时无需本地GPU资源。'
|
|
96
|
+
not_recommended_when: '如果需要完全离线或以GPU训练为中心的基准测试,请勿使用。如果无法配置OpenAI兼容的API密钥或需要避免按查询计费的API成本,也不宜使用。此外,如果需要确定性的可复现性,此基准测试也不理想,因为LLM API输出具有随机性。'
|
|
97
|
+
paper:
|
|
98
|
+
title: 'Aristotle: Mastering Logical Reasoning with A Logic-Complete Decompose-Search-Resolve
|
|
99
|
+
Framework'
|
|
100
|
+
venue: ACL 2025
|
|
101
|
+
year: 2025
|
|
102
|
+
url: https://arxiv.org/abs/2412.16953
|
|
103
|
+
download:
|
|
104
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.020_aristotle.zip
|
|
105
|
+
archive_type: zip
|
|
106
|
+
local_dir_name: paper-20-Aristotle
|
|
107
|
+
provider: github_release
|
|
108
|
+
repo: ResearAI/DeepScientist
|
|
109
|
+
tag: aisb-v0.0.1
|
|
110
|
+
asset_name: aisb.t3.020_aristotle.zip
|
|
111
|
+
sha256: f6c04512a751c42a8cd33286a62c46b34019cf91eac8d1245cead4de475b84e2
|
|
112
|
+
size_bytes: 695245
|
|
113
|
+
commercial:
|
|
114
|
+
annual_fee: null
|
|
115
|
+
display:
|
|
116
|
+
palette_seed: parchment-navy-logic
|
|
117
|
+
art_style: structured-proof
|
|
118
|
+
accent_priority: medium
|
|
119
|
+
image_path: ../image/020_aisb.t3.020_aristotle.jpg
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.021_tokenrecycling
|
|
3
|
+
name: 'Turning Trash into Treasure: Accelerating Inference of Large Language Models
|
|
4
|
+
with Token Recycling'
|
|
5
|
+
version: 0.1.0
|
|
6
|
+
one_line: Evaluate Token Recycling, a training-free speculative decoding method that
|
|
7
|
+
reuses candidate tokens via a <2 MB adjacency matrix to achieve ~2× LLM inference
|
|
8
|
+
speedup on SpecBench and MBPP.
|
|
9
|
+
task_description: 'This packaged benchmark evaluates Token Recycling (TR), a plug-and-play,
|
|
10
|
+
training-free speculative decoding method for accelerating LLM inference. TR stores
|
|
11
|
+
top-k candidate tokens from each decoding step in a vocabulary-sized adjacency matrix
|
|
12
|
+
(<2 MB), then constructs draft trees via BFS-like retrieval and verifies them with
|
|
13
|
+
tree attention. The evaluation route runs eval.sh, which iterates over Vicuna 7B/13B/33B
|
|
14
|
+
(for SpecBench) and Code Llama 7B/13B/34B (for MBPP), measuring Mean Accepted Tokens
|
|
15
|
+
(MAT), tokens/second, and speedup ratio relative to HuggingFace autoregressive decoding.
|
|
16
|
+
Models must be pre-downloaded to ../models/ relative to the project root. The codebase
|
|
17
|
+
is built on the SpecBench evaluation harness and uses greedy decoding (temperature=0,
|
|
18
|
+
batch size=1). Paper reports ~2× speedup on 7B models, 30%+ over prior train-free
|
|
19
|
+
methods, and 25% over Medusa (which requires training). All original experiments
|
|
20
|
+
used a single A100-80GB GPU with PyTorch 2.3 and CUDA 12.2.
|
|
21
|
+
|
|
22
|
+
'
|
|
23
|
+
capability_tags:
|
|
24
|
+
- research_code_optimization
|
|
25
|
+
- large_language_models
|
|
26
|
+
- speculative_decoding
|
|
27
|
+
- inference_acceleration
|
|
28
|
+
- systems_efficiency
|
|
29
|
+
aisb_direction: T3
|
|
30
|
+
track_fit:
|
|
31
|
+
- paper_track
|
|
32
|
+
- benchmark_track
|
|
33
|
+
task_mode: evaluation_driven
|
|
34
|
+
requires_execution: true
|
|
35
|
+
requires_paper: true
|
|
36
|
+
integrity_level: cas_plus_canary
|
|
37
|
+
snapshot_status: runnable
|
|
38
|
+
support_level: advanced
|
|
39
|
+
cost_band: high
|
|
40
|
+
time_band: 6-24h
|
|
41
|
+
difficulty: hard
|
|
42
|
+
data_access: public
|
|
43
|
+
primary_outputs:
|
|
44
|
+
- mean_accepted_tokens
|
|
45
|
+
- spec_bench_speedup
|
|
46
|
+
- throughput_report
|
|
47
|
+
launch_profiles:
|
|
48
|
+
- id: quick_check
|
|
49
|
+
label: Quick Check
|
|
50
|
+
description: 'Run eval.sh with a single model (e.g., vicuna-7b-v1.3) on spec_bench
|
|
51
|
+
only. Requires model weights in ../models/. Expect 1-3 hours on an A100-80GB.
|
|
52
|
+
|
|
53
|
+
'
|
|
54
|
+
- id: specbench_eval
|
|
55
|
+
label: Full SpecBench + MBPP Eval
|
|
56
|
+
description: 'Run the complete eval.sh loop over all three Vicuna sizes on SpecBench
|
|
57
|
+
and all three Code Llama sizes on MBPP. Expect 6-24 hours on a single A100-80GB
|
|
58
|
+
depending on model sizes evaluated.
|
|
59
|
+
|
|
60
|
+
'
|
|
61
|
+
dataset_download:
|
|
62
|
+
primary_method: bundled_plus_models
|
|
63
|
+
sources:
|
|
64
|
+
- kind: bundled
|
|
65
|
+
url: null
|
|
66
|
+
access: public
|
|
67
|
+
note: SpecBench question files and MBPP data are bundled in the data/ directory.
|
|
68
|
+
- kind: huggingface
|
|
69
|
+
url: https://huggingface.co/lmsys/vicuna-7b-v1.3
|
|
70
|
+
access: public
|
|
71
|
+
note: Vicuna 7B/13B/33B weights must be downloaded separately to ../models/.
|
|
72
|
+
- kind: huggingface
|
|
73
|
+
url: https://huggingface.co/codellama/CodeLlama-7b-hf
|
|
74
|
+
access: public
|
|
75
|
+
note: Code Llama 7B/13B/34B weights must be downloaded separately to ../models/.
|
|
76
|
+
notes:
|
|
77
|
+
- Model weights total 60-130 GB depending on which sizes are used.
|
|
78
|
+
- eval.sh expects models at ../models/vicuna-7b-v1.3, ../models/vicuna-13b-v1.3,
|
|
79
|
+
etc.
|
|
80
|
+
credential_requirements:
|
|
81
|
+
mode: none
|
|
82
|
+
items: []
|
|
83
|
+
notes:
|
|
84
|
+
- HuggingFace login may be needed for some gated model variants, but Vicuna and
|
|
85
|
+
Code Llama weights are publicly available.
|
|
86
|
+
resources:
|
|
87
|
+
minimum:
|
|
88
|
+
cpu_cores: 8
|
|
89
|
+
ram_gb: 32
|
|
90
|
+
disk_gb: 100
|
|
91
|
+
gpu_count: 1
|
|
92
|
+
gpu_vram_gb: 24
|
|
93
|
+
recommended:
|
|
94
|
+
cpu_cores: 16
|
|
95
|
+
ram_gb: 64
|
|
96
|
+
disk_gb: 200
|
|
97
|
+
gpu_count: 1
|
|
98
|
+
gpu_vram_gb: 80
|
|
99
|
+
environment:
|
|
100
|
+
python: '3.10'
|
|
101
|
+
cuda: '12.2'
|
|
102
|
+
pytorch: '2.3'
|
|
103
|
+
key_packages:
|
|
104
|
+
- transformers==4.37.1
|
|
105
|
+
- fschat
|
|
106
|
+
- shortuuid
|
|
107
|
+
notes:
|
|
108
|
+
- Paper experiments used PyTorch 2.3, CUDA 12.2, single A100-80GB, 128 CPUs.
|
|
109
|
+
- The bundled requirements.txt inherits from SpecBench; install via pip install
|
|
110
|
+
-r requirements.txt.
|
|
111
|
+
- The existing YAML listed pytorch 2.1.1 but the paper and code reference PyTorch
|
|
112
|
+
2.3.
|
|
113
|
+
risk_flags:
|
|
114
|
+
- model_download_required
|
|
115
|
+
- gpu_intensive
|
|
116
|
+
- currently_vicuna_only
|
|
117
|
+
risk_notes:
|
|
118
|
+
- Models (Vicuna, Code Llama) must be downloaded externally before evaluation; ~60-130
|
|
119
|
+
GB total.
|
|
120
|
+
- The minimum 24 GB VRAM can only run the 7B model in float16; 33B/34B models require
|
|
121
|
+
≥48 GB VRAM.
|
|
122
|
+
- The codebase is currently adapted to LLaMA/Vicuna architectures only (modeling_llama_kv.py);
|
|
123
|
+
adapting to newer models is listed as a TODO.
|
|
124
|
+
- No benchmark execution was performed during the packaging pass; metric values are
|
|
125
|
+
not yet validated.
|
|
126
|
+
recommended_when: 'Use this benchmark when you want an inference-acceleration evaluation
|
|
127
|
+
task for 7B+ LLMs that requires no draft model training, no large retrieval datastore,
|
|
128
|
+
and minimal additional memory (<2 MB). Ideal for comparing train-free speculative
|
|
129
|
+
decoding methods under controlled greedy-decoding, batch-size-one conditions.
|
|
130
|
+
|
|
131
|
+
'
|
|
132
|
+
not_recommended_when: 'Do not use this if you lack a GPU with ≥24 GB VRAM for serving
|
|
133
|
+
open-weight LLMs, if you need a training-centric benchmark, or if you need to evaluate
|
|
134
|
+
models outside the LLaMA/Vicuna family (the code currently only supports LLaMA-based
|
|
135
|
+
architectures).
|
|
136
|
+
|
|
137
|
+
'
|
|
138
|
+
paper:
|
|
139
|
+
title: 'Turning Trash into Treasure: Accelerating Inference of Large Language Models
|
|
140
|
+
with Token Recycling'
|
|
141
|
+
venue: ACL 2025 Oral
|
|
142
|
+
year: 2025
|
|
143
|
+
url: https://arxiv.org/abs/2408.08696
|
|
144
|
+
download:
|
|
145
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.021_tokenrecycling.zip
|
|
146
|
+
archive_type: zip
|
|
147
|
+
local_dir_name: paper-21-TokenRecycling
|
|
148
|
+
provider: github_release
|
|
149
|
+
repo: ResearAI/DeepScientist
|
|
150
|
+
tag: aisb-v0.0.1
|
|
151
|
+
asset_name: aisb.t3.021_tokenrecycling.zip
|
|
152
|
+
sha256: d38519836e52e4c5c5e1fccd9a4befa5b9a3f20a5c8fa787941a3b2773bd1ebd
|
|
153
|
+
size_bytes: 55377
|
|
154
|
+
commercial:
|
|
155
|
+
annual_fee: null
|
|
156
|
+
display:
|
|
157
|
+
palette_seed: lime-slate-recycle
|
|
158
|
+
art_style: systems-diagram
|
|
159
|
+
accent_priority: high
|
|
160
|
+
image_path: ../image/021_aisb.t3.021_tokenrecycling.jpg
|