@researai/deepscientist 1.5.17 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +309 -130
- package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
- package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
- package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
- package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
- package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
- package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
- package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
- package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
- package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
- package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
- package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
- package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
- package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
- package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
- package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
- package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
- package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
- package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
- package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
- package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
- package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
- package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
- package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
- package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
- package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
- package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
- package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
- package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
- package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
- package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
- package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
- package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
- package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
- package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
- package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
- package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
- package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
- package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
- package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
- package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
- package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
- package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
- package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
- package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
- package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
- package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
- package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
- package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
- package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
- package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
- package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
- package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
- package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
- package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
- package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
- package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
- package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
- package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
- package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
- package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
- package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
- package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
- package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
- package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
- package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
- package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
- package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
- package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
- package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
- package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
- package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
- package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
- package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
- package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
- package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
- package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
- package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
- package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
- package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
- package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
- package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
- package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
- package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
- package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
- package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
- package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
- package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
- package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
- package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
- package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
- package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
- package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
- package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
- package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
- package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
- package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
- package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
- package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
- package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
- package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
- package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
- package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
- package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
- package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
- package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
- package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
- package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
- package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
- package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
- package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
- package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
- package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
- package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
- package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
- package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
- package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
- package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
- package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
- package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
- package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
- package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
- package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
- package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
- package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
- package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
- package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
- package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
- package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
- package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
- package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
- package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
- package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
- package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
- package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
- package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
- package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
- package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
- package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
- package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
- package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
- package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
- package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
- package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
- package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
- package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
- package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
- package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
- package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
- package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
- package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
- package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
- package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
- package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
- package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
- package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
- package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
- package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
- package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
- package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
- package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
- package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
- package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
- package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
- package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
- package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
- package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
- package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
- package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
- package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
- package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
- package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
- package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
- package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
- package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
- package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
- package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
- package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
- package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
- package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
- package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
- package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
- package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
- package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
- package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
- package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
- package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
- package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
- package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
- package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
- package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
- package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
- package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
- package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
- package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
- package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
- package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
- package/AISB/image/aisb.b10.climate_earth.svg +16 -0
- package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
- package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
- package/AISB/image/aisb.b2.agent_systems.svg +16 -0
- package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
- package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
- package/AISB/image/aisb.b5.math_proof.svg +16 -0
- package/AISB/image/aisb.b6.research_process.svg +16 -0
- package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
- package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
- package/AISB/image/aisb.b9.material_science.svg +16 -0
- package/README.md +132 -11
- package/bin/ds.js +376 -49
- package/docs/en/00_QUICK_START.md +135 -18
- package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
- package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
- package/docs/en/05_TUI_GUIDE.md +171 -2
- package/docs/en/07_MEMORY_AND_MCP.md +38 -2
- package/docs/en/09_DOCTOR.md +64 -4
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
- package/docs/en/11_LICENSE_AND_RISK.md +4 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +622 -187
- package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +105 -2
- package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
- package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
- package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
- package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
- package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
- package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
- package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
- package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
- package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
- package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
- package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
- package/docs/en/91_DEVELOPMENT.md +29 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
- package/docs/en/README.md +44 -7
- package/docs/images/admin/admin-connectors-health-en.png +0 -0
- package/docs/images/admin/admin-controllers-en.png +0 -0
- package/docs/images/admin/admin-diagnostics-en.png +0 -0
- package/docs/images/admin/admin-errors-en.png +0 -0
- package/docs/images/admin/admin-issues-en.png +0 -0
- package/docs/images/admin/admin-logs-en.png +0 -0
- package/docs/images/admin/admin-quest-detail-en.png +0 -0
- package/docs/images/admin/admin-quests-en.png +0 -0
- package/docs/images/admin/admin-repairs-en.png +0 -0
- package/docs/images/admin/admin-runtime-en.png +0 -0
- package/docs/images/admin/admin-search-en.png +0 -0
- package/docs/images/admin/admin-stats-en.png +0 -0
- package/docs/images/admin/admin-summary-en.png +0 -0
- package/docs/images/connectors/connector-discord-en.png +0 -0
- package/docs/images/connectors/connector-feishu-en.png +0 -0
- package/docs/images/connectors/connector-lingzhu-en.png +0 -0
- package/docs/images/connectors/connector-qq-en.png +0 -0
- package/docs/images/connectors/connector-slack-en.png +0 -0
- package/docs/images/connectors/connector-telegram-en.png +0 -0
- package/docs/images/connectors/connector-weixin-en.png +0 -0
- package/docs/images/connectors/connector-whatsapp-en.png +0 -0
- package/docs/images/settings/settings-baselines-en.png +0 -0
- package/docs/images/settings/settings-config-en.png +0 -0
- package/docs/images/settings/settings-connectors-overview-en.png +0 -0
- package/docs/images/settings/settings-deepxiv-en.png +0 -0
- package/docs/images/settings/settings-mcp-servers-en.png +0 -0
- package/docs/images/settings/settings-plugins-en.png +0 -0
- package/docs/images/settings/settings-runners-en.png +0 -0
- package/docs/zh/00_QUICK_START.md +92 -17
- package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
- package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/zh/05_TUI_GUIDE.md +171 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
- package/docs/zh/09_DOCTOR.md +39 -4
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
- package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +550 -188
- package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +105 -2
- package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
- package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
- package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
- package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
- package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
- package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
- package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
- package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
- package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
- package/docs/zh/README.md +29 -7
- package/install.sh +122 -16
- package/package.json +4 -1
- package/pyproject.toml +2 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +13 -0
- package/src/deepscientist/admin/__init__.py +3 -0
- package/src/deepscientist/admin/charts.py +681 -0
- package/src/deepscientist/admin/logs.py +119 -0
- package/src/deepscientist/admin/repairs.py +217 -0
- package/src/deepscientist/admin/service.py +1310 -0
- package/src/deepscientist/admin/system_info.py +700 -0
- package/src/deepscientist/admin/tasks.py +465 -0
- package/src/deepscientist/admin/tool_metrics.py +600 -0
- package/src/deepscientist/artifact/guidance.py +8 -4
- package/src/deepscientist/artifact/schemas.py +115 -0
- package/src/deepscientist/artifact/service.py +4268 -260
- package/src/deepscientist/bash_exec/monitor.py +30 -3
- package/src/deepscientist/bash_exec/service.py +134 -1
- package/src/deepscientist/benchstore/__init__.py +4 -0
- package/src/deepscientist/benchstore/prompt_builder.py +224 -0
- package/src/deepscientist/benchstore/service.py +1716 -0
- package/src/deepscientist/channels/weixin_ilink.py +8 -1
- package/src/deepscientist/cli.py +92 -17
- package/src/deepscientist/codex_cli_compat.py +2 -2
- package/src/deepscientist/config/models.py +82 -11
- package/src/deepscientist/config/service.py +927 -91
- package/src/deepscientist/connector/weixin_support.py +48 -17
- package/src/deepscientist/daemon/api/handlers.py +697 -210
- package/src/deepscientist/daemon/api/router.py +76 -1
- package/src/deepscientist/daemon/app.py +1054 -51
- package/src/deepscientist/diagnostics/runner_failures.py +147 -0
- package/src/deepscientist/doctor.py +212 -65
- package/src/deepscientist/evidence_packets.py +590 -0
- package/src/deepscientist/home.py +52 -4
- package/src/deepscientist/kimi_cli_compat.py +50 -0
- package/src/deepscientist/latex_runtime.py +2 -2
- package/src/deepscientist/mcp/context.py +2 -0
- package/src/deepscientist/mcp/schemas.py +114 -0
- package/src/deepscientist/mcp/server.py +1566 -126
- package/src/deepscientist/memory/service.py +203 -16
- package/src/deepscientist/process_control.py +8 -1
- package/src/deepscientist/prompts/builder.py +836 -92
- package/src/deepscientist/quest/__init__.py +2 -2
- package/src/deepscientist/quest/layout.py +12 -1
- package/src/deepscientist/quest/node_traces.py +10 -0
- package/src/deepscientist/quest/service.py +1430 -139
- package/src/deepscientist/quest/stage_views.py +1 -1
- package/src/deepscientist/runners/__init__.py +18 -0
- package/src/deepscientist/runners/base.py +89 -1
- package/src/deepscientist/runners/builtins.py +13 -1
- package/src/deepscientist/runners/claude.py +391 -0
- package/src/deepscientist/runners/codex.py +421 -21
- package/src/deepscientist/runners/codex_telemetry.py +127 -0
- package/src/deepscientist/runners/kimi.py +334 -0
- package/src/deepscientist/runners/metadata.py +68 -0
- package/src/deepscientist/runners/opencode.py +414 -0
- package/src/deepscientist/runners/runtime_overrides.py +100 -0
- package/src/deepscientist/runners/simple_cli.py +538 -0
- package/src/deepscientist/runtime_storage.py +303 -0
- package/src/deepscientist/shared.py +61 -16
- package/src/deepscientist/skills/installer.py +37 -0
- package/src/deepscientist/skills/registry.py +2 -0
- package/src/deepscientist/tinytex.py +2 -2
- package/src/deepscientist/tui.py +10 -3
- package/src/prompts/benchstore/system.md +77 -0
- package/src/prompts/connectors/qq.md +33 -2
- package/src/prompts/connectors/weixin.md +208 -23
- package/src/prompts/contracts/admin_ops.md +74 -0
- package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
- package/src/prompts/contracts/shared_interaction.md +5 -11
- package/src/prompts/start_setup/system.md +422 -0
- package/src/prompts/system.md +409 -315
- package/src/prompts/system_copilot.md +88 -12
- package/src/skills/analysis-campaign/SKILL.md +239 -578
- package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
- package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
- package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
- package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
- package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
- package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
- package/src/skills/baseline/SKILL.md +183 -461
- package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
- package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
- package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
- package/src/skills/baseline/references/baseline-plan-template.md +37 -76
- package/src/skills/baseline/references/boundary-cases.md +86 -0
- package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
- package/src/skills/baseline/references/comparability-contract.md +7 -12
- package/src/skills/baseline/references/operational-guidance.md +56 -0
- package/src/skills/baseline/references/route-selection.md +5 -25
- package/src/skills/decision/SKILL.md +113 -306
- package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
- package/src/skills/decision/references/operational-guidance.md +94 -0
- package/src/skills/decision/references/research-route-criteria.md +7 -8
- package/src/skills/decision/references/strategic-decision-template.md +13 -26
- package/src/skills/experiment/SKILL.md +132 -670
- package/src/skills/experiment/references/execution-playbook.md +374 -0
- package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
- package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
- package/src/skills/experiment/references/operational-guidance.md +108 -0
- package/src/skills/finalize/SKILL.md +62 -0
- package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
- package/src/skills/finalize/references/resume-packet-template.md +7 -0
- package/src/skills/idea/SKILL.md +228 -15
- package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
- package/src/skills/idea/references/current-board-packet-template.md +61 -0
- package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
- package/src/skills/idea/references/idea-generation-playbook.md +21 -0
- package/src/skills/idea/references/idea-thinking-flow.md +6 -0
- package/src/skills/idea/references/literature-survey-template.md +3 -0
- package/src/skills/idea/references/objective-contract-template.md +54 -0
- package/src/skills/idea/references/outline-seeding-example.md +56 -0
- package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
- package/src/skills/idea/references/related-work-playbook.md +75 -2
- package/src/skills/idea/references/research-history-playbook.md +114 -0
- package/src/skills/idea/references/selection-gate.md +58 -6
- package/src/skills/intake-audit/SKILL.md +43 -2
- package/src/skills/intake-audit/references/state-audit-template.md +10 -0
- package/src/skills/nature-data/SKILL.md +128 -0
- package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-data/agents/openai.yaml +4 -0
- package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
- package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
- package/src/skills/nature-data/references/policy-principles.md +103 -0
- package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
- package/src/skills/nature-data/references/source-basis.md +54 -0
- package/src/skills/nature-data/references/statement-patterns.md +153 -0
- package/src/skills/nature-figure/SKILL.md +197 -0
- package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-figure/agents/openai.yaml +4 -0
- package/src/skills/nature-figure/evals/evals.json +37 -0
- package/src/skills/nature-figure/references/api.md +428 -0
- package/src/skills/nature-figure/references/backend-selection.md +100 -0
- package/src/skills/nature-figure/references/chart-types.md +281 -0
- package/src/skills/nature-figure/references/common-patterns.md +349 -0
- package/src/skills/nature-figure/references/design-theory.md +436 -0
- package/src/skills/nature-figure/references/figure-contract.md +93 -0
- package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
- package/src/skills/nature-figure/references/qa-contract.md +119 -0
- package/src/skills/nature-figure/references/r-template-index.md +66 -0
- package/src/skills/nature-figure/references/r-workflow.md +161 -0
- package/src/skills/nature-figure/references/tutorials.md +250 -0
- package/src/skills/nature-paper2ppt/SKILL.md +507 -0
- package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/SKILL.md +385 -0
- package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-polishing/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
- package/src/skills/nature-polishing/references/section-moves.md +240 -0
- package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
- package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
- package/src/skills/optimize/SKILL.md +177 -1568
- package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
- package/src/skills/optimize/references/candidate-board-template.md +13 -0
- package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
- package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
- package/src/skills/optimize/references/debug-response-template.md +29 -0
- package/src/skills/optimize/references/frontier-review-template.md +32 -0
- package/src/skills/optimize/references/fusion-playbook.md +36 -0
- package/src/skills/optimize/references/method-brief-template.md +73 -0
- package/src/skills/optimize/references/operational-guidance.md +621 -0
- package/src/skills/optimize/references/optimization-memory-template.md +30 -0
- package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
- package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
- package/src/skills/optimize/references/prompt-patterns.md +49 -0
- package/src/skills/paper-outline/SKILL.md +227 -0
- package/src/skills/paper-outline/references/outline-patterns.md +87 -0
- package/src/skills/paper-plot/SKILL.md +79 -0
- package/src/skills/paper-plot/agents/openai.yaml +4 -0
- package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
- package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
- package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
- package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
- package/src/skills/paper-plot/references/line_training_curve.md +44 -0
- package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
- package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
- package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
- package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
- package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
- package/src/skills/paper-plot/scripts/line_aime.py +94 -0
- package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
- package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
- package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
- package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
- package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
- package/src/skills/rebuttal/SKILL.md +9 -0
- package/src/skills/references/tool-usage-by-stage.md +438 -0
- package/src/skills/review/SKILL.md +105 -7
- package/src/skills/science/PROVENANCE.md +44 -0
- package/src/skills/science/SKILL.md +137 -0
- package/src/skills/science/references/artifact-science-tool.md +110 -0
- package/src/skills/science/references/claim-type-discipline.md +56 -0
- package/src/skills/science/references/domain-index.md +422 -0
- package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
- package/src/skills/science/references/package-check-playbook.md +64 -0
- package/src/skills/science/references/package-index.min.json +3616 -0
- package/src/skills/science/references/packages/abinit.md +80 -0
- package/src/skills/science/references/packages/acts.md +73 -0
- package/src/skills/science/references/packages/aiida-core.md +80 -0
- package/src/skills/science/references/packages/alamode.md +80 -0
- package/src/skills/science/references/packages/amuse.md +88 -0
- package/src/skills/science/references/packages/anndata.md +88 -0
- package/src/skills/science/references/packages/arbor.md +80 -0
- package/src/skills/science/references/packages/arc.md +73 -0
- package/src/skills/science/references/packages/astropy.md +88 -0
- package/src/skills/science/references/packages/astroquery.md +88 -0
- package/src/skills/science/references/packages/atomate2.md +80 -0
- package/src/skills/science/references/packages/atomsmltr.md +73 -0
- package/src/skills/science/references/packages/awkward.md +73 -0
- package/src/skills/science/references/packages/batman.md +88 -0
- package/src/skills/science/references/packages/biopython.md +88 -0
- package/src/skills/science/references/packages/bloqade.md +73 -0
- package/src/skills/science/references/packages/brian2.md +73 -0
- package/src/skills/science/references/packages/bullet3.md +73 -0
- package/src/skills/science/references/packages/calculix.md +80 -0
- package/src/skills/science/references/packages/cantera.md +73 -0
- package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
- package/src/skills/science/references/packages/ccdproc.md +88 -0
- package/src/skills/science/references/packages/celerite2.md +88 -0
- package/src/skills/science/references/packages/cellrank.md +73 -0
- package/src/skills/science/references/packages/cesm.md +80 -0
- package/src/skills/science/references/packages/chemicals.md +73 -0
- package/src/skills/science/references/packages/chempy.md +73 -0
- package/src/skills/science/references/packages/cirq.md +73 -0
- package/src/skills/science/references/packages/coffea.md +73 -0
- package/src/skills/science/references/packages/cp2k.md +88 -0
- package/src/skills/science/references/packages/custodian.md +80 -0
- package/src/skills/science/references/packages/dart.md +73 -0
- package/src/skills/science/references/packages/datamol.md +88 -0
- package/src/skills/science/references/packages/dd4hep.md +73 -0
- package/src/skills/science/references/packages/dealii.md +80 -0
- package/src/skills/science/references/packages/deepchem.md +88 -0
- package/src/skills/science/references/packages/delphes.md +73 -0
- package/src/skills/science/references/packages/devito.md +80 -0
- package/src/skills/science/references/packages/dftb.md +88 -0
- package/src/skills/science/references/packages/dftd4.md +88 -0
- package/src/skills/science/references/packages/dftk-jl.md +80 -0
- package/src/skills/science/references/packages/dolfinx.md +80 -0
- package/src/skills/science/references/packages/drake.md +73 -0
- package/src/skills/science/references/packages/dumux.md +73 -0
- package/src/skills/science/references/packages/elk.md +80 -0
- package/src/skills/science/references/packages/elmerfem.md +80 -0
- package/src/skills/science/references/packages/enzo-e.md +88 -0
- package/src/skills/science/references/packages/espresso.md +80 -0
- package/src/skills/science/references/packages/exoplanet.md +88 -0
- package/src/skills/science/references/packages/fairroot.md +73 -0
- package/src/skills/science/references/packages/fbpic.md +80 -0
- package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
- package/src/skills/science/references/packages/geant4.md +73 -0
- package/src/skills/science/references/packages/geosx.md +80 -0
- package/src/skills/science/references/packages/gprmax.md +80 -0
- package/src/skills/science/references/packages/gromacs.md +80 -0
- package/src/skills/science/references/packages/gwaslab.md +73 -0
- package/src/skills/science/references/packages/gz-sim.md +73 -0
- package/src/skills/science/references/packages/hail.md +88 -0
- package/src/skills/science/references/packages/hiphive.md +80 -0
- package/src/skills/science/references/packages/hoomd-blue.md +80 -0
- package/src/skills/science/references/packages/itensor.md +73 -0
- package/src/skills/science/references/packages/itensors-jl.md +73 -0
- package/src/skills/science/references/packages/jdftx.md +73 -0
- package/src/skills/science/references/packages/jobflow.md +80 -0
- package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
- package/src/skills/science/references/packages/kite.md +80 -0
- package/src/skills/science/references/packages/kratos.md +80 -0
- package/src/skills/science/references/packages/kwant.md +73 -0
- package/src/skills/science/references/packages/lammps.md +80 -0
- package/src/skills/science/references/packages/lightkurve.md +88 -0
- package/src/skills/science/references/packages/limix.md +73 -0
- package/src/skills/science/references/packages/maxwelllink.md +80 -0
- package/src/skills/science/references/packages/mcdc.md +73 -0
- package/src/skills/science/references/packages/meep.md +80 -0
- package/src/skills/science/references/packages/mfem.md +80 -0
- package/src/skills/science/references/packages/mitgcm.md +73 -0
- package/src/skills/science/references/packages/modflow6.md +73 -0
- package/src/skills/science/references/packages/molecool.md +73 -0
- package/src/skills/science/references/packages/mom6.md +73 -0
- package/src/skills/science/references/packages/moose.md +80 -0
- package/src/skills/science/references/packages/mpas-model.md +73 -0
- package/src/skills/science/references/packages/mujoco.md +73 -0
- package/src/skills/science/references/packages/mumax3.md +73 -0
- package/src/skills/science/references/packages/nekrs.md +80 -0
- package/src/skills/science/references/packages/nessi.md +73 -0
- package/src/skills/science/references/packages/nest-simulator.md +73 -0
- package/src/skills/science/references/packages/netket.md +73 -0
- package/src/skills/science/references/packages/neuron.md +73 -0
- package/src/skills/science/references/packages/nextflow.md +88 -0
- package/src/skills/science/references/packages/nwchem.md +88 -0
- package/src/skills/science/references/packages/openbabel.md +88 -0
- package/src/skills/science/references/packages/openems.md +80 -0
- package/src/skills/science/references/packages/openff-toolkit.md +88 -0
- package/src/skills/science/references/packages/openfoam-dev.md +80 -0
- package/src/skills/science/references/packages/openmc.md +73 -0
- package/src/skills/science/references/packages/openmm.md +80 -0
- package/src/skills/science/references/packages/openmoc.md +73 -0
- package/src/skills/science/references/packages/openmx.md +80 -0
- package/src/skills/science/references/packages/opensees.md +80 -0
- package/src/skills/science/references/packages/opensn.md +80 -0
- package/src/skills/science/references/packages/opm-simulators.md +73 -0
- package/src/skills/science/references/packages/oqupy.md +73 -0
- package/src/skills/science/references/packages/packmol.md +80 -0
- package/src/skills/science/references/packages/palabos.md +80 -0
- package/src/skills/science/references/packages/parflow.md +80 -0
- package/src/skills/science/references/packages/pennylane.md +88 -0
- package/src/skills/science/references/packages/perceval.md +73 -0
- package/src/skills/science/references/packages/phono3py.md +73 -0
- package/src/skills/science/references/packages/phonopy.md +73 -0
- package/src/skills/science/references/packages/photutils.md +88 -0
- package/src/skills/science/references/packages/picongpu.md +80 -0
- package/src/skills/science/references/packages/plink-ng.md +88 -0
- package/src/skills/science/references/packages/precice.md +73 -0
- package/src/skills/science/references/packages/psc.md +80 -0
- package/src/skills/science/references/packages/psi4.md +88 -0
- package/src/skills/science/references/packages/pybinding.md +73 -0
- package/src/skills/science/references/packages/pyfr.md +80 -0
- package/src/skills/science/references/packages/pyhf.md +73 -0
- package/src/skills/science/references/packages/pyiron_base.md +80 -0
- package/src/skills/science/references/packages/pylcp.md +73 -0
- package/src/skills/science/references/packages/pylith.md +80 -0
- package/src/skills/science/references/packages/pynbody.md +88 -0
- package/src/skills/science/references/packages/pysam.md +88 -0
- package/src/skills/science/references/packages/pyscf.md +88 -0
- package/src/skills/science/references/packages/q-e.md +73 -0
- package/src/skills/science/references/packages/qibo.md +73 -0
- package/src/skills/science/references/packages/qiskit.md +73 -0
- package/src/skills/science/references/packages/quantica-jl.md +73 -0
- package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
- package/src/skills/science/references/packages/quimb.md +73 -0
- package/src/skills/science/references/packages/qulacs.md +73 -0
- package/src/skills/science/references/packages/qutip.md +73 -0
- package/src/skills/science/references/packages/rdkit.md +88 -0
- package/src/skills/science/references/packages/rmg-py.md +73 -0
- package/src/skills/science/references/packages/root.md +73 -0
- package/src/skills/science/references/packages/scanpy.md +88 -0
- package/src/skills/science/references/packages/scikit-allel.md +88 -0
- package/src/skills/science/references/packages/scikit-bio.md +88 -0
- package/src/skills/science/references/packages/scqubits.md +73 -0
- package/src/skills/science/references/packages/scuff-em.md +80 -0
- package/src/skills/science/references/packages/scvi-tools.md +73 -0
- package/src/skills/science/references/packages/seissol.md +73 -0
- package/src/skills/science/references/packages/sfepy.md +80 -0
- package/src/skills/science/references/packages/sisl.md +73 -0
- package/src/skills/science/references/packages/smilei.md +80 -0
- package/src/skills/science/references/packages/snakemake.md +88 -0
- package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
- package/src/skills/science/references/packages/specutils.md +88 -0
- package/src/skills/science/references/packages/spglib.md +80 -0
- package/src/skills/science/references/packages/squidpy.md +88 -0
- package/src/skills/science/references/packages/starry.md +88 -0
- package/src/skills/science/references/packages/strawberryfields.md +73 -0
- package/src/skills/science/references/packages/su2.md +80 -0
- package/src/skills/science/references/packages/sunny-jl.md +73 -0
- package/src/skills/science/references/packages/sw4.md +73 -0
- package/src/skills/science/references/packages/swift.md +88 -0
- package/src/skills/science/references/packages/tdnegf.md +73 -0
- package/src/skills/science/references/packages/tenpy.md +73 -0
- package/src/skills/science/references/packages/thermo.md +73 -0
- package/src/skills/science/references/packages/tkwant.md +73 -0
- package/src/skills/science/references/packages/tvb-root.md +73 -0
- package/src/skills/science/references/packages/uproot5.md +73 -0
- package/src/skills/science/references/packages/vampire.md +80 -0
- package/src/skills/science/references/packages/wannier_tools.md +73 -0
- package/src/skills/science/references/packages/warpx.md +80 -0
- package/src/skills/science/references/packages/wrf.md +73 -0
- package/src/skills/science/references/packages/xtb.md +88 -0
- package/src/skills/science/references/packages/yt.md +73 -0
- package/src/skills/science/references/science-task-brief-template.md +71 -0
- package/src/skills/scout/SKILL.md +83 -425
- package/src/skills/scout/references/literature-scout-template.md +5 -24
- package/src/skills/scout/references/operational-guidance.md +191 -0
- package/src/skills/scout/references/paper-triage-playbook.md +11 -35
- package/src/skills/write/SKILL.md +744 -1246
- package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
- package/src/skills/write/references/oral_package_patterns.md +252 -0
- package/src/skills/write/references/oral_writing_principles.md +291 -0
- package/src/skills/write/references/section_rewrite_checklist.md +234 -0
- package/src/tui/dist/app/AppContainer.js +1314 -27
- package/src/tui/dist/components/Composer.js +26 -1
- package/src/tui/dist/components/ConfigScreen.js +2 -1
- package/src/tui/dist/components/InputPrompt.js +25 -9
- package/src/tui/dist/components/MainContent.js +18 -3
- package/src/tui/dist/components/QuestScreen.js +3 -2
- package/src/tui/dist/components/UtilityScreen.js +37 -0
- package/src/tui/dist/hooks/useSafeInput.js +10 -0
- package/src/tui/dist/index.js +13 -1
- package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
- package/src/tui/dist/lib/api.js +89 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AnalysisPlugin-BCKAfjba.js → AnalysisPlugin-CA94NGmI.js} +1 -1
- package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
- package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
- package/src/ui/dist/assets/{CodeViewerPlugin-CbaFRrUU.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
- package/src/ui/dist/assets/{DocViewerPlugin-DAjLVeQD.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
- package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
- package/src/ui/dist/assets/{GitDiffViewerPlugin-CQACjoAA.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
- package/src/ui/dist/assets/{GitSnapshotViewer-0r4nLPke.js → GitSnapshotViewer-CweA6VON.js} +2 -2
- package/src/ui/dist/assets/{ImageViewerPlugin-nBOmI2v_.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
- package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
- package/src/ui/dist/assets/{LatexPlugin-ZwtV8pIp.js → LatexPlugin-BQjAaA5J.js} +4 -4
- package/src/ui/dist/assets/{MarkdownViewerPlugin-DKqVfKyW.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
- package/src/ui/dist/assets/{MarketplacePlugin-BwxStZ9D.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
- package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
- package/src/ui/dist/assets/{NotebookEditor-DB9N_T9q.js → NotebookEditor-WFyd8Ybt.js} +3 -3
- package/src/ui/dist/assets/{PdfLoader-eWBONbQP.js → PdfLoader-CLE5u5TS.js} +3 -3
- package/src/ui/dist/assets/{PdfMarkdownPlugin-D22YOZL3.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
- package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
- package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
- package/src/ui/dist/assets/{TextViewerPlugin-C5xqeeUH.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
- package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
- package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
- package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
- package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
- package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
- package/src/ui/dist/assets/{code-WlFHE7z_.js → code-DbsmSd3Y.js} +1 -1
- package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
- package/src/ui/dist/assets/{wrap-text-BC-Hltpd.js → file-jump-queue-DeQBikaw.js} +3 -3
- package/src/ui/dist/assets/{file-socket-CfQPKQKj.js → file-socket-DA5XIx88.js} +1 -1
- package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
- package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
- package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
- package/src/ui/dist/assets/{index-CwNu1aH4.js → index-BsO46tJA.js} +1 -1
- package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
- package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
- package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
- package/src/ui/dist/assets/{project-sync-C9IdzdZW.js → project-sync-DPmWKmKD.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-E_gaeAxL.js → zoom-out-DAukFWen.js} +3 -3
- package/src/ui/dist/index.html +3 -3
- package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
- package/src/skills/baseline/references/memory-playbook.md +0 -40
- package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
- package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
- package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
- package/src/skills/write/references/paper-section-playbook.md +0 -64
- package/src/skills/write/references/reviewer-first-writing.md +0 -64
- package/src/skills/write/references/revision-checklist.md +0 -70
- package/src/skills/write/references/section-contracts.md +0 -82
- package/src/skills/write/references/sentence-level-proofing.md +0 -49
- package/src/ui/dist/assets/AiManusChatView-Bv-Z8YpU.js +0 -204
- package/src/ui/dist/assets/CliPlugin-BCKcpc35.js +0 -109
- package/src/ui/dist/assets/CodeEditorPlugin-DbOfSJ8K.js +0 -2
- package/src/ui/dist/assets/GitCommitViewerPlugin-CIUqbUDO.js +0 -1
- package/src/ui/dist/assets/LabCopilotPanel-BHxOxF4z.js +0 -14
- package/src/ui/dist/assets/LabPlugin-BKoZGs95.js +0 -22
- package/src/ui/dist/assets/NotebookEditor-BEQhaQbt.js +0 -81
- package/src/ui/dist/assets/PdfViewerPlugin-c-RK9DLM.js +0 -17
- package/src/ui/dist/assets/SearchPlugin-CxF9ytAx.js +0 -16
- package/src/ui/dist/assets/VNCViewer-BoLGLnHz.js +0 -11
- package/src/ui/dist/assets/bot-DREQOxzP.js +0 -6
- package/src/ui/dist/assets/chevron-up-C9Qpx4DE.js +0 -6
- package/src/ui/dist/assets/file-content-BZMz3RYp.js +0 -1
- package/src/ui/dist/assets/file-diff-panel-CQhw0jS2.js +0 -1
- package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
- package/src/ui/dist/assets/git-commit-horizontal-DxZ8DCZh.js +0 -6
- package/src/ui/dist/assets/image-Bgl4VIyx.js +0 -6
- package/src/ui/dist/assets/index-BpV6lusQ.css +0 -33
- package/src/ui/dist/assets/index-CBNVuWcP.js +0 -2496
- package/src/ui/dist/assets/index-DrUnlf6K.js +0 -1
- package/src/ui/dist/assets/index-NW-h8VzN.js +0 -1
- package/src/ui/dist/assets/pdf-effect-queue-J8OnM0jE.js +0 -6
- package/src/ui/dist/assets/popover-CLc0pPP8.js +0 -1
- package/src/ui/dist/assets/select-Cs2PmzwL.js +0 -11
- package/src/ui/dist/assets/sigma-ClKcHAXm.js +0 -6
- package/src/ui/dist/assets/trash-DwpbFr3w.js +0 -11
- package/src/ui/dist/assets/useCliAccess-NQ8m0Let.js +0 -1
- package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.021_tokenrecycling
|
|
3
|
+
name: '变废为宝:利用 Token 循环加速大语言模型推理'
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: 评估 Token 循环,这是一种无需训练的字验证解码方法,通过一个 <2 MB 的邻接矩阵重用候选 token,在 SpecBench 和 MBPP 上实现约 2 倍的 LLM 推理加速。
|
|
6
|
+
task_description: '此打包基准测试用于评估 Token 循环(TR),这是一种即插即用、无需训练的字验证解码方法,用于加速 LLM 推理。TR 将每个解码步骤中的 top-k 候选 token 存储在一个词汇表大小的邻接矩阵中(<2 MB),然后通过类 BFS 检索构建草稿树,并使用树注意力机制进行验证。评估流程运行 eval.sh 脚本,对 Vicuna 7B/13B/33B(在 SpecBench 上)和 Code Llama 7B/13B/34B(在 MBPP 上)进行迭代测试,测量平均接受 token 数(MAT)、token/秒以及相对于 HuggingFace 自回归解码的加速比。模型需要预先下载到项目根目录的 ../models/ 目录下。该代码库基于 SpecBench 评估框架,使用贪心解码(temperature=0,batch size=1)。论文报告在 7B 模型上实现约 2 倍加速,比之前的无训练方法提升 30% 以上,比 Medusa(需要训练)提升 25%。所有原始实验均使用单块 A100-80GB GPU,搭配 PyTorch 2.3 和 CUDA 12.2。
|
|
7
|
+
|
|
8
|
+
'
|
|
9
|
+
capability_tags:
|
|
10
|
+
- research_code_optimization
|
|
11
|
+
- large_language_models
|
|
12
|
+
- speculative_decoding
|
|
13
|
+
- inference_acceleration
|
|
14
|
+
- systems_efficiency
|
|
15
|
+
aisb_direction: T3
|
|
16
|
+
track_fit:
|
|
17
|
+
- paper_track
|
|
18
|
+
- benchmark_track
|
|
19
|
+
task_mode: evaluation_driven
|
|
20
|
+
requires_execution: true
|
|
21
|
+
requires_paper: true
|
|
22
|
+
integrity_level: cas_plus_canary
|
|
23
|
+
snapshot_status: runnable
|
|
24
|
+
support_level: advanced
|
|
25
|
+
cost_band: high
|
|
26
|
+
time_band: 6-24h
|
|
27
|
+
difficulty: hard
|
|
28
|
+
data_access: public
|
|
29
|
+
primary_outputs:
|
|
30
|
+
- mean_accepted_tokens
|
|
31
|
+
- spec_bench_speedup
|
|
32
|
+
- throughput_report
|
|
33
|
+
launch_profiles:
|
|
34
|
+
- id: quick_check
|
|
35
|
+
label: 快速检查
|
|
36
|
+
description: '使用单个模型(例如 vicuna-7b-v1.3)在 spec_bench 上运行 eval.sh。需要在 ../models/ 中放置模型权重。在 A100-80GB 上预计需要 1-3 小时。
|
|
37
|
+
|
|
38
|
+
'
|
|
39
|
+
- id: specbench_eval
|
|
40
|
+
label: 完整 SpecBench + MBPP 评估
|
|
41
|
+
description: '在 SpecBench 上对所有三种 Vicuna 规模,在 MBPP 上对所有三种 Code Llama 规模,运行完整的 eval.sh 循环。根据评估的模型规模,在单块 A100-80GB 上预计需要 6-24 小时。
|
|
42
|
+
|
|
43
|
+
'
|
|
44
|
+
dataset_download:
|
|
45
|
+
primary_method: bundled_plus_models
|
|
46
|
+
sources:
|
|
47
|
+
- kind: bundled
|
|
48
|
+
url: null
|
|
49
|
+
access: public
|
|
50
|
+
note: SpecBench 问题文件和 MBPP 数据已打包在 data/ 目录中。
|
|
51
|
+
- kind: huggingface
|
|
52
|
+
url: https://huggingface.co/lmsys/vicuna-7b-v1.3
|
|
53
|
+
access: public
|
|
54
|
+
note: Vicuna 7B/13B/33B 权重需单独下载到 ../models/。
|
|
55
|
+
- kind: huggingface
|
|
56
|
+
url: https://huggingface.co/codellama/CodeLlama-7b-hf
|
|
57
|
+
access: public
|
|
58
|
+
note: Code Llama 7B/13B/34B 权重需单独下载到 ../models/。
|
|
59
|
+
notes:
|
|
60
|
+
- 模型权重总计 60-130 GB,取决于使用的规模。
|
|
61
|
+
- eval.sh 期望模型位于 ../models/vicuna-7b-v1.3、../models/vicuna-13b-v1.3 等位置。
|
|
62
|
+
credential_requirements:
|
|
63
|
+
mode: none
|
|
64
|
+
items: []
|
|
65
|
+
notes:
|
|
66
|
+
- 某些门控模型变体可能需要 HuggingFace 登录,但 Vicuna 和 Code Llama 权重是公开可用的。
|
|
67
|
+
resources:
|
|
68
|
+
minimum:
|
|
69
|
+
cpu_cores: 8
|
|
70
|
+
ram_gb: 32
|
|
71
|
+
disk_gb: 100
|
|
72
|
+
gpu_count: 1
|
|
73
|
+
gpu_vram_gb: 24
|
|
74
|
+
recommended:
|
|
75
|
+
cpu_cores: 16
|
|
76
|
+
ram_gb: 64
|
|
77
|
+
disk_gb: 200
|
|
78
|
+
gpu_count: 1
|
|
79
|
+
gpu_vram_gb: 80
|
|
80
|
+
environment:
|
|
81
|
+
python: '3.10'
|
|
82
|
+
cuda: '12.2'
|
|
83
|
+
pytorch: '2.3'
|
|
84
|
+
key_packages:
|
|
85
|
+
- transformers==4.37.1
|
|
86
|
+
- fschat
|
|
87
|
+
- shortuuid
|
|
88
|
+
notes:
|
|
89
|
+
- 论文实验使用 PyTorch 2.3、CUDA 12.2、单块 A100-80GB、128 核 CPU。
|
|
90
|
+
- 打包的 requirements.txt 继承自 SpecBench;通过 pip install -r requirements.txt 安装。
|
|
91
|
+
- 现有的 YAML 列出了 PyTorch 2.1.1,但论文和代码参考的是 PyTorch 2.3。
|
|
92
|
+
risk_flags:
|
|
93
|
+
- model_download_required
|
|
94
|
+
- gpu_intensive
|
|
95
|
+
- currently_vicuna_only
|
|
96
|
+
risk_notes:
|
|
97
|
+
- 必须提前从外部下载模型(Vicuna、Code Llama)才能进行评估;总计约 60-130 GB。
|
|
98
|
+
- 最小的 24 GB 显存只能以 float16 运行 7B 模型;33B/34B 模型需要 ≥48 GB 显存。
|
|
99
|
+
- 代码库目前仅适配 LLaMA/Vicuna 架构(modeling_llama_kv.py);适配更新模型已列为待办事项。
|
|
100
|
+
- 打包过程中未执行基准测试;指标值尚未验证。
|
|
101
|
+
recommended_when: '当您需要对 7B+ LLM 进行推理加速评估任务,且无需草稿模型训练、无需大型检索数据存储、额外内存占用极小(<2 MB)时,可使用此基准测试。非常适合在受控的贪心解码、batch size 为 1 的条件下比较无训练的字验证解码方法。
|
|
102
|
+
|
|
103
|
+
'
|
|
104
|
+
not_recommended_when: '如果您没有 ≥24 GB 显存的 GPU 来服务开放权重 LLM,或者需要以训练为中心的基准测试,或者需要评估 LLaMA/Vicuna 系列之外的模型(代码目前仅支持基于 LLaMA 的架构),则不建议使用此基准测试。
|
|
105
|
+
|
|
106
|
+
'
|
|
107
|
+
paper:
|
|
108
|
+
title: 'Turning Trash into Treasure: Accelerating Inference of Large Language Models
|
|
109
|
+
with Token Recycling'
|
|
110
|
+
venue: ACL 2025 Oral
|
|
111
|
+
year: 2025
|
|
112
|
+
url: https://arxiv.org/abs/2408.08696
|
|
113
|
+
download:
|
|
114
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.021_tokenrecycling.zip
|
|
115
|
+
archive_type: zip
|
|
116
|
+
local_dir_name: paper-21-TokenRecycling
|
|
117
|
+
provider: github_release
|
|
118
|
+
repo: ResearAI/DeepScientist
|
|
119
|
+
tag: aisb-v0.0.1
|
|
120
|
+
asset_name: aisb.t3.021_tokenrecycling.zip
|
|
121
|
+
sha256: d38519836e52e4c5c5e1fccd9a4befa5b9a3f20a5c8fa787941a3b2773bd1ebd
|
|
122
|
+
size_bytes: 55377
|
|
123
|
+
commercial:
|
|
124
|
+
annual_fee: null
|
|
125
|
+
display:
|
|
126
|
+
palette_seed: lime-slate-recycle
|
|
127
|
+
art_style: systems-diagram
|
|
128
|
+
accent_priority: high
|
|
129
|
+
image_path: ../image/021_aisb.t3.021_tokenrecycling.jpg
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.022_chainofreasoning
|
|
3
|
+
name: Chain-of-Reasoning (CoR)
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: 'Train and evaluate CoR-Math-7B, a multi-paradigm mathematical reasoning
|
|
6
|
+
model that chains Natural Language, Algorithmic (Python), and Symbolic (Lean 4)
|
|
7
|
+
reasoning to solve arithmetic and theorem-proving benchmarks (GSM8K, MATH, miniF2F,
|
|
8
|
+
AMC 2023, AIME 2024).
|
|
9
|
+
|
|
10
|
+
'
|
|
11
|
+
task_description: 'This benchmark covers the full Chain-of-Reasoning (CoR) pipeline:
|
|
12
|
+
(1) constructing the Multi-Paradigm Mathematical (MPM) dataset (~167k reasoning
|
|
13
|
+
paths) by extending Lean-Workbook and NuminaMath-TIR with GPT-4o-generated cross-paradigm
|
|
14
|
+
solutions and Lean 4 proof verification; (2) Progressive Paradigm Training (PPT)
|
|
15
|
+
over three stages—NLR-only, NLR+AR, NLR+AR+SR—using DeepSpeed ZeRO Stage 3 SFT on
|
|
16
|
+
a 7B base model; and (3) evaluating the resulting CoR-Math-7B on five math benchmarks
|
|
17
|
+
(GSM8K, MATH, miniF2F, AMC 2023, AIME 2024) via vLLM-based inference with Sequential
|
|
18
|
+
Multi-Paradigm Sampling (SMPS). The evaluation script (evaluation/eval_CoR.py) computes
|
|
19
|
+
accuracy, collects reasoning traces, and optionally verifies Lean 4 proofs via a
|
|
20
|
+
bundled Lean server scheduler. The dataset construction step requires an OpenAI
|
|
21
|
+
API key and calls GPT-4o; the training and eval steps are self-contained once data
|
|
22
|
+
is prepared.
|
|
23
|
+
|
|
24
|
+
'
|
|
25
|
+
capability_tags:
|
|
26
|
+
- research_code_optimization
|
|
27
|
+
- mathematical_reasoning
|
|
28
|
+
- formal_methods
|
|
29
|
+
- large_language_models
|
|
30
|
+
- theorem_proving
|
|
31
|
+
aisb_direction: T3
|
|
32
|
+
track_fit:
|
|
33
|
+
- paper_track
|
|
34
|
+
- benchmark_track
|
|
35
|
+
task_mode: experiment_driven
|
|
36
|
+
requires_execution: true
|
|
37
|
+
requires_paper: true
|
|
38
|
+
integrity_level: cas_plus_canary
|
|
39
|
+
snapshot_status: runnable
|
|
40
|
+
support_level: advanced
|
|
41
|
+
cost_band: high
|
|
42
|
+
time_band: 1d+
|
|
43
|
+
difficulty: hard
|
|
44
|
+
data_access: public
|
|
45
|
+
primary_outputs:
|
|
46
|
+
- accuracy
|
|
47
|
+
- reasoning_traces
|
|
48
|
+
- lean_artifacts
|
|
49
|
+
launch_profiles:
|
|
50
|
+
- id: quick_eval
|
|
51
|
+
label: Quick Eval
|
|
52
|
+
description: 'Run evaluation only on one benchmark (e.g. MATH or GSM8K) using a
|
|
53
|
+
pre-trained checkpoint and the bundled eval_CoR.py script with vLLM inference.
|
|
54
|
+
Skips dataset construction and training entirely. Requires a model checkpoint
|
|
55
|
+
and 1–8 GPUs.
|
|
56
|
+
|
|
57
|
+
'
|
|
58
|
+
- id: full_train_eval
|
|
59
|
+
label: Full Train + Eval
|
|
60
|
+
description: 'Build the MPM dataset from Lean-Workbook and NuminaMath-TIR (requires
|
|
61
|
+
OpenAI API key for GPT-4o augmentation), run 3-stage Progressive Paradigm Training
|
|
62
|
+
with DeepSpeed ZeRO-3, then evaluate on all five benchmarks (GSM8K, MATH, miniF2F,
|
|
63
|
+
AMC 2023, AIME 2024). This is the paper-faithful route and requires multi-GPU
|
|
64
|
+
infrastructure and Lean 4/Mathlib build.
|
|
65
|
+
|
|
66
|
+
'
|
|
67
|
+
dataset_download:
|
|
68
|
+
primary_method: mixed
|
|
69
|
+
sources:
|
|
70
|
+
- kind: huggingface
|
|
71
|
+
url: https://huggingface.co/datasets/internlm/Lean-Workbook
|
|
72
|
+
access: public
|
|
73
|
+
note: 'lean_workbook.json — seed data for symbolic reasoning paradigm. Used in
|
|
74
|
+
utils/generate_informalproof.py to create informal proofs.
|
|
75
|
+
|
|
76
|
+
'
|
|
77
|
+
- kind: huggingface
|
|
78
|
+
url: https://huggingface.co/datasets/AI-MO/NuminaMath-TIR
|
|
79
|
+
access: public
|
|
80
|
+
note: 'train-00000-of-00001.parquet — seed data for algorithmic reasoning paradigm.
|
|
81
|
+
Must be converted to JSON for utils/generate_lean4.py.
|
|
82
|
+
|
|
83
|
+
'
|
|
84
|
+
notes:
|
|
85
|
+
- 'The MPM dataset (~167k paths from ~83k problems) is constructed by the user at
|
|
86
|
+
runtime via GPT-4o augmentation scripts; it is not directly downloadable.
|
|
87
|
+
|
|
88
|
+
'
|
|
89
|
+
- 'Raw seed datasets are modest in size (tens of GB); the synthesized MPM dataset
|
|
90
|
+
and model checkpoints will require additional storage.
|
|
91
|
+
|
|
92
|
+
'
|
|
93
|
+
credential_requirements:
|
|
94
|
+
mode: optional
|
|
95
|
+
items:
|
|
96
|
+
- OpenAI API key (required only for MPM dataset construction via GPT-4o)
|
|
97
|
+
- Weights & Biases API key (optional, for training logging)
|
|
98
|
+
notes:
|
|
99
|
+
- 'The OpenAI API key is used in script/leanwb_informalproof.sh and script/numinatir_lean4.sh
|
|
100
|
+
to call GPT-4o for generating cross-paradigm reasoning paths. If you use a pre-built
|
|
101
|
+
MPM dataset or a pre-trained checkpoint, no API key is needed.
|
|
102
|
+
|
|
103
|
+
'
|
|
104
|
+
- 'WandB credentials are set in script/train_CoR.sh but can be set to offline mode.
|
|
105
|
+
|
|
106
|
+
'
|
|
107
|
+
resources:
|
|
108
|
+
minimum:
|
|
109
|
+
cpu_cores: 16
|
|
110
|
+
ram_gb: 64
|
|
111
|
+
disk_gb: 250
|
|
112
|
+
gpu_count: 1
|
|
113
|
+
gpu_vram_gb: 48
|
|
114
|
+
recommended:
|
|
115
|
+
cpu_cores: 32
|
|
116
|
+
ram_gb: 128
|
|
117
|
+
disk_gb: 500
|
|
118
|
+
gpu_count: 8
|
|
119
|
+
gpu_vram_gb: 80
|
|
120
|
+
environment:
|
|
121
|
+
python: '3.10'
|
|
122
|
+
cuda: '12.1'
|
|
123
|
+
pytorch: 2.5.1
|
|
124
|
+
flash_attn: 2.7.0.post2
|
|
125
|
+
key_packages:
|
|
126
|
+
- flash-attn==2.7.0.post2
|
|
127
|
+
- deepspeed==0.15.4
|
|
128
|
+
- vllm==0.6.4.post1
|
|
129
|
+
- transformers==4.46.3
|
|
130
|
+
- accelerate
|
|
131
|
+
- datasets
|
|
132
|
+
notes:
|
|
133
|
+
- See requirements.txt for the full dependency set.
|
|
134
|
+
- 'Lean 4 must be installed separately via elan (leanprover-community.github.io).
|
|
135
|
+
Mathlib4 must be built locally with `lake build` before running theorem proving
|
|
136
|
+
evaluation or MPM dataset construction.
|
|
137
|
+
|
|
138
|
+
'
|
|
139
|
+
- 'CUDA version is not explicitly pinned in the repo; 12.1 is recommended based
|
|
140
|
+
on vLLM and flash-attn compatibility. 11.8 may also work.
|
|
141
|
+
|
|
142
|
+
'
|
|
143
|
+
risk_flags:
|
|
144
|
+
- external_api_dependency
|
|
145
|
+
- lean_toolchain_setup
|
|
146
|
+
- large_training_pipeline
|
|
147
|
+
- openai_cost
|
|
148
|
+
risk_notes:
|
|
149
|
+
- 'MPM dataset construction calls the OpenAI API (GPT-4o) extensively across ~285k
|
|
150
|
+
samples, which can incur significant API costs. Budget several hundred dollars for
|
|
151
|
+
full construction.
|
|
152
|
+
|
|
153
|
+
'
|
|
154
|
+
- 'Lean 4 and Mathlib4 must be compiled locally. The Mathlib build alone can take
|
|
155
|
+
1–2 hours and requires substantial RAM. Build failures are common on misconfigured
|
|
156
|
+
systems.
|
|
157
|
+
|
|
158
|
+
'
|
|
159
|
+
- 'Training uses DeepSpeed ZeRO Stage 3 across up to 8 GPUs with 4 epochs. The training
|
|
160
|
+
script has placeholder paths that must be edited before launch.
|
|
161
|
+
|
|
162
|
+
'
|
|
163
|
+
- 'The evaluation script uses vLLM with gpu_memory_utilization=0.95, which may OOM
|
|
164
|
+
on GPUs with less than 48 GB VRAM depending on batch size.
|
|
165
|
+
|
|
166
|
+
'
|
|
167
|
+
recommended_when: 'Use this benchmark when you want a heavyweight, multi-stage math-reasoning
|
|
168
|
+
task that combines LLM fine-tuning, Lean 4 formal verification, Python code execution,
|
|
169
|
+
and natural language chain-of-thought. Ideal for exploring how multi-paradigm cooperation
|
|
170
|
+
improves mathematical reasoning across both arithmetic computation (GSM8K, MATH,
|
|
171
|
+
AIME) and theorem proving (miniF2F).
|
|
172
|
+
|
|
173
|
+
'
|
|
174
|
+
not_recommended_when: 'Do not use this if you cannot provision multi-GPU infrastructure
|
|
175
|
+
(≥48 GB VRAM), cannot install and build Lean 4 + Mathlib4, or want to avoid OpenAI
|
|
176
|
+
API costs for dataset construction. Also not suitable if you need a quick, self-contained
|
|
177
|
+
eval-only benchmark—the full pipeline requires significant setup and compute time.
|
|
178
|
+
|
|
179
|
+
'
|
|
180
|
+
paper:
|
|
181
|
+
title: 'Chain-of-Reasoning: Towards Unified Mathematical Reasoning in Large Language
|
|
182
|
+
Models via a Multi-Paradigm Perspective
|
|
183
|
+
|
|
184
|
+
'
|
|
185
|
+
venue: arXiv preprint
|
|
186
|
+
year: 2025
|
|
187
|
+
url: https://arxiv.org/abs/2501.11110
|
|
188
|
+
download:
|
|
189
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.022_chainofreasoning.zip
|
|
190
|
+
archive_type: zip
|
|
191
|
+
local_dir_name: paper-22-ChainOfReasoning
|
|
192
|
+
provider: github_release
|
|
193
|
+
repo: ResearAI/DeepScientist
|
|
194
|
+
tag: aisb-v0.0.1
|
|
195
|
+
asset_name: aisb.t3.022_chainofreasoning.zip
|
|
196
|
+
sha256: 10a4cef8d810a0806780f11b2059b970a3ac4152b614f97a1e9e47294629764b
|
|
197
|
+
size_bytes: 986391
|
|
198
|
+
commercial:
|
|
199
|
+
annual_fee: null
|
|
200
|
+
display:
|
|
201
|
+
palette_seed: cobalt-ivory-proof
|
|
202
|
+
art_style: formal-reasoning
|
|
203
|
+
accent_priority: high
|
|
204
|
+
image_path: ../image/022_aisb.t3.022_chainofreasoning.jpg
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.022_chainofreasoning
|
|
3
|
+
name: Chain-of-Reasoning (CoR)
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: '训练并评估 CoR-Math-7B,一个多范式数学推理模型,通过链式整合自然语言、算法(Python)和符号(Lean 4)推理来求解算术和定理证明基准测试(GSM8K、MATH、miniF2F、AMC 2023、AIME 2024)。
|
|
6
|
+
|
|
7
|
+
'
|
|
8
|
+
task_description: '本基准测试涵盖完整的链式推理(CoR)流程:(1) 通过扩展 Lean-Workbook 和 NuminaMath-TIR,利用 GPT-4o 生成跨范式解答和 Lean 4 证明验证,构建多范式数学(MPM)数据集(约 167k 条推理路径);(2) 在 7B 基座模型上通过 DeepSpeed ZeRO Stage 3 SFT 进行三个阶段的渐进范式训练(PPT)——仅 NLR、NLR+AR、NLR+AR+SR;(3) 使用 vLLM 推理和顺序多范式采样(SMPS)在五个数学基准测试(GSM8K、MATH、miniF2F、AMC 2023、AIME 2024)上评估最终的 CoR-Math-7B 模型。评估脚本(evaluation/eval_CoR.py)计算准确率、收集推理轨迹,并可选地通过捆绑的 Lean 服务器调度器验证 Lean 4 证明。数据集构建步骤需要 OpenAI API 密钥并调用 GPT-4o;训练和评估步骤在数据准备完成后可独立运行。
|
|
9
|
+
|
|
10
|
+
'
|
|
11
|
+
capability_tags:
|
|
12
|
+
- research_code_optimization
|
|
13
|
+
- mathematical_reasoning
|
|
14
|
+
- formal_methods
|
|
15
|
+
- large_language_models
|
|
16
|
+
- theorem_proving
|
|
17
|
+
aisb_direction: T3
|
|
18
|
+
track_fit:
|
|
19
|
+
- paper_track
|
|
20
|
+
- benchmark_track
|
|
21
|
+
task_mode: experiment_driven
|
|
22
|
+
requires_execution: true
|
|
23
|
+
requires_paper: true
|
|
24
|
+
integrity_level: cas_plus_canary
|
|
25
|
+
snapshot_status: runnable
|
|
26
|
+
support_level: advanced
|
|
27
|
+
cost_band: high
|
|
28
|
+
time_band: 1d+
|
|
29
|
+
difficulty: hard
|
|
30
|
+
data_access: public
|
|
31
|
+
primary_outputs:
|
|
32
|
+
- accuracy
|
|
33
|
+
- reasoning_traces
|
|
34
|
+
- lean_artifacts
|
|
35
|
+
launch_profiles:
|
|
36
|
+
- id: quick_eval
|
|
37
|
+
label: 快速评估
|
|
38
|
+
description: '仅在单个基准测试(如 MATH 或 GSM8K)上使用预训练检查点和捆绑的 eval_CoR.py 脚本进行 vLLM 推理评估。完全跳过数据集构建和训练步骤。需要模型检查点和 1-8 块 GPU。
|
|
39
|
+
|
|
40
|
+
'
|
|
41
|
+
- id: full_train_eval
|
|
42
|
+
label: 完整训练+评估
|
|
43
|
+
description: '从 Lean-Workbook 和 NuminaMath-TIR 构建 MPM 数据集(需要 OpenAI API 密钥用于 GPT-4o 增强)、运行三阶段渐进范式训练的 DeepSpeed ZeRO-3,然后在所有五个基准测试(GSM8K、MATH、miniF2F、AMC 2023、AIME 2024)上进行评估。这是论文忠实路线,需要多 GPU 基础设施和 Lean 4/Mathlib 构建。
|
|
44
|
+
|
|
45
|
+
'
|
|
46
|
+
dataset_download:
|
|
47
|
+
primary_method: mixed
|
|
48
|
+
sources:
|
|
49
|
+
- kind: huggingface
|
|
50
|
+
url: https://huggingface.co/datasets/internlm/Lean-Workbook
|
|
51
|
+
access: public
|
|
52
|
+
note: 'lean_workbook.json — 符号推理范式的种子数据。在 utils/generate_informalproof.py 中用于创建非形式证明。
|
|
53
|
+
|
|
54
|
+
'
|
|
55
|
+
- kind: huggingface
|
|
56
|
+
url: https://huggingface.co/datasets/AI-MO/NuminaMath-TIR
|
|
57
|
+
access: public
|
|
58
|
+
note: 'train-00000-of-00001.parquet — 算法推理范式的种子数据。需转换为 JSON 供 utils/generate_lean4.py 使用。
|
|
59
|
+
|
|
60
|
+
'
|
|
61
|
+
notes:
|
|
62
|
+
- 'MPM 数据集(约 167k 条路径来自约 83k 道题目)由用户在运行时通过 GPT-4o 增强脚本构建,并非直接可下载。
|
|
63
|
+
|
|
64
|
+
'
|
|
65
|
+
- '原始种子数据集规模适中(数十 GB);合成后的 MPM 数据集和模型检查点将需要额外的存储空间。
|
|
66
|
+
|
|
67
|
+
'
|
|
68
|
+
credential_requirements:
|
|
69
|
+
mode: optional
|
|
70
|
+
items:
|
|
71
|
+
- OpenAI API 密钥(仅在通过 GPT-4o 构建 MPM 数据集时需要)
|
|
72
|
+
- Weights & Biases API 密钥(可选,用于训练日志记录)
|
|
73
|
+
notes:
|
|
74
|
+
- 'OpenAI API 密钥用于 script/leanwb_informalproof.sh 和 script/numinatir_lean4.sh 调用 GPT-4o 生成跨范式推理路径。如果您使用预构建的 MPM 数据集或预训练检查点,则不需要 API 密钥。
|
|
75
|
+
|
|
76
|
+
'
|
|
77
|
+
- 'WandB 凭据在 script/train_CoR.sh 中设置,但可设为离线模式。
|
|
78
|
+
|
|
79
|
+
'
|
|
80
|
+
resources:
|
|
81
|
+
minimum:
|
|
82
|
+
cpu_cores: 16
|
|
83
|
+
ram_gb: 64
|
|
84
|
+
disk_gb: 250
|
|
85
|
+
gpu_count: 1
|
|
86
|
+
gpu_vram_gb: 48
|
|
87
|
+
recommended:
|
|
88
|
+
cpu_cores: 32
|
|
89
|
+
ram_gb: 128
|
|
90
|
+
disk_gb: 500
|
|
91
|
+
gpu_count: 8
|
|
92
|
+
gpu_vram_gb: 80
|
|
93
|
+
environment:
|
|
94
|
+
python: '3.10'
|
|
95
|
+
cuda: '12.1'
|
|
96
|
+
pytorch: 2.5.1
|
|
97
|
+
flash_attn: 2.7.0.post2
|
|
98
|
+
key_packages:
|
|
99
|
+
- flash-attn==2.7.0.post2
|
|
100
|
+
- deepspeed==0.15.4
|
|
101
|
+
- vllm==0.6.4.post1
|
|
102
|
+
- transformers==4.46.3
|
|
103
|
+
- accelerate
|
|
104
|
+
- datasets
|
|
105
|
+
notes:
|
|
106
|
+
- 完整的依赖项请参见 requirements.txt。
|
|
107
|
+
- 'Lean 4 必须通过 elan(leanprover-community.github.io)单独安装。在运行定理证明评估或 MPM 数据集构建之前,必须使用 `lake build` 在本地构建 Mathlib4。
|
|
108
|
+
|
|
109
|
+
'
|
|
110
|
+
- '仓库中未明确固定 CUDA 版本;根据 vLLM 和 flash-attn 的兼容性,推荐 12.1 版本。11.8 也可能可用。
|
|
111
|
+
|
|
112
|
+
'
|
|
113
|
+
risk_flags:
|
|
114
|
+
- external_api_dependency
|
|
115
|
+
- lean_toolchain_setup
|
|
116
|
+
- large_training_pipeline
|
|
117
|
+
- openai_cost
|
|
118
|
+
risk_notes:
|
|
119
|
+
- 'MPM 数据集构建需要在约 285k 样本上大量调用 OpenAI API(GPT-4o),可能产生可观的 API 费用。完整构建需预算数百美元。
|
|
120
|
+
|
|
121
|
+
'
|
|
122
|
+
- 'Lean 4 和 Mathlib4 必须在本地编译。Mathlib 构建本身可能需要 1-2 小时,并需要大量 RAM。在配置不当的系统上构建失败很常见。
|
|
123
|
+
|
|
124
|
+
'
|
|
125
|
+
- '训练在最多 8 块 GPU 上使用 DeepSpeed ZeRO Stage 3 运行 4 个 epoch。训练脚本有占位符路径,启动前必须编辑。
|
|
126
|
+
|
|
127
|
+
'
|
|
128
|
+
- '评估脚本使用 vLLM 且 gpu_memory_utilization=0.95,根据批次大小不同可能在显存小于 48 GB 的 GPU 上导致 OOM。
|
|
129
|
+
|
|
130
|
+
'
|
|
131
|
+
recommended_when: '当您需要一个重量级、多阶段的数学推理任务,将 LLM 微调、Lean 4 形式验证、Python 代码执行和自然语言思维链相结合时使用此基准测试。非常适合探索多范式协作如何提升数学推理能力,涵盖算术计算(GSM8K、MATH、AIME)和定理证明(miniF2F)。
|
|
132
|
+
|
|
133
|
+
'
|
|
134
|
+
not_recommended_when: '如果无法配置多 GPU 基础设施(≥48 GB 显存)、无法安装和构建 Lean 4 + Mathlib4,或希望避免 OpenAI API 数据集构建费用,请勿使用此基准测试。同样不适合需要快速、独立运行的纯评估基准测试——完整流程需要大量设置和计算时间。
|
|
135
|
+
|
|
136
|
+
'
|
|
137
|
+
paper:
|
|
138
|
+
title: 'Chain-of-Reasoning: Towards Unified Mathematical Reasoning in Large Language
|
|
139
|
+
Models via a Multi-Paradigm Perspective
|
|
140
|
+
|
|
141
|
+
'
|
|
142
|
+
venue: arXiv preprint
|
|
143
|
+
year: 2025
|
|
144
|
+
url: https://arxiv.org/abs/2501.11110
|
|
145
|
+
download:
|
|
146
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.022_chainofreasoning.zip
|
|
147
|
+
archive_type: zip
|
|
148
|
+
local_dir_name: paper-22-ChainOfReasoning
|
|
149
|
+
provider: github_release
|
|
150
|
+
repo: ResearAI/DeepScientist
|
|
151
|
+
tag: aisb-v0.0.1
|
|
152
|
+
asset_name: aisb.t3.022_chainofreasoning.zip
|
|
153
|
+
sha256: 10a4cef8d810a0806780f11b2059b970a3ac4152b614f97a1e9e47294629764b
|
|
154
|
+
size_bytes: 986391
|
|
155
|
+
commercial:
|
|
156
|
+
annual_fee: null
|
|
157
|
+
display:
|
|
158
|
+
palette_seed: cobalt-ivory-proof
|
|
159
|
+
art_style: formal-reasoning
|
|
160
|
+
accent_priority: high
|
|
161
|
+
image_path: ../image/022_aisb.t3.022_chainofreasoning.jpg
|