@researai/deepscientist 1.5.16 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +309 -130
- package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
- package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
- package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
- package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
- package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
- package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
- package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
- package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
- package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
- package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
- package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
- package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
- package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
- package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
- package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
- package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
- package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
- package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
- package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
- package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
- package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
- package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
- package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
- package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
- package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
- package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
- package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
- package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
- package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
- package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
- package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
- package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
- package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
- package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
- package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
- package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
- package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
- package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
- package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
- package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
- package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
- package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
- package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
- package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
- package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
- package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
- package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
- package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
- package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
- package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
- package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
- package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
- package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
- package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
- package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
- package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
- package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
- package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
- package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
- package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
- package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
- package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
- package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
- package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
- package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
- package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
- package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
- package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
- package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
- package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
- package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
- package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
- package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
- package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
- package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
- package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
- package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
- package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
- package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
- package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
- package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
- package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
- package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
- package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
- package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
- package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
- package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
- package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
- package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
- package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
- package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
- package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
- package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
- package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
- package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
- package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
- package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
- package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
- package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
- package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
- package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
- package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
- package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
- package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
- package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
- package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
- package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
- package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
- package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
- package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
- package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
- package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
- package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
- package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
- package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
- package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
- package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
- package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
- package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
- package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
- package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
- package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
- package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
- package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
- package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
- package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
- package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
- package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
- package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
- package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
- package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
- package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
- package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
- package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
- package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
- package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
- package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
- package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
- package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
- package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
- package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
- package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
- package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
- package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
- package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
- package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
- package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
- package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
- package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
- package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
- package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
- package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
- package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
- package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
- package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
- package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
- package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
- package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
- package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
- package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
- package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
- package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
- package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
- package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
- package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
- package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
- package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
- package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
- package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
- package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
- package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
- package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
- package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
- package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
- package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
- package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
- package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
- package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
- package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
- package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
- package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
- package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
- package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
- package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
- package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
- package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
- package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
- package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
- package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
- package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
- package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
- package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
- package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
- package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
- package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
- package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
- package/AISB/image/aisb.b10.climate_earth.svg +16 -0
- package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
- package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
- package/AISB/image/aisb.b2.agent_systems.svg +16 -0
- package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
- package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
- package/AISB/image/aisb.b5.math_proof.svg +16 -0
- package/AISB/image/aisb.b6.research_process.svg +16 -0
- package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
- package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
- package/AISB/image/aisb.b9.material_science.svg +16 -0
- package/README.md +196 -32
- package/bin/ds.js +924 -66
- package/docs/en/00_QUICK_START.md +195 -18
- package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
- package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
- package/docs/en/05_TUI_GUIDE.md +171 -2
- package/docs/en/07_MEMORY_AND_MCP.md +38 -2
- package/docs/en/09_DOCTOR.md +78 -7
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
- package/docs/en/11_LICENSE_AND_RISK.md +4 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +624 -180
- package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +386 -0
- package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
- package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
- package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
- package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
- package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
- package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
- package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
- package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
- package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
- package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
- package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
- package/docs/en/91_DEVELOPMENT.md +266 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
- package/docs/en/README.md +48 -7
- package/docs/images/admin/admin-connectors-health-en.png +0 -0
- package/docs/images/admin/admin-controllers-en.png +0 -0
- package/docs/images/admin/admin-diagnostics-en.png +0 -0
- package/docs/images/admin/admin-errors-en.png +0 -0
- package/docs/images/admin/admin-issues-en.png +0 -0
- package/docs/images/admin/admin-logs-en.png +0 -0
- package/docs/images/admin/admin-quest-detail-en.png +0 -0
- package/docs/images/admin/admin-quests-en.png +0 -0
- package/docs/images/admin/admin-repairs-en.png +0 -0
- package/docs/images/admin/admin-runtime-en.png +0 -0
- package/docs/images/admin/admin-search-en.png +0 -0
- package/docs/images/admin/admin-stats-en.png +0 -0
- package/docs/images/admin/admin-summary-en.png +0 -0
- package/docs/images/connectors/connector-discord-en.png +0 -0
- package/docs/images/connectors/connector-feishu-en.png +0 -0
- package/docs/images/connectors/connector-lingzhu-en.png +0 -0
- package/docs/images/connectors/connector-qq-en.png +0 -0
- package/docs/images/connectors/connector-slack-en.png +0 -0
- package/docs/images/connectors/connector-telegram-en.png +0 -0
- package/docs/images/connectors/connector-weixin-en.png +0 -0
- package/docs/images/connectors/connector-whatsapp-en.png +0 -0
- package/docs/images/settings/settings-baselines-en.png +0 -0
- package/docs/images/settings/settings-config-en.png +0 -0
- package/docs/images/settings/settings-connectors-overview-en.png +0 -0
- package/docs/images/settings/settings-deepxiv-en.png +0 -0
- package/docs/images/settings/settings-mcp-servers-en.png +0 -0
- package/docs/images/settings/settings-plugins-en.png +0 -0
- package/docs/images/settings/settings-runners-en.png +0 -0
- package/docs/zh/00_QUICK_START.md +142 -18
- package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
- package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/zh/05_TUI_GUIDE.md +171 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
- package/docs/zh/09_DOCTOR.md +54 -8
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
- package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +552 -181
- package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +384 -0
- package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
- package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
- package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
- package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
- package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
- package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
- package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
- package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
- package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
- package/docs/zh/README.md +33 -7
- package/install.sh +168 -20
- package/package.json +5 -1
- package/pyproject.toml +2 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +13 -0
- package/src/deepscientist/admin/__init__.py +3 -0
- package/src/deepscientist/admin/charts.py +681 -0
- package/src/deepscientist/admin/logs.py +119 -0
- package/src/deepscientist/admin/repairs.py +217 -0
- package/src/deepscientist/admin/service.py +1310 -0
- package/src/deepscientist/admin/system_info.py +700 -0
- package/src/deepscientist/admin/tasks.py +465 -0
- package/src/deepscientist/admin/tool_metrics.py +600 -0
- package/src/deepscientist/artifact/guidance.py +8 -4
- package/src/deepscientist/artifact/schemas.py +115 -0
- package/src/deepscientist/artifact/service.py +4268 -260
- package/src/deepscientist/bash_exec/monitor.py +30 -3
- package/src/deepscientist/bash_exec/service.py +134 -1
- package/src/deepscientist/benchstore/__init__.py +4 -0
- package/src/deepscientist/benchstore/prompt_builder.py +224 -0
- package/src/deepscientist/benchstore/service.py +1716 -0
- package/src/deepscientist/bridges/connectors.py +8 -2
- package/src/deepscientist/channels/weixin_ilink.py +8 -1
- package/src/deepscientist/cli.py +92 -17
- package/src/deepscientist/codex_cli_compat.py +187 -74
- package/src/deepscientist/config/models.py +82 -11
- package/src/deepscientist/config/service.py +1077 -93
- package/src/deepscientist/connector/weixin_support.py +48 -17
- package/src/deepscientist/daemon/api/handlers.py +827 -235
- package/src/deepscientist/daemon/api/router.py +81 -1
- package/src/deepscientist/daemon/app.py +1512 -85
- package/src/deepscientist/diagnostics/__init__.py +6 -0
- package/src/deepscientist/diagnostics/runner_failures.py +277 -0
- package/src/deepscientist/doctor.py +407 -56
- package/src/deepscientist/evidence_packets.py +590 -0
- package/src/deepscientist/home.py +52 -4
- package/src/deepscientist/kimi_cli_compat.py +50 -0
- package/src/deepscientist/latex_runtime.py +2 -2
- package/src/deepscientist/mcp/context.py +2 -0
- package/src/deepscientist/mcp/schemas.py +114 -0
- package/src/deepscientist/mcp/server.py +1566 -126
- package/src/deepscientist/memory/service.py +203 -16
- package/src/deepscientist/process_control.py +8 -1
- package/src/deepscientist/prompts/builder.py +850 -88
- package/src/deepscientist/quest/__init__.py +2 -2
- package/src/deepscientist/quest/layout.py +12 -1
- package/src/deepscientist/quest/node_traces.py +10 -0
- package/src/deepscientist/quest/service.py +1852 -161
- package/src/deepscientist/quest/stage_views.py +1 -1
- package/src/deepscientist/runners/__init__.py +18 -0
- package/src/deepscientist/runners/base.py +89 -1
- package/src/deepscientist/runners/builtins.py +13 -1
- package/src/deepscientist/runners/claude.py +391 -0
- package/src/deepscientist/runners/codex.py +480 -35
- package/src/deepscientist/runners/codex_telemetry.py +127 -0
- package/src/deepscientist/runners/kimi.py +334 -0
- package/src/deepscientist/runners/metadata.py +68 -0
- package/src/deepscientist/runners/opencode.py +414 -0
- package/src/deepscientist/runners/runtime_overrides.py +100 -0
- package/src/deepscientist/runners/simple_cli.py +538 -0
- package/src/deepscientist/runtime_storage.py +303 -0
- package/src/deepscientist/shared.py +80 -16
- package/src/deepscientist/skills/installer.py +37 -0
- package/src/deepscientist/skills/registry.py +2 -0
- package/src/deepscientist/tinytex.py +2 -2
- package/src/deepscientist/tui.py +10 -3
- package/src/prompts/benchstore/system.md +77 -0
- package/src/prompts/connectors/qq.md +33 -2
- package/src/prompts/connectors/weixin.md +208 -23
- package/src/prompts/contracts/admin_ops.md +74 -0
- package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
- package/src/prompts/contracts/shared_interaction.md +5 -10
- package/src/prompts/start_setup/system.md +422 -0
- package/src/prompts/system.md +411 -304
- package/src/prompts/system_copilot.md +89 -0
- package/src/skills/analysis-campaign/SKILL.md +239 -578
- package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
- package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
- package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
- package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
- package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
- package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
- package/src/skills/baseline/SKILL.md +183 -461
- package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
- package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
- package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
- package/src/skills/baseline/references/baseline-plan-template.md +37 -76
- package/src/skills/baseline/references/boundary-cases.md +86 -0
- package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
- package/src/skills/baseline/references/comparability-contract.md +7 -12
- package/src/skills/baseline/references/operational-guidance.md +56 -0
- package/src/skills/baseline/references/route-selection.md +5 -25
- package/src/skills/decision/SKILL.md +113 -306
- package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
- package/src/skills/decision/references/operational-guidance.md +94 -0
- package/src/skills/decision/references/research-route-criteria.md +7 -8
- package/src/skills/decision/references/strategic-decision-template.md +13 -26
- package/src/skills/experiment/SKILL.md +132 -670
- package/src/skills/experiment/references/execution-playbook.md +374 -0
- package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
- package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
- package/src/skills/experiment/references/operational-guidance.md +108 -0
- package/src/skills/finalize/SKILL.md +62 -0
- package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
- package/src/skills/finalize/references/resume-packet-template.md +7 -0
- package/src/skills/idea/SKILL.md +228 -15
- package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
- package/src/skills/idea/references/current-board-packet-template.md +61 -0
- package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
- package/src/skills/idea/references/idea-generation-playbook.md +21 -0
- package/src/skills/idea/references/idea-thinking-flow.md +6 -0
- package/src/skills/idea/references/literature-survey-template.md +3 -0
- package/src/skills/idea/references/objective-contract-template.md +54 -0
- package/src/skills/idea/references/outline-seeding-example.md +56 -0
- package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
- package/src/skills/idea/references/related-work-playbook.md +75 -2
- package/src/skills/idea/references/research-history-playbook.md +114 -0
- package/src/skills/idea/references/selection-gate.md +58 -6
- package/src/skills/intake-audit/SKILL.md +43 -2
- package/src/skills/intake-audit/references/state-audit-template.md +10 -0
- package/src/skills/nature-data/SKILL.md +128 -0
- package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-data/agents/openai.yaml +4 -0
- package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
- package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
- package/src/skills/nature-data/references/policy-principles.md +103 -0
- package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
- package/src/skills/nature-data/references/source-basis.md +54 -0
- package/src/skills/nature-data/references/statement-patterns.md +153 -0
- package/src/skills/nature-figure/SKILL.md +197 -0
- package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-figure/agents/openai.yaml +4 -0
- package/src/skills/nature-figure/evals/evals.json +37 -0
- package/src/skills/nature-figure/references/api.md +428 -0
- package/src/skills/nature-figure/references/backend-selection.md +100 -0
- package/src/skills/nature-figure/references/chart-types.md +281 -0
- package/src/skills/nature-figure/references/common-patterns.md +349 -0
- package/src/skills/nature-figure/references/design-theory.md +436 -0
- package/src/skills/nature-figure/references/figure-contract.md +93 -0
- package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
- package/src/skills/nature-figure/references/qa-contract.md +119 -0
- package/src/skills/nature-figure/references/r-template-index.md +66 -0
- package/src/skills/nature-figure/references/r-workflow.md +161 -0
- package/src/skills/nature-figure/references/tutorials.md +250 -0
- package/src/skills/nature-paper2ppt/SKILL.md +507 -0
- package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/SKILL.md +385 -0
- package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-polishing/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
- package/src/skills/nature-polishing/references/section-moves.md +240 -0
- package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
- package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
- package/src/skills/optimize/SKILL.md +177 -1568
- package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
- package/src/skills/optimize/references/candidate-board-template.md +13 -0
- package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
- package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
- package/src/skills/optimize/references/debug-response-template.md +29 -0
- package/src/skills/optimize/references/frontier-review-template.md +32 -0
- package/src/skills/optimize/references/fusion-playbook.md +36 -0
- package/src/skills/optimize/references/method-brief-template.md +73 -0
- package/src/skills/optimize/references/operational-guidance.md +621 -0
- package/src/skills/optimize/references/optimization-memory-template.md +30 -0
- package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
- package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
- package/src/skills/optimize/references/prompt-patterns.md +49 -0
- package/src/skills/paper-outline/SKILL.md +227 -0
- package/src/skills/paper-outline/references/outline-patterns.md +87 -0
- package/src/skills/paper-plot/SKILL.md +79 -0
- package/src/skills/paper-plot/agents/openai.yaml +4 -0
- package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
- package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
- package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
- package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
- package/src/skills/paper-plot/references/line_training_curve.md +44 -0
- package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
- package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
- package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
- package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
- package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
- package/src/skills/paper-plot/scripts/line_aime.py +94 -0
- package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
- package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
- package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
- package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
- package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
- package/src/skills/rebuttal/SKILL.md +9 -0
- package/src/skills/references/tool-usage-by-stage.md +438 -0
- package/src/skills/review/SKILL.md +105 -7
- package/src/skills/science/PROVENANCE.md +44 -0
- package/src/skills/science/SKILL.md +137 -0
- package/src/skills/science/references/artifact-science-tool.md +110 -0
- package/src/skills/science/references/claim-type-discipline.md +56 -0
- package/src/skills/science/references/domain-index.md +422 -0
- package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
- package/src/skills/science/references/package-check-playbook.md +64 -0
- package/src/skills/science/references/package-index.min.json +3616 -0
- package/src/skills/science/references/packages/abinit.md +80 -0
- package/src/skills/science/references/packages/acts.md +73 -0
- package/src/skills/science/references/packages/aiida-core.md +80 -0
- package/src/skills/science/references/packages/alamode.md +80 -0
- package/src/skills/science/references/packages/amuse.md +88 -0
- package/src/skills/science/references/packages/anndata.md +88 -0
- package/src/skills/science/references/packages/arbor.md +80 -0
- package/src/skills/science/references/packages/arc.md +73 -0
- package/src/skills/science/references/packages/astropy.md +88 -0
- package/src/skills/science/references/packages/astroquery.md +88 -0
- package/src/skills/science/references/packages/atomate2.md +80 -0
- package/src/skills/science/references/packages/atomsmltr.md +73 -0
- package/src/skills/science/references/packages/awkward.md +73 -0
- package/src/skills/science/references/packages/batman.md +88 -0
- package/src/skills/science/references/packages/biopython.md +88 -0
- package/src/skills/science/references/packages/bloqade.md +73 -0
- package/src/skills/science/references/packages/brian2.md +73 -0
- package/src/skills/science/references/packages/bullet3.md +73 -0
- package/src/skills/science/references/packages/calculix.md +80 -0
- package/src/skills/science/references/packages/cantera.md +73 -0
- package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
- package/src/skills/science/references/packages/ccdproc.md +88 -0
- package/src/skills/science/references/packages/celerite2.md +88 -0
- package/src/skills/science/references/packages/cellrank.md +73 -0
- package/src/skills/science/references/packages/cesm.md +80 -0
- package/src/skills/science/references/packages/chemicals.md +73 -0
- package/src/skills/science/references/packages/chempy.md +73 -0
- package/src/skills/science/references/packages/cirq.md +73 -0
- package/src/skills/science/references/packages/coffea.md +73 -0
- package/src/skills/science/references/packages/cp2k.md +88 -0
- package/src/skills/science/references/packages/custodian.md +80 -0
- package/src/skills/science/references/packages/dart.md +73 -0
- package/src/skills/science/references/packages/datamol.md +88 -0
- package/src/skills/science/references/packages/dd4hep.md +73 -0
- package/src/skills/science/references/packages/dealii.md +80 -0
- package/src/skills/science/references/packages/deepchem.md +88 -0
- package/src/skills/science/references/packages/delphes.md +73 -0
- package/src/skills/science/references/packages/devito.md +80 -0
- package/src/skills/science/references/packages/dftb.md +88 -0
- package/src/skills/science/references/packages/dftd4.md +88 -0
- package/src/skills/science/references/packages/dftk-jl.md +80 -0
- package/src/skills/science/references/packages/dolfinx.md +80 -0
- package/src/skills/science/references/packages/drake.md +73 -0
- package/src/skills/science/references/packages/dumux.md +73 -0
- package/src/skills/science/references/packages/elk.md +80 -0
- package/src/skills/science/references/packages/elmerfem.md +80 -0
- package/src/skills/science/references/packages/enzo-e.md +88 -0
- package/src/skills/science/references/packages/espresso.md +80 -0
- package/src/skills/science/references/packages/exoplanet.md +88 -0
- package/src/skills/science/references/packages/fairroot.md +73 -0
- package/src/skills/science/references/packages/fbpic.md +80 -0
- package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
- package/src/skills/science/references/packages/geant4.md +73 -0
- package/src/skills/science/references/packages/geosx.md +80 -0
- package/src/skills/science/references/packages/gprmax.md +80 -0
- package/src/skills/science/references/packages/gromacs.md +80 -0
- package/src/skills/science/references/packages/gwaslab.md +73 -0
- package/src/skills/science/references/packages/gz-sim.md +73 -0
- package/src/skills/science/references/packages/hail.md +88 -0
- package/src/skills/science/references/packages/hiphive.md +80 -0
- package/src/skills/science/references/packages/hoomd-blue.md +80 -0
- package/src/skills/science/references/packages/itensor.md +73 -0
- package/src/skills/science/references/packages/itensors-jl.md +73 -0
- package/src/skills/science/references/packages/jdftx.md +73 -0
- package/src/skills/science/references/packages/jobflow.md +80 -0
- package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
- package/src/skills/science/references/packages/kite.md +80 -0
- package/src/skills/science/references/packages/kratos.md +80 -0
- package/src/skills/science/references/packages/kwant.md +73 -0
- package/src/skills/science/references/packages/lammps.md +80 -0
- package/src/skills/science/references/packages/lightkurve.md +88 -0
- package/src/skills/science/references/packages/limix.md +73 -0
- package/src/skills/science/references/packages/maxwelllink.md +80 -0
- package/src/skills/science/references/packages/mcdc.md +73 -0
- package/src/skills/science/references/packages/meep.md +80 -0
- package/src/skills/science/references/packages/mfem.md +80 -0
- package/src/skills/science/references/packages/mitgcm.md +73 -0
- package/src/skills/science/references/packages/modflow6.md +73 -0
- package/src/skills/science/references/packages/molecool.md +73 -0
- package/src/skills/science/references/packages/mom6.md +73 -0
- package/src/skills/science/references/packages/moose.md +80 -0
- package/src/skills/science/references/packages/mpas-model.md +73 -0
- package/src/skills/science/references/packages/mujoco.md +73 -0
- package/src/skills/science/references/packages/mumax3.md +73 -0
- package/src/skills/science/references/packages/nekrs.md +80 -0
- package/src/skills/science/references/packages/nessi.md +73 -0
- package/src/skills/science/references/packages/nest-simulator.md +73 -0
- package/src/skills/science/references/packages/netket.md +73 -0
- package/src/skills/science/references/packages/neuron.md +73 -0
- package/src/skills/science/references/packages/nextflow.md +88 -0
- package/src/skills/science/references/packages/nwchem.md +88 -0
- package/src/skills/science/references/packages/openbabel.md +88 -0
- package/src/skills/science/references/packages/openems.md +80 -0
- package/src/skills/science/references/packages/openff-toolkit.md +88 -0
- package/src/skills/science/references/packages/openfoam-dev.md +80 -0
- package/src/skills/science/references/packages/openmc.md +73 -0
- package/src/skills/science/references/packages/openmm.md +80 -0
- package/src/skills/science/references/packages/openmoc.md +73 -0
- package/src/skills/science/references/packages/openmx.md +80 -0
- package/src/skills/science/references/packages/opensees.md +80 -0
- package/src/skills/science/references/packages/opensn.md +80 -0
- package/src/skills/science/references/packages/opm-simulators.md +73 -0
- package/src/skills/science/references/packages/oqupy.md +73 -0
- package/src/skills/science/references/packages/packmol.md +80 -0
- package/src/skills/science/references/packages/palabos.md +80 -0
- package/src/skills/science/references/packages/parflow.md +80 -0
- package/src/skills/science/references/packages/pennylane.md +88 -0
- package/src/skills/science/references/packages/perceval.md +73 -0
- package/src/skills/science/references/packages/phono3py.md +73 -0
- package/src/skills/science/references/packages/phonopy.md +73 -0
- package/src/skills/science/references/packages/photutils.md +88 -0
- package/src/skills/science/references/packages/picongpu.md +80 -0
- package/src/skills/science/references/packages/plink-ng.md +88 -0
- package/src/skills/science/references/packages/precice.md +73 -0
- package/src/skills/science/references/packages/psc.md +80 -0
- package/src/skills/science/references/packages/psi4.md +88 -0
- package/src/skills/science/references/packages/pybinding.md +73 -0
- package/src/skills/science/references/packages/pyfr.md +80 -0
- package/src/skills/science/references/packages/pyhf.md +73 -0
- package/src/skills/science/references/packages/pyiron_base.md +80 -0
- package/src/skills/science/references/packages/pylcp.md +73 -0
- package/src/skills/science/references/packages/pylith.md +80 -0
- package/src/skills/science/references/packages/pynbody.md +88 -0
- package/src/skills/science/references/packages/pysam.md +88 -0
- package/src/skills/science/references/packages/pyscf.md +88 -0
- package/src/skills/science/references/packages/q-e.md +73 -0
- package/src/skills/science/references/packages/qibo.md +73 -0
- package/src/skills/science/references/packages/qiskit.md +73 -0
- package/src/skills/science/references/packages/quantica-jl.md +73 -0
- package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
- package/src/skills/science/references/packages/quimb.md +73 -0
- package/src/skills/science/references/packages/qulacs.md +73 -0
- package/src/skills/science/references/packages/qutip.md +73 -0
- package/src/skills/science/references/packages/rdkit.md +88 -0
- package/src/skills/science/references/packages/rmg-py.md +73 -0
- package/src/skills/science/references/packages/root.md +73 -0
- package/src/skills/science/references/packages/scanpy.md +88 -0
- package/src/skills/science/references/packages/scikit-allel.md +88 -0
- package/src/skills/science/references/packages/scikit-bio.md +88 -0
- package/src/skills/science/references/packages/scqubits.md +73 -0
- package/src/skills/science/references/packages/scuff-em.md +80 -0
- package/src/skills/science/references/packages/scvi-tools.md +73 -0
- package/src/skills/science/references/packages/seissol.md +73 -0
- package/src/skills/science/references/packages/sfepy.md +80 -0
- package/src/skills/science/references/packages/sisl.md +73 -0
- package/src/skills/science/references/packages/smilei.md +80 -0
- package/src/skills/science/references/packages/snakemake.md +88 -0
- package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
- package/src/skills/science/references/packages/specutils.md +88 -0
- package/src/skills/science/references/packages/spglib.md +80 -0
- package/src/skills/science/references/packages/squidpy.md +88 -0
- package/src/skills/science/references/packages/starry.md +88 -0
- package/src/skills/science/references/packages/strawberryfields.md +73 -0
- package/src/skills/science/references/packages/su2.md +80 -0
- package/src/skills/science/references/packages/sunny-jl.md +73 -0
- package/src/skills/science/references/packages/sw4.md +73 -0
- package/src/skills/science/references/packages/swift.md +88 -0
- package/src/skills/science/references/packages/tdnegf.md +73 -0
- package/src/skills/science/references/packages/tenpy.md +73 -0
- package/src/skills/science/references/packages/thermo.md +73 -0
- package/src/skills/science/references/packages/tkwant.md +73 -0
- package/src/skills/science/references/packages/tvb-root.md +73 -0
- package/src/skills/science/references/packages/uproot5.md +73 -0
- package/src/skills/science/references/packages/vampire.md +80 -0
- package/src/skills/science/references/packages/wannier_tools.md +73 -0
- package/src/skills/science/references/packages/warpx.md +80 -0
- package/src/skills/science/references/packages/wrf.md +73 -0
- package/src/skills/science/references/packages/xtb.md +88 -0
- package/src/skills/science/references/packages/yt.md +73 -0
- package/src/skills/science/references/science-task-brief-template.md +71 -0
- package/src/skills/scout/SKILL.md +83 -425
- package/src/skills/scout/references/literature-scout-template.md +5 -24
- package/src/skills/scout/references/operational-guidance.md +191 -0
- package/src/skills/scout/references/paper-triage-playbook.md +11 -35
- package/src/skills/write/SKILL.md +744 -1246
- package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
- package/src/skills/write/references/oral_package_patterns.md +252 -0
- package/src/skills/write/references/oral_writing_principles.md +291 -0
- package/src/skills/write/references/section_rewrite_checklist.md +234 -0
- package/src/tui/dist/app/AppContainer.js +1314 -27
- package/src/tui/dist/components/Composer.js +26 -1
- package/src/tui/dist/components/ConfigScreen.js +2 -1
- package/src/tui/dist/components/InputPrompt.js +25 -9
- package/src/tui/dist/components/MainContent.js +18 -3
- package/src/tui/dist/components/QuestScreen.js +3 -2
- package/src/tui/dist/components/UtilityScreen.js +37 -0
- package/src/tui/dist/hooks/useSafeInput.js +10 -0
- package/src/tui/dist/index.js +13 -1
- package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
- package/src/tui/dist/lib/api.js +89 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AnalysisPlugin-DnSm0GZn.js → AnalysisPlugin-CA94NGmI.js} +1 -1
- package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
- package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
- package/src/ui/dist/assets/{CodeViewerPlugin-itb0tltR.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
- package/src/ui/dist/assets/{DocViewerPlugin-DqKkiCI6.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
- package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
- package/src/ui/dist/assets/{GitDiffViewerPlugin-DxL2ezFG.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
- package/src/ui/dist/assets/{GitSnapshotViewer-B_RQm1YZ.js → GitSnapshotViewer-CweA6VON.js} +2 -2
- package/src/ui/dist/assets/{ImageViewerPlugin-tHqlXY3n.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
- package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
- package/src/ui/dist/assets/{LatexPlugin-B495DTXC.js → LatexPlugin-BQjAaA5J.js} +4 -4
- package/src/ui/dist/assets/{MarkdownViewerPlugin-DG28-61B.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
- package/src/ui/dist/assets/{MarketplacePlugin-BiOGT-Kj.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
- package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
- package/src/ui/dist/assets/{NotebookEditor-CVsj8h_T.js → NotebookEditor-WFyd8Ybt.js} +23 -23
- package/src/ui/dist/assets/{PdfLoader-CASDQmxJ.js → PdfLoader-CLE5u5TS.js} +3 -3
- package/src/ui/dist/assets/{PdfMarkdownPlugin-BFhwoKsY.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
- package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
- package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
- package/src/ui/dist/assets/{TextViewerPlugin-CB4DYfWO.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
- package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
- package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
- package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
- package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
- package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
- package/src/ui/dist/assets/{code-DLC6G24T.js → code-DbsmSd3Y.js} +1 -1
- package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
- package/src/ui/dist/assets/{wrap-text-CwMn-iqb.js → file-jump-queue-DeQBikaw.js} +3 -3
- package/src/ui/dist/assets/{file-socket-Cu4Qln7Y.js → file-socket-DA5XIx88.js} +1 -1
- package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
- package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
- package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
- package/src/ui/dist/assets/{index-wQ7RIIRd.js → index-BsO46tJA.js} +1 -1
- package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
- package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
- package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
- package/src/ui/dist/assets/{project-sync-CsX08Qno.js → project-sync-DPmWKmKD.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-R-GWEhzS.js → zoom-out-DAukFWen.js} +3 -3
- package/src/ui/dist/index.html +3 -3
- package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
- package/src/skills/baseline/references/memory-playbook.md +0 -40
- package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
- package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
- package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
- package/src/skills/write/references/paper-section-playbook.md +0 -64
- package/src/skills/write/references/reviewer-first-writing.md +0 -64
- package/src/skills/write/references/revision-checklist.md +0 -70
- package/src/skills/write/references/section-contracts.md +0 -82
- package/src/skills/write/references/sentence-level-proofing.md +0 -49
- package/src/ui/dist/assets/AiManusChatView-COFACy7V.js +0 -204
- package/src/ui/dist/assets/CliPlugin-CvwCmDQ5.js +0 -109
- package/src/ui/dist/assets/CodeEditorPlugin-cOqSa0xq.js +0 -2
- package/src/ui/dist/assets/GitCommitViewerPlugin-DVgNHBCS.js +0 -1
- package/src/ui/dist/assets/LabCopilotPanel-ClMbq5Yu.js +0 -14
- package/src/ui/dist/assets/LabPlugin-L_SuE8ow.js +0 -22
- package/src/ui/dist/assets/NotebookEditor-C-4Kt1p9.js +0 -81
- package/src/ui/dist/assets/PdfViewerPlugin-DcOzU9vd.js +0 -17
- package/src/ui/dist/assets/SearchPlugin-CHj7M58O.js +0 -16
- package/src/ui/dist/assets/VNCViewer-CjlbyCB3.js +0 -11
- package/src/ui/dist/assets/bot-CFkZY-JP.js +0 -6
- package/src/ui/dist/assets/chevron-up-Dq5ofbht.js +0 -6
- package/src/ui/dist/assets/file-content-Dv4LoZec.js +0 -1
- package/src/ui/dist/assets/file-diff-panel-Denq-lC3.js +0 -1
- package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
- package/src/ui/dist/assets/git-commit-horizontal-BUh6G52n.js +0 -6
- package/src/ui/dist/assets/image-B9HUUddG.js +0 -6
- package/src/ui/dist/assets/index-B2B1sg-M.js +0 -1
- package/src/ui/dist/assets/index-Cgla8biy.css +0 -33
- package/src/ui/dist/assets/index-DRyx7vAc.js +0 -1
- package/src/ui/dist/assets/index-Gbl53BNp.js +0 -2496
- package/src/ui/dist/assets/pdf-effect-queue-ZtnHFCAi.js +0 -6
- package/src/ui/dist/assets/popover-DL6h35vr.js +0 -1
- package/src/ui/dist/assets/select-DvmXt1yY.js +0 -11
- package/src/ui/dist/assets/sigma-7jpXazui.js +0 -6
- package/src/ui/dist/assets/trash-xA7kFt8i.js +0 -11
- package/src/ui/dist/assets/useCliAccess-DsMwDjOp.js +0 -1
- package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.b11.model_efficiency
|
|
3
|
+
name: AISB B11 Model Efficiency
|
|
4
|
+
version: 0.2.0
|
|
5
|
+
catalog_version: 0.1.0
|
|
6
|
+
one_line: LongBench-v2 mini request set for quality-latency-cost metric validation.
|
|
7
|
+
task_description: AISB B11 Model Efficiency is an AISB BenchStore catalog pilot entry. The package is
|
|
8
|
+
intended for public-dev workspace initialization, public evaluator checks, and paper/benchmark-track
|
|
9
|
+
submission scaffolding. Official leaderboard scores require organizer-side hidden replay.
|
|
10
|
+
homepage: https://aisb.deepscientist.cc
|
|
11
|
+
localized:
|
|
12
|
+
zh:
|
|
13
|
+
name: AISB B11 模型效率
|
|
14
|
+
one_line: 用于质量、延迟和成本指标验证的 LongBench-v2 mini 请求集。
|
|
15
|
+
task_description: AISB B11 模型效率 是 AISB BenchStore catalog pilot 条目。公开包只用于 public-dev 工作区初始化、公开 evaluator
|
|
16
|
+
检查和论文/benchmark track 提交脚手架;正式 leaderboard 分数必须由组织方 hidden replay 产生。
|
|
17
|
+
capability_tags:
|
|
18
|
+
- model_efficiency
|
|
19
|
+
- long_context
|
|
20
|
+
- latency
|
|
21
|
+
aisb_direction: B11
|
|
22
|
+
track_fit:
|
|
23
|
+
- paper_track
|
|
24
|
+
- benchmark_track
|
|
25
|
+
task_mode: benchmark
|
|
26
|
+
benchmark_mode: quality_latency_cost_logged_inference
|
|
27
|
+
requires_execution: true
|
|
28
|
+
requires_paper: true
|
|
29
|
+
integrity_level: hidden_reference
|
|
30
|
+
snapshot_status: partial
|
|
31
|
+
support_level: preview
|
|
32
|
+
discovery:
|
|
33
|
+
collection: AISB
|
|
34
|
+
collection_priority: 100
|
|
35
|
+
recommendation_weight: 500
|
|
36
|
+
featured: false
|
|
37
|
+
featured_reason: null
|
|
38
|
+
display:
|
|
39
|
+
placement: grid
|
|
40
|
+
card_size: m
|
|
41
|
+
badge: AISB
|
|
42
|
+
accent_priority: normal
|
|
43
|
+
official_links:
|
|
44
|
+
homepage: https://github.com/ResearAI/NLPCC-2026-Task9-AISB
|
|
45
|
+
github: https://github.com/ResearAI/NLPCC-2026-Task9-AISB
|
|
46
|
+
benchmark_overview: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/aisb/B11_ModelEfficiency
|
|
47
|
+
release_gate:
|
|
48
|
+
release_ready: false
|
|
49
|
+
status: registry_pending
|
|
50
|
+
full_real_replay: passed
|
|
51
|
+
reason: Public-dev and scorer-private replay paths exist locally, but final registry promotion, release
|
|
52
|
+
assets, and license review remain pending.
|
|
53
|
+
next_required_step: Validate the published BenchStore release asset on a clean host, then complete
|
|
54
|
+
registry and license review.
|
|
55
|
+
primary_outputs:
|
|
56
|
+
- results.json
|
|
57
|
+
- paper/paper.pdf
|
|
58
|
+
- paper/claims.json
|
|
59
|
+
- logs/experiment_log.jsonl
|
|
60
|
+
- logs/iterations.jsonl
|
|
61
|
+
cost_band: medium
|
|
62
|
+
time_band: 1-4h
|
|
63
|
+
difficulty: hard
|
|
64
|
+
data_access: public
|
|
65
|
+
resources:
|
|
66
|
+
minimum:
|
|
67
|
+
cpu_cores: 2
|
|
68
|
+
ram_gb: 4
|
|
69
|
+
disk_gb: 4
|
|
70
|
+
gpu_count: 0
|
|
71
|
+
recommended:
|
|
72
|
+
cpu_cores: 8
|
|
73
|
+
ram_gb: 16
|
|
74
|
+
disk_gb: 20
|
|
75
|
+
gpu_count: 0
|
|
76
|
+
source:
|
|
77
|
+
benchmark_dir: benchmarks/aisb/B11_ModelEfficiency
|
|
78
|
+
bench_yaml: benchmarks/aisb/B11_ModelEfficiency/bench.yaml
|
|
79
|
+
source_repo: ResearAI/NLPCC-2026-Task9-AISB
|
|
80
|
+
source_commit: b6b7527a226ca7328697156608571f2cb7dcda3a
|
|
81
|
+
download:
|
|
82
|
+
provider: github_release
|
|
83
|
+
repo: ResearAI/NLPCC-2026-Task9-AISB
|
|
84
|
+
tag: aisb-v0.0.1
|
|
85
|
+
asset_name: aisb.b11.model_efficiency-v0.2.0.zip
|
|
86
|
+
url: https://github.com/giao-123-sun/DeepScientist/releases/download/aisb-v0.0.1/aisb.b11.model_efficiency-v0.2.0.zip
|
|
87
|
+
source_url: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/aisb/B11_ModelEfficiency
|
|
88
|
+
archive_type: zip
|
|
89
|
+
local_dir_name: aisb.b11.model_efficiency
|
|
90
|
+
sha256: 890c432a9beed8912271a6fa72dde4c08fd58b89d74df5674e741e4a04c904db
|
|
91
|
+
size_bytes: 974447
|
|
92
|
+
published_at: '2026-04-30T00:00:00Z'
|
|
93
|
+
asset_status: published
|
|
94
|
+
dataset_download:
|
|
95
|
+
primary_method: packaged_public_dev
|
|
96
|
+
sources:
|
|
97
|
+
- kind: public_dev
|
|
98
|
+
access: public
|
|
99
|
+
url: null
|
|
100
|
+
note: Public-dev files are included in the published BenchStore package.
|
|
101
|
+
- kind: hidden_reference
|
|
102
|
+
access: private
|
|
103
|
+
url: null
|
|
104
|
+
note: Organizer-only hidden references are excluded from catalog packages.
|
|
105
|
+
credential_requirements:
|
|
106
|
+
mode: none
|
|
107
|
+
notes:
|
|
108
|
+
- No credential is required for packaged public-dev evaluation.
|
|
109
|
+
- External model APIs are optional participant choices and not bundled.
|
|
110
|
+
environment:
|
|
111
|
+
python: '>=3.11'
|
|
112
|
+
docker: false
|
|
113
|
+
key_packages:
|
|
114
|
+
- pytest
|
|
115
|
+
- PyYAML
|
|
116
|
+
notes:
|
|
117
|
+
- Use docker/Dockerfile when available for official replay parity.
|
|
118
|
+
- Do not copy organizer-private hidden references into participant workspaces.
|
|
119
|
+
launch_profiles:
|
|
120
|
+
- id: b11_public_dev_smoke
|
|
121
|
+
label: B11 public-dev smoke test
|
|
122
|
+
description: Initialize an AISB public-dev workspace, validate submission artifacts, and run the public
|
|
123
|
+
evaluator.
|
|
124
|
+
startup_contract:
|
|
125
|
+
- Use only files inside the installed benchmark workspace and the generated submission directory.
|
|
126
|
+
- Treat hidden references as organizer-only and unavailable to the agent.
|
|
127
|
+
- Produce benchmark outputs, paper artifacts, claims, and logs before packaging.
|
|
128
|
+
commands:
|
|
129
|
+
- python scripts/agent_tools.py workspace init B11 --dest .work/benchstore/B11
|
|
130
|
+
- python scripts/agent_tools.py evaluate B11 --bench-dir .work/benchstore/B11 --submission .work/benchstore/B11/submission
|
|
131
|
+
- python scripts/agent_tools.py submission validate .work/benchstore/B11/submission
|
|
132
|
+
- python scripts/agent_tools.py submission package .work/benchstore/B11/submission --output .work/benchstore/B11/submission.zip
|
|
133
|
+
- python scripts/agent_tools.py submission replay .work/benchstore/B11/submission --track B11
|
|
134
|
+
one_click_prompt: Start AISB B11 from the installed BenchStore package. Initialize the public-dev workspace,
|
|
135
|
+
inspect AGENT.md and README.md, run the baseline or your method, evaluate locally, validate/package
|
|
136
|
+
the submission, and record any release-gate risks in logs/experiment_log.jsonl.
|
|
137
|
+
paper:
|
|
138
|
+
venue: AISB
|
|
139
|
+
year: 2026
|
|
140
|
+
track_fit:
|
|
141
|
+
- paper_track
|
|
142
|
+
- benchmark_track
|
|
143
|
+
requires_claims_json: true
|
|
144
|
+
url: null
|
|
145
|
+
benchmark_track:
|
|
146
|
+
metric: quality_latency_cost_score
|
|
147
|
+
benchmark_source: LongBench-v2 mini request set
|
|
148
|
+
packaged_data_status: real LongBench-v2 public-dev request set plus scorer-private hidden rows
|
|
149
|
+
official_score_requires_hidden_replay: true
|
|
150
|
+
benchmarks:
|
|
151
|
+
primary:
|
|
152
|
+
name: LongBench-v2 mini
|
|
153
|
+
source: THUDM/LongBench-v2 (HF, Apache-2.0)
|
|
154
|
+
tasks: 3
|
|
155
|
+
hidden: 3
|
|
156
|
+
metric: quality_latency_cost_score (quality 0.55, latency 0.20, memory 0.10, tokens 0.10, cost 0.05)
|
|
157
|
+
sota: 63.3% (Gemini-2.5-Pro, w/ CoT)
|
|
158
|
+
our_baseline: 0.3529
|
|
159
|
+
alternatives:
|
|
160
|
+
- name: RULER
|
|
161
|
+
source: github.com/hsiehjackson/RULER
|
|
162
|
+
note: long-context efficiency; alternative metric
|
|
163
|
+
- name: MLPerf Inference
|
|
164
|
+
source: mlcommons.org
|
|
165
|
+
note: hardware-normalized; future candidate
|
|
166
|
+
papers:
|
|
167
|
+
highlights:
|
|
168
|
+
- title: 'LongBench v2: Towards Deeper Understanding on Realistic Long-Context Multitasks'
|
|
169
|
+
source: THUDM/LongBench-v2 (HF, Apache-2.0)
|
|
170
|
+
why: Primary benchmark; SOTA 63.3% (Gemini-2.5-Pro)
|
|
171
|
+
- title: 'Youtu-LLM: Unleashing Native Agent Potential for Lightweight LLMs'
|
|
172
|
+
arxiv: '2512.24618'
|
|
173
|
+
url: https://arxiv.org/abs/2512.24618
|
|
174
|
+
venue: Dec 2025
|
|
175
|
+
why: 1.96B params; sub-2B SOTA; quality-latency-cost balance
|
|
176
|
+
- title: 'MARS: Efficient Adaptive Co-Scheduling for Heterogeneous Agent Systems'
|
|
177
|
+
arxiv: '2604.26963'
|
|
178
|
+
url: https://arxiv.org/abs/2604.26963
|
|
179
|
+
venue: April 2026
|
|
180
|
+
why: 5.94x end-to-end latency reduction; GPU-CPU co-scheduling
|
|
181
|
+
full_references_url: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/aisb/B11_*/references/papers.json
|
|
182
|
+
nlpcc_tracks:
|
|
183
|
+
- track: T1
|
|
184
|
+
name: AI/CS Reasoning & Engineering
|
|
185
|
+
url: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/nlpcc/T1
|
|
186
|
+
private_hidden_reference_strategy:
|
|
187
|
+
policy: organizer_only
|
|
188
|
+
public_package_includes_hidden_reference: false
|
|
189
|
+
public_catalog_discloses_private_paths: false
|
|
190
|
+
score_release: aggregate_only
|
|
191
|
+
notes:
|
|
192
|
+
- Hidden answers, labels, canaries, and private evaluator internals are never packaged.
|
|
193
|
+
- Official scores must record hidden_reference_version without disclosing hidden content.
|
|
194
|
+
score_card:
|
|
195
|
+
required_fields:
|
|
196
|
+
- team_id
|
|
197
|
+
- system_name
|
|
198
|
+
- direction
|
|
199
|
+
- track
|
|
200
|
+
- benchmark_package_version
|
|
201
|
+
- evaluator_version
|
|
202
|
+
- hidden_reference_version
|
|
203
|
+
- docker_image_digest
|
|
204
|
+
- data_split_version
|
|
205
|
+
- benchmark_score
|
|
206
|
+
- paper_score
|
|
207
|
+
- cas_score
|
|
208
|
+
- final_score
|
|
209
|
+
- security_verdict
|
|
210
|
+
- license_verdict
|
|
211
|
+
- used_reference_scope
|
|
212
|
+
- official_score
|
|
213
|
+
- created_at
|
|
214
|
+
track_scoring:
|
|
215
|
+
paper_track: Final_A = 0.0 * S_benchmark + 1.0 * S_paper
|
|
216
|
+
benchmark_track: Final_B = 0.7 * S_benchmark + 0.3 * S_paper
|
|
217
|
+
cas: integrity gate, not bonus
|
|
218
|
+
risk_flags:
|
|
219
|
+
- hidden_reference_not_public
|
|
220
|
+
- not_release_ready
|
|
221
|
+
- registry_promotion_pending
|
|
222
|
+
- hardware_normalization_pending
|
|
223
|
+
risk_notes:
|
|
224
|
+
- This catalog entry is not release_ready.
|
|
225
|
+
- Download URL, checksum, and size point to the published GitHub release asset.
|
|
226
|
+
- Official scores require organizer-side replay against private hidden references.
|
|
227
|
+
recommended_when: Use this entry for AISB public-dev integration, local agent workflow checks, and catalog
|
|
228
|
+
UI validation.
|
|
229
|
+
not_recommended_when: Do not treat this entry as an official release or public leaderboard claim until
|
|
230
|
+
release gates pass.
|
|
231
|
+
image_path: ../image/aisb.b11.model_efficiency.svg
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.b12.embodied_ai
|
|
3
|
+
name: AISB B12 Embodied AI
|
|
4
|
+
version: 0.4.0
|
|
5
|
+
catalog_version: 0.1.0
|
|
6
|
+
one_line: CALVIN task-spec public-dev mini package for headless embodied rollout validation.
|
|
7
|
+
task_description: AISB B12 Embodied AI is an AISB BenchStore catalog pilot entry. The package is intended
|
|
8
|
+
for public-dev workspace initialization, public evaluator checks, and paper/benchmark-track submission
|
|
9
|
+
scaffolding. Official leaderboard scores require organizer-side hidden replay.
|
|
10
|
+
homepage: https://aisb.deepscientist.cc
|
|
11
|
+
localized:
|
|
12
|
+
zh:
|
|
13
|
+
name: AISB B12 具身智能
|
|
14
|
+
one_line: 用于无头具身 rollout 验证的 CALVIN 任务规格 public-dev mini 包。
|
|
15
|
+
task_description: AISB B12 具身智能 是 AISB BenchStore catalog pilot 条目。公开包只用于 public-dev 工作区初始化、公开 evaluator
|
|
16
|
+
检查和论文/benchmark track 提交脚手架;正式 leaderboard 分数必须由组织方 hidden replay 产生。
|
|
17
|
+
capability_tags:
|
|
18
|
+
- embodied_ai
|
|
19
|
+
- simulator
|
|
20
|
+
- robotics
|
|
21
|
+
aisb_direction: B12
|
|
22
|
+
track_fit:
|
|
23
|
+
- paper_track
|
|
24
|
+
- benchmark_track
|
|
25
|
+
task_mode: benchmark
|
|
26
|
+
benchmark_mode: embodied_episode_success_headless_rollout
|
|
27
|
+
requires_execution: true
|
|
28
|
+
requires_paper: true
|
|
29
|
+
integrity_level: hidden_reference
|
|
30
|
+
snapshot_status: partial
|
|
31
|
+
support_level: preview
|
|
32
|
+
discovery:
|
|
33
|
+
collection: AISB
|
|
34
|
+
collection_priority: 100
|
|
35
|
+
recommendation_weight: 500
|
|
36
|
+
featured: false
|
|
37
|
+
featured_reason: null
|
|
38
|
+
display:
|
|
39
|
+
placement: grid
|
|
40
|
+
card_size: m
|
|
41
|
+
badge: AISB
|
|
42
|
+
accent_priority: normal
|
|
43
|
+
official_links:
|
|
44
|
+
homepage: https://github.com/ResearAI/NLPCC-2026-Task9-AISB
|
|
45
|
+
github: https://github.com/ResearAI/NLPCC-2026-Task9-AISB
|
|
46
|
+
benchmark_overview: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/aisb/B12_EmbodiedAI
|
|
47
|
+
release_gate:
|
|
48
|
+
release_ready: false
|
|
49
|
+
status: registry_pending
|
|
50
|
+
full_real_replay: passed
|
|
51
|
+
reason: Public-dev and scorer-private replay paths exist locally, but final registry promotion, release
|
|
52
|
+
assets, and license review remain pending.
|
|
53
|
+
next_required_step: Validate the published BenchStore release asset on a clean host, then complete
|
|
54
|
+
registry and license review.
|
|
55
|
+
primary_outputs:
|
|
56
|
+
- results.json
|
|
57
|
+
- paper/paper.pdf
|
|
58
|
+
- paper/claims.json
|
|
59
|
+
- logs/experiment_log.jsonl
|
|
60
|
+
- logs/iterations.jsonl
|
|
61
|
+
cost_band: high
|
|
62
|
+
time_band: 2-8h
|
|
63
|
+
difficulty: hard
|
|
64
|
+
data_access: public
|
|
65
|
+
resources:
|
|
66
|
+
minimum:
|
|
67
|
+
cpu_cores: 4
|
|
68
|
+
ram_gb: 16
|
|
69
|
+
disk_gb: 20
|
|
70
|
+
gpu_count: 0
|
|
71
|
+
recommended:
|
|
72
|
+
cpu_cores: 8
|
|
73
|
+
ram_gb: 32
|
|
74
|
+
disk_gb: 80
|
|
75
|
+
gpu_count: 0
|
|
76
|
+
source:
|
|
77
|
+
benchmark_dir: benchmarks/aisb/B12_EmbodiedAI
|
|
78
|
+
bench_yaml: benchmarks/aisb/B12_EmbodiedAI/bench.yaml
|
|
79
|
+
source_repo: ResearAI/NLPCC-2026-Task9-AISB
|
|
80
|
+
source_commit: b6b7527a226ca7328697156608571f2cb7dcda3a
|
|
81
|
+
download:
|
|
82
|
+
provider: github_release
|
|
83
|
+
repo: ResearAI/NLPCC-2026-Task9-AISB
|
|
84
|
+
tag: aisb-v0.0.1
|
|
85
|
+
asset_name: aisb.b12.embodied_ai-v0.4.0.zip
|
|
86
|
+
url: https://github.com/giao-123-sun/DeepScientist/releases/download/aisb-v0.0.1/aisb.b12.embodied_ai-v0.4.0.zip
|
|
87
|
+
source_url: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/aisb/B12_EmbodiedAI
|
|
88
|
+
archive_type: zip
|
|
89
|
+
local_dir_name: aisb.b12.embodied_ai
|
|
90
|
+
sha256: cd221f86fddd6123f7487e0ec3e27dfc881aaf005a07c712a9d1fe6bab44e156
|
|
91
|
+
size_bytes: 25349
|
|
92
|
+
published_at: '2026-04-30T00:00:00Z'
|
|
93
|
+
asset_status: published
|
|
94
|
+
dataset_download:
|
|
95
|
+
primary_method: packaged_public_dev
|
|
96
|
+
sources:
|
|
97
|
+
- kind: public_dev
|
|
98
|
+
access: public
|
|
99
|
+
url: null
|
|
100
|
+
note: Public-dev files are included in the published BenchStore package.
|
|
101
|
+
- kind: hidden_reference
|
|
102
|
+
access: private
|
|
103
|
+
url: null
|
|
104
|
+
note: Organizer-only hidden references are excluded from catalog packages.
|
|
105
|
+
credential_requirements:
|
|
106
|
+
mode: none
|
|
107
|
+
notes:
|
|
108
|
+
- No credential is required for packaged public-dev evaluation.
|
|
109
|
+
- External model APIs are optional participant choices and not bundled.
|
|
110
|
+
environment:
|
|
111
|
+
python: '>=3.11'
|
|
112
|
+
docker: true
|
|
113
|
+
key_packages:
|
|
114
|
+
- pytest
|
|
115
|
+
- PyYAML
|
|
116
|
+
notes:
|
|
117
|
+
- Use docker/Dockerfile when available for official replay parity.
|
|
118
|
+
- Do not copy organizer-private hidden references into participant workspaces.
|
|
119
|
+
launch_profiles:
|
|
120
|
+
- id: b12_public_dev_smoke
|
|
121
|
+
label: B12 public-dev smoke test
|
|
122
|
+
description: Initialize an AISB public-dev workspace, validate submission artifacts, and run the public
|
|
123
|
+
evaluator.
|
|
124
|
+
startup_contract:
|
|
125
|
+
- Use only files inside the installed benchmark workspace and the generated submission directory.
|
|
126
|
+
- Treat hidden references as organizer-only and unavailable to the agent.
|
|
127
|
+
- Produce benchmark outputs, paper artifacts, claims, and logs before packaging.
|
|
128
|
+
commands:
|
|
129
|
+
- python scripts/agent_tools.py workspace init B12 --dest .work/benchstore/B12
|
|
130
|
+
- python scripts/agent_tools.py evaluate B12 --bench-dir .work/benchstore/B12 --submission .work/benchstore/B12/submission
|
|
131
|
+
- python scripts/agent_tools.py submission validate .work/benchstore/B12/submission
|
|
132
|
+
- python scripts/agent_tools.py submission package .work/benchstore/B12/submission --output .work/benchstore/B12/submission.zip
|
|
133
|
+
- python scripts/agent_tools.py submission replay .work/benchstore/B12/submission --track B12
|
|
134
|
+
one_click_prompt: Start AISB B12 from the installed BenchStore package. Initialize the public-dev workspace,
|
|
135
|
+
inspect AGENT.md and README.md, run the baseline or your method, evaluate locally, validate/package
|
|
136
|
+
the submission, and record any release-gate risks in logs/experiment_log.jsonl.
|
|
137
|
+
paper:
|
|
138
|
+
venue: AISB
|
|
139
|
+
year: 2026
|
|
140
|
+
track_fit:
|
|
141
|
+
- paper_track
|
|
142
|
+
- benchmark_track
|
|
143
|
+
requires_claims_json: true
|
|
144
|
+
url: null
|
|
145
|
+
benchmark_track:
|
|
146
|
+
metric: episode_success_rate
|
|
147
|
+
benchmark_source: CALVIN headless subset
|
|
148
|
+
packaged_data_status: real CALVIN task-spec mini split with scorer-private debug-dataset headless rollout
|
|
149
|
+
official_score_requires_hidden_replay: true
|
|
150
|
+
benchmarks:
|
|
151
|
+
primary:
|
|
152
|
+
name: CALVIN debug
|
|
153
|
+
source: github.com/mees/calvin (MIT)
|
|
154
|
+
tasks: 2
|
|
155
|
+
hidden: 2
|
|
156
|
+
metric: episode_success_rate
|
|
157
|
+
sota: FLOWER 4.53/5 (ABC, 2025)
|
|
158
|
+
public_score: 0.0
|
|
159
|
+
alternatives:
|
|
160
|
+
- name: SimplerEnv
|
|
161
|
+
source: github.com/simpler-env/SimplerEnv
|
|
162
|
+
note: lighter simulator; future candidate
|
|
163
|
+
- name: Behavior-1K
|
|
164
|
+
source: behavior.stanford.edu
|
|
165
|
+
note: 1000 everyday tasks; heavy
|
|
166
|
+
- name: BuilderBench
|
|
167
|
+
source: arXiv:2510.06288
|
|
168
|
+
note: 42 target structures; open exploration
|
|
169
|
+
papers:
|
|
170
|
+
highlights:
|
|
171
|
+
- title: 'CALVIN: A Benchmark for Language-Conditioned Policy Learning'
|
|
172
|
+
arxiv: '2112.03227'
|
|
173
|
+
url: https://arxiv.org/abs/2112.03227
|
|
174
|
+
why: Primary benchmark; debug dataset used for public-dev (MIT)
|
|
175
|
+
- title: 'EmboCoach-Bench: Benchmarking AI Agents in Developing Embodied Robots'
|
|
176
|
+
arxiv: '2601.21570'
|
|
177
|
+
url: https://arxiv.org/abs/2601.21570
|
|
178
|
+
venue: Jan 2026
|
|
179
|
+
why: 32 RL/IL tasks; agents beat human baseline by 26.5%
|
|
180
|
+
- title: 'BuilderBench: Building Blocks for Agents'
|
|
181
|
+
arxiv: '2510.06288'
|
|
182
|
+
url: https://arxiv.org/abs/2510.06288
|
|
183
|
+
venue: Oct 2025
|
|
184
|
+
why: 42 target structures; open exploration; no external supervision
|
|
185
|
+
- title: FLOWER
|
|
186
|
+
source: arXiv:2509.04996
|
|
187
|
+
why: 'Current CALVIN SOTA: 4.53/5 on ABC tasks'
|
|
188
|
+
full_references_url: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/aisb/B12_*/references/papers.json
|
|
189
|
+
nlpcc_tracks:
|
|
190
|
+
- track: T1
|
|
191
|
+
name: AI/CS Reasoning & Engineering
|
|
192
|
+
url: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/nlpcc/T1
|
|
193
|
+
private_hidden_reference_strategy:
|
|
194
|
+
policy: organizer_only
|
|
195
|
+
public_package_includes_hidden_reference: false
|
|
196
|
+
public_catalog_discloses_private_paths: false
|
|
197
|
+
score_release: aggregate_only
|
|
198
|
+
notes:
|
|
199
|
+
- Hidden answers, labels, canaries, and private evaluator internals are never packaged.
|
|
200
|
+
- Official scores must record hidden_reference_version without disclosing hidden content.
|
|
201
|
+
score_card:
|
|
202
|
+
required_fields:
|
|
203
|
+
- team_id
|
|
204
|
+
- system_name
|
|
205
|
+
- direction
|
|
206
|
+
- track
|
|
207
|
+
- benchmark_package_version
|
|
208
|
+
- evaluator_version
|
|
209
|
+
- hidden_reference_version
|
|
210
|
+
- docker_image_digest
|
|
211
|
+
- data_split_version
|
|
212
|
+
- benchmark_score
|
|
213
|
+
- paper_score
|
|
214
|
+
- cas_score
|
|
215
|
+
- final_score
|
|
216
|
+
- security_verdict
|
|
217
|
+
- license_verdict
|
|
218
|
+
- used_reference_scope
|
|
219
|
+
- official_score
|
|
220
|
+
- created_at
|
|
221
|
+
track_scoring:
|
|
222
|
+
paper_track: Final_A = 0.0 * S_benchmark + 1.0 * S_paper
|
|
223
|
+
benchmark_track: Final_B = 0.7 * S_benchmark + 0.3 * S_paper
|
|
224
|
+
cas: integrity gate, not bonus
|
|
225
|
+
risk_flags:
|
|
226
|
+
- hidden_reference_not_public
|
|
227
|
+
- not_release_ready
|
|
228
|
+
- registry_promotion_pending
|
|
229
|
+
- simulator_dependency_pending
|
|
230
|
+
risk_notes:
|
|
231
|
+
- This catalog entry is not release_ready.
|
|
232
|
+
- Download URL, checksum, and size point to the published GitHub release asset.
|
|
233
|
+
- Official scores require organizer-side replay against private hidden references.
|
|
234
|
+
recommended_when: Use this entry for AISB public-dev integration, local agent workflow checks, and catalog
|
|
235
|
+
UI validation.
|
|
236
|
+
not_recommended_when: Do not treat this entry as an official release or public leaderboard claim until
|
|
237
|
+
release gates pass.
|
|
238
|
+
image_path: ../image/aisb.b12.embodied_ai.svg
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.b2.agent_systems
|
|
3
|
+
name: AISB B2 Agent Systems
|
|
4
|
+
version: 0.3.0
|
|
5
|
+
catalog_version: 0.1.0
|
|
6
|
+
one_line: tau2-bench airline public-dev mini package for tool-use task success validation.
|
|
7
|
+
task_description: AISB B2 Agent Systems is an AISB BenchStore catalog pilot entry. The package is intended
|
|
8
|
+
for public-dev workspace initialization, public evaluator checks, and paper/benchmark-track submission
|
|
9
|
+
scaffolding. Official leaderboard scores require organizer-side hidden replay.
|
|
10
|
+
homepage: https://aisb.deepscientist.cc
|
|
11
|
+
localized:
|
|
12
|
+
zh:
|
|
13
|
+
name: AISB B2 智能体系统
|
|
14
|
+
one_line: 用于工具调用任务成功率验证的 tau2-bench airline public-dev mini 包。
|
|
15
|
+
task_description: AISB B2 智能体系统 是 AISB BenchStore catalog pilot 条目。公开包只用于 public-dev 工作区初始化、公开 evaluator
|
|
16
|
+
检查和论文/benchmark track 提交脚手架;正式 leaderboard 分数必须由组织方 hidden replay 产生。
|
|
17
|
+
capability_tags:
|
|
18
|
+
- agent_systems
|
|
19
|
+
- tool_use
|
|
20
|
+
- task_success
|
|
21
|
+
aisb_direction: B2
|
|
22
|
+
track_fit:
|
|
23
|
+
- paper_track
|
|
24
|
+
- benchmark_track
|
|
25
|
+
task_mode: benchmark
|
|
26
|
+
benchmark_mode: agent_task_success
|
|
27
|
+
requires_execution: true
|
|
28
|
+
requires_paper: true
|
|
29
|
+
integrity_level: hidden_reference
|
|
30
|
+
snapshot_status: partial
|
|
31
|
+
support_level: preview
|
|
32
|
+
discovery:
|
|
33
|
+
collection: AISB
|
|
34
|
+
collection_priority: 100
|
|
35
|
+
recommendation_weight: 500
|
|
36
|
+
featured: false
|
|
37
|
+
featured_reason: null
|
|
38
|
+
display:
|
|
39
|
+
placement: grid
|
|
40
|
+
card_size: m
|
|
41
|
+
badge: AISB
|
|
42
|
+
accent_priority: normal
|
|
43
|
+
official_links:
|
|
44
|
+
homepage: https://github.com/ResearAI/NLPCC-2026-Task9-AISB
|
|
45
|
+
github: https://github.com/ResearAI/NLPCC-2026-Task9-AISB
|
|
46
|
+
benchmark_overview: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/aisb/B2_AgentSystems
|
|
47
|
+
release_gate:
|
|
48
|
+
release_ready: false
|
|
49
|
+
status: registry_pending
|
|
50
|
+
full_real_replay: passed
|
|
51
|
+
reason: Public-dev and scorer-private replay paths exist locally, but final registry promotion, release
|
|
52
|
+
assets, and license review remain pending.
|
|
53
|
+
next_required_step: Validate the published BenchStore release asset on a clean host, then complete
|
|
54
|
+
registry and license review.
|
|
55
|
+
primary_outputs:
|
|
56
|
+
- results.json
|
|
57
|
+
- paper/paper.pdf
|
|
58
|
+
- paper/claims.json
|
|
59
|
+
- logs/experiment_log.jsonl
|
|
60
|
+
- logs/iterations.jsonl
|
|
61
|
+
cost_band: medium
|
|
62
|
+
time_band: 1-4h
|
|
63
|
+
difficulty: hard
|
|
64
|
+
data_access: public
|
|
65
|
+
resources:
|
|
66
|
+
minimum:
|
|
67
|
+
cpu_cores: 4
|
|
68
|
+
ram_gb: 8
|
|
69
|
+
disk_gb: 5
|
|
70
|
+
gpu_count: 0
|
|
71
|
+
recommended:
|
|
72
|
+
cpu_cores: 8
|
|
73
|
+
ram_gb: 16
|
|
74
|
+
disk_gb: 20
|
|
75
|
+
gpu_count: 0
|
|
76
|
+
source:
|
|
77
|
+
benchmark_dir: benchmarks/aisb/B2_AgentSystems
|
|
78
|
+
bench_yaml: benchmarks/aisb/B2_AgentSystems/bench.yaml
|
|
79
|
+
source_repo: ResearAI/NLPCC-2026-Task9-AISB
|
|
80
|
+
source_commit: b6b7527a226ca7328697156608571f2cb7dcda3a
|
|
81
|
+
download:
|
|
82
|
+
provider: github_release
|
|
83
|
+
repo: ResearAI/NLPCC-2026-Task9-AISB
|
|
84
|
+
tag: aisb-v0.0.1
|
|
85
|
+
asset_name: aisb.b2.agent_systems-v0.3.0.zip
|
|
86
|
+
url: https://github.com/giao-123-sun/DeepScientist/releases/download/aisb-v0.0.1/aisb.b2.agent_systems-v0.3.0.zip
|
|
87
|
+
source_url: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/aisb/B2_AgentSystems
|
|
88
|
+
archive_type: zip
|
|
89
|
+
local_dir_name: aisb.b2.agent_systems
|
|
90
|
+
sha256: 640fe257640743a51c7e65bede73c9ea19be97b4d85b232689c23d477d051216
|
|
91
|
+
size_bytes: 25925
|
|
92
|
+
published_at: '2026-04-30T00:00:00Z'
|
|
93
|
+
asset_status: published
|
|
94
|
+
dataset_download:
|
|
95
|
+
primary_method: packaged_public_dev
|
|
96
|
+
sources:
|
|
97
|
+
- kind: public_dev
|
|
98
|
+
access: public
|
|
99
|
+
url: null
|
|
100
|
+
note: Public-dev files are included in the published BenchStore package.
|
|
101
|
+
- kind: hidden_reference
|
|
102
|
+
access: private
|
|
103
|
+
url: null
|
|
104
|
+
note: Organizer-only hidden references are excluded from catalog packages.
|
|
105
|
+
credential_requirements:
|
|
106
|
+
mode: none
|
|
107
|
+
notes:
|
|
108
|
+
- No credential is required for packaged public-dev evaluation.
|
|
109
|
+
- External model APIs are optional participant choices and not bundled.
|
|
110
|
+
environment:
|
|
111
|
+
python: '>=3.11'
|
|
112
|
+
docker: true
|
|
113
|
+
key_packages:
|
|
114
|
+
- pytest
|
|
115
|
+
- PyYAML
|
|
116
|
+
notes:
|
|
117
|
+
- Use docker/Dockerfile when available for official replay parity.
|
|
118
|
+
- Do not copy organizer-private hidden references into participant workspaces.
|
|
119
|
+
launch_profiles:
|
|
120
|
+
- id: b2_public_dev_smoke
|
|
121
|
+
label: B2 public-dev smoke test
|
|
122
|
+
description: Initialize an AISB public-dev workspace, validate submission artifacts, and run the public
|
|
123
|
+
evaluator.
|
|
124
|
+
startup_contract:
|
|
125
|
+
- Use only files inside the installed benchmark workspace and the generated submission directory.
|
|
126
|
+
- Treat hidden references as organizer-only and unavailable to the agent.
|
|
127
|
+
- Produce benchmark outputs, paper artifacts, claims, and logs before packaging.
|
|
128
|
+
commands:
|
|
129
|
+
- python scripts/agent_tools.py workspace init B2 --dest .work/benchstore/B2
|
|
130
|
+
- python scripts/agent_tools.py evaluate B2 --bench-dir .work/benchstore/B2 --submission .work/benchstore/B2/submission
|
|
131
|
+
- python scripts/agent_tools.py submission validate .work/benchstore/B2/submission
|
|
132
|
+
- python scripts/agent_tools.py submission package .work/benchstore/B2/submission --output .work/benchstore/B2/submission.zip
|
|
133
|
+
- python scripts/agent_tools.py submission replay .work/benchstore/B2/submission --track B2
|
|
134
|
+
one_click_prompt: Start AISB B2 from the installed BenchStore package. Initialize the public-dev workspace,
|
|
135
|
+
inspect AGENT.md and README.md, run the baseline or your method, evaluate locally, validate/package
|
|
136
|
+
the submission, and record any release-gate risks in logs/experiment_log.jsonl.
|
|
137
|
+
paper:
|
|
138
|
+
venue: AISB
|
|
139
|
+
year: 2026
|
|
140
|
+
track_fit:
|
|
141
|
+
- paper_track
|
|
142
|
+
- benchmark_track
|
|
143
|
+
requires_claims_json: true
|
|
144
|
+
url: null
|
|
145
|
+
benchmark_track:
|
|
146
|
+
metric: task_success_rate
|
|
147
|
+
benchmark_source: tau2-bench airline mini
|
|
148
|
+
packaged_data_status: real tau2-bench airline mini split
|
|
149
|
+
official_score_requires_hidden_replay: true
|
|
150
|
+
benchmarks:
|
|
151
|
+
primary:
|
|
152
|
+
name: tau2-bench airline
|
|
153
|
+
source: github.com/sierra-research/tau2-bench
|
|
154
|
+
tasks: 2
|
|
155
|
+
metric: task_success_rate
|
|
156
|
+
sota: no public absolute scores
|
|
157
|
+
public_score: 0.0
|
|
158
|
+
alternatives:
|
|
159
|
+
- name: GAIA
|
|
160
|
+
source: huggingface.co/datasets/gaia-benchmark/GAIA
|
|
161
|
+
note: gated; secondary candidate
|
|
162
|
+
- name: Terminal-Bench
|
|
163
|
+
source: github.com/terminal-bench
|
|
164
|
+
note: CLI agent benchmark; future candidate
|
|
165
|
+
papers:
|
|
166
|
+
highlights:
|
|
167
|
+
- title: 'tau2-bench: A Benchmark for Tool-Agent-User Interaction'
|
|
168
|
+
source: github.com/sierra-research/tau2-bench
|
|
169
|
+
why: Primary benchmark; airline domain used for public-dev
|
|
170
|
+
- title: Recursive Multi-Agent Systems
|
|
171
|
+
arxiv: '2604.25917'
|
|
172
|
+
url: https://arxiv.org/abs/2604.25917
|
|
173
|
+
venue: April 2026
|
|
174
|
+
why: +8.3% avg accuracy; 1.2x-2.4x inference speedup
|
|
175
|
+
- title: 'DR3-Eval: Towards Realistic and Reproducible Deep Research Evaluation'
|
|
176
|
+
arxiv: '2604.14683'
|
|
177
|
+
url: https://arxiv.org/abs/2604.14683
|
|
178
|
+
venue: April 2026
|
|
179
|
+
why: 5-dimension deep research evaluation framework
|
|
180
|
+
full_references_url: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/aisb/B2_*/references/papers.json
|
|
181
|
+
nlpcc_tracks:
|
|
182
|
+
- track: T1
|
|
183
|
+
name: AI/CS Reasoning & Engineering
|
|
184
|
+
url: https://github.com/ResearAI/NLPCC-2026-Task9-AISB/tree/main/benchmarks/nlpcc/T1
|
|
185
|
+
private_hidden_reference_strategy:
|
|
186
|
+
policy: organizer_only
|
|
187
|
+
public_package_includes_hidden_reference: false
|
|
188
|
+
public_catalog_discloses_private_paths: false
|
|
189
|
+
score_release: aggregate_only
|
|
190
|
+
notes:
|
|
191
|
+
- Hidden answers, labels, canaries, and private evaluator internals are never packaged.
|
|
192
|
+
- Official scores must record hidden_reference_version without disclosing hidden content.
|
|
193
|
+
score_card:
|
|
194
|
+
required_fields:
|
|
195
|
+
- team_id
|
|
196
|
+
- system_name
|
|
197
|
+
- direction
|
|
198
|
+
- track
|
|
199
|
+
- benchmark_package_version
|
|
200
|
+
- evaluator_version
|
|
201
|
+
- hidden_reference_version
|
|
202
|
+
- docker_image_digest
|
|
203
|
+
- data_split_version
|
|
204
|
+
- benchmark_score
|
|
205
|
+
- paper_score
|
|
206
|
+
- cas_score
|
|
207
|
+
- final_score
|
|
208
|
+
- security_verdict
|
|
209
|
+
- license_verdict
|
|
210
|
+
- used_reference_scope
|
|
211
|
+
- official_score
|
|
212
|
+
- created_at
|
|
213
|
+
track_scoring:
|
|
214
|
+
paper_track: Final_A = 0.0 * S_benchmark + 1.0 * S_paper
|
|
215
|
+
benchmark_track: Final_B = 0.7 * S_benchmark + 0.3 * S_paper
|
|
216
|
+
cas: integrity gate, not bonus
|
|
217
|
+
risk_flags:
|
|
218
|
+
- hidden_reference_not_public
|
|
219
|
+
- not_release_ready
|
|
220
|
+
- registry_promotion_pending
|
|
221
|
+
risk_notes:
|
|
222
|
+
- This catalog entry is not release_ready.
|
|
223
|
+
- Download URL, checksum, and size point to the published GitHub release asset.
|
|
224
|
+
- Official scores require organizer-side replay against private hidden references.
|
|
225
|
+
recommended_when: Use this entry for AISB public-dev integration, local agent workflow checks, and catalog
|
|
226
|
+
UI validation.
|
|
227
|
+
not_recommended_when: Do not treat this entry as an official release or public leaderboard claim until
|
|
228
|
+
release gates pass.
|
|
229
|
+
image_path: ../image/aisb.b2.agent_systems.svg
|