@researai/deepscientist 1.5.16 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +309 -130
- package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
- package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
- package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
- package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
- package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
- package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
- package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
- package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
- package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
- package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
- package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
- package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
- package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
- package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
- package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
- package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
- package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
- package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
- package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
- package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
- package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
- package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
- package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
- package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
- package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
- package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
- package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
- package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
- package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
- package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
- package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
- package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
- package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
- package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
- package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
- package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
- package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
- package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
- package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
- package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
- package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
- package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
- package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
- package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
- package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
- package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
- package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
- package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
- package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
- package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
- package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
- package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
- package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
- package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
- package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
- package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
- package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
- package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
- package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
- package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
- package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
- package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
- package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
- package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
- package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
- package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
- package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
- package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
- package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
- package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
- package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
- package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
- package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
- package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
- package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
- package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
- package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
- package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
- package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
- package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
- package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
- package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
- package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
- package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
- package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
- package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
- package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
- package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
- package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
- package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
- package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
- package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
- package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
- package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
- package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
- package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
- package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
- package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
- package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
- package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
- package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
- package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
- package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
- package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
- package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
- package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
- package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
- package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
- package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
- package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
- package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
- package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
- package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
- package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
- package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
- package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
- package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
- package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
- package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
- package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
- package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
- package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
- package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
- package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
- package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
- package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
- package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
- package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
- package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
- package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
- package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
- package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
- package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
- package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
- package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
- package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
- package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
- package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
- package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
- package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
- package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
- package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
- package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
- package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
- package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
- package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
- package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
- package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
- package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
- package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
- package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
- package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
- package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
- package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
- package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
- package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
- package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
- package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
- package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
- package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
- package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
- package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
- package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
- package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
- package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
- package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
- package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
- package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
- package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
- package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
- package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
- package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
- package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
- package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
- package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
- package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
- package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
- package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
- package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
- package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
- package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
- package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
- package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
- package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
- package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
- package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
- package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
- package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
- package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
- package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
- package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
- package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
- package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
- package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
- package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
- package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
- package/AISB/image/aisb.b10.climate_earth.svg +16 -0
- package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
- package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
- package/AISB/image/aisb.b2.agent_systems.svg +16 -0
- package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
- package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
- package/AISB/image/aisb.b5.math_proof.svg +16 -0
- package/AISB/image/aisb.b6.research_process.svg +16 -0
- package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
- package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
- package/AISB/image/aisb.b9.material_science.svg +16 -0
- package/README.md +196 -32
- package/bin/ds.js +924 -66
- package/docs/en/00_QUICK_START.md +195 -18
- package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
- package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
- package/docs/en/05_TUI_GUIDE.md +171 -2
- package/docs/en/07_MEMORY_AND_MCP.md +38 -2
- package/docs/en/09_DOCTOR.md +78 -7
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
- package/docs/en/11_LICENSE_AND_RISK.md +4 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +624 -180
- package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +386 -0
- package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
- package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
- package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
- package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
- package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
- package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
- package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
- package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
- package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
- package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
- package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
- package/docs/en/91_DEVELOPMENT.md +266 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
- package/docs/en/README.md +48 -7
- package/docs/images/admin/admin-connectors-health-en.png +0 -0
- package/docs/images/admin/admin-controllers-en.png +0 -0
- package/docs/images/admin/admin-diagnostics-en.png +0 -0
- package/docs/images/admin/admin-errors-en.png +0 -0
- package/docs/images/admin/admin-issues-en.png +0 -0
- package/docs/images/admin/admin-logs-en.png +0 -0
- package/docs/images/admin/admin-quest-detail-en.png +0 -0
- package/docs/images/admin/admin-quests-en.png +0 -0
- package/docs/images/admin/admin-repairs-en.png +0 -0
- package/docs/images/admin/admin-runtime-en.png +0 -0
- package/docs/images/admin/admin-search-en.png +0 -0
- package/docs/images/admin/admin-stats-en.png +0 -0
- package/docs/images/admin/admin-summary-en.png +0 -0
- package/docs/images/connectors/connector-discord-en.png +0 -0
- package/docs/images/connectors/connector-feishu-en.png +0 -0
- package/docs/images/connectors/connector-lingzhu-en.png +0 -0
- package/docs/images/connectors/connector-qq-en.png +0 -0
- package/docs/images/connectors/connector-slack-en.png +0 -0
- package/docs/images/connectors/connector-telegram-en.png +0 -0
- package/docs/images/connectors/connector-weixin-en.png +0 -0
- package/docs/images/connectors/connector-whatsapp-en.png +0 -0
- package/docs/images/settings/settings-baselines-en.png +0 -0
- package/docs/images/settings/settings-config-en.png +0 -0
- package/docs/images/settings/settings-connectors-overview-en.png +0 -0
- package/docs/images/settings/settings-deepxiv-en.png +0 -0
- package/docs/images/settings/settings-mcp-servers-en.png +0 -0
- package/docs/images/settings/settings-plugins-en.png +0 -0
- package/docs/images/settings/settings-runners-en.png +0 -0
- package/docs/zh/00_QUICK_START.md +142 -18
- package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
- package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/zh/05_TUI_GUIDE.md +171 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
- package/docs/zh/09_DOCTOR.md +54 -8
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
- package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +552 -181
- package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +384 -0
- package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
- package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
- package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
- package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
- package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
- package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
- package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
- package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
- package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
- package/docs/zh/README.md +33 -7
- package/install.sh +168 -20
- package/package.json +5 -1
- package/pyproject.toml +2 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +13 -0
- package/src/deepscientist/admin/__init__.py +3 -0
- package/src/deepscientist/admin/charts.py +681 -0
- package/src/deepscientist/admin/logs.py +119 -0
- package/src/deepscientist/admin/repairs.py +217 -0
- package/src/deepscientist/admin/service.py +1310 -0
- package/src/deepscientist/admin/system_info.py +700 -0
- package/src/deepscientist/admin/tasks.py +465 -0
- package/src/deepscientist/admin/tool_metrics.py +600 -0
- package/src/deepscientist/artifact/guidance.py +8 -4
- package/src/deepscientist/artifact/schemas.py +115 -0
- package/src/deepscientist/artifact/service.py +4268 -260
- package/src/deepscientist/bash_exec/monitor.py +30 -3
- package/src/deepscientist/bash_exec/service.py +134 -1
- package/src/deepscientist/benchstore/__init__.py +4 -0
- package/src/deepscientist/benchstore/prompt_builder.py +224 -0
- package/src/deepscientist/benchstore/service.py +1716 -0
- package/src/deepscientist/bridges/connectors.py +8 -2
- package/src/deepscientist/channels/weixin_ilink.py +8 -1
- package/src/deepscientist/cli.py +92 -17
- package/src/deepscientist/codex_cli_compat.py +187 -74
- package/src/deepscientist/config/models.py +82 -11
- package/src/deepscientist/config/service.py +1077 -93
- package/src/deepscientist/connector/weixin_support.py +48 -17
- package/src/deepscientist/daemon/api/handlers.py +827 -235
- package/src/deepscientist/daemon/api/router.py +81 -1
- package/src/deepscientist/daemon/app.py +1512 -85
- package/src/deepscientist/diagnostics/__init__.py +6 -0
- package/src/deepscientist/diagnostics/runner_failures.py +277 -0
- package/src/deepscientist/doctor.py +407 -56
- package/src/deepscientist/evidence_packets.py +590 -0
- package/src/deepscientist/home.py +52 -4
- package/src/deepscientist/kimi_cli_compat.py +50 -0
- package/src/deepscientist/latex_runtime.py +2 -2
- package/src/deepscientist/mcp/context.py +2 -0
- package/src/deepscientist/mcp/schemas.py +114 -0
- package/src/deepscientist/mcp/server.py +1566 -126
- package/src/deepscientist/memory/service.py +203 -16
- package/src/deepscientist/process_control.py +8 -1
- package/src/deepscientist/prompts/builder.py +850 -88
- package/src/deepscientist/quest/__init__.py +2 -2
- package/src/deepscientist/quest/layout.py +12 -1
- package/src/deepscientist/quest/node_traces.py +10 -0
- package/src/deepscientist/quest/service.py +1852 -161
- package/src/deepscientist/quest/stage_views.py +1 -1
- package/src/deepscientist/runners/__init__.py +18 -0
- package/src/deepscientist/runners/base.py +89 -1
- package/src/deepscientist/runners/builtins.py +13 -1
- package/src/deepscientist/runners/claude.py +391 -0
- package/src/deepscientist/runners/codex.py +480 -35
- package/src/deepscientist/runners/codex_telemetry.py +127 -0
- package/src/deepscientist/runners/kimi.py +334 -0
- package/src/deepscientist/runners/metadata.py +68 -0
- package/src/deepscientist/runners/opencode.py +414 -0
- package/src/deepscientist/runners/runtime_overrides.py +100 -0
- package/src/deepscientist/runners/simple_cli.py +538 -0
- package/src/deepscientist/runtime_storage.py +303 -0
- package/src/deepscientist/shared.py +80 -16
- package/src/deepscientist/skills/installer.py +37 -0
- package/src/deepscientist/skills/registry.py +2 -0
- package/src/deepscientist/tinytex.py +2 -2
- package/src/deepscientist/tui.py +10 -3
- package/src/prompts/benchstore/system.md +77 -0
- package/src/prompts/connectors/qq.md +33 -2
- package/src/prompts/connectors/weixin.md +208 -23
- package/src/prompts/contracts/admin_ops.md +74 -0
- package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
- package/src/prompts/contracts/shared_interaction.md +5 -10
- package/src/prompts/start_setup/system.md +422 -0
- package/src/prompts/system.md +411 -304
- package/src/prompts/system_copilot.md +89 -0
- package/src/skills/analysis-campaign/SKILL.md +239 -578
- package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
- package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
- package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
- package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
- package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
- package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
- package/src/skills/baseline/SKILL.md +183 -461
- package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
- package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
- package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
- package/src/skills/baseline/references/baseline-plan-template.md +37 -76
- package/src/skills/baseline/references/boundary-cases.md +86 -0
- package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
- package/src/skills/baseline/references/comparability-contract.md +7 -12
- package/src/skills/baseline/references/operational-guidance.md +56 -0
- package/src/skills/baseline/references/route-selection.md +5 -25
- package/src/skills/decision/SKILL.md +113 -306
- package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
- package/src/skills/decision/references/operational-guidance.md +94 -0
- package/src/skills/decision/references/research-route-criteria.md +7 -8
- package/src/skills/decision/references/strategic-decision-template.md +13 -26
- package/src/skills/experiment/SKILL.md +132 -670
- package/src/skills/experiment/references/execution-playbook.md +374 -0
- package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
- package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
- package/src/skills/experiment/references/operational-guidance.md +108 -0
- package/src/skills/finalize/SKILL.md +62 -0
- package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
- package/src/skills/finalize/references/resume-packet-template.md +7 -0
- package/src/skills/idea/SKILL.md +228 -15
- package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
- package/src/skills/idea/references/current-board-packet-template.md +61 -0
- package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
- package/src/skills/idea/references/idea-generation-playbook.md +21 -0
- package/src/skills/idea/references/idea-thinking-flow.md +6 -0
- package/src/skills/idea/references/literature-survey-template.md +3 -0
- package/src/skills/idea/references/objective-contract-template.md +54 -0
- package/src/skills/idea/references/outline-seeding-example.md +56 -0
- package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
- package/src/skills/idea/references/related-work-playbook.md +75 -2
- package/src/skills/idea/references/research-history-playbook.md +114 -0
- package/src/skills/idea/references/selection-gate.md +58 -6
- package/src/skills/intake-audit/SKILL.md +43 -2
- package/src/skills/intake-audit/references/state-audit-template.md +10 -0
- package/src/skills/nature-data/SKILL.md +128 -0
- package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-data/agents/openai.yaml +4 -0
- package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
- package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
- package/src/skills/nature-data/references/policy-principles.md +103 -0
- package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
- package/src/skills/nature-data/references/source-basis.md +54 -0
- package/src/skills/nature-data/references/statement-patterns.md +153 -0
- package/src/skills/nature-figure/SKILL.md +197 -0
- package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-figure/agents/openai.yaml +4 -0
- package/src/skills/nature-figure/evals/evals.json +37 -0
- package/src/skills/nature-figure/references/api.md +428 -0
- package/src/skills/nature-figure/references/backend-selection.md +100 -0
- package/src/skills/nature-figure/references/chart-types.md +281 -0
- package/src/skills/nature-figure/references/common-patterns.md +349 -0
- package/src/skills/nature-figure/references/design-theory.md +436 -0
- package/src/skills/nature-figure/references/figure-contract.md +93 -0
- package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
- package/src/skills/nature-figure/references/qa-contract.md +119 -0
- package/src/skills/nature-figure/references/r-template-index.md +66 -0
- package/src/skills/nature-figure/references/r-workflow.md +161 -0
- package/src/skills/nature-figure/references/tutorials.md +250 -0
- package/src/skills/nature-paper2ppt/SKILL.md +507 -0
- package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/SKILL.md +385 -0
- package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-polishing/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
- package/src/skills/nature-polishing/references/section-moves.md +240 -0
- package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
- package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
- package/src/skills/optimize/SKILL.md +177 -1568
- package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
- package/src/skills/optimize/references/candidate-board-template.md +13 -0
- package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
- package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
- package/src/skills/optimize/references/debug-response-template.md +29 -0
- package/src/skills/optimize/references/frontier-review-template.md +32 -0
- package/src/skills/optimize/references/fusion-playbook.md +36 -0
- package/src/skills/optimize/references/method-brief-template.md +73 -0
- package/src/skills/optimize/references/operational-guidance.md +621 -0
- package/src/skills/optimize/references/optimization-memory-template.md +30 -0
- package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
- package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
- package/src/skills/optimize/references/prompt-patterns.md +49 -0
- package/src/skills/paper-outline/SKILL.md +227 -0
- package/src/skills/paper-outline/references/outline-patterns.md +87 -0
- package/src/skills/paper-plot/SKILL.md +79 -0
- package/src/skills/paper-plot/agents/openai.yaml +4 -0
- package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
- package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
- package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
- package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
- package/src/skills/paper-plot/references/line_training_curve.md +44 -0
- package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
- package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
- package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
- package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
- package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
- package/src/skills/paper-plot/scripts/line_aime.py +94 -0
- package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
- package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
- package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
- package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
- package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
- package/src/skills/rebuttal/SKILL.md +9 -0
- package/src/skills/references/tool-usage-by-stage.md +438 -0
- package/src/skills/review/SKILL.md +105 -7
- package/src/skills/science/PROVENANCE.md +44 -0
- package/src/skills/science/SKILL.md +137 -0
- package/src/skills/science/references/artifact-science-tool.md +110 -0
- package/src/skills/science/references/claim-type-discipline.md +56 -0
- package/src/skills/science/references/domain-index.md +422 -0
- package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
- package/src/skills/science/references/package-check-playbook.md +64 -0
- package/src/skills/science/references/package-index.min.json +3616 -0
- package/src/skills/science/references/packages/abinit.md +80 -0
- package/src/skills/science/references/packages/acts.md +73 -0
- package/src/skills/science/references/packages/aiida-core.md +80 -0
- package/src/skills/science/references/packages/alamode.md +80 -0
- package/src/skills/science/references/packages/amuse.md +88 -0
- package/src/skills/science/references/packages/anndata.md +88 -0
- package/src/skills/science/references/packages/arbor.md +80 -0
- package/src/skills/science/references/packages/arc.md +73 -0
- package/src/skills/science/references/packages/astropy.md +88 -0
- package/src/skills/science/references/packages/astroquery.md +88 -0
- package/src/skills/science/references/packages/atomate2.md +80 -0
- package/src/skills/science/references/packages/atomsmltr.md +73 -0
- package/src/skills/science/references/packages/awkward.md +73 -0
- package/src/skills/science/references/packages/batman.md +88 -0
- package/src/skills/science/references/packages/biopython.md +88 -0
- package/src/skills/science/references/packages/bloqade.md +73 -0
- package/src/skills/science/references/packages/brian2.md +73 -0
- package/src/skills/science/references/packages/bullet3.md +73 -0
- package/src/skills/science/references/packages/calculix.md +80 -0
- package/src/skills/science/references/packages/cantera.md +73 -0
- package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
- package/src/skills/science/references/packages/ccdproc.md +88 -0
- package/src/skills/science/references/packages/celerite2.md +88 -0
- package/src/skills/science/references/packages/cellrank.md +73 -0
- package/src/skills/science/references/packages/cesm.md +80 -0
- package/src/skills/science/references/packages/chemicals.md +73 -0
- package/src/skills/science/references/packages/chempy.md +73 -0
- package/src/skills/science/references/packages/cirq.md +73 -0
- package/src/skills/science/references/packages/coffea.md +73 -0
- package/src/skills/science/references/packages/cp2k.md +88 -0
- package/src/skills/science/references/packages/custodian.md +80 -0
- package/src/skills/science/references/packages/dart.md +73 -0
- package/src/skills/science/references/packages/datamol.md +88 -0
- package/src/skills/science/references/packages/dd4hep.md +73 -0
- package/src/skills/science/references/packages/dealii.md +80 -0
- package/src/skills/science/references/packages/deepchem.md +88 -0
- package/src/skills/science/references/packages/delphes.md +73 -0
- package/src/skills/science/references/packages/devito.md +80 -0
- package/src/skills/science/references/packages/dftb.md +88 -0
- package/src/skills/science/references/packages/dftd4.md +88 -0
- package/src/skills/science/references/packages/dftk-jl.md +80 -0
- package/src/skills/science/references/packages/dolfinx.md +80 -0
- package/src/skills/science/references/packages/drake.md +73 -0
- package/src/skills/science/references/packages/dumux.md +73 -0
- package/src/skills/science/references/packages/elk.md +80 -0
- package/src/skills/science/references/packages/elmerfem.md +80 -0
- package/src/skills/science/references/packages/enzo-e.md +88 -0
- package/src/skills/science/references/packages/espresso.md +80 -0
- package/src/skills/science/references/packages/exoplanet.md +88 -0
- package/src/skills/science/references/packages/fairroot.md +73 -0
- package/src/skills/science/references/packages/fbpic.md +80 -0
- package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
- package/src/skills/science/references/packages/geant4.md +73 -0
- package/src/skills/science/references/packages/geosx.md +80 -0
- package/src/skills/science/references/packages/gprmax.md +80 -0
- package/src/skills/science/references/packages/gromacs.md +80 -0
- package/src/skills/science/references/packages/gwaslab.md +73 -0
- package/src/skills/science/references/packages/gz-sim.md +73 -0
- package/src/skills/science/references/packages/hail.md +88 -0
- package/src/skills/science/references/packages/hiphive.md +80 -0
- package/src/skills/science/references/packages/hoomd-blue.md +80 -0
- package/src/skills/science/references/packages/itensor.md +73 -0
- package/src/skills/science/references/packages/itensors-jl.md +73 -0
- package/src/skills/science/references/packages/jdftx.md +73 -0
- package/src/skills/science/references/packages/jobflow.md +80 -0
- package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
- package/src/skills/science/references/packages/kite.md +80 -0
- package/src/skills/science/references/packages/kratos.md +80 -0
- package/src/skills/science/references/packages/kwant.md +73 -0
- package/src/skills/science/references/packages/lammps.md +80 -0
- package/src/skills/science/references/packages/lightkurve.md +88 -0
- package/src/skills/science/references/packages/limix.md +73 -0
- package/src/skills/science/references/packages/maxwelllink.md +80 -0
- package/src/skills/science/references/packages/mcdc.md +73 -0
- package/src/skills/science/references/packages/meep.md +80 -0
- package/src/skills/science/references/packages/mfem.md +80 -0
- package/src/skills/science/references/packages/mitgcm.md +73 -0
- package/src/skills/science/references/packages/modflow6.md +73 -0
- package/src/skills/science/references/packages/molecool.md +73 -0
- package/src/skills/science/references/packages/mom6.md +73 -0
- package/src/skills/science/references/packages/moose.md +80 -0
- package/src/skills/science/references/packages/mpas-model.md +73 -0
- package/src/skills/science/references/packages/mujoco.md +73 -0
- package/src/skills/science/references/packages/mumax3.md +73 -0
- package/src/skills/science/references/packages/nekrs.md +80 -0
- package/src/skills/science/references/packages/nessi.md +73 -0
- package/src/skills/science/references/packages/nest-simulator.md +73 -0
- package/src/skills/science/references/packages/netket.md +73 -0
- package/src/skills/science/references/packages/neuron.md +73 -0
- package/src/skills/science/references/packages/nextflow.md +88 -0
- package/src/skills/science/references/packages/nwchem.md +88 -0
- package/src/skills/science/references/packages/openbabel.md +88 -0
- package/src/skills/science/references/packages/openems.md +80 -0
- package/src/skills/science/references/packages/openff-toolkit.md +88 -0
- package/src/skills/science/references/packages/openfoam-dev.md +80 -0
- package/src/skills/science/references/packages/openmc.md +73 -0
- package/src/skills/science/references/packages/openmm.md +80 -0
- package/src/skills/science/references/packages/openmoc.md +73 -0
- package/src/skills/science/references/packages/openmx.md +80 -0
- package/src/skills/science/references/packages/opensees.md +80 -0
- package/src/skills/science/references/packages/opensn.md +80 -0
- package/src/skills/science/references/packages/opm-simulators.md +73 -0
- package/src/skills/science/references/packages/oqupy.md +73 -0
- package/src/skills/science/references/packages/packmol.md +80 -0
- package/src/skills/science/references/packages/palabos.md +80 -0
- package/src/skills/science/references/packages/parflow.md +80 -0
- package/src/skills/science/references/packages/pennylane.md +88 -0
- package/src/skills/science/references/packages/perceval.md +73 -0
- package/src/skills/science/references/packages/phono3py.md +73 -0
- package/src/skills/science/references/packages/phonopy.md +73 -0
- package/src/skills/science/references/packages/photutils.md +88 -0
- package/src/skills/science/references/packages/picongpu.md +80 -0
- package/src/skills/science/references/packages/plink-ng.md +88 -0
- package/src/skills/science/references/packages/precice.md +73 -0
- package/src/skills/science/references/packages/psc.md +80 -0
- package/src/skills/science/references/packages/psi4.md +88 -0
- package/src/skills/science/references/packages/pybinding.md +73 -0
- package/src/skills/science/references/packages/pyfr.md +80 -0
- package/src/skills/science/references/packages/pyhf.md +73 -0
- package/src/skills/science/references/packages/pyiron_base.md +80 -0
- package/src/skills/science/references/packages/pylcp.md +73 -0
- package/src/skills/science/references/packages/pylith.md +80 -0
- package/src/skills/science/references/packages/pynbody.md +88 -0
- package/src/skills/science/references/packages/pysam.md +88 -0
- package/src/skills/science/references/packages/pyscf.md +88 -0
- package/src/skills/science/references/packages/q-e.md +73 -0
- package/src/skills/science/references/packages/qibo.md +73 -0
- package/src/skills/science/references/packages/qiskit.md +73 -0
- package/src/skills/science/references/packages/quantica-jl.md +73 -0
- package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
- package/src/skills/science/references/packages/quimb.md +73 -0
- package/src/skills/science/references/packages/qulacs.md +73 -0
- package/src/skills/science/references/packages/qutip.md +73 -0
- package/src/skills/science/references/packages/rdkit.md +88 -0
- package/src/skills/science/references/packages/rmg-py.md +73 -0
- package/src/skills/science/references/packages/root.md +73 -0
- package/src/skills/science/references/packages/scanpy.md +88 -0
- package/src/skills/science/references/packages/scikit-allel.md +88 -0
- package/src/skills/science/references/packages/scikit-bio.md +88 -0
- package/src/skills/science/references/packages/scqubits.md +73 -0
- package/src/skills/science/references/packages/scuff-em.md +80 -0
- package/src/skills/science/references/packages/scvi-tools.md +73 -0
- package/src/skills/science/references/packages/seissol.md +73 -0
- package/src/skills/science/references/packages/sfepy.md +80 -0
- package/src/skills/science/references/packages/sisl.md +73 -0
- package/src/skills/science/references/packages/smilei.md +80 -0
- package/src/skills/science/references/packages/snakemake.md +88 -0
- package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
- package/src/skills/science/references/packages/specutils.md +88 -0
- package/src/skills/science/references/packages/spglib.md +80 -0
- package/src/skills/science/references/packages/squidpy.md +88 -0
- package/src/skills/science/references/packages/starry.md +88 -0
- package/src/skills/science/references/packages/strawberryfields.md +73 -0
- package/src/skills/science/references/packages/su2.md +80 -0
- package/src/skills/science/references/packages/sunny-jl.md +73 -0
- package/src/skills/science/references/packages/sw4.md +73 -0
- package/src/skills/science/references/packages/swift.md +88 -0
- package/src/skills/science/references/packages/tdnegf.md +73 -0
- package/src/skills/science/references/packages/tenpy.md +73 -0
- package/src/skills/science/references/packages/thermo.md +73 -0
- package/src/skills/science/references/packages/tkwant.md +73 -0
- package/src/skills/science/references/packages/tvb-root.md +73 -0
- package/src/skills/science/references/packages/uproot5.md +73 -0
- package/src/skills/science/references/packages/vampire.md +80 -0
- package/src/skills/science/references/packages/wannier_tools.md +73 -0
- package/src/skills/science/references/packages/warpx.md +80 -0
- package/src/skills/science/references/packages/wrf.md +73 -0
- package/src/skills/science/references/packages/xtb.md +88 -0
- package/src/skills/science/references/packages/yt.md +73 -0
- package/src/skills/science/references/science-task-brief-template.md +71 -0
- package/src/skills/scout/SKILL.md +83 -425
- package/src/skills/scout/references/literature-scout-template.md +5 -24
- package/src/skills/scout/references/operational-guidance.md +191 -0
- package/src/skills/scout/references/paper-triage-playbook.md +11 -35
- package/src/skills/write/SKILL.md +744 -1246
- package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
- package/src/skills/write/references/oral_package_patterns.md +252 -0
- package/src/skills/write/references/oral_writing_principles.md +291 -0
- package/src/skills/write/references/section_rewrite_checklist.md +234 -0
- package/src/tui/dist/app/AppContainer.js +1314 -27
- package/src/tui/dist/components/Composer.js +26 -1
- package/src/tui/dist/components/ConfigScreen.js +2 -1
- package/src/tui/dist/components/InputPrompt.js +25 -9
- package/src/tui/dist/components/MainContent.js +18 -3
- package/src/tui/dist/components/QuestScreen.js +3 -2
- package/src/tui/dist/components/UtilityScreen.js +37 -0
- package/src/tui/dist/hooks/useSafeInput.js +10 -0
- package/src/tui/dist/index.js +13 -1
- package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
- package/src/tui/dist/lib/api.js +89 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AnalysisPlugin-DnSm0GZn.js → AnalysisPlugin-CA94NGmI.js} +1 -1
- package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
- package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
- package/src/ui/dist/assets/{CodeViewerPlugin-itb0tltR.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
- package/src/ui/dist/assets/{DocViewerPlugin-DqKkiCI6.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
- package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
- package/src/ui/dist/assets/{GitDiffViewerPlugin-DxL2ezFG.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
- package/src/ui/dist/assets/{GitSnapshotViewer-B_RQm1YZ.js → GitSnapshotViewer-CweA6VON.js} +2 -2
- package/src/ui/dist/assets/{ImageViewerPlugin-tHqlXY3n.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
- package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
- package/src/ui/dist/assets/{LatexPlugin-B495DTXC.js → LatexPlugin-BQjAaA5J.js} +4 -4
- package/src/ui/dist/assets/{MarkdownViewerPlugin-DG28-61B.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
- package/src/ui/dist/assets/{MarketplacePlugin-BiOGT-Kj.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
- package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
- package/src/ui/dist/assets/{NotebookEditor-CVsj8h_T.js → NotebookEditor-WFyd8Ybt.js} +23 -23
- package/src/ui/dist/assets/{PdfLoader-CASDQmxJ.js → PdfLoader-CLE5u5TS.js} +3 -3
- package/src/ui/dist/assets/{PdfMarkdownPlugin-BFhwoKsY.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
- package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
- package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
- package/src/ui/dist/assets/{TextViewerPlugin-CB4DYfWO.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
- package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
- package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
- package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
- package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
- package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
- package/src/ui/dist/assets/{code-DLC6G24T.js → code-DbsmSd3Y.js} +1 -1
- package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
- package/src/ui/dist/assets/{wrap-text-CwMn-iqb.js → file-jump-queue-DeQBikaw.js} +3 -3
- package/src/ui/dist/assets/{file-socket-Cu4Qln7Y.js → file-socket-DA5XIx88.js} +1 -1
- package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
- package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
- package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
- package/src/ui/dist/assets/{index-wQ7RIIRd.js → index-BsO46tJA.js} +1 -1
- package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
- package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
- package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
- package/src/ui/dist/assets/{project-sync-CsX08Qno.js → project-sync-DPmWKmKD.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-R-GWEhzS.js → zoom-out-DAukFWen.js} +3 -3
- package/src/ui/dist/index.html +3 -3
- package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
- package/src/skills/baseline/references/memory-playbook.md +0 -40
- package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
- package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
- package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
- package/src/skills/write/references/paper-section-playbook.md +0 -64
- package/src/skills/write/references/reviewer-first-writing.md +0 -64
- package/src/skills/write/references/revision-checklist.md +0 -70
- package/src/skills/write/references/section-contracts.md +0 -82
- package/src/skills/write/references/sentence-level-proofing.md +0 -49
- package/src/ui/dist/assets/AiManusChatView-COFACy7V.js +0 -204
- package/src/ui/dist/assets/CliPlugin-CvwCmDQ5.js +0 -109
- package/src/ui/dist/assets/CodeEditorPlugin-cOqSa0xq.js +0 -2
- package/src/ui/dist/assets/GitCommitViewerPlugin-DVgNHBCS.js +0 -1
- package/src/ui/dist/assets/LabCopilotPanel-ClMbq5Yu.js +0 -14
- package/src/ui/dist/assets/LabPlugin-L_SuE8ow.js +0 -22
- package/src/ui/dist/assets/NotebookEditor-C-4Kt1p9.js +0 -81
- package/src/ui/dist/assets/PdfViewerPlugin-DcOzU9vd.js +0 -17
- package/src/ui/dist/assets/SearchPlugin-CHj7M58O.js +0 -16
- package/src/ui/dist/assets/VNCViewer-CjlbyCB3.js +0 -11
- package/src/ui/dist/assets/bot-CFkZY-JP.js +0 -6
- package/src/ui/dist/assets/chevron-up-Dq5ofbht.js +0 -6
- package/src/ui/dist/assets/file-content-Dv4LoZec.js +0 -1
- package/src/ui/dist/assets/file-diff-panel-Denq-lC3.js +0 -1
- package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
- package/src/ui/dist/assets/git-commit-horizontal-BUh6G52n.js +0 -6
- package/src/ui/dist/assets/image-B9HUUddG.js +0 -6
- package/src/ui/dist/assets/index-B2B1sg-M.js +0 -1
- package/src/ui/dist/assets/index-Cgla8biy.css +0 -33
- package/src/ui/dist/assets/index-DRyx7vAc.js +0 -1
- package/src/ui/dist/assets/index-Gbl53BNp.js +0 -2496
- package/src/ui/dist/assets/pdf-effect-queue-ZtnHFCAi.js +0 -6
- package/src/ui/dist/assets/popover-DL6h35vr.js +0 -1
- package/src/ui/dist/assets/select-DvmXt1yY.js +0 -11
- package/src/ui/dist/assets/sigma-7jpXazui.js +0 -6
- package/src/ui/dist/assets/trash-xA7kFt8i.js +0 -11
- package/src/ui/dist/assets/useCliAccess-DsMwDjOp.js +0 -1
- package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
id: aisb.t3.016_mathfusion
|
|
2
|
+
name: MathFusion
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: Instruction fusion framework for enhancing mathematical reasoning in 7B-8B
|
|
5
|
+
LLMs through cross-problem synthesis.
|
|
6
|
+
task_description: 'MathFusion is a novel instruction fusion framework that enhances
|
|
7
|
+
mathematical reasoning by synthesizing new training problems from pairs of existing
|
|
8
|
+
problems. It implements three fusion strategies: (1) Sequential Fusion chains related
|
|
9
|
+
problems through shared variables to model solution dependencies; (2) Parallel Fusion
|
|
10
|
+
combines analogous problems to reinforce conceptual understanding; and (3) Conditional
|
|
11
|
+
Fusion creates context-aware selective problems for reasoning flexibility.
|
|
12
|
+
|
|
13
|
+
This benchmark covers the full pipeline: training on MathFusionQA using LLaMA-Factory
|
|
14
|
+
with full-parameter SFT (DeepSpeed ZeRO-2, flash-attention FA2, bf16, cutoff 4096),
|
|
15
|
+
followed by evaluation across six mathematical reasoning benchmarks including GSM8K,
|
|
16
|
+
MATH, CollegeMath, DeepMind-Mathematics, OlympiadBench, and TheoremQA. Reported
|
|
17
|
+
training configuration uses 3 epochs with batch size 128 on 8x NVIDIA A100 GPUs.
|
|
18
|
+
|
|
19
|
+
'
|
|
20
|
+
task_mode: experiment_driven
|
|
21
|
+
requires_execution: true
|
|
22
|
+
requires_paper: true
|
|
23
|
+
integrity_level: cas_plus_canary
|
|
24
|
+
snapshot_status: external_eval_required
|
|
25
|
+
support_level: recovery
|
|
26
|
+
time_band: 2-4d
|
|
27
|
+
cost_band: very_high
|
|
28
|
+
difficulty: hard
|
|
29
|
+
data_access: public
|
|
30
|
+
primary_outputs:
|
|
31
|
+
- accuracy
|
|
32
|
+
- finetuned_checkpoint
|
|
33
|
+
- benchmark_report
|
|
34
|
+
launch_profiles:
|
|
35
|
+
- id: quick_check
|
|
36
|
+
label: Quick Check
|
|
37
|
+
description: 'Lightweight sanity run with minimal data subsets and reduced epochs
|
|
38
|
+
to verify pipeline integrity without full resource commitment.
|
|
39
|
+
|
|
40
|
+
'
|
|
41
|
+
time_estimate: 1-2h
|
|
42
|
+
resource_estimate: 1x A100
|
|
43
|
+
- id: full_train_eval
|
|
44
|
+
label: Full Train + Eval
|
|
45
|
+
description: 'Complete training run on MathFusionQA with full parameter SFT across
|
|
46
|
+
3 epochs, followed by evaluation on all six benchmark datasets. Requires 8x A100
|
|
47
|
+
GPUs for the standard configuration.
|
|
48
|
+
|
|
49
|
+
'
|
|
50
|
+
time_estimate: 2-4d
|
|
51
|
+
resource_estimate: 8x A100
|
|
52
|
+
dataset_download:
|
|
53
|
+
primary_method: mixed
|
|
54
|
+
sources:
|
|
55
|
+
- kind: huggingface
|
|
56
|
+
url: https://huggingface.co/datasets/QizhiPei/MathFusionQA
|
|
57
|
+
access: public
|
|
58
|
+
note: Primary instruction fusion dataset containing original, sequential, parallel,
|
|
59
|
+
and conditional splits
|
|
60
|
+
- kind: github_repo
|
|
61
|
+
url: https://github.com/hiyouga/LLaMA-Factory
|
|
62
|
+
access: public
|
|
63
|
+
note: Training framework (v0.9.1) - converts MathFusionQA to JSON format for training
|
|
64
|
+
- kind: github_repo
|
|
65
|
+
url: https://github.com/QwenLM/Qwen2.5-Math
|
|
66
|
+
access: public
|
|
67
|
+
note: In-domain evaluation harness (gsm8k, math, mwpbench, deepmind-mathematics,
|
|
68
|
+
olympiadbench)
|
|
69
|
+
- kind: github_repo
|
|
70
|
+
url: https://github.com/hkust-nlp/dart-math
|
|
71
|
+
access: public
|
|
72
|
+
note: Out-of-domain evaluation harness (theoremqa and others)
|
|
73
|
+
notes:
|
|
74
|
+
- MathFusionQA must be converted to LLaMA-Factory JSON format before training
|
|
75
|
+
- Evaluation requires both Qwen2.5-Math and DART-Math harnesses
|
|
76
|
+
- Six evaluation datasets: gsm8k, math, mwpbench/college-math, deepmind-mathematics,
|
|
77
|
+
olympiadbench, theoremqa
|
|
78
|
+
credential_requirements:
|
|
79
|
+
mode: none
|
|
80
|
+
items: []
|
|
81
|
+
notes:
|
|
82
|
+
- No explicit API credentials required for public datasets and GitHub repositories
|
|
83
|
+
resources:
|
|
84
|
+
minimum:
|
|
85
|
+
cpu_cores: 16
|
|
86
|
+
ram_gb: 128
|
|
87
|
+
disk_gb: 300
|
|
88
|
+
gpu_count: 2
|
|
89
|
+
gpu_vram_gb: 80
|
|
90
|
+
recommended:
|
|
91
|
+
cpu_cores: 64
|
|
92
|
+
ram_gb: 256
|
|
93
|
+
disk_gb: 500
|
|
94
|
+
gpu_count: 8
|
|
95
|
+
gpu_vram_gb: 80
|
|
96
|
+
notes:
|
|
97
|
+
- Reported ACL 2025 training used 8x NVIDIA A100 80GB for 3 epochs, batch size 128
|
|
98
|
+
- Full-parameter SFT with bf16 and DeepSpeed ZeRO-2 requires substantial VRAM
|
|
99
|
+
- vLLM inference during evaluation has similar memory footprint to training
|
|
100
|
+
environment:
|
|
101
|
+
python: '3.10'
|
|
102
|
+
cuda: '12.1'
|
|
103
|
+
pytorch: 2.3.1
|
|
104
|
+
flash_attn: fa2
|
|
105
|
+
key_packages:
|
|
106
|
+
- transformers==4.46.1
|
|
107
|
+
- accelerate==0.34.2
|
|
108
|
+
- deepspeed==0.15.4
|
|
109
|
+
- vllm==0.5.3.post1
|
|
110
|
+
- sympy==1.12.1
|
|
111
|
+
- antlr4-python3-runtime==4.11.1
|
|
112
|
+
- pebble
|
|
113
|
+
- word2number
|
|
114
|
+
- boto3
|
|
115
|
+
- triton==2.3.1
|
|
116
|
+
- ipython
|
|
117
|
+
notes:
|
|
118
|
+
- Install flash-attn separately: pip install flash-attn --no-build-isolation
|
|
119
|
+
- Training uses LLaMA-Factory v0.9.1 with full-parameter SFT
|
|
120
|
+
- Evaluation requires dart-math and latex2sympy packages
|
|
121
|
+
risk_flags:
|
|
122
|
+
- long_runtime
|
|
123
|
+
- high_compute_cost
|
|
124
|
+
- multi_harness_dependency
|
|
125
|
+
risk_notes:
|
|
126
|
+
- Full training runs take 2-4 days on 8x A100
|
|
127
|
+
- Multi-stage pipeline with external evaluation dependencies
|
|
128
|
+
- Requires understanding of LLaMA-Factory configuration for custom runs
|
|
129
|
+
recommended_when: 'Use this benchmark for full-parameter math reasoning SFT experiments
|
|
130
|
+
with instruction fusion. Ideal for researchers exploring cross-problem synthesis,
|
|
131
|
+
data augmentation via problem fusion, or evaluating math reasoning improvements
|
|
132
|
+
across in-domain and out-of-domain benchmarks.
|
|
133
|
+
|
|
134
|
+
'
|
|
135
|
+
not_recommended_when: 'Do not use if only needing inference-only prompting, PEFT/LoRA
|
|
136
|
+
methods, or if lacking infrastructure for full-parameter SFT with 80GB+ VRAM per
|
|
137
|
+
GPU.
|
|
138
|
+
|
|
139
|
+
'
|
|
140
|
+
paper:
|
|
141
|
+
title: 'MathFusion: Enhancing Mathematical Problem-solving of LLM through Instruction
|
|
142
|
+
Fusion'
|
|
143
|
+
authors: Qizhi Pei, Lijun Wu, Zhuoshi Pan, Yu Li, Honglin Lin, Chenlin Ming, Xin
|
|
144
|
+
Gao, Conghui He, Rui Yan
|
|
145
|
+
venue: ACL 2025 Main
|
|
146
|
+
year: 2025
|
|
147
|
+
url: https://arxiv.org/abs/2503.16212
|
|
148
|
+
models_released:
|
|
149
|
+
- DeepSeekMath-7B-MathFusion
|
|
150
|
+
- Mistral-7B-MathFusion
|
|
151
|
+
- Llama3-8B-MathFusion
|
|
152
|
+
download:
|
|
153
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.016_mathfusion.zip
|
|
154
|
+
archive_type: zip
|
|
155
|
+
local_dir_name: paper-16-MathFusion
|
|
156
|
+
provider: github_release
|
|
157
|
+
repo: ResearAI/DeepScientist
|
|
158
|
+
tag: aisb-v0.0.1
|
|
159
|
+
asset_name: aisb.t3.016_mathfusion.zip
|
|
160
|
+
sha256: 105b308e4f4831e9b98e67b1c8ab5af3635c01d856307bd6adede1807856d544
|
|
161
|
+
size_bytes: 3883862
|
|
162
|
+
display:
|
|
163
|
+
palette_seed: chalk-indigo-proof
|
|
164
|
+
art_style: academic-modern
|
|
165
|
+
accent_priority: high
|
|
166
|
+
image_path: ../../../AISB/image/016_aisb.t3.016_mathfusion.jpg
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
id: aisb.t3.016_mathfusion
|
|
2
|
+
name: 数学融合
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: 用于通过跨问题综合增强 7B-8B LLM 数学推理能力的指令融合框架。
|
|
5
|
+
task_description: 'MathFusion 是一个创新的指令融合框架,通过从现有问题对中综合新的训练问题来增强数学推理能力。它实现了三种融合策略:(1) 顺序融合通过共享变量链接相关问题,以建模解题依赖关系;(2) 并行融合结合类似问题以强化概念理解;(3) 条件融合创建情境感知的可选问题以增强推理灵活性。
|
|
6
|
+
|
|
7
|
+
本基准测试涵盖完整流程:使用 LLaMA-Factory 进行全参数 SFT 训练(DeepSpeed ZeRO-2、flash-attention FA2、bf16、cutoff 4096),随后在六个数学推理基准上评估,包括 GSM8K、MATH、CollegeMath、DeepMind-Mathematics、OlympiadBench 和 TheoremQA。报告的训练配置使用 3 个 epoch、batch size 128,在 8x NVIDIA A100 GPU 上运行。
|
|
8
|
+
|
|
9
|
+
'
|
|
10
|
+
task_mode: experiment_driven
|
|
11
|
+
requires_execution: true
|
|
12
|
+
requires_paper: true
|
|
13
|
+
integrity_level: cas_plus_canary
|
|
14
|
+
snapshot_status: external_eval_required
|
|
15
|
+
support_level: recovery
|
|
16
|
+
time_band: 2-4d
|
|
17
|
+
cost_band: very_high
|
|
18
|
+
difficulty: hard
|
|
19
|
+
data_access: public
|
|
20
|
+
primary_outputs:
|
|
21
|
+
- accuracy
|
|
22
|
+
- finetuned_checkpoint
|
|
23
|
+
- benchmark_report
|
|
24
|
+
launch_profiles:
|
|
25
|
+
- id: quick_check
|
|
26
|
+
label: 快速检查
|
|
27
|
+
description: '使用最小数据子集和减少的 epoch 数进行轻量级完整性检查,以验证流程完整性而不需要完整资源投入。
|
|
28
|
+
|
|
29
|
+
'
|
|
30
|
+
time_estimate: 1-2h
|
|
31
|
+
resource_estimate: 1x A100
|
|
32
|
+
- id: full_train_eval
|
|
33
|
+
label: 完整训练 + 评估
|
|
34
|
+
description: '在 MathFusionQA 上进行完整训练,使用全参数 SFT 跨越 3 个 epoch,随后在所有六个基准数据集上进行评估。标准配置需要 8x A100 GPU。
|
|
35
|
+
|
|
36
|
+
'
|
|
37
|
+
time_estimate: 2-4d
|
|
38
|
+
resource_estimate: 8x A100
|
|
39
|
+
dataset_download:
|
|
40
|
+
primary_method: mixed
|
|
41
|
+
sources:
|
|
42
|
+
- kind: huggingface
|
|
43
|
+
url: https://huggingface.co/datasets/QizhiPei/MathFusionQA
|
|
44
|
+
access: public
|
|
45
|
+
note: 主要指令融合数据集,包含原始、顺序、并行和条件分割
|
|
46
|
+
- kind: github_repo
|
|
47
|
+
url: https://github.com/hiyouga/LLaMA-Factory
|
|
48
|
+
access: public
|
|
49
|
+
note: 训练框架(v0.9.1)— 将 MathFusionQA 转换为 JSON 格式用于训练
|
|
50
|
+
- kind: github_repo
|
|
51
|
+
url: https://github.com/QwenLM/Qwen2.5-Math
|
|
52
|
+
access: public
|
|
53
|
+
note: 域内评估工具(gsm8k、math、mwpbench、deepmind-mathematics、olympiadbench)
|
|
54
|
+
- kind: github_repo
|
|
55
|
+
url: https://github.com/hkust-nlp/dart-math
|
|
56
|
+
access: public
|
|
57
|
+
note: 域外评估工具(theoremqa 等)
|
|
58
|
+
notes:
|
|
59
|
+
- MathFusionQA 必须在训练前转换为 LLaMA-Factory JSON 格式
|
|
60
|
+
- 评估需要 Qwen2.5-Math 和 DART-Math 两个工具
|
|
61
|
+
- 六个评估数据集:gsm8k、math、mwpbench/college-math、deepmind-mathematics、olympiadbench、theoremqa
|
|
62
|
+
credential_requirements:
|
|
63
|
+
mode: none
|
|
64
|
+
items: []
|
|
65
|
+
notes:
|
|
66
|
+
- 公共数据集和 GitHub 仓库不需要明确的 API 凭证
|
|
67
|
+
resources:
|
|
68
|
+
minimum:
|
|
69
|
+
cpu_cores: 16
|
|
70
|
+
ram_gb: 128
|
|
71
|
+
disk_gb: 300
|
|
72
|
+
gpu_count: 2
|
|
73
|
+
gpu_vram_gb: 80
|
|
74
|
+
recommended:
|
|
75
|
+
cpu_cores: 64
|
|
76
|
+
ram_gb: 256
|
|
77
|
+
disk_gb: 500
|
|
78
|
+
gpu_count: 8
|
|
79
|
+
gpu_vram_gb: 80
|
|
80
|
+
notes:
|
|
81
|
+
- 报告的 ACL 2025 训练使用 8x NVIDIA A100 80GB,3 个 epoch,batch size 128
|
|
82
|
+
- 使用 bf16 和 DeepSpeed ZeRO-2 的全参数 SFT 需要大量显存
|
|
83
|
+
- 评估期间 vLLM 推理的内存占用与训练相似
|
|
84
|
+
environment:
|
|
85
|
+
python: '3.10'
|
|
86
|
+
cuda: '12.1'
|
|
87
|
+
pytorch: 2.3.1
|
|
88
|
+
flash_attn: fa2
|
|
89
|
+
key_packages:
|
|
90
|
+
- transformers==4.46.1
|
|
91
|
+
- accelerate==0.34.2
|
|
92
|
+
- deepspeed==0.15.4
|
|
93
|
+
- vllm==0.5.3.post1
|
|
94
|
+
- sympy==1.12.1
|
|
95
|
+
- antlr4-python3-runtime==4.11.1
|
|
96
|
+
- pebble
|
|
97
|
+
- word2number
|
|
98
|
+
- boto3
|
|
99
|
+
- triton==2.3.1
|
|
100
|
+
- ipython
|
|
101
|
+
notes:
|
|
102
|
+
- 需要单独安装 flash-attn:pip install flash-attn --no-build-isolation
|
|
103
|
+
- 训练使用 LLaMA-Factory v0.9.1 和全参数 SFT
|
|
104
|
+
- 评估需要 dart-math 和 latex2sympy 包
|
|
105
|
+
risk_flags:
|
|
106
|
+
- long_runtime
|
|
107
|
+
- high_compute_cost
|
|
108
|
+
- multi_harness_dependency
|
|
109
|
+
risk_notes:
|
|
110
|
+
- 完整训练在 8x A100 上需要 2-4 天
|
|
111
|
+
- 具有外部评估依赖关系的多阶段流程
|
|
112
|
+
- 自定义运行需要了解 LLaMA-Factory 配置
|
|
113
|
+
recommended_when: '在指令融合下进行全参数数学推理 SFT 实验时使用此基准。非常适合探索跨问题综合、通过问题融合进行数据增强,或在域内和域外基准上评估数学推理改进的研究人员。
|
|
114
|
+
|
|
115
|
+
'
|
|
116
|
+
not_recommended_when: '如果仅需要推理仅提示、PEFT/LoRA 方法,或缺乏每个 GPU 80GB+ 显存的基础设施来进行全参数 SFT,则不要使用。
|
|
117
|
+
|
|
118
|
+
'
|
|
119
|
+
paper:
|
|
120
|
+
title: 'MathFusion: Enhancing Mathematical Problem-solving of LLM through Instruction
|
|
121
|
+
Fusion'
|
|
122
|
+
authors: Qizhi Pei, Lijun Wu, Zhuoshi Pan, Yu Li, Honglin Lin, Chenlin Ming, Xin
|
|
123
|
+
Gao, Conghui He, Rui Yan
|
|
124
|
+
venue: ACL 2025 Main
|
|
125
|
+
year: 2025
|
|
126
|
+
url: https://arxiv.org/abs/2503.16212
|
|
127
|
+
models_released:
|
|
128
|
+
- DeepSeekMath-7B-MathFusion
|
|
129
|
+
- Mistral-7B-MathFusion
|
|
130
|
+
- Llama3-8B-MathFusion
|
|
131
|
+
download:
|
|
132
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.016_mathfusion.zip
|
|
133
|
+
archive_type: zip
|
|
134
|
+
local_dir_name: paper-16-MathFusion
|
|
135
|
+
provider: github_release
|
|
136
|
+
repo: ResearAI/DeepScientist
|
|
137
|
+
tag: aisb-v0.0.1
|
|
138
|
+
asset_name: aisb.t3.016_mathfusion.zip
|
|
139
|
+
sha256: 105b308e4f4831e9b98e67b1c8ab5af3635c01d856307bd6adede1807856d544
|
|
140
|
+
size_bytes: 3883862
|
|
141
|
+
display:
|
|
142
|
+
palette_seed: chalk-indigo-proof
|
|
143
|
+
art_style: academic-modern
|
|
144
|
+
accent_priority: high
|
|
145
|
+
image_path: ../../../AISB/image/016_aisb.t3.016_mathfusion.jpg
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.017_multimodalglp
|
|
3
|
+
name: GLPN-LLM (Multimodal Fake News Detection)
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: 'Train and evaluate a GCN-based multimodal fake-news classifier that integrates
|
|
6
|
+
GPT-4o pseudo labels via masked global label propagation on cross-modal similarity
|
|
7
|
+
graphs built from CLIP embeddings, across Twitter, PHEME, and Weibo datasets.
|
|
8
|
+
|
|
9
|
+
'
|
|
10
|
+
task_description: 'This benchmark implements the GLPN-LLM framework (ACL 2025) for
|
|
11
|
+
multimodal fake-news detection. The pipeline has two stages: (1) graph construction
|
|
12
|
+
(`construction.py`) builds a cross-modal similarity graph from CLIP text+image embeddings
|
|
13
|
+
of social-media posts, and (2) training/evaluation (`main.py`) runs a GCN (or UniMP
|
|
14
|
+
TransformerConv variant) with a Global Random Mask mechanism that integrates ground-truth
|
|
15
|
+
labels and GPT-4o-generated pseudo labels into node features while preventing label
|
|
16
|
+
leakage. The task is to optimize classification accuracy and macro-F1 on three benchmark
|
|
17
|
+
datasets—Twitter (17k tweets), PHEME (2k tweets), and Weibo (5.3k posts)—under the
|
|
18
|
+
paper''s label-propagation framework. Pre-computed pseudo labels from GPT-4o are
|
|
19
|
+
bundled as CSV files; no LLM API calls are required at runtime. The agent should
|
|
20
|
+
improve upon the baseline accuracy reported in the paper while preserving the core
|
|
21
|
+
GLPN-LLM architecture.
|
|
22
|
+
|
|
23
|
+
'
|
|
24
|
+
capability_tags:
|
|
25
|
+
- research_code_optimization
|
|
26
|
+
- multimodal_learning
|
|
27
|
+
- fake_news_detection
|
|
28
|
+
- graph_learning
|
|
29
|
+
- classification
|
|
30
|
+
- label_propagation
|
|
31
|
+
- pseudo_labeling
|
|
32
|
+
aisb_direction: T3
|
|
33
|
+
track_fit:
|
|
34
|
+
- paper_track
|
|
35
|
+
- benchmark_track
|
|
36
|
+
task_mode: experiment_driven
|
|
37
|
+
requires_execution: true
|
|
38
|
+
requires_paper: true
|
|
39
|
+
integrity_level: cas_plus_canary
|
|
40
|
+
cost_band: medium
|
|
41
|
+
time_band: 6-24h
|
|
42
|
+
difficulty: hard
|
|
43
|
+
data_access: public
|
|
44
|
+
snapshot_status: runnable
|
|
45
|
+
support_level: advanced
|
|
46
|
+
primary_outputs:
|
|
47
|
+
- accuracy
|
|
48
|
+
- f1_score
|
|
49
|
+
- pseudo_label_artifacts
|
|
50
|
+
- benchmark_report
|
|
51
|
+
launch_profiles:
|
|
52
|
+
- id: quick_check
|
|
53
|
+
label: Quick Check (single dataset)
|
|
54
|
+
description: 'Run construction.py then main.py on the Twitter dataset only to verify
|
|
55
|
+
the code path and produce accuracy/F1 on the 2k-tweet test split. Completes in
|
|
56
|
+
minutes on a single GPU.
|
|
57
|
+
|
|
58
|
+
'
|
|
59
|
+
- id: full_train_eval
|
|
60
|
+
label: Full Train + Eval (all three datasets)
|
|
61
|
+
description: 'Run the full graph construction and GCN training/evaluation pipeline
|
|
62
|
+
across Twitter, PHEME, and Weibo datasets end-to-end, reproducing Table 1 from
|
|
63
|
+
the paper. Requires changing the dataset_name variable in main.py or parameterising
|
|
64
|
+
via run.sh.
|
|
65
|
+
|
|
66
|
+
'
|
|
67
|
+
dataset_download:
|
|
68
|
+
primary_method: google_drive
|
|
69
|
+
sources:
|
|
70
|
+
- kind: google_drive
|
|
71
|
+
url: https://drive.google.com/file/d/1gPX-tAC1Vo6C8j8PV9IbAk8hbDhd1XMG/view?usp=drive_link
|
|
72
|
+
access: public
|
|
73
|
+
note: 'ZIP archive containing weibo/, twitter/, and pheme/ subdirectories with
|
|
74
|
+
CSV files, pre-computed CLIP embeddings (.pt), graph structures (.pt), and GPT-4o
|
|
75
|
+
pseudo-label CSVs. Extract into script/dataset/.
|
|
76
|
+
|
|
77
|
+
'
|
|
78
|
+
notes:
|
|
79
|
+
- Exact archive size is not documented; expect several GB after extraction given
|
|
80
|
+
17k+ tweet embeddings.
|
|
81
|
+
- Pre-computed GPT-4o pseudo labels are included as *_analysis_results.csv files;
|
|
82
|
+
no OpenAI API key needed at runtime.
|
|
83
|
+
credential_requirements:
|
|
84
|
+
mode: none
|
|
85
|
+
items: []
|
|
86
|
+
notes:
|
|
87
|
+
- GPT-4o pseudo labels are pre-computed and bundled in the dataset download.
|
|
88
|
+
- No API keys or external service credentials are required for training or evaluation.
|
|
89
|
+
resources:
|
|
90
|
+
minimum:
|
|
91
|
+
cpu_cores: 8
|
|
92
|
+
ram_gb: 32
|
|
93
|
+
disk_gb: 100
|
|
94
|
+
gpu_count: 1
|
|
95
|
+
gpu_vram_gb: 16
|
|
96
|
+
recommended:
|
|
97
|
+
cpu_cores: 16
|
|
98
|
+
ram_gb: 64
|
|
99
|
+
disk_gb: 200
|
|
100
|
+
gpu_count: 1
|
|
101
|
+
gpu_vram_gb: 24
|
|
102
|
+
environment:
|
|
103
|
+
python: '3.10'
|
|
104
|
+
cuda: '11.8'
|
|
105
|
+
pytorch: 2.7.1
|
|
106
|
+
flash_attn: null
|
|
107
|
+
key_packages:
|
|
108
|
+
- torch-geometric==2.6.1
|
|
109
|
+
- dgl==2.1.0
|
|
110
|
+
- openai-clip
|
|
111
|
+
- scikit-learn
|
|
112
|
+
- pandas
|
|
113
|
+
notes:
|
|
114
|
+
- The paper recommends installing CLIP directly from https://github.com/openai/CLIP.git.
|
|
115
|
+
- See script/requirements.txt for the full dependency list.
|
|
116
|
+
- Python and CUDA versions are estimated from PyTorch 2.7.1 compatibility; verify
|
|
117
|
+
against requirements.txt.
|
|
118
|
+
risk_flags:
|
|
119
|
+
- external_dataset_download
|
|
120
|
+
- dataset_name_hardcoded
|
|
121
|
+
- no_prior_execution
|
|
122
|
+
risk_notes:
|
|
123
|
+
- Dataset must be downloaded from Google Drive and extracted to script/dataset/ before
|
|
124
|
+
running.
|
|
125
|
+
- main.py has dataset_name hardcoded to 'twitter'; must be changed manually for PHEME/Weibo
|
|
126
|
+
evaluation.
|
|
127
|
+
- No benchmark execution was performed during the packaging pass; metric values are
|
|
128
|
+
unverified.
|
|
129
|
+
- The pseudo-label CSV filename in main.py is hardcoded as 'twitter_analysis_results.csv'
|
|
130
|
+
even when dataset_name changes, which may need fixing for weibo/pheme datasets.
|
|
131
|
+
- Graph construction (construction.py) may be slow on larger datasets due to pairwise
|
|
132
|
+
similarity computation.
|
|
133
|
+
recommended_when: 'Use this benchmark when you want a multimodal graph-learning task
|
|
134
|
+
combining text, image, and graph-structured social-media data with LLM-derived pseudo-label
|
|
135
|
+
refinement. Good for evaluating agents on research code optimization involving GNNs,
|
|
136
|
+
label propagation, and multimodal feature fusion on moderately-sized social-media
|
|
137
|
+
datasets.
|
|
138
|
+
|
|
139
|
+
'
|
|
140
|
+
not_recommended_when: 'Do not use this if you need a text-only benchmark, cannot download
|
|
141
|
+
multi-GB datasets from Google Drive, or want a benchmark with verified baseline
|
|
142
|
+
execution outputs. Also not suitable if you need end-to-end LLM inference as part
|
|
143
|
+
of the pipeline (pseudo labels are pre-computed).
|
|
144
|
+
|
|
145
|
+
'
|
|
146
|
+
paper:
|
|
147
|
+
title: Synergizing LLMs with Global Label Propagation for Multimodal Fake News Detection
|
|
148
|
+
authors:
|
|
149
|
+
- Shuguo Hu
|
|
150
|
+
- Jun Hu
|
|
151
|
+
- Huaiwen Zhang
|
|
152
|
+
venue: ACL 2025
|
|
153
|
+
year: 2025
|
|
154
|
+
url: https://arxiv.org/abs/2506.00488
|
|
155
|
+
commercial:
|
|
156
|
+
annual_fee: null
|
|
157
|
+
download:
|
|
158
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.017_multimodalglp.zip
|
|
159
|
+
archive_type: zip
|
|
160
|
+
local_dir_name: paper-17-MultimodalGLP
|
|
161
|
+
provider: github_release
|
|
162
|
+
repo: ResearAI/DeepScientist
|
|
163
|
+
tag: aisb-v0.0.1
|
|
164
|
+
asset_name: aisb.t3.017_multimodalglp.zip
|
|
165
|
+
sha256: 0f2ab3bc3b08fdb577994ecd08fbca2b2796342cebdf9c5f797bd6b327107350
|
|
166
|
+
size_bytes: 34225
|
|
167
|
+
display:
|
|
168
|
+
palette_seed: ruby-graph-signal
|
|
169
|
+
art_style: social-graph
|
|
170
|
+
accent_priority: high
|
|
171
|
+
image_path: ../image/017_aisb.t3.017_multimodalglp.jpg
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.017_multimodalglp
|
|
3
|
+
name: GLPN-LLM(多模态虚假新闻检测)
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: '训练并评估一个基于GCN的多模态虚假新闻分类器,该分类器通过掩码全局标签传播整合GPT-4o伪标签,利用CLIP嵌入构建跨模态相似图,在Twitter、PHEME和Weibo数据集上进行测试。'
|
|
6
|
+
task_description: '本基准实现了用于多模态虚假新闻检测的GLPN-LLM框架(ACL 2025)。该流程分为两个阶段:(1)图构建(`construction.py`)从社交媒体帖子的CLIP文本+图像嵌入构建跨模态相似图;(2)训练/评估(`main.py`)运行GCN(或UniMP TransformerConv变体),并使用全局随机掩码机制将真实标签和GPT-4o生成的伪标签整合到节点特征中,同时防止标签泄漏。任务是在论文的标签传播框架下,优化三个基准数据集——Twitter(17k条推文)、PHEME(2k条推文)和Weibo(5.3k条帖子)——的分类准确率和宏F1分数。GPT-4o预计算的伪标签以CSV文件形式打包;运行时无需调用LLM API。智能体应在保留核心GLPN-LLM架构的同时,提升论文中报告的基线准确率。'
|
|
7
|
+
capability_tags:
|
|
8
|
+
- research_code_optimization
|
|
9
|
+
- multimodal_learning
|
|
10
|
+
- fake_news_detection
|
|
11
|
+
- graph_learning
|
|
12
|
+
- classification
|
|
13
|
+
- label_propagation
|
|
14
|
+
- pseudo_labeling
|
|
15
|
+
aisb_direction: T3
|
|
16
|
+
track_fit:
|
|
17
|
+
- paper_track
|
|
18
|
+
- benchmark_track
|
|
19
|
+
task_mode: experiment_driven
|
|
20
|
+
requires_execution: true
|
|
21
|
+
requires_paper: true
|
|
22
|
+
integrity_level: cas_plus_canary
|
|
23
|
+
cost_band: medium
|
|
24
|
+
time_band: 6-24h
|
|
25
|
+
difficulty: hard
|
|
26
|
+
data_access: public
|
|
27
|
+
snapshot_status: runnable
|
|
28
|
+
support_level: advanced
|
|
29
|
+
primary_outputs:
|
|
30
|
+
- accuracy
|
|
31
|
+
- f1_score
|
|
32
|
+
- pseudo_label_artifacts
|
|
33
|
+
- benchmark_report
|
|
34
|
+
launch_profiles:
|
|
35
|
+
- id: quick_check
|
|
36
|
+
label: 快速检查(单数据集)
|
|
37
|
+
description: '仅在Twitter数据集上运行construction.py,然后运行main.py,以验证代码路径并在2k条推文测试集上产生准确率/F1分数。在单个GPU上几分钟内完成。'
|
|
38
|
+
- id: full_train_eval
|
|
39
|
+
label: 完整训练+评估(全部三个数据集)
|
|
40
|
+
description: '在Twitter、PHEME和Weibo数据集上端到端运行完整的图构建和GCN训练/评估流程,复现论文中的表1。需要更改main.py中的dataset_name变量或通过run.sh参数化。'
|
|
41
|
+
dataset_download:
|
|
42
|
+
primary_method: google_drive
|
|
43
|
+
sources:
|
|
44
|
+
- kind: google_drive
|
|
45
|
+
url: https://drive.google.com/file/d/1gPX-tAC1Vo6C8j8PV9IbAk8hbDhd1XMG/view?usp=drive_link
|
|
46
|
+
access: public
|
|
47
|
+
note: 'ZIP压缩包,包含weibo/、twitter/和pheme/子目录,其中包含CSV文件、预计算的CLIP嵌入(.pt)、图结构(.pt)和GPT-4o伪标签CSV文件。解压到script/dataset/目录。'
|
|
48
|
+
notes:
|
|
49
|
+
- 确切压缩包大小未记录;考虑到17k+条推文嵌入,解压后预计需要数GB空间。
|
|
50
|
+
- 预计算的GPT-4o伪标签以*_analysis_results.csv文件形式包含;运行时无需OpenAI API密钥。
|
|
51
|
+
credential_requirements:
|
|
52
|
+
mode: none
|
|
53
|
+
items: []
|
|
54
|
+
notes:
|
|
55
|
+
- GPT-4o伪标签已预计算并打包在数据集下载文件中。
|
|
56
|
+
- 训练和评估无需API密钥或外部服务凭证。
|
|
57
|
+
resources:
|
|
58
|
+
minimum:
|
|
59
|
+
cpu_cores: 8
|
|
60
|
+
ram_gb: 32
|
|
61
|
+
disk_gb: 100
|
|
62
|
+
gpu_count: 1
|
|
63
|
+
gpu_vram_gb: 16
|
|
64
|
+
recommended:
|
|
65
|
+
cpu_cores: 16
|
|
66
|
+
ram_gb: 64
|
|
67
|
+
disk_gb: 200
|
|
68
|
+
gpu_count: 1
|
|
69
|
+
gpu_vram_gb: 24
|
|
70
|
+
environment:
|
|
71
|
+
python: '3.10'
|
|
72
|
+
cuda: '11.8'
|
|
73
|
+
pytorch: 2.7.1
|
|
74
|
+
flash_attn: null
|
|
75
|
+
key_packages:
|
|
76
|
+
- torch-geometric==2.6.1
|
|
77
|
+
- dgl==2.1.0
|
|
78
|
+
- openai-clip
|
|
79
|
+
- scikit-learn
|
|
80
|
+
- pandas
|
|
81
|
+
notes:
|
|
82
|
+
- 论文推荐直接从https://github.com/openai/CLIP.git安装CLIP。
|
|
83
|
+
- 完整依赖列表请参见script/requirements.txt。
|
|
84
|
+
- Python和CUDA版本是根据PyTorch 2.7.1兼容性估算的;请根据requirements.txt进行验证。
|
|
85
|
+
risk_flags:
|
|
86
|
+
- external_dataset_download
|
|
87
|
+
- dataset_name_hardcoded
|
|
88
|
+
- no_prior_execution
|
|
89
|
+
risk_notes:
|
|
90
|
+
- 必须在运行前从Google Drive下载数据集并解压到script/dataset/目录。
|
|
91
|
+
- main.py中的dataset_name硬编码为'twitter';评估PHEME/Weibo时必须手动更改。
|
|
92
|
+
- 打包过程中未执行基准测试;指标值未经验证。
|
|
93
|
+
- main.py中的伪标签CSV文件名硬编码为'twitter_analysis_results.csv',即使dataset_name更改时也是如此,weibo/pheme数据集可能需要修复。
|
|
94
|
+
- 图构建(construction.py)在较大数据集上可能因成对相似度计算而较慢。
|
|
95
|
+
recommended_when: '当您需要结合文本、图像和图结构社交媒体数据的多模态图学习任务,并使用LLM衍生的伪标签进行优化时,可以使用此基准。适合评估智能体在涉及GNN、标签传播和多模态特征融合的中等规模社交媒体数据集上的研究代码优化能力。'
|
|
96
|
+
not_recommended_when: '如果需要纯文本基准、无法从Google Drive下载数GB的数据集,或想要有已验证基线执行输出的基准,请勿使用。如果需要将端到端LLM推理作为流程的一部分(伪标签是预计算的),也不适用。'
|
|
97
|
+
paper:
|
|
98
|
+
title: Synergizing LLMs with Global Label Propagation for Multimodal Fake News Detection
|
|
99
|
+
authors:
|
|
100
|
+
- Shuguo Hu
|
|
101
|
+
- Jun Hu
|
|
102
|
+
- Huaiwen Zhang
|
|
103
|
+
venue: ACL 2025
|
|
104
|
+
year: 2025
|
|
105
|
+
url: https://arxiv.org/abs/2506.00488
|
|
106
|
+
commercial:
|
|
107
|
+
annual_fee: null
|
|
108
|
+
download:
|
|
109
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.017_multimodalglp.zip
|
|
110
|
+
archive_type: zip
|
|
111
|
+
local_dir_name: paper-17-MultimodalGLP
|
|
112
|
+
provider: github_release
|
|
113
|
+
repo: ResearAI/DeepScientist
|
|
114
|
+
tag: aisb-v0.0.1
|
|
115
|
+
asset_name: aisb.t3.017_multimodalglp.zip
|
|
116
|
+
sha256: 0f2ab3bc3b08fdb577994ecd08fbca2b2796342cebdf9c5f797bd6b327107350
|
|
117
|
+
size_bytes: 34225
|
|
118
|
+
display:
|
|
119
|
+
palette_seed: ruby-graph-signal
|
|
120
|
+
art_style: social-graph
|
|
121
|
+
accent_priority: high
|
|
122
|
+
image_path: ../image/017_aisb.t3.017_multimodalglp.jpg
|