@researai/deepscientist 1.5.16 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +309 -130
- package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
- package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
- package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
- package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
- package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
- package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
- package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
- package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
- package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
- package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
- package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
- package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
- package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
- package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
- package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
- package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
- package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
- package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
- package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
- package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
- package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
- package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
- package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
- package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
- package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
- package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
- package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
- package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
- package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
- package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
- package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
- package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
- package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
- package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
- package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
- package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
- package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
- package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
- package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
- package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
- package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
- package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
- package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
- package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
- package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
- package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
- package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
- package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
- package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
- package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
- package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
- package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
- package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
- package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
- package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
- package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
- package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
- package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
- package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
- package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
- package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
- package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
- package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
- package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
- package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
- package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
- package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
- package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
- package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
- package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
- package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
- package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
- package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
- package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
- package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
- package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
- package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
- package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
- package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
- package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
- package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
- package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
- package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
- package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
- package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
- package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
- package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
- package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
- package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
- package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
- package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
- package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
- package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
- package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
- package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
- package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
- package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
- package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
- package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
- package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
- package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
- package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
- package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
- package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
- package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
- package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
- package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
- package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
- package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
- package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
- package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
- package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
- package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
- package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
- package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
- package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
- package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
- package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
- package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
- package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
- package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
- package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
- package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
- package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
- package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
- package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
- package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
- package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
- package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
- package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
- package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
- package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
- package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
- package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
- package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
- package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
- package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
- package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
- package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
- package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
- package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
- package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
- package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
- package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
- package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
- package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
- package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
- package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
- package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
- package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
- package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
- package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
- package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
- package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
- package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
- package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
- package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
- package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
- package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
- package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
- package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
- package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
- package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
- package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
- package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
- package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
- package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
- package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
- package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
- package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
- package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
- package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
- package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
- package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
- package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
- package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
- package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
- package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
- package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
- package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
- package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
- package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
- package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
- package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
- package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
- package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
- package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
- package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
- package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
- package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
- package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
- package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
- package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
- package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
- package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
- package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
- package/AISB/image/aisb.b10.climate_earth.svg +16 -0
- package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
- package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
- package/AISB/image/aisb.b2.agent_systems.svg +16 -0
- package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
- package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
- package/AISB/image/aisb.b5.math_proof.svg +16 -0
- package/AISB/image/aisb.b6.research_process.svg +16 -0
- package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
- package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
- package/AISB/image/aisb.b9.material_science.svg +16 -0
- package/README.md +196 -32
- package/bin/ds.js +924 -66
- package/docs/en/00_QUICK_START.md +195 -18
- package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
- package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
- package/docs/en/05_TUI_GUIDE.md +171 -2
- package/docs/en/07_MEMORY_AND_MCP.md +38 -2
- package/docs/en/09_DOCTOR.md +78 -7
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
- package/docs/en/11_LICENSE_AND_RISK.md +4 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +624 -180
- package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +386 -0
- package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
- package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
- package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
- package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
- package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
- package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
- package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
- package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
- package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
- package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
- package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
- package/docs/en/91_DEVELOPMENT.md +266 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
- package/docs/en/README.md +48 -7
- package/docs/images/admin/admin-connectors-health-en.png +0 -0
- package/docs/images/admin/admin-controllers-en.png +0 -0
- package/docs/images/admin/admin-diagnostics-en.png +0 -0
- package/docs/images/admin/admin-errors-en.png +0 -0
- package/docs/images/admin/admin-issues-en.png +0 -0
- package/docs/images/admin/admin-logs-en.png +0 -0
- package/docs/images/admin/admin-quest-detail-en.png +0 -0
- package/docs/images/admin/admin-quests-en.png +0 -0
- package/docs/images/admin/admin-repairs-en.png +0 -0
- package/docs/images/admin/admin-runtime-en.png +0 -0
- package/docs/images/admin/admin-search-en.png +0 -0
- package/docs/images/admin/admin-stats-en.png +0 -0
- package/docs/images/admin/admin-summary-en.png +0 -0
- package/docs/images/connectors/connector-discord-en.png +0 -0
- package/docs/images/connectors/connector-feishu-en.png +0 -0
- package/docs/images/connectors/connector-lingzhu-en.png +0 -0
- package/docs/images/connectors/connector-qq-en.png +0 -0
- package/docs/images/connectors/connector-slack-en.png +0 -0
- package/docs/images/connectors/connector-telegram-en.png +0 -0
- package/docs/images/connectors/connector-weixin-en.png +0 -0
- package/docs/images/connectors/connector-whatsapp-en.png +0 -0
- package/docs/images/settings/settings-baselines-en.png +0 -0
- package/docs/images/settings/settings-config-en.png +0 -0
- package/docs/images/settings/settings-connectors-overview-en.png +0 -0
- package/docs/images/settings/settings-deepxiv-en.png +0 -0
- package/docs/images/settings/settings-mcp-servers-en.png +0 -0
- package/docs/images/settings/settings-plugins-en.png +0 -0
- package/docs/images/settings/settings-runners-en.png +0 -0
- package/docs/zh/00_QUICK_START.md +142 -18
- package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
- package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/zh/05_TUI_GUIDE.md +171 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
- package/docs/zh/09_DOCTOR.md +54 -8
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
- package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +552 -181
- package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +384 -0
- package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
- package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
- package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
- package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
- package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
- package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
- package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
- package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
- package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
- package/docs/zh/README.md +33 -7
- package/install.sh +168 -20
- package/package.json +5 -1
- package/pyproject.toml +2 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +13 -0
- package/src/deepscientist/admin/__init__.py +3 -0
- package/src/deepscientist/admin/charts.py +681 -0
- package/src/deepscientist/admin/logs.py +119 -0
- package/src/deepscientist/admin/repairs.py +217 -0
- package/src/deepscientist/admin/service.py +1310 -0
- package/src/deepscientist/admin/system_info.py +700 -0
- package/src/deepscientist/admin/tasks.py +465 -0
- package/src/deepscientist/admin/tool_metrics.py +600 -0
- package/src/deepscientist/artifact/guidance.py +8 -4
- package/src/deepscientist/artifact/schemas.py +115 -0
- package/src/deepscientist/artifact/service.py +4268 -260
- package/src/deepscientist/bash_exec/monitor.py +30 -3
- package/src/deepscientist/bash_exec/service.py +134 -1
- package/src/deepscientist/benchstore/__init__.py +4 -0
- package/src/deepscientist/benchstore/prompt_builder.py +224 -0
- package/src/deepscientist/benchstore/service.py +1716 -0
- package/src/deepscientist/bridges/connectors.py +8 -2
- package/src/deepscientist/channels/weixin_ilink.py +8 -1
- package/src/deepscientist/cli.py +92 -17
- package/src/deepscientist/codex_cli_compat.py +187 -74
- package/src/deepscientist/config/models.py +82 -11
- package/src/deepscientist/config/service.py +1077 -93
- package/src/deepscientist/connector/weixin_support.py +48 -17
- package/src/deepscientist/daemon/api/handlers.py +827 -235
- package/src/deepscientist/daemon/api/router.py +81 -1
- package/src/deepscientist/daemon/app.py +1512 -85
- package/src/deepscientist/diagnostics/__init__.py +6 -0
- package/src/deepscientist/diagnostics/runner_failures.py +277 -0
- package/src/deepscientist/doctor.py +407 -56
- package/src/deepscientist/evidence_packets.py +590 -0
- package/src/deepscientist/home.py +52 -4
- package/src/deepscientist/kimi_cli_compat.py +50 -0
- package/src/deepscientist/latex_runtime.py +2 -2
- package/src/deepscientist/mcp/context.py +2 -0
- package/src/deepscientist/mcp/schemas.py +114 -0
- package/src/deepscientist/mcp/server.py +1566 -126
- package/src/deepscientist/memory/service.py +203 -16
- package/src/deepscientist/process_control.py +8 -1
- package/src/deepscientist/prompts/builder.py +850 -88
- package/src/deepscientist/quest/__init__.py +2 -2
- package/src/deepscientist/quest/layout.py +12 -1
- package/src/deepscientist/quest/node_traces.py +10 -0
- package/src/deepscientist/quest/service.py +1852 -161
- package/src/deepscientist/quest/stage_views.py +1 -1
- package/src/deepscientist/runners/__init__.py +18 -0
- package/src/deepscientist/runners/base.py +89 -1
- package/src/deepscientist/runners/builtins.py +13 -1
- package/src/deepscientist/runners/claude.py +391 -0
- package/src/deepscientist/runners/codex.py +480 -35
- package/src/deepscientist/runners/codex_telemetry.py +127 -0
- package/src/deepscientist/runners/kimi.py +334 -0
- package/src/deepscientist/runners/metadata.py +68 -0
- package/src/deepscientist/runners/opencode.py +414 -0
- package/src/deepscientist/runners/runtime_overrides.py +100 -0
- package/src/deepscientist/runners/simple_cli.py +538 -0
- package/src/deepscientist/runtime_storage.py +303 -0
- package/src/deepscientist/shared.py +80 -16
- package/src/deepscientist/skills/installer.py +37 -0
- package/src/deepscientist/skills/registry.py +2 -0
- package/src/deepscientist/tinytex.py +2 -2
- package/src/deepscientist/tui.py +10 -3
- package/src/prompts/benchstore/system.md +77 -0
- package/src/prompts/connectors/qq.md +33 -2
- package/src/prompts/connectors/weixin.md +208 -23
- package/src/prompts/contracts/admin_ops.md +74 -0
- package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
- package/src/prompts/contracts/shared_interaction.md +5 -10
- package/src/prompts/start_setup/system.md +422 -0
- package/src/prompts/system.md +411 -304
- package/src/prompts/system_copilot.md +89 -0
- package/src/skills/analysis-campaign/SKILL.md +239 -578
- package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
- package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
- package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
- package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
- package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
- package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
- package/src/skills/baseline/SKILL.md +183 -461
- package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
- package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
- package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
- package/src/skills/baseline/references/baseline-plan-template.md +37 -76
- package/src/skills/baseline/references/boundary-cases.md +86 -0
- package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
- package/src/skills/baseline/references/comparability-contract.md +7 -12
- package/src/skills/baseline/references/operational-guidance.md +56 -0
- package/src/skills/baseline/references/route-selection.md +5 -25
- package/src/skills/decision/SKILL.md +113 -306
- package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
- package/src/skills/decision/references/operational-guidance.md +94 -0
- package/src/skills/decision/references/research-route-criteria.md +7 -8
- package/src/skills/decision/references/strategic-decision-template.md +13 -26
- package/src/skills/experiment/SKILL.md +132 -670
- package/src/skills/experiment/references/execution-playbook.md +374 -0
- package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
- package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
- package/src/skills/experiment/references/operational-guidance.md +108 -0
- package/src/skills/finalize/SKILL.md +62 -0
- package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
- package/src/skills/finalize/references/resume-packet-template.md +7 -0
- package/src/skills/idea/SKILL.md +228 -15
- package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
- package/src/skills/idea/references/current-board-packet-template.md +61 -0
- package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
- package/src/skills/idea/references/idea-generation-playbook.md +21 -0
- package/src/skills/idea/references/idea-thinking-flow.md +6 -0
- package/src/skills/idea/references/literature-survey-template.md +3 -0
- package/src/skills/idea/references/objective-contract-template.md +54 -0
- package/src/skills/idea/references/outline-seeding-example.md +56 -0
- package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
- package/src/skills/idea/references/related-work-playbook.md +75 -2
- package/src/skills/idea/references/research-history-playbook.md +114 -0
- package/src/skills/idea/references/selection-gate.md +58 -6
- package/src/skills/intake-audit/SKILL.md +43 -2
- package/src/skills/intake-audit/references/state-audit-template.md +10 -0
- package/src/skills/nature-data/SKILL.md +128 -0
- package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-data/agents/openai.yaml +4 -0
- package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
- package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
- package/src/skills/nature-data/references/policy-principles.md +103 -0
- package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
- package/src/skills/nature-data/references/source-basis.md +54 -0
- package/src/skills/nature-data/references/statement-patterns.md +153 -0
- package/src/skills/nature-figure/SKILL.md +197 -0
- package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-figure/agents/openai.yaml +4 -0
- package/src/skills/nature-figure/evals/evals.json +37 -0
- package/src/skills/nature-figure/references/api.md +428 -0
- package/src/skills/nature-figure/references/backend-selection.md +100 -0
- package/src/skills/nature-figure/references/chart-types.md +281 -0
- package/src/skills/nature-figure/references/common-patterns.md +349 -0
- package/src/skills/nature-figure/references/design-theory.md +436 -0
- package/src/skills/nature-figure/references/figure-contract.md +93 -0
- package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
- package/src/skills/nature-figure/references/qa-contract.md +119 -0
- package/src/skills/nature-figure/references/r-template-index.md +66 -0
- package/src/skills/nature-figure/references/r-workflow.md +161 -0
- package/src/skills/nature-figure/references/tutorials.md +250 -0
- package/src/skills/nature-paper2ppt/SKILL.md +507 -0
- package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/SKILL.md +385 -0
- package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-polishing/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
- package/src/skills/nature-polishing/references/section-moves.md +240 -0
- package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
- package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
- package/src/skills/optimize/SKILL.md +177 -1568
- package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
- package/src/skills/optimize/references/candidate-board-template.md +13 -0
- package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
- package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
- package/src/skills/optimize/references/debug-response-template.md +29 -0
- package/src/skills/optimize/references/frontier-review-template.md +32 -0
- package/src/skills/optimize/references/fusion-playbook.md +36 -0
- package/src/skills/optimize/references/method-brief-template.md +73 -0
- package/src/skills/optimize/references/operational-guidance.md +621 -0
- package/src/skills/optimize/references/optimization-memory-template.md +30 -0
- package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
- package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
- package/src/skills/optimize/references/prompt-patterns.md +49 -0
- package/src/skills/paper-outline/SKILL.md +227 -0
- package/src/skills/paper-outline/references/outline-patterns.md +87 -0
- package/src/skills/paper-plot/SKILL.md +79 -0
- package/src/skills/paper-plot/agents/openai.yaml +4 -0
- package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
- package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
- package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
- package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
- package/src/skills/paper-plot/references/line_training_curve.md +44 -0
- package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
- package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
- package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
- package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
- package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
- package/src/skills/paper-plot/scripts/line_aime.py +94 -0
- package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
- package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
- package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
- package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
- package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
- package/src/skills/rebuttal/SKILL.md +9 -0
- package/src/skills/references/tool-usage-by-stage.md +438 -0
- package/src/skills/review/SKILL.md +105 -7
- package/src/skills/science/PROVENANCE.md +44 -0
- package/src/skills/science/SKILL.md +137 -0
- package/src/skills/science/references/artifact-science-tool.md +110 -0
- package/src/skills/science/references/claim-type-discipline.md +56 -0
- package/src/skills/science/references/domain-index.md +422 -0
- package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
- package/src/skills/science/references/package-check-playbook.md +64 -0
- package/src/skills/science/references/package-index.min.json +3616 -0
- package/src/skills/science/references/packages/abinit.md +80 -0
- package/src/skills/science/references/packages/acts.md +73 -0
- package/src/skills/science/references/packages/aiida-core.md +80 -0
- package/src/skills/science/references/packages/alamode.md +80 -0
- package/src/skills/science/references/packages/amuse.md +88 -0
- package/src/skills/science/references/packages/anndata.md +88 -0
- package/src/skills/science/references/packages/arbor.md +80 -0
- package/src/skills/science/references/packages/arc.md +73 -0
- package/src/skills/science/references/packages/astropy.md +88 -0
- package/src/skills/science/references/packages/astroquery.md +88 -0
- package/src/skills/science/references/packages/atomate2.md +80 -0
- package/src/skills/science/references/packages/atomsmltr.md +73 -0
- package/src/skills/science/references/packages/awkward.md +73 -0
- package/src/skills/science/references/packages/batman.md +88 -0
- package/src/skills/science/references/packages/biopython.md +88 -0
- package/src/skills/science/references/packages/bloqade.md +73 -0
- package/src/skills/science/references/packages/brian2.md +73 -0
- package/src/skills/science/references/packages/bullet3.md +73 -0
- package/src/skills/science/references/packages/calculix.md +80 -0
- package/src/skills/science/references/packages/cantera.md +73 -0
- package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
- package/src/skills/science/references/packages/ccdproc.md +88 -0
- package/src/skills/science/references/packages/celerite2.md +88 -0
- package/src/skills/science/references/packages/cellrank.md +73 -0
- package/src/skills/science/references/packages/cesm.md +80 -0
- package/src/skills/science/references/packages/chemicals.md +73 -0
- package/src/skills/science/references/packages/chempy.md +73 -0
- package/src/skills/science/references/packages/cirq.md +73 -0
- package/src/skills/science/references/packages/coffea.md +73 -0
- package/src/skills/science/references/packages/cp2k.md +88 -0
- package/src/skills/science/references/packages/custodian.md +80 -0
- package/src/skills/science/references/packages/dart.md +73 -0
- package/src/skills/science/references/packages/datamol.md +88 -0
- package/src/skills/science/references/packages/dd4hep.md +73 -0
- package/src/skills/science/references/packages/dealii.md +80 -0
- package/src/skills/science/references/packages/deepchem.md +88 -0
- package/src/skills/science/references/packages/delphes.md +73 -0
- package/src/skills/science/references/packages/devito.md +80 -0
- package/src/skills/science/references/packages/dftb.md +88 -0
- package/src/skills/science/references/packages/dftd4.md +88 -0
- package/src/skills/science/references/packages/dftk-jl.md +80 -0
- package/src/skills/science/references/packages/dolfinx.md +80 -0
- package/src/skills/science/references/packages/drake.md +73 -0
- package/src/skills/science/references/packages/dumux.md +73 -0
- package/src/skills/science/references/packages/elk.md +80 -0
- package/src/skills/science/references/packages/elmerfem.md +80 -0
- package/src/skills/science/references/packages/enzo-e.md +88 -0
- package/src/skills/science/references/packages/espresso.md +80 -0
- package/src/skills/science/references/packages/exoplanet.md +88 -0
- package/src/skills/science/references/packages/fairroot.md +73 -0
- package/src/skills/science/references/packages/fbpic.md +80 -0
- package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
- package/src/skills/science/references/packages/geant4.md +73 -0
- package/src/skills/science/references/packages/geosx.md +80 -0
- package/src/skills/science/references/packages/gprmax.md +80 -0
- package/src/skills/science/references/packages/gromacs.md +80 -0
- package/src/skills/science/references/packages/gwaslab.md +73 -0
- package/src/skills/science/references/packages/gz-sim.md +73 -0
- package/src/skills/science/references/packages/hail.md +88 -0
- package/src/skills/science/references/packages/hiphive.md +80 -0
- package/src/skills/science/references/packages/hoomd-blue.md +80 -0
- package/src/skills/science/references/packages/itensor.md +73 -0
- package/src/skills/science/references/packages/itensors-jl.md +73 -0
- package/src/skills/science/references/packages/jdftx.md +73 -0
- package/src/skills/science/references/packages/jobflow.md +80 -0
- package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
- package/src/skills/science/references/packages/kite.md +80 -0
- package/src/skills/science/references/packages/kratos.md +80 -0
- package/src/skills/science/references/packages/kwant.md +73 -0
- package/src/skills/science/references/packages/lammps.md +80 -0
- package/src/skills/science/references/packages/lightkurve.md +88 -0
- package/src/skills/science/references/packages/limix.md +73 -0
- package/src/skills/science/references/packages/maxwelllink.md +80 -0
- package/src/skills/science/references/packages/mcdc.md +73 -0
- package/src/skills/science/references/packages/meep.md +80 -0
- package/src/skills/science/references/packages/mfem.md +80 -0
- package/src/skills/science/references/packages/mitgcm.md +73 -0
- package/src/skills/science/references/packages/modflow6.md +73 -0
- package/src/skills/science/references/packages/molecool.md +73 -0
- package/src/skills/science/references/packages/mom6.md +73 -0
- package/src/skills/science/references/packages/moose.md +80 -0
- package/src/skills/science/references/packages/mpas-model.md +73 -0
- package/src/skills/science/references/packages/mujoco.md +73 -0
- package/src/skills/science/references/packages/mumax3.md +73 -0
- package/src/skills/science/references/packages/nekrs.md +80 -0
- package/src/skills/science/references/packages/nessi.md +73 -0
- package/src/skills/science/references/packages/nest-simulator.md +73 -0
- package/src/skills/science/references/packages/netket.md +73 -0
- package/src/skills/science/references/packages/neuron.md +73 -0
- package/src/skills/science/references/packages/nextflow.md +88 -0
- package/src/skills/science/references/packages/nwchem.md +88 -0
- package/src/skills/science/references/packages/openbabel.md +88 -0
- package/src/skills/science/references/packages/openems.md +80 -0
- package/src/skills/science/references/packages/openff-toolkit.md +88 -0
- package/src/skills/science/references/packages/openfoam-dev.md +80 -0
- package/src/skills/science/references/packages/openmc.md +73 -0
- package/src/skills/science/references/packages/openmm.md +80 -0
- package/src/skills/science/references/packages/openmoc.md +73 -0
- package/src/skills/science/references/packages/openmx.md +80 -0
- package/src/skills/science/references/packages/opensees.md +80 -0
- package/src/skills/science/references/packages/opensn.md +80 -0
- package/src/skills/science/references/packages/opm-simulators.md +73 -0
- package/src/skills/science/references/packages/oqupy.md +73 -0
- package/src/skills/science/references/packages/packmol.md +80 -0
- package/src/skills/science/references/packages/palabos.md +80 -0
- package/src/skills/science/references/packages/parflow.md +80 -0
- package/src/skills/science/references/packages/pennylane.md +88 -0
- package/src/skills/science/references/packages/perceval.md +73 -0
- package/src/skills/science/references/packages/phono3py.md +73 -0
- package/src/skills/science/references/packages/phonopy.md +73 -0
- package/src/skills/science/references/packages/photutils.md +88 -0
- package/src/skills/science/references/packages/picongpu.md +80 -0
- package/src/skills/science/references/packages/plink-ng.md +88 -0
- package/src/skills/science/references/packages/precice.md +73 -0
- package/src/skills/science/references/packages/psc.md +80 -0
- package/src/skills/science/references/packages/psi4.md +88 -0
- package/src/skills/science/references/packages/pybinding.md +73 -0
- package/src/skills/science/references/packages/pyfr.md +80 -0
- package/src/skills/science/references/packages/pyhf.md +73 -0
- package/src/skills/science/references/packages/pyiron_base.md +80 -0
- package/src/skills/science/references/packages/pylcp.md +73 -0
- package/src/skills/science/references/packages/pylith.md +80 -0
- package/src/skills/science/references/packages/pynbody.md +88 -0
- package/src/skills/science/references/packages/pysam.md +88 -0
- package/src/skills/science/references/packages/pyscf.md +88 -0
- package/src/skills/science/references/packages/q-e.md +73 -0
- package/src/skills/science/references/packages/qibo.md +73 -0
- package/src/skills/science/references/packages/qiskit.md +73 -0
- package/src/skills/science/references/packages/quantica-jl.md +73 -0
- package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
- package/src/skills/science/references/packages/quimb.md +73 -0
- package/src/skills/science/references/packages/qulacs.md +73 -0
- package/src/skills/science/references/packages/qutip.md +73 -0
- package/src/skills/science/references/packages/rdkit.md +88 -0
- package/src/skills/science/references/packages/rmg-py.md +73 -0
- package/src/skills/science/references/packages/root.md +73 -0
- package/src/skills/science/references/packages/scanpy.md +88 -0
- package/src/skills/science/references/packages/scikit-allel.md +88 -0
- package/src/skills/science/references/packages/scikit-bio.md +88 -0
- package/src/skills/science/references/packages/scqubits.md +73 -0
- package/src/skills/science/references/packages/scuff-em.md +80 -0
- package/src/skills/science/references/packages/scvi-tools.md +73 -0
- package/src/skills/science/references/packages/seissol.md +73 -0
- package/src/skills/science/references/packages/sfepy.md +80 -0
- package/src/skills/science/references/packages/sisl.md +73 -0
- package/src/skills/science/references/packages/smilei.md +80 -0
- package/src/skills/science/references/packages/snakemake.md +88 -0
- package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
- package/src/skills/science/references/packages/specutils.md +88 -0
- package/src/skills/science/references/packages/spglib.md +80 -0
- package/src/skills/science/references/packages/squidpy.md +88 -0
- package/src/skills/science/references/packages/starry.md +88 -0
- package/src/skills/science/references/packages/strawberryfields.md +73 -0
- package/src/skills/science/references/packages/su2.md +80 -0
- package/src/skills/science/references/packages/sunny-jl.md +73 -0
- package/src/skills/science/references/packages/sw4.md +73 -0
- package/src/skills/science/references/packages/swift.md +88 -0
- package/src/skills/science/references/packages/tdnegf.md +73 -0
- package/src/skills/science/references/packages/tenpy.md +73 -0
- package/src/skills/science/references/packages/thermo.md +73 -0
- package/src/skills/science/references/packages/tkwant.md +73 -0
- package/src/skills/science/references/packages/tvb-root.md +73 -0
- package/src/skills/science/references/packages/uproot5.md +73 -0
- package/src/skills/science/references/packages/vampire.md +80 -0
- package/src/skills/science/references/packages/wannier_tools.md +73 -0
- package/src/skills/science/references/packages/warpx.md +80 -0
- package/src/skills/science/references/packages/wrf.md +73 -0
- package/src/skills/science/references/packages/xtb.md +88 -0
- package/src/skills/science/references/packages/yt.md +73 -0
- package/src/skills/science/references/science-task-brief-template.md +71 -0
- package/src/skills/scout/SKILL.md +83 -425
- package/src/skills/scout/references/literature-scout-template.md +5 -24
- package/src/skills/scout/references/operational-guidance.md +191 -0
- package/src/skills/scout/references/paper-triage-playbook.md +11 -35
- package/src/skills/write/SKILL.md +744 -1246
- package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
- package/src/skills/write/references/oral_package_patterns.md +252 -0
- package/src/skills/write/references/oral_writing_principles.md +291 -0
- package/src/skills/write/references/section_rewrite_checklist.md +234 -0
- package/src/tui/dist/app/AppContainer.js +1314 -27
- package/src/tui/dist/components/Composer.js +26 -1
- package/src/tui/dist/components/ConfigScreen.js +2 -1
- package/src/tui/dist/components/InputPrompt.js +25 -9
- package/src/tui/dist/components/MainContent.js +18 -3
- package/src/tui/dist/components/QuestScreen.js +3 -2
- package/src/tui/dist/components/UtilityScreen.js +37 -0
- package/src/tui/dist/hooks/useSafeInput.js +10 -0
- package/src/tui/dist/index.js +13 -1
- package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
- package/src/tui/dist/lib/api.js +89 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AnalysisPlugin-DnSm0GZn.js → AnalysisPlugin-CA94NGmI.js} +1 -1
- package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
- package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
- package/src/ui/dist/assets/{CodeViewerPlugin-itb0tltR.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
- package/src/ui/dist/assets/{DocViewerPlugin-DqKkiCI6.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
- package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
- package/src/ui/dist/assets/{GitDiffViewerPlugin-DxL2ezFG.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
- package/src/ui/dist/assets/{GitSnapshotViewer-B_RQm1YZ.js → GitSnapshotViewer-CweA6VON.js} +2 -2
- package/src/ui/dist/assets/{ImageViewerPlugin-tHqlXY3n.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
- package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
- package/src/ui/dist/assets/{LatexPlugin-B495DTXC.js → LatexPlugin-BQjAaA5J.js} +4 -4
- package/src/ui/dist/assets/{MarkdownViewerPlugin-DG28-61B.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
- package/src/ui/dist/assets/{MarketplacePlugin-BiOGT-Kj.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
- package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
- package/src/ui/dist/assets/{NotebookEditor-CVsj8h_T.js → NotebookEditor-WFyd8Ybt.js} +23 -23
- package/src/ui/dist/assets/{PdfLoader-CASDQmxJ.js → PdfLoader-CLE5u5TS.js} +3 -3
- package/src/ui/dist/assets/{PdfMarkdownPlugin-BFhwoKsY.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
- package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
- package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
- package/src/ui/dist/assets/{TextViewerPlugin-CB4DYfWO.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
- package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
- package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
- package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
- package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
- package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
- package/src/ui/dist/assets/{code-DLC6G24T.js → code-DbsmSd3Y.js} +1 -1
- package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
- package/src/ui/dist/assets/{wrap-text-CwMn-iqb.js → file-jump-queue-DeQBikaw.js} +3 -3
- package/src/ui/dist/assets/{file-socket-Cu4Qln7Y.js → file-socket-DA5XIx88.js} +1 -1
- package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
- package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
- package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
- package/src/ui/dist/assets/{index-wQ7RIIRd.js → index-BsO46tJA.js} +1 -1
- package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
- package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
- package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
- package/src/ui/dist/assets/{project-sync-CsX08Qno.js → project-sync-DPmWKmKD.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-R-GWEhzS.js → zoom-out-DAukFWen.js} +3 -3
- package/src/ui/dist/index.html +3 -3
- package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
- package/src/skills/baseline/references/memory-playbook.md +0 -40
- package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
- package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
- package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
- package/src/skills/write/references/paper-section-playbook.md +0 -64
- package/src/skills/write/references/reviewer-first-writing.md +0 -64
- package/src/skills/write/references/revision-checklist.md +0 -70
- package/src/skills/write/references/section-contracts.md +0 -82
- package/src/skills/write/references/sentence-level-proofing.md +0 -49
- package/src/ui/dist/assets/AiManusChatView-COFACy7V.js +0 -204
- package/src/ui/dist/assets/CliPlugin-CvwCmDQ5.js +0 -109
- package/src/ui/dist/assets/CodeEditorPlugin-cOqSa0xq.js +0 -2
- package/src/ui/dist/assets/GitCommitViewerPlugin-DVgNHBCS.js +0 -1
- package/src/ui/dist/assets/LabCopilotPanel-ClMbq5Yu.js +0 -14
- package/src/ui/dist/assets/LabPlugin-L_SuE8ow.js +0 -22
- package/src/ui/dist/assets/NotebookEditor-C-4Kt1p9.js +0 -81
- package/src/ui/dist/assets/PdfViewerPlugin-DcOzU9vd.js +0 -17
- package/src/ui/dist/assets/SearchPlugin-CHj7M58O.js +0 -16
- package/src/ui/dist/assets/VNCViewer-CjlbyCB3.js +0 -11
- package/src/ui/dist/assets/bot-CFkZY-JP.js +0 -6
- package/src/ui/dist/assets/chevron-up-Dq5ofbht.js +0 -6
- package/src/ui/dist/assets/file-content-Dv4LoZec.js +0 -1
- package/src/ui/dist/assets/file-diff-panel-Denq-lC3.js +0 -1
- package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
- package/src/ui/dist/assets/git-commit-horizontal-BUh6G52n.js +0 -6
- package/src/ui/dist/assets/image-B9HUUddG.js +0 -6
- package/src/ui/dist/assets/index-B2B1sg-M.js +0 -1
- package/src/ui/dist/assets/index-Cgla8biy.css +0 -33
- package/src/ui/dist/assets/index-DRyx7vAc.js +0 -1
- package/src/ui/dist/assets/index-Gbl53BNp.js +0 -2496
- package/src/ui/dist/assets/pdf-effect-queue-ZtnHFCAi.js +0 -6
- package/src/ui/dist/assets/popover-DL6h35vr.js +0 -1
- package/src/ui/dist/assets/select-DvmXt1yY.js +0 -11
- package/src/ui/dist/assets/sigma-7jpXazui.js +0 -6
- package/src/ui/dist/assets/trash-xA7kFt8i.js +0 -11
- package/src/ui/dist/assets/useCliAccess-DsMwDjOp.js +0 -1
- package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.029_cdqgeoembed
|
|
3
|
+
name: Conditional Dichotomy Quantification via Geometric Embedding
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: 'Train and evaluate opposite-aware complex-valued embeddings (DoGE) that
|
|
6
|
+
quantify conditional dichotomy between context-conditioned text pairs across debate,
|
|
7
|
+
defeasible NLI, and causal reasoning scenarios, measured by DCF and Oppo-Angle metrics.
|
|
8
|
+
|
|
9
|
+
'
|
|
10
|
+
task_description: 'This benchmark covers the Conditional Dichotomy Quantification
|
|
11
|
+
(ConDQ) task introduced at ACL 2025. The goal is to learn embeddings that capture
|
|
12
|
+
whether two text outputs conditioned on the same context are genuinely oppositional
|
|
13
|
+
(e.g., pro vs. con arguments in debate, strengtheners vs. weakeners in defeasible
|
|
14
|
+
NLI, supporters vs. defeaters in causal reasoning). The paper''s DoGE framework
|
|
15
|
+
uses complex-valued embeddings with a dichotomous objective to geometrically position
|
|
16
|
+
positive, negative, and neutral arguments. Evaluation uses two metrics: Dichotomy
|
|
17
|
+
Consistency Frequency (DCF), which checks relational ordering among argument types,
|
|
18
|
+
and Oppo-Angle, which measures angular separation between opposing arguments. The
|
|
19
|
+
current local snapshot does NOT contain executable training or evaluation code —
|
|
20
|
+
only metadata, the paper, catalog files, and a checkpoint directory stub. The upstream
|
|
21
|
+
repository (https://github.com/cui-shaobo/conditional-dichotomy-quantification)
|
|
22
|
+
and the published PyPI package `opposite-score` provide the runnable code. Restoring
|
|
23
|
+
the executable route requires cloning the upstream repo and installing the opposite-score
|
|
24
|
+
package, then loading one of the three pretrained HuggingFace models for inference
|
|
25
|
+
or retraining on the three scenario datasets (Debate ~95K, Defeasible NLI ~441K,
|
|
26
|
+
Causal Reasoning ~48K quadruples).
|
|
27
|
+
|
|
28
|
+
'
|
|
29
|
+
capability_tags:
|
|
30
|
+
- research_code_optimization
|
|
31
|
+
- text_embeddings
|
|
32
|
+
- contrastive_nlp
|
|
33
|
+
- evaluation
|
|
34
|
+
- classification
|
|
35
|
+
- complex_valued_representations
|
|
36
|
+
aisb_direction: T3
|
|
37
|
+
track_fit:
|
|
38
|
+
- paper_track
|
|
39
|
+
- benchmark_track
|
|
40
|
+
task_mode: analysis_driven
|
|
41
|
+
requires_execution: true
|
|
42
|
+
requires_paper: true
|
|
43
|
+
integrity_level: cas_plus_canary
|
|
44
|
+
snapshot_status: restore_needed
|
|
45
|
+
support_level: recovery
|
|
46
|
+
cost_band: medium
|
|
47
|
+
time_band: 6-24h
|
|
48
|
+
difficulty: hard
|
|
49
|
+
data_access: public
|
|
50
|
+
primary_outputs:
|
|
51
|
+
- avg_dcf
|
|
52
|
+
- debate_dcf
|
|
53
|
+
- nli_dcf
|
|
54
|
+
- causal_dcf
|
|
55
|
+
- debate_oppo_angle
|
|
56
|
+
- nli_oppo_angle
|
|
57
|
+
- causal_oppo_angle
|
|
58
|
+
launch_profiles:
|
|
59
|
+
- id: analysis_only
|
|
60
|
+
label: Analysis Only
|
|
61
|
+
description: 'Review the preserved metadata, paper, metric contract, and catalog
|
|
62
|
+
files. No executable code is run; useful for understanding the task formulation
|
|
63
|
+
and evaluation protocol.
|
|
64
|
+
|
|
65
|
+
'
|
|
66
|
+
- id: restore_first
|
|
67
|
+
label: Restore First
|
|
68
|
+
description: 'Clone the upstream GitHub repository, install opposite-score from
|
|
69
|
+
PyPI, download pretrained HuggingFace checkpoints, and reconstruct the full training
|
|
70
|
+
and evaluation pipeline before running benchmarks.
|
|
71
|
+
|
|
72
|
+
'
|
|
73
|
+
- id: inference_only
|
|
74
|
+
label: Inference Only (after restore)
|
|
75
|
+
description: 'After restoring code, load a pretrained opposite-score model from
|
|
76
|
+
HuggingFace and compute DCF and Oppo-Angle on the test splits of the three scenarios
|
|
77
|
+
without retraining.
|
|
78
|
+
|
|
79
|
+
'
|
|
80
|
+
dataset_download:
|
|
81
|
+
primary_method: mixed
|
|
82
|
+
sources:
|
|
83
|
+
- kind: huggingface
|
|
84
|
+
url: https://huggingface.co/shaobocui/opposite-score-debate-bert
|
|
85
|
+
access: public
|
|
86
|
+
note: Pretrained opposite-score model for debate scenario (BERT backbone).
|
|
87
|
+
- kind: huggingface
|
|
88
|
+
url: https://huggingface.co/shaobocui/opposite-score-defeasibleNLI-bert
|
|
89
|
+
access: public
|
|
90
|
+
note: Pretrained opposite-score model for defeasible NLI scenario (BERT backbone).
|
|
91
|
+
- kind: huggingface
|
|
92
|
+
url: https://huggingface.co/shaobocui/opposite-score-causal-reasoning-bert
|
|
93
|
+
access: public
|
|
94
|
+
note: Pretrained opposite-score model for causal reasoning scenario (BERT backbone).
|
|
95
|
+
- kind: github
|
|
96
|
+
url: https://github.com/cui-shaobo/conditional-dichotomy-quantification
|
|
97
|
+
access: public
|
|
98
|
+
note: Upstream source repository with training/evaluation code and dataset construction
|
|
99
|
+
scripts.
|
|
100
|
+
- kind: pypi
|
|
101
|
+
url: https://pypi.org/project/opposite-score/
|
|
102
|
+
access: public
|
|
103
|
+
note: Published Python package (opposite-score==0.0.1) providing DichotomyE model
|
|
104
|
+
and scoring API.
|
|
105
|
+
notes:
|
|
106
|
+
- 'The three scenario datasets (PERSPECTRUM for debate, δ-NLI for defeasible NLI,
|
|
107
|
+
δ-CAUSAL for causal reasoning) total ~583K quadruples. Disk footprint for datasets
|
|
108
|
+
is modest (text-only); pretrained model checkpoints are BERT-scale (~400MB each).
|
|
109
|
+
|
|
110
|
+
'
|
|
111
|
+
- 'Neutral arguments in the datasets were generated using GPT-4o and human-verified
|
|
112
|
+
by two annotators; the generation pipeline requires spaCy and GPT-4o API access
|
|
113
|
+
if reproducing from scratch.
|
|
114
|
+
|
|
115
|
+
'
|
|
116
|
+
credential_requirements:
|
|
117
|
+
mode: none
|
|
118
|
+
items: []
|
|
119
|
+
notes:
|
|
120
|
+
- 'No credentials required for using pretrained models or published datasets. Reproducing
|
|
121
|
+
neutral argument generation from scratch would require OpenAI API access for GPT-4o.
|
|
122
|
+
|
|
123
|
+
'
|
|
124
|
+
resources:
|
|
125
|
+
minimum:
|
|
126
|
+
cpu_cores: 4
|
|
127
|
+
ram_gb: 16
|
|
128
|
+
disk_gb: 20
|
|
129
|
+
gpu_count: 1
|
|
130
|
+
gpu_vram_gb: 8
|
|
131
|
+
recommended:
|
|
132
|
+
cpu_cores: 8
|
|
133
|
+
ram_gb: 32
|
|
134
|
+
disk_gb: 80
|
|
135
|
+
gpu_count: 1
|
|
136
|
+
gpu_vram_gb: 16
|
|
137
|
+
environment:
|
|
138
|
+
python: '3.10'
|
|
139
|
+
cuda: null
|
|
140
|
+
pytorch: null
|
|
141
|
+
flash_attn: null
|
|
142
|
+
key_packages:
|
|
143
|
+
- opposite-score==0.0.1
|
|
144
|
+
- transformers
|
|
145
|
+
- spacy
|
|
146
|
+
notes:
|
|
147
|
+
- 'The current local snapshot does not contain executable setup files or runnable
|
|
148
|
+
source code. Environment specifications are derived from the README and PyPI package
|
|
149
|
+
metadata.
|
|
150
|
+
|
|
151
|
+
'
|
|
152
|
+
- 'The opposite-score package auto-downloads pretrained models on first use; a CUDA-capable
|
|
153
|
+
GPU is needed for the .cuda() call shown in usage examples.
|
|
154
|
+
|
|
155
|
+
'
|
|
156
|
+
- 'For full training replication, additional dependencies from the upstream repo''s
|
|
157
|
+
requirements files will be needed.
|
|
158
|
+
|
|
159
|
+
'
|
|
160
|
+
risk_flags:
|
|
161
|
+
- source_snapshot_incomplete
|
|
162
|
+
- route_caveat
|
|
163
|
+
- upstream_dependency
|
|
164
|
+
risk_notes:
|
|
165
|
+
- 'The local snapshot contains no executable Python files — only metadata, AGENTS,
|
|
166
|
+
catalog YAML, metric contract JSON, a checkpoint directory stub, and an angles_data.csv
|
|
167
|
+
file.
|
|
168
|
+
|
|
169
|
+
'
|
|
170
|
+
- 'All training and evaluation code must be restored from the upstream GitHub repository
|
|
171
|
+
or reconstructed from the paper and the opposite-score PyPI package.
|
|
172
|
+
|
|
173
|
+
'
|
|
174
|
+
- 'No benchmark execution was performed during the packaging pass; all metric values
|
|
175
|
+
in the metric contract are provisional/blocked.
|
|
176
|
+
|
|
177
|
+
'
|
|
178
|
+
- 'Reproducing neutral argument generation requires GPT-4o API access and associated
|
|
179
|
+
costs.
|
|
180
|
+
|
|
181
|
+
'
|
|
182
|
+
recommended_when: 'Use this entry when you want to study or reproduce the ConDQ task
|
|
183
|
+
— quantifying how oppositional two text outputs are given shared context — or when
|
|
184
|
+
planning upstream code restoration for the DoGE geometric embedding framework. Also
|
|
185
|
+
suitable for evaluating existing sentence embedding methods on the three ConDQ scenario
|
|
186
|
+
datasets using DCF and Oppo-Angle metrics.
|
|
187
|
+
|
|
188
|
+
'
|
|
189
|
+
not_recommended_when: 'Do not treat this local snapshot as a ready-to-run benchmark.
|
|
190
|
+
The executable training and evaluation code is absent. If you need an immediately
|
|
191
|
+
runnable text embedding benchmark without upstream restoration, choose a different
|
|
192
|
+
entry. Also not suitable if you lack GPU access, as the opposite-score models require
|
|
193
|
+
CUDA for standard usage.
|
|
194
|
+
|
|
195
|
+
'
|
|
196
|
+
paper:
|
|
197
|
+
title: Conditional Dichotomy Quantification via Geometric Embedding
|
|
198
|
+
authors:
|
|
199
|
+
- Shaobo Cui
|
|
200
|
+
- Wenqing Liu
|
|
201
|
+
- Yiyang Feng
|
|
202
|
+
- Jiawei Zhou
|
|
203
|
+
- Boi Faltings
|
|
204
|
+
venue: ACL 2025 (Oral)
|
|
205
|
+
year: 2025
|
|
206
|
+
url: https://aclanthology.org/2025.acl-long.383/
|
|
207
|
+
doi: 10.18653/v1/2025.acl-long.383
|
|
208
|
+
download:
|
|
209
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.029_cdqgeoembed.zip
|
|
210
|
+
archive_type: zip
|
|
211
|
+
local_dir_name: paper-29-CDQGeoEmbed
|
|
212
|
+
provider: github_release
|
|
213
|
+
repo: ResearAI/DeepScientist
|
|
214
|
+
tag: aisb-v0.0.1
|
|
215
|
+
asset_name: aisb.t3.029_cdqgeoembed.zip
|
|
216
|
+
sha256: 298b5f93116ff0797ce46bde8d3a2f35857b7b31a7e1b701cc8126148ff6d6b1
|
|
217
|
+
size_bytes: 236643
|
|
218
|
+
commercial:
|
|
219
|
+
annual_fee: null
|
|
220
|
+
display:
|
|
221
|
+
palette_seed: cedar-blue-opposition
|
|
222
|
+
art_style: semantic-cartography
|
|
223
|
+
accent_priority: high
|
|
224
|
+
image_path: ../image/029_aisb.t3.029_cdqgeoembed.jpg
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.029_cdqgeoembed
|
|
3
|
+
name: 基于几何嵌入的条件二分量化
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: '训练并评估反向感知复值嵌入(DoGE),该嵌入在辩论、可废止NLI和因果推理场景中量化上下文条件文本对之间的条件二分,通过DCF和Oppo-Angle指标进行测量。'
|
|
6
|
+
task_description: '本基准测试涵盖ACL 2025提出的条件二分量化(ConDQ)任务。目标是学习能够捕获两个在相同上下文条件下文本输出是否真正对立的嵌入(例如辩论中的正反论点、可废止NLI中的强化与削弱论据、因果推理中的支持与反驳)。论文的DoGE框架使用复值嵌入和二分目标来几何定位正面、负面和中性论据。评估使用两个指标:二分一致性频率(DCF),用于检查论据类型之间的关系排序;以及Oppo-Angle,用于测量对立论据之间的角度分离。当前的本地快照不包含可执行训练或评估代码,仅包含元数据、论文、目录文件和一个检查点目录存根。上游仓库(https://github.com/cui-shaobo/conditional-dichotomy-quantification)和发布的PyPI包`opposite-score`提供了可运行代码。恢复可执行路径需要克隆上游仓库并安装opposite-score包,然后加载三个预训练HuggingFace模型之一进行推理,或在三个场景数据集上重新训练(辩论约95K、可废止NLI约441K、因果推理约48K四元组)。'
|
|
7
|
+
capability_tags:
|
|
8
|
+
- research_code_optimization
|
|
9
|
+
- text_embeddings
|
|
10
|
+
- contrastive_nlp
|
|
11
|
+
- evaluation
|
|
12
|
+
- classification
|
|
13
|
+
- complex_valued_representations
|
|
14
|
+
aisb_direction: T3
|
|
15
|
+
track_fit:
|
|
16
|
+
- paper_track
|
|
17
|
+
- benchmark_track
|
|
18
|
+
task_mode: analysis_driven
|
|
19
|
+
requires_execution: true
|
|
20
|
+
requires_paper: true
|
|
21
|
+
integrity_level: cas_plus_canary
|
|
22
|
+
snapshot_status: restore_needed
|
|
23
|
+
support_level: recovery
|
|
24
|
+
cost_band: medium
|
|
25
|
+
time_band: 6-24h
|
|
26
|
+
difficulty: hard
|
|
27
|
+
data_access: public
|
|
28
|
+
primary_outputs:
|
|
29
|
+
- avg_dcf
|
|
30
|
+
- debate_dcf
|
|
31
|
+
- nli_dcf
|
|
32
|
+
- causal_dcf
|
|
33
|
+
- debate_oppo_angle
|
|
34
|
+
- nli_oppo_angle
|
|
35
|
+
- causal_oppo_angle
|
|
36
|
+
launch_profiles:
|
|
37
|
+
- id: analysis_only
|
|
38
|
+
label: 仅分析
|
|
39
|
+
description: '查看保存的元数据、论文、指标合约和目录文件。不运行可执行代码;适用于理解任务定义和评估协议。'
|
|
40
|
+
- id: restore_first
|
|
41
|
+
label: 优先恢复
|
|
42
|
+
description: '克隆上游GitHub仓库,从PyPI安装opposite-score,下载预训练HuggingFace检查点,并在运行基准测试前重建完整的训练和评估流程。'
|
|
43
|
+
- id: inference_only
|
|
44
|
+
label: 仅推理(恢复后)
|
|
45
|
+
description: '恢复代码后,从HuggingFace加载预训练opposite-score模型,在三个场景的测试集上计算DCF和Oppo-Angle,无需重新训练。'
|
|
46
|
+
dataset_download:
|
|
47
|
+
primary_method: mixed
|
|
48
|
+
sources:
|
|
49
|
+
- kind: huggingface
|
|
50
|
+
url: https://huggingface.co/shaobocui/opposite-score-debate-bert
|
|
51
|
+
access: public
|
|
52
|
+
note: 辩论场景的预训练opposite-score模型(BERT骨干)。
|
|
53
|
+
- kind: huggingface
|
|
54
|
+
url: https://huggingface.co/shaobocui/opposite-score-defeasibleNLI-bert
|
|
55
|
+
access: public
|
|
56
|
+
note: 可废止NLI场景的预训练opposite-score模型(BERT骨干)。
|
|
57
|
+
- kind: huggingface
|
|
58
|
+
url: https://huggingface.co/shaobocui/opposite-score-causal-reasoning-bert
|
|
59
|
+
access: public
|
|
60
|
+
note: 因果推理场景的预训练opposite-score模型(BERT骨干)。
|
|
61
|
+
- kind: github
|
|
62
|
+
url: https://github.com/cui-shaobo/conditional-dichotomy-quantification
|
|
63
|
+
access: public
|
|
64
|
+
note: 上游源代码仓库,包含训练/评估代码和数据集构建脚本。
|
|
65
|
+
- kind: pypi
|
|
66
|
+
url: https://pypi.org/project/opposite-score/
|
|
67
|
+
access: public
|
|
68
|
+
note: 发布的Python包(opposite-score==0.0.1),提供DichotomyE模型和评分API。
|
|
69
|
+
notes:
|
|
70
|
+
- '三个场景数据集(辩论的PERSPECTRUM、可废止NLI的δ-NLI、因果推理的δ-CAUSAL)共计约583K个四元组。数据集磁盘占用很小(仅文本);预训练模型检查点为BERT规模(每个约400MB)。'
|
|
71
|
+
- '数据集中的中性论据使用GPT-4o生成,并经由两名标注者人工验证;如果从头复现生成流程需要spaCy和GPT-4o API访问权限。'
|
|
72
|
+
credential_requirements:
|
|
73
|
+
mode: none
|
|
74
|
+
items: []
|
|
75
|
+
notes:
|
|
76
|
+
- '使用预训练模型或已发布数据集无需凭据。从头复现中性论据生成需要OpenAI API访问权限以使用GPT-4o。'
|
|
77
|
+
resources:
|
|
78
|
+
minimum:
|
|
79
|
+
cpu_cores: 4
|
|
80
|
+
ram_gb: 16
|
|
81
|
+
disk_gb: 20
|
|
82
|
+
gpu_count: 1
|
|
83
|
+
gpu_vram_gb: 8
|
|
84
|
+
recommended:
|
|
85
|
+
cpu_cores: 8
|
|
86
|
+
ram_gb: 32
|
|
87
|
+
disk_gb: 80
|
|
88
|
+
gpu_count: 1
|
|
89
|
+
gpu_vram_gb: 16
|
|
90
|
+
environment:
|
|
91
|
+
python: '3.10'
|
|
92
|
+
cuda: null
|
|
93
|
+
pytorch: null
|
|
94
|
+
flash_attn: null
|
|
95
|
+
key_packages:
|
|
96
|
+
- opposite-score==0.0.1
|
|
97
|
+
- transformers
|
|
98
|
+
- spacy
|
|
99
|
+
notes:
|
|
100
|
+
- '当前本地快照不包含可执行设置文件或可运行源代码。环境规格来源于README和PyPI包元数据。'
|
|
101
|
+
- 'opposite-score包会在首次使用时自动下载预训练模型;使用示例中的.cuda()调用需要支持CUDA的GPU。'
|
|
102
|
+
- '完整复现训练需要上游仓库requirements文件中的额外依赖。'
|
|
103
|
+
risk_flags:
|
|
104
|
+
- source_snapshot_incomplete
|
|
105
|
+
- route_caveat
|
|
106
|
+
- upstream_dependency
|
|
107
|
+
risk_notes:
|
|
108
|
+
- '本地快照不包含可执行的Python文件,仅包含元数据、AGENTS、目录YAML、指标合约JSON、检查点目录存根和angles_data.csv文件。'
|
|
109
|
+
- '所有训练和评估代码必须从上游GitHub仓库恢复,或从论文和opposite-score PyPI包重新构建。'
|
|
110
|
+
- '打包过程中未执行基准测试;指标合约中的所有指标值为临时/阻塞状态。'
|
|
111
|
+
- '复现中性论据生成需要GPT-4o API访问权限及相关费用。'
|
|
112
|
+
recommended_when: '当您希望研究或复现ConDQ任务——即量化给定共享上下文时两个文本输出的对立程度——或计划为DoGE几何嵌入框架进行上游代码恢复时使用此条目。也适用于使用DCF和Oppo-Angle指标在三个ConDQ场景数据集上评估现有句子嵌入方法。'
|
|
113
|
+
not_recommended_when: '不要将本地快照视为可立即运行的基准测试。可执行训练和评估代码缺失。如果您需要无需上游恢复即可立即运行文本嵌入基准测试,请选择其他条目。如果缺乏GPU访问权限也不适用,因为opposite-score模型需要CUDA才能正常使用。'
|
|
114
|
+
paper:
|
|
115
|
+
title: Conditional Dichotomy Quantification via Geometric Embedding
|
|
116
|
+
authors:
|
|
117
|
+
- Shaobo Cui
|
|
118
|
+
- Wenqing Liu
|
|
119
|
+
- Yiyang Feng
|
|
120
|
+
- Jiawei Zhou
|
|
121
|
+
- Boi Faltings
|
|
122
|
+
venue: ACL 2025 (Oral)
|
|
123
|
+
year: 2025
|
|
124
|
+
url: https://aclanthology.org/2025.acl-long.383/
|
|
125
|
+
doi: 10.18653/v1/2025.acl-long.383
|
|
126
|
+
download:
|
|
127
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.029_cdqgeoembed.zip
|
|
128
|
+
archive_type: zip
|
|
129
|
+
local_dir_name: paper-29-CDQGeoEmbed
|
|
130
|
+
provider: github_release
|
|
131
|
+
repo: ResearAI/DeepScientist
|
|
132
|
+
tag: aisb-v0.0.1
|
|
133
|
+
asset_name: aisb.t3.029_cdqgeoembed.zip
|
|
134
|
+
sha256: 298b5f93116ff0797ce46bde8d3a2f35857b7b31a7e1b701cc8126148ff6d6b1
|
|
135
|
+
size_bytes: 236643
|
|
136
|
+
commercial:
|
|
137
|
+
annual_fee: null
|
|
138
|
+
display:
|
|
139
|
+
palette_seed: cedar-blue-opposition
|
|
140
|
+
art_style: semantic-cartography
|
|
141
|
+
accent_priority: high
|
|
142
|
+
image_path: ../image/029_aisb.t3.029_cdqgeoembed.jpg
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.030_processrm
|
|
3
|
+
name: EpicPRM – Process-Supervised Reward Model Data Construction
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: 'Construct high-quality process supervision data (Epic50k) via perplexity-based
|
|
6
|
+
MC estimation and adaptive binary search, train a PRM on Qwen2-math-1.5B, and evaluate
|
|
7
|
+
step-level F1 on ProcessBench (GSM8K, MATH, OlympiadBench, OmniMATH).
|
|
8
|
+
|
|
9
|
+
'
|
|
10
|
+
task_description: 'This benchmark reproduces the EpicPRM framework for building process-supervised
|
|
11
|
+
reward models for mathematical reasoning. The pipeline has three stages: (1) generate
|
|
12
|
+
Chain-of-Thought reasoning chains from multiple LLMs (LLaMA3-8B-Instruct, LLaMA3.1-8B-Instruct,
|
|
13
|
+
Qwen2-7B-Instruct) on MATH dataset problems; (2) annotate intermediate reasoning
|
|
14
|
+
steps using a perplexity-weighted Monte Carlo estimation with an adaptive binary
|
|
15
|
+
search algorithm that dynamically adjusts starting position and sampling count based
|
|
16
|
+
on problem difficulty; (3) train a process reward model (PRM) on the resulting Epic50k
|
|
17
|
+
dataset (50k annotated steps) using Qwen2-math-1.5B-base with a binary classification
|
|
18
|
+
head and DeepSpeed, then evaluate step-level correctness prediction via the ProcessBench
|
|
19
|
+
strict protocol. The evaluation script (eval_strict.py) computes F1 as the harmonic
|
|
20
|
+
mean of acc_correct (fraction of correct solutions correctly identified) and acc_error
|
|
21
|
+
(fraction of error solutions where the predicted first error matches ground truth),
|
|
22
|
+
with threshold tuned on GSM8K and applied to MATH, OlympiadBench, and OmniMATH.
|
|
23
|
+
The data construction stage (generate_train_data.py) requires running multiple 7–8B
|
|
24
|
+
parameter LLMs as completers with vLLM for rollout sampling, which is the most compute-intensive
|
|
25
|
+
phase. PRM training itself uses DeepSpeed ZeRO (stage 1/2/3 configs provided). The
|
|
26
|
+
snapshot includes executable code for all stages but no pre-generated data or pre-trained
|
|
27
|
+
checkpoints; Epic50k and models are available on HuggingFace.
|
|
28
|
+
|
|
29
|
+
'
|
|
30
|
+
capability_tags:
|
|
31
|
+
- research_code_optimization
|
|
32
|
+
- process_reward_modeling
|
|
33
|
+
- mathematical_reasoning
|
|
34
|
+
- large_language_models
|
|
35
|
+
- data_construction
|
|
36
|
+
- monte_carlo_estimation
|
|
37
|
+
aisb_direction: T3
|
|
38
|
+
track_fit:
|
|
39
|
+
- paper_track
|
|
40
|
+
- benchmark_track
|
|
41
|
+
task_mode: experiment_driven
|
|
42
|
+
requires_execution: true
|
|
43
|
+
requires_paper: true
|
|
44
|
+
integrity_level: cas_plus_canary
|
|
45
|
+
snapshot_status: runnable
|
|
46
|
+
support_level: advanced
|
|
47
|
+
cost_band: high
|
|
48
|
+
time_band: 1d+
|
|
49
|
+
difficulty: hard
|
|
50
|
+
data_access: public
|
|
51
|
+
primary_outputs:
|
|
52
|
+
- f1_gsm8k
|
|
53
|
+
- f1_math
|
|
54
|
+
- f1_olympiadbench
|
|
55
|
+
- f1_omnimath
|
|
56
|
+
- process_reward_dataset
|
|
57
|
+
- verifier_checkpoint
|
|
58
|
+
launch_profiles:
|
|
59
|
+
- id: quick_eval
|
|
60
|
+
label: Quick Eval
|
|
61
|
+
description: 'Download the pre-trained PRM checkpoint and Epic50k from HuggingFace,
|
|
62
|
+
then run eval_strict.py on ProcessBench datasets (GSM8K, MATH, OlympiadBench,
|
|
63
|
+
OmniMATH) to reproduce F1 metrics. Requires a single GPU with ≥24 GB VRAM for
|
|
64
|
+
inference on the 1.5B model.
|
|
65
|
+
|
|
66
|
+
'
|
|
67
|
+
- id: train_and_eval
|
|
68
|
+
label: Train + Eval
|
|
69
|
+
description: 'Train the PRM from scratch on Epic50k using train_reward_model.py
|
|
70
|
+
with DeepSpeed, then evaluate via eval_strict.py. Skips the data construction
|
|
71
|
+
stage by using the published dataset.
|
|
72
|
+
|
|
73
|
+
'
|
|
74
|
+
- id: full_pipeline
|
|
75
|
+
label: Full Pipeline
|
|
76
|
+
description: 'Run all three stages: generate CoT chains and annotate steps via generate_train_data.py
|
|
77
|
+
(requires multiple 7-8B LLMs served via vLLM), train the PRM with DeepSpeed, and
|
|
78
|
+
evaluate. This is the most compute-intensive profile, requiring multi-GPU for
|
|
79
|
+
rollout sampling.
|
|
80
|
+
|
|
81
|
+
'
|
|
82
|
+
dataset_download:
|
|
83
|
+
primary_method: huggingface
|
|
84
|
+
sources:
|
|
85
|
+
- kind: huggingface
|
|
86
|
+
url: https://huggingface.co/datasets/SunW7777/EpicPRM
|
|
87
|
+
access: public
|
|
88
|
+
note: 'Epic50k dataset (50k annotated intermediate reasoning steps) and associated
|
|
89
|
+
model. Also linked from https://github.com/xiaolizh1/EpicPRM.
|
|
90
|
+
|
|
91
|
+
'
|
|
92
|
+
- kind: external
|
|
93
|
+
url: https://github.com/openai/prm800k
|
|
94
|
+
access: public
|
|
95
|
+
note: 'PRM800k dataset used for comparison and threshold analysis in the paper.
|
|
96
|
+
|
|
97
|
+
'
|
|
98
|
+
- kind: external
|
|
99
|
+
url: https://github.com/peiyi9979/Math-Shepherd
|
|
100
|
+
access: public
|
|
101
|
+
note: 'Math-Shepherd dataset used as a baseline comparison.
|
|
102
|
+
|
|
103
|
+
'
|
|
104
|
+
notes:
|
|
105
|
+
- Epic50k is relatively small (~50k steps); download size is modest.
|
|
106
|
+
- 'The MATH dataset (Hendrycks et al., 2021) is needed for CoT generation if running
|
|
107
|
+
the full pipeline. ProcessBench evaluation data is needed for eval_strict.py.
|
|
108
|
+
|
|
109
|
+
'
|
|
110
|
+
- 'Base models (Qwen2-math-1.5B-base, LLaMA3-8B-Instruct, LLaMA3.1-8B-Instruct,
|
|
111
|
+
Qwen2-7B-Instruct) must be downloaded from HuggingFace for training and data construction.
|
|
112
|
+
|
|
113
|
+
'
|
|
114
|
+
credential_requirements:
|
|
115
|
+
mode: none
|
|
116
|
+
items: []
|
|
117
|
+
notes:
|
|
118
|
+
- HuggingFace account may be needed for gated model downloads (LLaMA3 family).
|
|
119
|
+
resources:
|
|
120
|
+
minimum:
|
|
121
|
+
cpu_cores: 16
|
|
122
|
+
ram_gb: 64
|
|
123
|
+
disk_gb: 150
|
|
124
|
+
gpu_count: 1
|
|
125
|
+
gpu_vram_gb: 24
|
|
126
|
+
recommended:
|
|
127
|
+
cpu_cores: 32
|
|
128
|
+
ram_gb: 128
|
|
129
|
+
disk_gb: 300
|
|
130
|
+
gpu_count: 2
|
|
131
|
+
gpu_vram_gb: 48
|
|
132
|
+
environment:
|
|
133
|
+
python: '3.10'
|
|
134
|
+
cuda: '11.8'
|
|
135
|
+
pytorch: 2.1.0
|
|
136
|
+
key_packages:
|
|
137
|
+
- deepspeed==0.15.4
|
|
138
|
+
- transformers
|
|
139
|
+
- vllm
|
|
140
|
+
- scikit-learn
|
|
141
|
+
- numpy
|
|
142
|
+
- torch
|
|
143
|
+
notes:
|
|
144
|
+
- 'DeepSpeed ZeRO configs (stage 1/2/3 with bf16) are bundled in deepspeed_config/.
|
|
145
|
+
Use ds_config_bf16_zero2.json for typical 2-GPU training.
|
|
146
|
+
|
|
147
|
+
'
|
|
148
|
+
- 'vLLM is used for accelerating rollout sampling during data construction (generate_train_data.py).
|
|
149
|
+
Not needed for eval-only or train-only profiles.
|
|
150
|
+
|
|
151
|
+
'
|
|
152
|
+
- See bundled requirements and scripts/ for exact launch commands.
|
|
153
|
+
risk_flags:
|
|
154
|
+
- large_model_downloads
|
|
155
|
+
- multi_model_dependency
|
|
156
|
+
- compute_intensive_data_construction
|
|
157
|
+
risk_notes:
|
|
158
|
+
- 'The full data construction pipeline requires serving multiple 7-8B parameter LLMs
|
|
159
|
+
simultaneously as completers, each sampling many rollouts per problem. This stage
|
|
160
|
+
alone can take multiple GPU-days.
|
|
161
|
+
|
|
162
|
+
'
|
|
163
|
+
- 'No benchmark execution was performed during packaging; metric values in the paper
|
|
164
|
+
have not been independently verified against this snapshot.
|
|
165
|
+
|
|
166
|
+
'
|
|
167
|
+
- 'The eval_strict.py script implements ProcessBench evaluation protocol; ProcessBench
|
|
168
|
+
data files must be obtained separately if not bundled.
|
|
169
|
+
|
|
170
|
+
'
|
|
171
|
+
- 'Base model weights (Qwen2-math-1.5B-base) are required for training. LLaMA3 models
|
|
172
|
+
may require accepting a license on HuggingFace.
|
|
173
|
+
|
|
174
|
+
'
|
|
175
|
+
recommended_when: 'Use this benchmark when you want to study process-supervised reward
|
|
176
|
+
model training with efficient data construction, compare annotation strategies (perplexity-based
|
|
177
|
+
vs count-based MC estimation), or evaluate PRM quality on mathematical reasoning
|
|
178
|
+
step-correctness prediction. Good for researchers interested in data-efficient PRM
|
|
179
|
+
training — Epic50k is <10% the size of PRM800k but achieves competitive or superior
|
|
180
|
+
results.
|
|
181
|
+
|
|
182
|
+
'
|
|
183
|
+
not_recommended_when: 'Do not use this if you cannot support DeepSpeed-based reward
|
|
184
|
+
model training, if you only need prompt-level evaluation without step-level annotation,
|
|
185
|
+
or if you lack access to at least one 24GB GPU. The full data construction pipeline
|
|
186
|
+
requires substantially more compute (multiple 7-8B LLMs for rollout sampling).
|
|
187
|
+
|
|
188
|
+
'
|
|
189
|
+
paper:
|
|
190
|
+
title: 'An Efficient and Precise Training Data Construction Framework for Process-supervised
|
|
191
|
+
Reward Model in Mathematical Reasoning
|
|
192
|
+
|
|
193
|
+
'
|
|
194
|
+
venue: ACL 2025
|
|
195
|
+
year: 2025
|
|
196
|
+
url: https://aclanthology.org/2025.acl-long.216/
|
|
197
|
+
download:
|
|
198
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.030_processrm.zip
|
|
199
|
+
archive_type: zip
|
|
200
|
+
local_dir_name: paper-30-ProcessRM
|
|
201
|
+
provider: github_release
|
|
202
|
+
repo: ResearAI/DeepScientist
|
|
203
|
+
tag: aisb-v0.0.1
|
|
204
|
+
asset_name: aisb.t3.030_processrm.zip
|
|
205
|
+
sha256: e34d67b264044c51d0bcdaa9d6dc4d7b9cb59f9c2285b4a629b3ac02af8c725a
|
|
206
|
+
size_bytes: 54360
|
|
207
|
+
display:
|
|
208
|
+
palette_seed: bronze-indigo-verifier
|
|
209
|
+
art_style: math-lab
|
|
210
|
+
accent_priority: high
|
|
211
|
+
image_path: ../image/030_aisb.t3.030_processrm.jpg
|