@researai/deepscientist 1.5.16 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +309 -130
- package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
- package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
- package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
- package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
- package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
- package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
- package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
- package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
- package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
- package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
- package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
- package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
- package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
- package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
- package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
- package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
- package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
- package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
- package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
- package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
- package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
- package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
- package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
- package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
- package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
- package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
- package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
- package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
- package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
- package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
- package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
- package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
- package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
- package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
- package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
- package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
- package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
- package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
- package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
- package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
- package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
- package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
- package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
- package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
- package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
- package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
- package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
- package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
- package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
- package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
- package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
- package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
- package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
- package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
- package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
- package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
- package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
- package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
- package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
- package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
- package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
- package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
- package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
- package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
- package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
- package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
- package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
- package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
- package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
- package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
- package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
- package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
- package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
- package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
- package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
- package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
- package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
- package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
- package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
- package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
- package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
- package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
- package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
- package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
- package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
- package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
- package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
- package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
- package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
- package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
- package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
- package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
- package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
- package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
- package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
- package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
- package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
- package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
- package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
- package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
- package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
- package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
- package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
- package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
- package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
- package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
- package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
- package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
- package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
- package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
- package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
- package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
- package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
- package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
- package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
- package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
- package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
- package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
- package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
- package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
- package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
- package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
- package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
- package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
- package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
- package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
- package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
- package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
- package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
- package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
- package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
- package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
- package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
- package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
- package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
- package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
- package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
- package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
- package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
- package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
- package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
- package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
- package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
- package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
- package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
- package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
- package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
- package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
- package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
- package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
- package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
- package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
- package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
- package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
- package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
- package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
- package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
- package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
- package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
- package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
- package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
- package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
- package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
- package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
- package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
- package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
- package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
- package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
- package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
- package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
- package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
- package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
- package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
- package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
- package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
- package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
- package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
- package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
- package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
- package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
- package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
- package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
- package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
- package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
- package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
- package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
- package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
- package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
- package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
- package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
- package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
- package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
- package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
- package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
- package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
- package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
- package/AISB/image/aisb.b10.climate_earth.svg +16 -0
- package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
- package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
- package/AISB/image/aisb.b2.agent_systems.svg +16 -0
- package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
- package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
- package/AISB/image/aisb.b5.math_proof.svg +16 -0
- package/AISB/image/aisb.b6.research_process.svg +16 -0
- package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
- package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
- package/AISB/image/aisb.b9.material_science.svg +16 -0
- package/README.md +196 -32
- package/bin/ds.js +924 -66
- package/docs/en/00_QUICK_START.md +195 -18
- package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
- package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
- package/docs/en/05_TUI_GUIDE.md +171 -2
- package/docs/en/07_MEMORY_AND_MCP.md +38 -2
- package/docs/en/09_DOCTOR.md +78 -7
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
- package/docs/en/11_LICENSE_AND_RISK.md +4 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +624 -180
- package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +386 -0
- package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
- package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
- package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
- package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
- package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
- package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
- package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
- package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
- package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
- package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
- package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
- package/docs/en/91_DEVELOPMENT.md +266 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
- package/docs/en/README.md +48 -7
- package/docs/images/admin/admin-connectors-health-en.png +0 -0
- package/docs/images/admin/admin-controllers-en.png +0 -0
- package/docs/images/admin/admin-diagnostics-en.png +0 -0
- package/docs/images/admin/admin-errors-en.png +0 -0
- package/docs/images/admin/admin-issues-en.png +0 -0
- package/docs/images/admin/admin-logs-en.png +0 -0
- package/docs/images/admin/admin-quest-detail-en.png +0 -0
- package/docs/images/admin/admin-quests-en.png +0 -0
- package/docs/images/admin/admin-repairs-en.png +0 -0
- package/docs/images/admin/admin-runtime-en.png +0 -0
- package/docs/images/admin/admin-search-en.png +0 -0
- package/docs/images/admin/admin-stats-en.png +0 -0
- package/docs/images/admin/admin-summary-en.png +0 -0
- package/docs/images/connectors/connector-discord-en.png +0 -0
- package/docs/images/connectors/connector-feishu-en.png +0 -0
- package/docs/images/connectors/connector-lingzhu-en.png +0 -0
- package/docs/images/connectors/connector-qq-en.png +0 -0
- package/docs/images/connectors/connector-slack-en.png +0 -0
- package/docs/images/connectors/connector-telegram-en.png +0 -0
- package/docs/images/connectors/connector-weixin-en.png +0 -0
- package/docs/images/connectors/connector-whatsapp-en.png +0 -0
- package/docs/images/settings/settings-baselines-en.png +0 -0
- package/docs/images/settings/settings-config-en.png +0 -0
- package/docs/images/settings/settings-connectors-overview-en.png +0 -0
- package/docs/images/settings/settings-deepxiv-en.png +0 -0
- package/docs/images/settings/settings-mcp-servers-en.png +0 -0
- package/docs/images/settings/settings-plugins-en.png +0 -0
- package/docs/images/settings/settings-runners-en.png +0 -0
- package/docs/zh/00_QUICK_START.md +142 -18
- package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
- package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/zh/05_TUI_GUIDE.md +171 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
- package/docs/zh/09_DOCTOR.md +54 -8
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
- package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +552 -181
- package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +384 -0
- package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
- package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
- package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
- package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
- package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
- package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
- package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
- package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
- package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
- package/docs/zh/README.md +33 -7
- package/install.sh +168 -20
- package/package.json +5 -1
- package/pyproject.toml +2 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +13 -0
- package/src/deepscientist/admin/__init__.py +3 -0
- package/src/deepscientist/admin/charts.py +681 -0
- package/src/deepscientist/admin/logs.py +119 -0
- package/src/deepscientist/admin/repairs.py +217 -0
- package/src/deepscientist/admin/service.py +1310 -0
- package/src/deepscientist/admin/system_info.py +700 -0
- package/src/deepscientist/admin/tasks.py +465 -0
- package/src/deepscientist/admin/tool_metrics.py +600 -0
- package/src/deepscientist/artifact/guidance.py +8 -4
- package/src/deepscientist/artifact/schemas.py +115 -0
- package/src/deepscientist/artifact/service.py +4268 -260
- package/src/deepscientist/bash_exec/monitor.py +30 -3
- package/src/deepscientist/bash_exec/service.py +134 -1
- package/src/deepscientist/benchstore/__init__.py +4 -0
- package/src/deepscientist/benchstore/prompt_builder.py +224 -0
- package/src/deepscientist/benchstore/service.py +1716 -0
- package/src/deepscientist/bridges/connectors.py +8 -2
- package/src/deepscientist/channels/weixin_ilink.py +8 -1
- package/src/deepscientist/cli.py +92 -17
- package/src/deepscientist/codex_cli_compat.py +187 -74
- package/src/deepscientist/config/models.py +82 -11
- package/src/deepscientist/config/service.py +1077 -93
- package/src/deepscientist/connector/weixin_support.py +48 -17
- package/src/deepscientist/daemon/api/handlers.py +827 -235
- package/src/deepscientist/daemon/api/router.py +81 -1
- package/src/deepscientist/daemon/app.py +1512 -85
- package/src/deepscientist/diagnostics/__init__.py +6 -0
- package/src/deepscientist/diagnostics/runner_failures.py +277 -0
- package/src/deepscientist/doctor.py +407 -56
- package/src/deepscientist/evidence_packets.py +590 -0
- package/src/deepscientist/home.py +52 -4
- package/src/deepscientist/kimi_cli_compat.py +50 -0
- package/src/deepscientist/latex_runtime.py +2 -2
- package/src/deepscientist/mcp/context.py +2 -0
- package/src/deepscientist/mcp/schemas.py +114 -0
- package/src/deepscientist/mcp/server.py +1566 -126
- package/src/deepscientist/memory/service.py +203 -16
- package/src/deepscientist/process_control.py +8 -1
- package/src/deepscientist/prompts/builder.py +850 -88
- package/src/deepscientist/quest/__init__.py +2 -2
- package/src/deepscientist/quest/layout.py +12 -1
- package/src/deepscientist/quest/node_traces.py +10 -0
- package/src/deepscientist/quest/service.py +1852 -161
- package/src/deepscientist/quest/stage_views.py +1 -1
- package/src/deepscientist/runners/__init__.py +18 -0
- package/src/deepscientist/runners/base.py +89 -1
- package/src/deepscientist/runners/builtins.py +13 -1
- package/src/deepscientist/runners/claude.py +391 -0
- package/src/deepscientist/runners/codex.py +480 -35
- package/src/deepscientist/runners/codex_telemetry.py +127 -0
- package/src/deepscientist/runners/kimi.py +334 -0
- package/src/deepscientist/runners/metadata.py +68 -0
- package/src/deepscientist/runners/opencode.py +414 -0
- package/src/deepscientist/runners/runtime_overrides.py +100 -0
- package/src/deepscientist/runners/simple_cli.py +538 -0
- package/src/deepscientist/runtime_storage.py +303 -0
- package/src/deepscientist/shared.py +80 -16
- package/src/deepscientist/skills/installer.py +37 -0
- package/src/deepscientist/skills/registry.py +2 -0
- package/src/deepscientist/tinytex.py +2 -2
- package/src/deepscientist/tui.py +10 -3
- package/src/prompts/benchstore/system.md +77 -0
- package/src/prompts/connectors/qq.md +33 -2
- package/src/prompts/connectors/weixin.md +208 -23
- package/src/prompts/contracts/admin_ops.md +74 -0
- package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
- package/src/prompts/contracts/shared_interaction.md +5 -10
- package/src/prompts/start_setup/system.md +422 -0
- package/src/prompts/system.md +411 -304
- package/src/prompts/system_copilot.md +89 -0
- package/src/skills/analysis-campaign/SKILL.md +239 -578
- package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
- package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
- package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
- package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
- package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
- package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
- package/src/skills/baseline/SKILL.md +183 -461
- package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
- package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
- package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
- package/src/skills/baseline/references/baseline-plan-template.md +37 -76
- package/src/skills/baseline/references/boundary-cases.md +86 -0
- package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
- package/src/skills/baseline/references/comparability-contract.md +7 -12
- package/src/skills/baseline/references/operational-guidance.md +56 -0
- package/src/skills/baseline/references/route-selection.md +5 -25
- package/src/skills/decision/SKILL.md +113 -306
- package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
- package/src/skills/decision/references/operational-guidance.md +94 -0
- package/src/skills/decision/references/research-route-criteria.md +7 -8
- package/src/skills/decision/references/strategic-decision-template.md +13 -26
- package/src/skills/experiment/SKILL.md +132 -670
- package/src/skills/experiment/references/execution-playbook.md +374 -0
- package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
- package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
- package/src/skills/experiment/references/operational-guidance.md +108 -0
- package/src/skills/finalize/SKILL.md +62 -0
- package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
- package/src/skills/finalize/references/resume-packet-template.md +7 -0
- package/src/skills/idea/SKILL.md +228 -15
- package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
- package/src/skills/idea/references/current-board-packet-template.md +61 -0
- package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
- package/src/skills/idea/references/idea-generation-playbook.md +21 -0
- package/src/skills/idea/references/idea-thinking-flow.md +6 -0
- package/src/skills/idea/references/literature-survey-template.md +3 -0
- package/src/skills/idea/references/objective-contract-template.md +54 -0
- package/src/skills/idea/references/outline-seeding-example.md +56 -0
- package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
- package/src/skills/idea/references/related-work-playbook.md +75 -2
- package/src/skills/idea/references/research-history-playbook.md +114 -0
- package/src/skills/idea/references/selection-gate.md +58 -6
- package/src/skills/intake-audit/SKILL.md +43 -2
- package/src/skills/intake-audit/references/state-audit-template.md +10 -0
- package/src/skills/nature-data/SKILL.md +128 -0
- package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-data/agents/openai.yaml +4 -0
- package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
- package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
- package/src/skills/nature-data/references/policy-principles.md +103 -0
- package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
- package/src/skills/nature-data/references/source-basis.md +54 -0
- package/src/skills/nature-data/references/statement-patterns.md +153 -0
- package/src/skills/nature-figure/SKILL.md +197 -0
- package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-figure/agents/openai.yaml +4 -0
- package/src/skills/nature-figure/evals/evals.json +37 -0
- package/src/skills/nature-figure/references/api.md +428 -0
- package/src/skills/nature-figure/references/backend-selection.md +100 -0
- package/src/skills/nature-figure/references/chart-types.md +281 -0
- package/src/skills/nature-figure/references/common-patterns.md +349 -0
- package/src/skills/nature-figure/references/design-theory.md +436 -0
- package/src/skills/nature-figure/references/figure-contract.md +93 -0
- package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
- package/src/skills/nature-figure/references/qa-contract.md +119 -0
- package/src/skills/nature-figure/references/r-template-index.md +66 -0
- package/src/skills/nature-figure/references/r-workflow.md +161 -0
- package/src/skills/nature-figure/references/tutorials.md +250 -0
- package/src/skills/nature-paper2ppt/SKILL.md +507 -0
- package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/SKILL.md +385 -0
- package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-polishing/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
- package/src/skills/nature-polishing/references/section-moves.md +240 -0
- package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
- package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
- package/src/skills/optimize/SKILL.md +177 -1568
- package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
- package/src/skills/optimize/references/candidate-board-template.md +13 -0
- package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
- package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
- package/src/skills/optimize/references/debug-response-template.md +29 -0
- package/src/skills/optimize/references/frontier-review-template.md +32 -0
- package/src/skills/optimize/references/fusion-playbook.md +36 -0
- package/src/skills/optimize/references/method-brief-template.md +73 -0
- package/src/skills/optimize/references/operational-guidance.md +621 -0
- package/src/skills/optimize/references/optimization-memory-template.md +30 -0
- package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
- package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
- package/src/skills/optimize/references/prompt-patterns.md +49 -0
- package/src/skills/paper-outline/SKILL.md +227 -0
- package/src/skills/paper-outline/references/outline-patterns.md +87 -0
- package/src/skills/paper-plot/SKILL.md +79 -0
- package/src/skills/paper-plot/agents/openai.yaml +4 -0
- package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
- package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
- package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
- package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
- package/src/skills/paper-plot/references/line_training_curve.md +44 -0
- package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
- package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
- package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
- package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
- package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
- package/src/skills/paper-plot/scripts/line_aime.py +94 -0
- package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
- package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
- package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
- package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
- package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
- package/src/skills/rebuttal/SKILL.md +9 -0
- package/src/skills/references/tool-usage-by-stage.md +438 -0
- package/src/skills/review/SKILL.md +105 -7
- package/src/skills/science/PROVENANCE.md +44 -0
- package/src/skills/science/SKILL.md +137 -0
- package/src/skills/science/references/artifact-science-tool.md +110 -0
- package/src/skills/science/references/claim-type-discipline.md +56 -0
- package/src/skills/science/references/domain-index.md +422 -0
- package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
- package/src/skills/science/references/package-check-playbook.md +64 -0
- package/src/skills/science/references/package-index.min.json +3616 -0
- package/src/skills/science/references/packages/abinit.md +80 -0
- package/src/skills/science/references/packages/acts.md +73 -0
- package/src/skills/science/references/packages/aiida-core.md +80 -0
- package/src/skills/science/references/packages/alamode.md +80 -0
- package/src/skills/science/references/packages/amuse.md +88 -0
- package/src/skills/science/references/packages/anndata.md +88 -0
- package/src/skills/science/references/packages/arbor.md +80 -0
- package/src/skills/science/references/packages/arc.md +73 -0
- package/src/skills/science/references/packages/astropy.md +88 -0
- package/src/skills/science/references/packages/astroquery.md +88 -0
- package/src/skills/science/references/packages/atomate2.md +80 -0
- package/src/skills/science/references/packages/atomsmltr.md +73 -0
- package/src/skills/science/references/packages/awkward.md +73 -0
- package/src/skills/science/references/packages/batman.md +88 -0
- package/src/skills/science/references/packages/biopython.md +88 -0
- package/src/skills/science/references/packages/bloqade.md +73 -0
- package/src/skills/science/references/packages/brian2.md +73 -0
- package/src/skills/science/references/packages/bullet3.md +73 -0
- package/src/skills/science/references/packages/calculix.md +80 -0
- package/src/skills/science/references/packages/cantera.md +73 -0
- package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
- package/src/skills/science/references/packages/ccdproc.md +88 -0
- package/src/skills/science/references/packages/celerite2.md +88 -0
- package/src/skills/science/references/packages/cellrank.md +73 -0
- package/src/skills/science/references/packages/cesm.md +80 -0
- package/src/skills/science/references/packages/chemicals.md +73 -0
- package/src/skills/science/references/packages/chempy.md +73 -0
- package/src/skills/science/references/packages/cirq.md +73 -0
- package/src/skills/science/references/packages/coffea.md +73 -0
- package/src/skills/science/references/packages/cp2k.md +88 -0
- package/src/skills/science/references/packages/custodian.md +80 -0
- package/src/skills/science/references/packages/dart.md +73 -0
- package/src/skills/science/references/packages/datamol.md +88 -0
- package/src/skills/science/references/packages/dd4hep.md +73 -0
- package/src/skills/science/references/packages/dealii.md +80 -0
- package/src/skills/science/references/packages/deepchem.md +88 -0
- package/src/skills/science/references/packages/delphes.md +73 -0
- package/src/skills/science/references/packages/devito.md +80 -0
- package/src/skills/science/references/packages/dftb.md +88 -0
- package/src/skills/science/references/packages/dftd4.md +88 -0
- package/src/skills/science/references/packages/dftk-jl.md +80 -0
- package/src/skills/science/references/packages/dolfinx.md +80 -0
- package/src/skills/science/references/packages/drake.md +73 -0
- package/src/skills/science/references/packages/dumux.md +73 -0
- package/src/skills/science/references/packages/elk.md +80 -0
- package/src/skills/science/references/packages/elmerfem.md +80 -0
- package/src/skills/science/references/packages/enzo-e.md +88 -0
- package/src/skills/science/references/packages/espresso.md +80 -0
- package/src/skills/science/references/packages/exoplanet.md +88 -0
- package/src/skills/science/references/packages/fairroot.md +73 -0
- package/src/skills/science/references/packages/fbpic.md +80 -0
- package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
- package/src/skills/science/references/packages/geant4.md +73 -0
- package/src/skills/science/references/packages/geosx.md +80 -0
- package/src/skills/science/references/packages/gprmax.md +80 -0
- package/src/skills/science/references/packages/gromacs.md +80 -0
- package/src/skills/science/references/packages/gwaslab.md +73 -0
- package/src/skills/science/references/packages/gz-sim.md +73 -0
- package/src/skills/science/references/packages/hail.md +88 -0
- package/src/skills/science/references/packages/hiphive.md +80 -0
- package/src/skills/science/references/packages/hoomd-blue.md +80 -0
- package/src/skills/science/references/packages/itensor.md +73 -0
- package/src/skills/science/references/packages/itensors-jl.md +73 -0
- package/src/skills/science/references/packages/jdftx.md +73 -0
- package/src/skills/science/references/packages/jobflow.md +80 -0
- package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
- package/src/skills/science/references/packages/kite.md +80 -0
- package/src/skills/science/references/packages/kratos.md +80 -0
- package/src/skills/science/references/packages/kwant.md +73 -0
- package/src/skills/science/references/packages/lammps.md +80 -0
- package/src/skills/science/references/packages/lightkurve.md +88 -0
- package/src/skills/science/references/packages/limix.md +73 -0
- package/src/skills/science/references/packages/maxwelllink.md +80 -0
- package/src/skills/science/references/packages/mcdc.md +73 -0
- package/src/skills/science/references/packages/meep.md +80 -0
- package/src/skills/science/references/packages/mfem.md +80 -0
- package/src/skills/science/references/packages/mitgcm.md +73 -0
- package/src/skills/science/references/packages/modflow6.md +73 -0
- package/src/skills/science/references/packages/molecool.md +73 -0
- package/src/skills/science/references/packages/mom6.md +73 -0
- package/src/skills/science/references/packages/moose.md +80 -0
- package/src/skills/science/references/packages/mpas-model.md +73 -0
- package/src/skills/science/references/packages/mujoco.md +73 -0
- package/src/skills/science/references/packages/mumax3.md +73 -0
- package/src/skills/science/references/packages/nekrs.md +80 -0
- package/src/skills/science/references/packages/nessi.md +73 -0
- package/src/skills/science/references/packages/nest-simulator.md +73 -0
- package/src/skills/science/references/packages/netket.md +73 -0
- package/src/skills/science/references/packages/neuron.md +73 -0
- package/src/skills/science/references/packages/nextflow.md +88 -0
- package/src/skills/science/references/packages/nwchem.md +88 -0
- package/src/skills/science/references/packages/openbabel.md +88 -0
- package/src/skills/science/references/packages/openems.md +80 -0
- package/src/skills/science/references/packages/openff-toolkit.md +88 -0
- package/src/skills/science/references/packages/openfoam-dev.md +80 -0
- package/src/skills/science/references/packages/openmc.md +73 -0
- package/src/skills/science/references/packages/openmm.md +80 -0
- package/src/skills/science/references/packages/openmoc.md +73 -0
- package/src/skills/science/references/packages/openmx.md +80 -0
- package/src/skills/science/references/packages/opensees.md +80 -0
- package/src/skills/science/references/packages/opensn.md +80 -0
- package/src/skills/science/references/packages/opm-simulators.md +73 -0
- package/src/skills/science/references/packages/oqupy.md +73 -0
- package/src/skills/science/references/packages/packmol.md +80 -0
- package/src/skills/science/references/packages/palabos.md +80 -0
- package/src/skills/science/references/packages/parflow.md +80 -0
- package/src/skills/science/references/packages/pennylane.md +88 -0
- package/src/skills/science/references/packages/perceval.md +73 -0
- package/src/skills/science/references/packages/phono3py.md +73 -0
- package/src/skills/science/references/packages/phonopy.md +73 -0
- package/src/skills/science/references/packages/photutils.md +88 -0
- package/src/skills/science/references/packages/picongpu.md +80 -0
- package/src/skills/science/references/packages/plink-ng.md +88 -0
- package/src/skills/science/references/packages/precice.md +73 -0
- package/src/skills/science/references/packages/psc.md +80 -0
- package/src/skills/science/references/packages/psi4.md +88 -0
- package/src/skills/science/references/packages/pybinding.md +73 -0
- package/src/skills/science/references/packages/pyfr.md +80 -0
- package/src/skills/science/references/packages/pyhf.md +73 -0
- package/src/skills/science/references/packages/pyiron_base.md +80 -0
- package/src/skills/science/references/packages/pylcp.md +73 -0
- package/src/skills/science/references/packages/pylith.md +80 -0
- package/src/skills/science/references/packages/pynbody.md +88 -0
- package/src/skills/science/references/packages/pysam.md +88 -0
- package/src/skills/science/references/packages/pyscf.md +88 -0
- package/src/skills/science/references/packages/q-e.md +73 -0
- package/src/skills/science/references/packages/qibo.md +73 -0
- package/src/skills/science/references/packages/qiskit.md +73 -0
- package/src/skills/science/references/packages/quantica-jl.md +73 -0
- package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
- package/src/skills/science/references/packages/quimb.md +73 -0
- package/src/skills/science/references/packages/qulacs.md +73 -0
- package/src/skills/science/references/packages/qutip.md +73 -0
- package/src/skills/science/references/packages/rdkit.md +88 -0
- package/src/skills/science/references/packages/rmg-py.md +73 -0
- package/src/skills/science/references/packages/root.md +73 -0
- package/src/skills/science/references/packages/scanpy.md +88 -0
- package/src/skills/science/references/packages/scikit-allel.md +88 -0
- package/src/skills/science/references/packages/scikit-bio.md +88 -0
- package/src/skills/science/references/packages/scqubits.md +73 -0
- package/src/skills/science/references/packages/scuff-em.md +80 -0
- package/src/skills/science/references/packages/scvi-tools.md +73 -0
- package/src/skills/science/references/packages/seissol.md +73 -0
- package/src/skills/science/references/packages/sfepy.md +80 -0
- package/src/skills/science/references/packages/sisl.md +73 -0
- package/src/skills/science/references/packages/smilei.md +80 -0
- package/src/skills/science/references/packages/snakemake.md +88 -0
- package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
- package/src/skills/science/references/packages/specutils.md +88 -0
- package/src/skills/science/references/packages/spglib.md +80 -0
- package/src/skills/science/references/packages/squidpy.md +88 -0
- package/src/skills/science/references/packages/starry.md +88 -0
- package/src/skills/science/references/packages/strawberryfields.md +73 -0
- package/src/skills/science/references/packages/su2.md +80 -0
- package/src/skills/science/references/packages/sunny-jl.md +73 -0
- package/src/skills/science/references/packages/sw4.md +73 -0
- package/src/skills/science/references/packages/swift.md +88 -0
- package/src/skills/science/references/packages/tdnegf.md +73 -0
- package/src/skills/science/references/packages/tenpy.md +73 -0
- package/src/skills/science/references/packages/thermo.md +73 -0
- package/src/skills/science/references/packages/tkwant.md +73 -0
- package/src/skills/science/references/packages/tvb-root.md +73 -0
- package/src/skills/science/references/packages/uproot5.md +73 -0
- package/src/skills/science/references/packages/vampire.md +80 -0
- package/src/skills/science/references/packages/wannier_tools.md +73 -0
- package/src/skills/science/references/packages/warpx.md +80 -0
- package/src/skills/science/references/packages/wrf.md +73 -0
- package/src/skills/science/references/packages/xtb.md +88 -0
- package/src/skills/science/references/packages/yt.md +73 -0
- package/src/skills/science/references/science-task-brief-template.md +71 -0
- package/src/skills/scout/SKILL.md +83 -425
- package/src/skills/scout/references/literature-scout-template.md +5 -24
- package/src/skills/scout/references/operational-guidance.md +191 -0
- package/src/skills/scout/references/paper-triage-playbook.md +11 -35
- package/src/skills/write/SKILL.md +744 -1246
- package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
- package/src/skills/write/references/oral_package_patterns.md +252 -0
- package/src/skills/write/references/oral_writing_principles.md +291 -0
- package/src/skills/write/references/section_rewrite_checklist.md +234 -0
- package/src/tui/dist/app/AppContainer.js +1314 -27
- package/src/tui/dist/components/Composer.js +26 -1
- package/src/tui/dist/components/ConfigScreen.js +2 -1
- package/src/tui/dist/components/InputPrompt.js +25 -9
- package/src/tui/dist/components/MainContent.js +18 -3
- package/src/tui/dist/components/QuestScreen.js +3 -2
- package/src/tui/dist/components/UtilityScreen.js +37 -0
- package/src/tui/dist/hooks/useSafeInput.js +10 -0
- package/src/tui/dist/index.js +13 -1
- package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
- package/src/tui/dist/lib/api.js +89 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AnalysisPlugin-DnSm0GZn.js → AnalysisPlugin-CA94NGmI.js} +1 -1
- package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
- package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
- package/src/ui/dist/assets/{CodeViewerPlugin-itb0tltR.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
- package/src/ui/dist/assets/{DocViewerPlugin-DqKkiCI6.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
- package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
- package/src/ui/dist/assets/{GitDiffViewerPlugin-DxL2ezFG.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
- package/src/ui/dist/assets/{GitSnapshotViewer-B_RQm1YZ.js → GitSnapshotViewer-CweA6VON.js} +2 -2
- package/src/ui/dist/assets/{ImageViewerPlugin-tHqlXY3n.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
- package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
- package/src/ui/dist/assets/{LatexPlugin-B495DTXC.js → LatexPlugin-BQjAaA5J.js} +4 -4
- package/src/ui/dist/assets/{MarkdownViewerPlugin-DG28-61B.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
- package/src/ui/dist/assets/{MarketplacePlugin-BiOGT-Kj.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
- package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
- package/src/ui/dist/assets/{NotebookEditor-CVsj8h_T.js → NotebookEditor-WFyd8Ybt.js} +23 -23
- package/src/ui/dist/assets/{PdfLoader-CASDQmxJ.js → PdfLoader-CLE5u5TS.js} +3 -3
- package/src/ui/dist/assets/{PdfMarkdownPlugin-BFhwoKsY.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
- package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
- package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
- package/src/ui/dist/assets/{TextViewerPlugin-CB4DYfWO.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
- package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
- package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
- package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
- package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
- package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
- package/src/ui/dist/assets/{code-DLC6G24T.js → code-DbsmSd3Y.js} +1 -1
- package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
- package/src/ui/dist/assets/{wrap-text-CwMn-iqb.js → file-jump-queue-DeQBikaw.js} +3 -3
- package/src/ui/dist/assets/{file-socket-Cu4Qln7Y.js → file-socket-DA5XIx88.js} +1 -1
- package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
- package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
- package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
- package/src/ui/dist/assets/{index-wQ7RIIRd.js → index-BsO46tJA.js} +1 -1
- package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
- package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
- package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
- package/src/ui/dist/assets/{project-sync-CsX08Qno.js → project-sync-DPmWKmKD.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-R-GWEhzS.js → zoom-out-DAukFWen.js} +3 -3
- package/src/ui/dist/index.html +3 -3
- package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
- package/src/skills/baseline/references/memory-playbook.md +0 -40
- package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
- package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
- package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
- package/src/skills/write/references/paper-section-playbook.md +0 -64
- package/src/skills/write/references/reviewer-first-writing.md +0 -64
- package/src/skills/write/references/revision-checklist.md +0 -70
- package/src/skills/write/references/section-contracts.md +0 -82
- package/src/skills/write/references/sentence-level-proofing.md +0 -49
- package/src/ui/dist/assets/AiManusChatView-COFACy7V.js +0 -204
- package/src/ui/dist/assets/CliPlugin-CvwCmDQ5.js +0 -109
- package/src/ui/dist/assets/CodeEditorPlugin-cOqSa0xq.js +0 -2
- package/src/ui/dist/assets/GitCommitViewerPlugin-DVgNHBCS.js +0 -1
- package/src/ui/dist/assets/LabCopilotPanel-ClMbq5Yu.js +0 -14
- package/src/ui/dist/assets/LabPlugin-L_SuE8ow.js +0 -22
- package/src/ui/dist/assets/NotebookEditor-C-4Kt1p9.js +0 -81
- package/src/ui/dist/assets/PdfViewerPlugin-DcOzU9vd.js +0 -17
- package/src/ui/dist/assets/SearchPlugin-CHj7M58O.js +0 -16
- package/src/ui/dist/assets/VNCViewer-CjlbyCB3.js +0 -11
- package/src/ui/dist/assets/bot-CFkZY-JP.js +0 -6
- package/src/ui/dist/assets/chevron-up-Dq5ofbht.js +0 -6
- package/src/ui/dist/assets/file-content-Dv4LoZec.js +0 -1
- package/src/ui/dist/assets/file-diff-panel-Denq-lC3.js +0 -1
- package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
- package/src/ui/dist/assets/git-commit-horizontal-BUh6G52n.js +0 -6
- package/src/ui/dist/assets/image-B9HUUddG.js +0 -6
- package/src/ui/dist/assets/index-B2B1sg-M.js +0 -1
- package/src/ui/dist/assets/index-Cgla8biy.css +0 -33
- package/src/ui/dist/assets/index-DRyx7vAc.js +0 -1
- package/src/ui/dist/assets/index-Gbl53BNp.js +0 -2496
- package/src/ui/dist/assets/pdf-effect-queue-ZtnHFCAi.js +0 -6
- package/src/ui/dist/assets/popover-DL6h35vr.js +0 -1
- package/src/ui/dist/assets/select-DvmXt1yY.js +0 -11
- package/src/ui/dist/assets/sigma-7jpXazui.js +0 -6
- package/src/ui/dist/assets/trash-xA7kFt8i.js +0 -11
- package/src/ui/dist/assets/useCliAccess-DsMwDjOp.js +0 -1
- package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
schema_version: 1
|
|
2
|
+
id: aisb.t3.032_ptsolver
|
|
3
|
+
name: '个人旅行求解器:RealTravel 数据集'
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
one_line: 'RealTravel 数据集(77 个美国城市,1000 个测试样本 + 155 个验证样本)的纯数据快照,包含 LLM 驱动的用户偏好提取脚本;完整的 PTS 规划器和 SCIP 求解器未包含在内。
|
|
6
|
+
|
|
7
|
+
'
|
|
8
|
+
task_description: '该基准测试打包了 RealTravel 数据集——这是 TravelPlanner 基准测试的扩展,包含了来自 Google Local 的真实用户评论和 POI 元数据,覆盖 77 个美国城市。快照包含结构化旅行查询、用户评论历史、POI 数据库(餐厅、景点、住宿)以及四个使用 LLM API(GPT-4o / DeepSeek)的 Python 数据处理脚本,用于提取用户偏好、汇总用户画像、标准化偏好标签,以及生成景点利弊描述。完整的个人旅行求解器(PTS)系统——包括转换器、搜索、重排序(SASRec)和规划(SCIP 求解器)模块——未包含在此快照中。主要评估指标(pass_rate)被阻塞,因为没有可执行的评估或规划代码。若要将此基准测试作为可运行的评估工具,需要从论文的外部代码发布中恢复下游 PTS 规划器。
|
|
9
|
+
|
|
10
|
+
'
|
|
11
|
+
capability_tags:
|
|
12
|
+
- research_code_optimization
|
|
13
|
+
- travel_planning
|
|
14
|
+
- llm_tooling
|
|
15
|
+
- constraint_solving
|
|
16
|
+
- personalization
|
|
17
|
+
aisb_direction: T3
|
|
18
|
+
track_fit:
|
|
19
|
+
- paper_track
|
|
20
|
+
- benchmark_track
|
|
21
|
+
task_mode: evaluation_driven
|
|
22
|
+
requires_execution: true
|
|
23
|
+
requires_paper: true
|
|
24
|
+
integrity_level: cas_plus_canary
|
|
25
|
+
snapshot_status: data_only
|
|
26
|
+
support_level: recovery
|
|
27
|
+
cost_band: low
|
|
28
|
+
time_band: 1-2h
|
|
29
|
+
difficulty: medium
|
|
30
|
+
data_access: public
|
|
31
|
+
primary_outputs:
|
|
32
|
+
- user_profiles
|
|
33
|
+
- poi_summaries
|
|
34
|
+
- structured_travel_data
|
|
35
|
+
- preference_tags
|
|
36
|
+
launch_profiles:
|
|
37
|
+
- id: data_prep
|
|
38
|
+
label: 数据准备
|
|
39
|
+
description: '运行四个捆绑的数据处理脚本,提取用户偏好、汇总用户画像、标准化标签,并生成景点利弊描述。需要 LLM API 凭证(OpenAI 或 DeepSeek)。无需 GPU。
|
|
40
|
+
|
|
41
|
+
'
|
|
42
|
+
- id: planner_restore
|
|
43
|
+
label: 规划器恢复
|
|
44
|
+
description: '从论文的外部代码发布中恢复完整的 PTS 流程(转换器、搜索、SASRec 重排序、SCIP 求解器规划),然后才能将此基准测试作为可运行的端到端评估工具。
|
|
45
|
+
|
|
46
|
+
'
|
|
47
|
+
dataset_download:
|
|
48
|
+
primary_method: bundled
|
|
49
|
+
sources:
|
|
50
|
+
- kind: archive
|
|
51
|
+
url: https://deepscientist.cc/AISB/032_ptsolver
|
|
52
|
+
access: public
|
|
53
|
+
note: 'ZIP 压缩包,包含 RealTravel 数据集(1000 个测试样本 / 155 个验证样本)、77 个城市的 POI 数据库、用户评论数据及数据处理脚本。
|
|
54
|
+
|
|
55
|
+
'
|
|
56
|
+
notes:
|
|
57
|
+
- 数据集来源于 Google Local(Yan et al., 2023)和 TravelPlanner(Xie et al., 2024)。
|
|
58
|
+
- 数据库子目录包含住宿、景点、餐饮和背景数据。
|
|
59
|
+
- 总磁盘占用量较小(解压后低于 20 GB)。
|
|
60
|
+
credential_requirements:
|
|
61
|
+
mode: api_keys
|
|
62
|
+
items:
|
|
63
|
+
- OpenAI API 密钥(GPT-4o)或 DeepSeek API 密钥,用于运行数据处理脚本
|
|
64
|
+
- 可选:attraction-pro-con.py 中引用的 ChatGLM / SiliconFlow / Yi API 密钥
|
|
65
|
+
notes:
|
|
66
|
+
- 脚本从环境变量(Gpt_API_KEY、DEEPSEEK_API_KEY、CHATGLM_API_KEY)读取密钥。
|
|
67
|
+
- 部分源代码中存在硬编码的 API 密钥,可能已过期或无效。
|
|
68
|
+
- 仅检查预存在的数据集文件时无需任何凭据。
|
|
69
|
+
resources:
|
|
70
|
+
minimum:
|
|
71
|
+
cpu_cores: 8
|
|
72
|
+
ram_gb: 16
|
|
73
|
+
disk_gb: 20
|
|
74
|
+
gpu_count: 0
|
|
75
|
+
gpu_vram_gb: 0
|
|
76
|
+
recommended:
|
|
77
|
+
cpu_cores: 16
|
|
78
|
+
ram_gb: 32
|
|
79
|
+
disk_gb: 50
|
|
80
|
+
gpu_count: 0
|
|
81
|
+
gpu_vram_gb: 0
|
|
82
|
+
environment:
|
|
83
|
+
python: '3.10'
|
|
84
|
+
cuda: null
|
|
85
|
+
pytorch: null
|
|
86
|
+
flash_attn: null
|
|
87
|
+
key_packages:
|
|
88
|
+
- langchain
|
|
89
|
+
- openai
|
|
90
|
+
- tqdm
|
|
91
|
+
notes:
|
|
92
|
+
- 所有捆绑脚本均可仅用 CPU 运行。
|
|
93
|
+
- 数据处理脚本使用 langchain ChatOpenAI 和 openai 客户端库。
|
|
94
|
+
- 完整 PTS 系统(未捆绑)还需 PySCIPOpt、SASRec、BGE 嵌入和 scikit-learn(用于 PCA)。
|
|
95
|
+
- 完整的依赖项清单请参阅捆绑的 requirements 文件。
|
|
96
|
+
risk_flags:
|
|
97
|
+
- blocked_metric
|
|
98
|
+
- incomplete_pipeline
|
|
99
|
+
- api_key_exposure
|
|
100
|
+
- external_code_dependency
|
|
101
|
+
risk_notes:
|
|
102
|
+
- 主要评估指标(pass_rate)被阻塞——快照中没有评估或规划代码。
|
|
103
|
+
- 完整的 PTS 系统(5 个模块)未捆绑;仅包含数据处理脚本。
|
|
104
|
+
- attraction-pro-con.py 中存在硬编码的 API 密钥;这些可能是泄露的凭据,不应重复使用。
|
|
105
|
+
- 运行数据处理脚本会产生 LLM API 费用,成本与处理的用户/POI 数量成正比。
|
|
106
|
+
- 重排序模块(SASRec + BGE 嵌入)和规划模块(SCIP 求解器)必须从外部恢复。
|
|
107
|
+
recommended_when: '当您需要一个基于真实用户评论和 POI 数据的逼真旅行规划数据集,用于混合符号约束满足与用户偏好建模的任务,且无需进行繁重的 GPU 训练时,可以使用此基准测试。适用于评估基于 LLM 的偏好提取、用户画像分析,或作为构建约束型旅行规划器的数据基础。
|
|
108
|
+
|
|
109
|
+
'
|
|
110
|
+
not_recommended_when: '如果需要完全自包含、端到端可运行的基准测试,请勿使用此基准测试。规划和评估流程未捆绑。此外,不适用于专注于大规模模型微调、多模态数据或非美国旅行目的地的基准测试。
|
|
111
|
+
|
|
112
|
+
'
|
|
113
|
+
paper:
|
|
114
|
+
title: 'Personal Travel Solver: A Preference-Driven LLM-Solver System for Travel
|
|
115
|
+
Planning'
|
|
116
|
+
venue: ACL 2025
|
|
117
|
+
year: 2025
|
|
118
|
+
url: https://aclanthology.org/2025.acl-long.1339/
|
|
119
|
+
download:
|
|
120
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.032_ptsolver.zip
|
|
121
|
+
archive_type: zip
|
|
122
|
+
local_dir_name: paper-32-PTSolver
|
|
123
|
+
provider: github_release
|
|
124
|
+
repo: ResearAI/DeepScientist
|
|
125
|
+
tag: aisb-v0.0.1
|
|
126
|
+
asset_name: aisb.t3.032_ptsolver.zip
|
|
127
|
+
sha256: 26f7f39e12eb28552ada092809b289f77090a229d0b745e9dfbcbb7b7b4f9d5c
|
|
128
|
+
size_bytes: 38141816
|
|
129
|
+
commercial:
|
|
130
|
+
annual_fee: null
|
|
131
|
+
display:
|
|
132
|
+
palette_seed: sand-teal-itinerary
|
|
133
|
+
art_style: trip-planner
|
|
134
|
+
accent_priority: medium
|
|
135
|
+
image_path: ../image/032_aisb.t3.032_ptsolver.jpg
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
id: aisb.t3.033_gcse
|
|
2
|
+
name: GCSE
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: Unsupervised sentence embedding optimization using knowledge graph-guided
|
|
5
|
+
augmentation and Gaussian-decayed contrastive learning for noise-aware training.
|
|
6
|
+
task_description: 'This packaged benchmark covers unsupervised sentence embedding
|
|
7
|
+
training via the Gaussian-decayed gradient-assisted Contrastive Sentence Embedding
|
|
8
|
+
(GCSE) model. The method addresses two primary challenges in unsupervised sentence
|
|
9
|
+
representation: low data diversity and high data noise. The pipeline first extracts
|
|
10
|
+
fine-grained knowledge (entities, quantities) from source data to construct a knowledge
|
|
11
|
+
graph, then uses an LLM to synthesize diverse positive samples. An evaluation model
|
|
12
|
+
annotates and filters false positives, while a Gaussian-decayed function reduces
|
|
13
|
+
the gradient impact of false negatives during training, gradually restoring weights
|
|
14
|
+
as training progresses. This preserves sample diversity while preventing noisy samples
|
|
15
|
+
from distorting the semantic space. The benchmark evaluates on multiple STS tasks
|
|
16
|
+
via SentEval.
|
|
17
|
+
|
|
18
|
+
'
|
|
19
|
+
task_mode: experiment_driven
|
|
20
|
+
requires_execution: true
|
|
21
|
+
requires_paper: true
|
|
22
|
+
integrity_level: cas_plus_canary
|
|
23
|
+
snapshot_status: runnable
|
|
24
|
+
support_level: advanced
|
|
25
|
+
time_band: 6-24h
|
|
26
|
+
cost_band: medium
|
|
27
|
+
difficulty: medium
|
|
28
|
+
data_access: public
|
|
29
|
+
primary_outputs:
|
|
30
|
+
- avg_sts
|
|
31
|
+
- embedding_checkpoint
|
|
32
|
+
- contrastive_eval_report
|
|
33
|
+
launch_profiles:
|
|
34
|
+
- id: quick_eval
|
|
35
|
+
label: Quick Eval
|
|
36
|
+
description: Run the packaged sentence-embedding evaluation route using SentEval
|
|
37
|
+
on STS benchmarks (STS12-STS16, STS-Benchmark, SICK-Relatedness).
|
|
38
|
+
- id: full_train_eval
|
|
39
|
+
label: Full Train + Eval
|
|
40
|
+
description: Execute the full GCSE training pipeline with knowledge graph construction,
|
|
41
|
+
LLM-based data synthesis, denoising, and downstream evaluation.
|
|
42
|
+
dataset_download:
|
|
43
|
+
primary_method: url
|
|
44
|
+
sources:
|
|
45
|
+
- url: https://deepscientist.cc/AISB/033_gcse
|
|
46
|
+
archive_type: zip
|
|
47
|
+
notes:
|
|
48
|
+
- Dataset includes source domain data and pre-processed knowledge graphs for augmentation.
|
|
49
|
+
- LLM-based data synthesis may require additional model weights depending on configuration.
|
|
50
|
+
credential_requirements:
|
|
51
|
+
mode: none
|
|
52
|
+
items: []
|
|
53
|
+
notes: []
|
|
54
|
+
resources:
|
|
55
|
+
minimum:
|
|
56
|
+
cpu_cores: 8
|
|
57
|
+
ram_gb: 32
|
|
58
|
+
disk_gb: 80
|
|
59
|
+
gpu_count: 1
|
|
60
|
+
gpu_vram_gb: 16
|
|
61
|
+
recommended:
|
|
62
|
+
cpu_cores: 16
|
|
63
|
+
ram_gb: 64
|
|
64
|
+
disk_gb: 150
|
|
65
|
+
gpu_count: 1
|
|
66
|
+
gpu_vram_gb: 24
|
|
67
|
+
environment:
|
|
68
|
+
python: '3.12'
|
|
69
|
+
cuda: '12.0'
|
|
70
|
+
pytorch: '2.0'
|
|
71
|
+
flash_attn: null
|
|
72
|
+
key_packages:
|
|
73
|
+
- transformers==4.44.1
|
|
74
|
+
- senteval
|
|
75
|
+
- prettytable
|
|
76
|
+
notes:
|
|
77
|
+
- The README explicitly recommends a separate vLLM environment for LLM-based data
|
|
78
|
+
synthesis tasks.
|
|
79
|
+
- Default training uses bert-base-uncased as base model; other encoder variants
|
|
80
|
+
(BERT-large, RoBERTa-base, RoBERTa-large) are supported per paper experiments.
|
|
81
|
+
- See bundled requirements.txt and pipeline dependencies for full environment setup.
|
|
82
|
+
risk_flags:
|
|
83
|
+
- requires_llm_synthesis
|
|
84
|
+
- single_gpu_training
|
|
85
|
+
risk_notes:
|
|
86
|
+
- LLM-based data synthesis (pipelines/5-8) requires inference access to a language
|
|
87
|
+
model; vLLM is recommended for efficiency.
|
|
88
|
+
- Training time scales with dataset size and synthesis volume; the 6-24h estimate
|
|
89
|
+
assumes moderate-scale domain data.
|
|
90
|
+
- No benchmark execution was performed in this packaging pass; runtime validation
|
|
91
|
+
is recommended before trusting reported metric values.
|
|
92
|
+
recommended_when: 'Use this benchmark for moderate-scale NLP training tasks focused
|
|
93
|
+
on semantic textual similarity and sentence embedding robustness. Suitable when
|
|
94
|
+
exploring knowledge-driven data augmentation or noise-aware contrastive learning
|
|
95
|
+
for improving unsupervised sentence representations.
|
|
96
|
+
|
|
97
|
+
'
|
|
98
|
+
not_recommended_when: 'Do not use this if you require a fully CPU-only workflow, a
|
|
99
|
+
benchmark centered on full autoregressive generation, or if no GPU resources are
|
|
100
|
+
available. Not suitable for tasks requiring multi-GPU distributed training at scale.
|
|
101
|
+
|
|
102
|
+
'
|
|
103
|
+
paper:
|
|
104
|
+
title: Enhancing Unsupervised Sentence Embeddings via Knowledge-Driven Data Augmentation
|
|
105
|
+
and Gaussian-Decayed Contrastive Learning
|
|
106
|
+
venue: ACL 2025
|
|
107
|
+
year: 2025
|
|
108
|
+
url: https://aclanthology.org/2025.acl-long.244/
|
|
109
|
+
authors:
|
|
110
|
+
- Peichao Lai (Peking University)
|
|
111
|
+
- Zhengfeng Zhang (Fuzhou University)
|
|
112
|
+
- Wentao Zhang (Peking University)
|
|
113
|
+
- Fangcheng Fu (Peking University)
|
|
114
|
+
- Bin Cui (Peking University)
|
|
115
|
+
download:
|
|
116
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.033_gcse.zip
|
|
117
|
+
archive_type: zip
|
|
118
|
+
local_dir_name: paper-33-GCSE
|
|
119
|
+
provider: github_release
|
|
120
|
+
repo: ResearAI/DeepScientist
|
|
121
|
+
tag: aisb-v0.0.1
|
|
122
|
+
asset_name: aisb.t3.033_gcse.zip
|
|
123
|
+
sha256: 1d0220f894ed883ae572eb0da0ce1a358966a280ddfafadeb19a9db1f01f7866
|
|
124
|
+
size_bytes: 912820
|
|
125
|
+
display:
|
|
126
|
+
palette_seed: mint-slate-semantic
|
|
127
|
+
art_style: embedding-atlas
|
|
128
|
+
accent_priority: medium
|
|
129
|
+
image_path: ../image/033_aisb.t3.033_gcse.jpg
|
|
130
|
+
capability_tags:
|
|
131
|
+
- research_code_optimization
|
|
132
|
+
- sentence_embeddings
|
|
133
|
+
- representation_learning
|
|
134
|
+
- data_augmentation
|
|
135
|
+
- nlp
|
|
136
|
+
- contrastive_learning
|
|
137
|
+
- knowledge_graphs
|
|
138
|
+
- noise_aware_training
|
|
139
|
+
aisb_direction: T3
|
|
140
|
+
track_fit:
|
|
141
|
+
- paper_track
|
|
142
|
+
- benchmark_track
|
|
143
|
+
commercial:
|
|
144
|
+
annual_fee: null
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
id: aisb.t3.033_gcse
|
|
2
|
+
name: GCSE
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: 基于知识图谱引导增强和高斯衰减对比学习的无监督句子嵌入优化,实现噪声感知训练。
|
|
5
|
+
task_description: '本基准测试包涵盖基于高斯衰减梯度辅助对比句子嵌入(GCSE)模型的无监督句子嵌入训练。该方法针对无监督句子表示学习中的两个主要挑战:数据多样性不足和数据噪声过高。流程首先从源数据中提取细粒度知识(实体、数量)以构建知识图谱,然后使用大语言模型合成多样化正样本。评估模型对假正例进行标注和过滤,而高斯衰减函数在训练过程中降低假负例的梯度影响,逐步恢复权重。这在保持样本多样性的同时,防止噪声样本扭曲语义空间。基准测试通过SentEval在多个STS任务上进行评估。
|
|
6
|
+
|
|
7
|
+
'
|
|
8
|
+
task_mode: experiment_driven
|
|
9
|
+
requires_execution: true
|
|
10
|
+
requires_paper: true
|
|
11
|
+
integrity_level: cas_plus_canary
|
|
12
|
+
snapshot_status: runnable
|
|
13
|
+
support_level: advanced
|
|
14
|
+
time_band: 6-24h
|
|
15
|
+
cost_band: medium
|
|
16
|
+
difficulty: medium
|
|
17
|
+
data_access: public
|
|
18
|
+
primary_outputs:
|
|
19
|
+
- avg_sts
|
|
20
|
+
- embedding_checkpoint
|
|
21
|
+
- contrastive_eval_report
|
|
22
|
+
launch_profiles:
|
|
23
|
+
- id: quick_eval
|
|
24
|
+
label: 快速评估
|
|
25
|
+
description: 使用SentEval在STS基准测试(STS12-STS16、STS-Benchmark、SICK-Relatedness)上运行打包的句子嵌入评估流程。
|
|
26
|
+
- id: full_train_eval
|
|
27
|
+
label: 完整训练 + 评估
|
|
28
|
+
description: 执行完整的GCSE训练流程,包括知识图谱构建、基于大语言模型的数据合成、去噪和下游评估。
|
|
29
|
+
dataset_download:
|
|
30
|
+
primary_method: url
|
|
31
|
+
sources:
|
|
32
|
+
- url: https://deepscientist.cc/AISB/033_gcse
|
|
33
|
+
archive_type: zip
|
|
34
|
+
notes:
|
|
35
|
+
- 数据集包含源领域数据及用于增强的预处理知识图谱。
|
|
36
|
+
- 基于大语言模型的数据合成可能需要额外的模型权重,具体取决于配置。
|
|
37
|
+
credential_requirements:
|
|
38
|
+
mode: none
|
|
39
|
+
items: []
|
|
40
|
+
notes: []
|
|
41
|
+
resources:
|
|
42
|
+
minimum:
|
|
43
|
+
cpu_cores: 8
|
|
44
|
+
ram_gb: 32
|
|
45
|
+
disk_gb: 80
|
|
46
|
+
gpu_count: 1
|
|
47
|
+
gpu_vram_gb: 16
|
|
48
|
+
recommended:
|
|
49
|
+
cpu_cores: 16
|
|
50
|
+
ram_gb: 64
|
|
51
|
+
disk_gb: 150
|
|
52
|
+
gpu_count: 1
|
|
53
|
+
gpu_vram_gb: 24
|
|
54
|
+
environment:
|
|
55
|
+
python: '3.12'
|
|
56
|
+
cuda: '12.0'
|
|
57
|
+
pytorch: '2.0'
|
|
58
|
+
flash_attn: null
|
|
59
|
+
key_packages:
|
|
60
|
+
- transformers==4.44.1
|
|
61
|
+
- senteval
|
|
62
|
+
- prettytable
|
|
63
|
+
notes:
|
|
64
|
+
- README明确建议为基于大语言模型的数据合成任务配置独立的vLLM环境。
|
|
65
|
+
- 默认训练使用bert-base-uncased作为基础模型;根据论文实验,还支持其他编码器变体(BERT-large、RoBERTa-base、RoBERTa-large)。
|
|
66
|
+
- 有关完整环境设置,请参阅附带的requirements.txt和流程依赖项。
|
|
67
|
+
risk_flags:
|
|
68
|
+
- requires_llm_synthesis
|
|
69
|
+
- single_gpu_training
|
|
70
|
+
risk_notes:
|
|
71
|
+
- 基于大语言模型的数据合成(流程5-8)需要访问语言模型进行推理;推荐使用vLLM以提高效率。
|
|
72
|
+
- 训练时间随数据集大小和合成量增加而延长;6-24小时的估计假设使用中等规模的领域数据。
|
|
73
|
+
- 本次打包过程中未执行基准测试运行;在使用报告的指标值之前,建议进行运行时验证。
|
|
74
|
+
recommended_when: '当您需要进行中等规模的NLP训练任务,且重点关注语义文本相似性和句子嵌入鲁棒性时,可使用此基准测试。适用于探索知识驱动数据增强或噪声感知对比学习以改进无监督句子表示的场景。
|
|
75
|
+
|
|
76
|
+
'
|
|
77
|
+
not_recommended_when: '如果您需要完全基于CPU的工作流程、以完全自回归生成为中心的基准测试,或没有可用的GPU资源,请勿使用此基准测试。不适用于需要大规模多GPU分布式训练的任务。
|
|
78
|
+
|
|
79
|
+
'
|
|
80
|
+
paper:
|
|
81
|
+
title: Enhancing Unsupervised Sentence Embeddings via Knowledge-Driven Data Augmentation
|
|
82
|
+
and Gaussian-Decayed Contrastive Learning
|
|
83
|
+
venue: ACL 2025
|
|
84
|
+
year: 2025
|
|
85
|
+
url: https://aclanthology.org/2025.acl-long.244/
|
|
86
|
+
authors:
|
|
87
|
+
- Peichao Lai (Peking University)
|
|
88
|
+
- Zhengfeng Zhang (Fuzhou University)
|
|
89
|
+
- Wentao Zhang (Peking University)
|
|
90
|
+
- Fangcheng Fu (Peking University)
|
|
91
|
+
- Bin Cui (Peking University)
|
|
92
|
+
abstract_summary: 本文提出了一种名为GCSE的新方法,通过知识图谱引导的数据增强和高斯衰减对比学习来提升无监督句子嵌入的质量。该方法首先利用知识图谱获取细粒度知识来增强数据多样性,然后通过高斯衰减函数降低噪声样本的影响。在多个STS基准测试上的实验表明,该方法显著优于现有无监督方法。
|
|
93
|
+
abstract: '无监督句子嵌入学习面临着数据多样性不足和噪声干扰的双重挑战。本研究提出了一种名为高斯衰减梯度辅助对比句子嵌入(GCSE)的新方法,通过知识驱动的数据增强和噪声感知对比学习来解决这些问题。首先,我们从源数据中提取细粒度知识(实体、数量)构建知识图谱,并利用大语言模型合成多样化的正样本。然后,引入高斯衰减函数来降低训练过程中假负例的梯度影响,同时通过评估模型过滤假正例。在多个标准STS基准测试上的实验表明,我们的方法显著提升了无监督句子嵌入的性能,超越了现有的最先进方法。
|
|
94
|
+
|
|
95
|
+
'
|
|
96
|
+
notes: []
|
|
97
|
+
download:
|
|
98
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.033_gcse.zip
|
|
99
|
+
archive_type: zip
|
|
100
|
+
local_dir_name: paper-33-GCSE
|
|
101
|
+
provider: github_release
|
|
102
|
+
repo: ResearAI/DeepScientist
|
|
103
|
+
tag: aisb-v0.0.1
|
|
104
|
+
asset_name: aisb.t3.033_gcse.zip
|
|
105
|
+
sha256: 1d0220f894ed883ae572eb0da0ce1a358966a280ddfafadeb19a9db1f01f7866
|
|
106
|
+
size_bytes: 912820
|
|
107
|
+
display:
|
|
108
|
+
palette_seed: mint-slate-semantic
|
|
109
|
+
art_style: embedding-atlas
|
|
110
|
+
accent_priority: medium
|
|
111
|
+
image_path: ../image/033_aisb.t3.033_gcse.jpg
|
|
112
|
+
capability_tags:
|
|
113
|
+
- research_code_optimization
|
|
114
|
+
- sentence_embeddings
|
|
115
|
+
- representation_learning
|
|
116
|
+
- data_augmentation
|
|
117
|
+
- nlp
|
|
118
|
+
- contrastive_learning
|
|
119
|
+
- knowledge_graphs
|
|
120
|
+
- noise_aware_training
|
|
121
|
+
aisb_direction: T3
|
|
122
|
+
track_fit:
|
|
123
|
+
- paper_track
|
|
124
|
+
- benchmark_track
|
|
125
|
+
commercial:
|
|
126
|
+
annual_fee: null
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
id: aisb.t3.034_ensemblewm
|
|
2
|
+
name: Ensemble Watermarks for Large Language Models
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: Multi-feature watermark detection combining red-green, acrostic, and sensorimotor-norm
|
|
5
|
+
signals for robust LLM output verification.
|
|
6
|
+
task_description: 'This benchmark evaluates ensemble watermark detection methods for
|
|
7
|
+
large language model outputs. The ensemble combines three distinct watermark features:
|
|
8
|
+
the red-green watermark (token-level logit manipulation), acrostic embeddings (sentence-initial
|
|
9
|
+
letter encoding), and sensorimotor norms (perceptual/action category selection).
|
|
10
|
+
The task involves generating watermarked text via logit modification and detecting
|
|
11
|
+
watermarks in both clean and paraphrased outputs. Evaluation covers detection rate,
|
|
12
|
+
watermark score distributions, and robustness against paraphrasing attacks using
|
|
13
|
+
a T5-based attack pipeline. The unified detection function applies across all ensemble
|
|
14
|
+
configurations without modification.
|
|
15
|
+
|
|
16
|
+
'
|
|
17
|
+
task_mode: evaluation_driven
|
|
18
|
+
requires_execution: true
|
|
19
|
+
requires_paper: true
|
|
20
|
+
integrity_level: cas_plus_canary
|
|
21
|
+
snapshot_status: runnable
|
|
22
|
+
support_level: advanced
|
|
23
|
+
time_band: 6-24h
|
|
24
|
+
cost_band: medium
|
|
25
|
+
difficulty: hard
|
|
26
|
+
data_access: public
|
|
27
|
+
primary_outputs:
|
|
28
|
+
- detection_rate
|
|
29
|
+
- watermark_scores
|
|
30
|
+
- benchmark_report
|
|
31
|
+
launch_profiles:
|
|
32
|
+
- id: quick_eval
|
|
33
|
+
label: Quick Eval
|
|
34
|
+
description: Run one packaged watermark-detection evaluation route on generated
|
|
35
|
+
outputs. Uses a single model configuration and predefined test prompts to measure
|
|
36
|
+
baseline detection rates without attack simulation.
|
|
37
|
+
- id: full_eval
|
|
38
|
+
label: Full Eval
|
|
39
|
+
description: Execute the complete multi-feature watermark detection workflow including
|
|
40
|
+
generation, soft paraphrasing attack (T5-based word replacement at configurable
|
|
41
|
+
percentage), and evaluation across all feature combinations. Produces detection
|
|
42
|
+
rate matrices and watermark score distributions.
|
|
43
|
+
dataset_download:
|
|
44
|
+
primary_method: mixed
|
|
45
|
+
sources:
|
|
46
|
+
- url: https://deepscientist.cc/AISB/034_ensemblewm
|
|
47
|
+
type: archive
|
|
48
|
+
format: zip
|
|
49
|
+
- url: https://huggingface.co/datasets/know-center/Lancaster_sensorimotor_norms
|
|
50
|
+
type: external
|
|
51
|
+
format: csv
|
|
52
|
+
description: Lancaster Sensorimotor Norms dataset with 39,707 words across 11
|
|
53
|
+
perceptual and 5 action dimensions
|
|
54
|
+
notes:
|
|
55
|
+
- The Lancaster sensorimotor norms CSV is bundled in the archive
|
|
56
|
+
- Test prompts sourced from standard LLM evaluation datasets
|
|
57
|
+
credential_requirements:
|
|
58
|
+
mode: none
|
|
59
|
+
items: []
|
|
60
|
+
notes:
|
|
61
|
+
- No external API keys required
|
|
62
|
+
- Local LLM weights must be obtained separately (e.g., Llama-3.1-8B via HuggingFace)
|
|
63
|
+
resources:
|
|
64
|
+
minimum:
|
|
65
|
+
cpu_cores: 8
|
|
66
|
+
ram_gb: 32
|
|
67
|
+
disk_gb: 80
|
|
68
|
+
gpu_count: 1
|
|
69
|
+
gpu_vram_gb: 24
|
|
70
|
+
notes: Supports quantized models (GPTQ 4-bit) to reduce VRAM requirements
|
|
71
|
+
recommended:
|
|
72
|
+
cpu_cores: 16
|
|
73
|
+
ram_gb: 64
|
|
74
|
+
disk_gb: 150
|
|
75
|
+
gpu_count: 1
|
|
76
|
+
gpu_vram_gb: 48
|
|
77
|
+
notes: Full precision recommended for generation experiments; 4-bit quantization
|
|
78
|
+
available for memory-constrained setups
|
|
79
|
+
environment:
|
|
80
|
+
python: '3.10'
|
|
81
|
+
cuda: '11.8'
|
|
82
|
+
pytorch: 2.1.0
|
|
83
|
+
flash_attn: 2.x
|
|
84
|
+
key_packages:
|
|
85
|
+
- transformers
|
|
86
|
+
- torch
|
|
87
|
+
- numpy
|
|
88
|
+
- pandas
|
|
89
|
+
- spacy
|
|
90
|
+
- datasets
|
|
91
|
+
- tqdm
|
|
92
|
+
- bitsandbytes
|
|
93
|
+
- auto-gptq
|
|
94
|
+
notes:
|
|
95
|
+
- Requires spacy en_core_web_sm model: python -m spacy download en_core_web_sm
|
|
96
|
+
- T5 model for paraphrasing attack loaded via transformers (T5-small or T5-base)
|
|
97
|
+
- Llama model loaded via AutoModelForCausalLM with optional BitsAndBytesConfig for
|
|
98
|
+
4-bit quantization
|
|
99
|
+
- See bundled requirements.txt for full dependency specification
|
|
100
|
+
risk_flags:
|
|
101
|
+
- high_vram
|
|
102
|
+
- extended_runtime
|
|
103
|
+
risk_notes:
|
|
104
|
+
- Generation experiments require significant GPU memory for LLM inference
|
|
105
|
+
- Full evaluation with attack simulation may take several hours depending on dataset
|
|
106
|
+
size
|
|
107
|
+
- Soft attack (paraphrasing) batch_size parameter affects memory usage
|
|
108
|
+
recommended_when: 'Use this benchmark when you need an LLM-safeguard evaluation pipeline
|
|
109
|
+
focused on watermark detection quality, want to evaluate ensemble detection robustness
|
|
110
|
+
against paraphrasing, or need to compare single-feature versus multi-feature watermark
|
|
111
|
+
schemes. Suitable for evaluating detection-only approaches without requiring model
|
|
112
|
+
fine-tuning.
|
|
113
|
+
|
|
114
|
+
'
|
|
115
|
+
not_recommended_when: 'Do not use this benchmark if you cannot host open LLM checkpoints
|
|
116
|
+
locally, lack GPU resources with adequate VRAM, or need a benchmark without generation
|
|
117
|
+
and detection loops. Not suitable for evaluating text classification or perplexity-based
|
|
118
|
+
detection methods.
|
|
119
|
+
|
|
120
|
+
'
|
|
121
|
+
paper:
|
|
122
|
+
title: Ensemble Watermarks for Large Language Models
|
|
123
|
+
authors:
|
|
124
|
+
- Georg Niess
|
|
125
|
+
- Roman Kern
|
|
126
|
+
venue: arXiv preprint
|
|
127
|
+
year: 2024
|
|
128
|
+
url: https://arxiv.org/abs/2411.19563
|
|
129
|
+
code_url: https://github.com/
|
|
130
|
+
abstract: 'As large language models reach human-like fluency, reliably distinguishing
|
|
131
|
+
AI-generated text from human authorship becomes increasingly difficult. We propose
|
|
132
|
+
a multi-feature method for generating watermarks that combines acrostica and sensorimotor
|
|
133
|
+
norms with the established red-green watermark to achieve a 98% detection rate.
|
|
134
|
+
After paraphrasing attacks, performance remains at 95% detection rate compared
|
|
135
|
+
to 49% for red-green alone.
|
|
136
|
+
|
|
137
|
+
'
|
|
138
|
+
download:
|
|
139
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.034_ensemblewm.zip
|
|
140
|
+
archive_type: zip
|
|
141
|
+
local_dir_name: aisb-t3-034-ensemblewm
|
|
142
|
+
provider: github_release
|
|
143
|
+
repo: ResearAI/DeepScientist
|
|
144
|
+
tag: aisb-v0.0.1
|
|
145
|
+
asset_name: aisb.t3.034_ensemblewm.zip
|
|
146
|
+
sha256: 8eae0196937e9b32feade1727d417158061f0741b7b0b80a9f154bdd6aaba079
|
|
147
|
+
size_bytes: 25404700
|
|
148
|
+
display:
|
|
149
|
+
palette_seed: aqua-ink-watermark
|
|
150
|
+
art_style: verification-grid
|
|
151
|
+
accent_priority: high
|
|
152
|
+
tags:
|
|
153
|
+
- watermarking
|
|
154
|
+
- detection
|
|
155
|
+
- llm-safety
|
|
156
|
+
- robustness
|
|
157
|
+
- ensemble-methods
|
|
158
|
+
image_path: ../image/034_aisb.t3.034_ensemblewm.jpg
|
|
159
|
+
metric_contract:
|
|
160
|
+
primary_metric: detection_rate
|
|
161
|
+
origin_path: detection_notebook.ipynb
|
|
162
|
+
source_ref: detection_rate
|
|
163
|
+
evaluation_protocol:
|
|
164
|
+
code_paths:
|
|
165
|
+
- batch_run/run_experiments_soft.py
|
|
166
|
+
- batch_run/run_attack_soft.py
|
|
167
|
+
- detection_notebook.ipynb
|
|
168
|
+
- modules/text_generation.py
|
|
169
|
+
metrics_summary: []
|
|
170
|
+
execution_status: pending
|
|
171
|
+
execution_notes: 'Static code audit confirmed executable anchors for all staged
|
|
172
|
+
metrics. No benchmark execution was performed in this packaging pass. Metric values
|
|
173
|
+
should be treated as provisional pending trusted runtime outputs.
|
|
174
|
+
|
|
175
|
+
'
|
|
176
|
+
executive_summary: 'This benchmark provides a comprehensive evaluation framework for
|
|
177
|
+
ensemble watermark detection in LLM outputs. The approach combines token-level (red-green),
|
|
178
|
+
sentence-level (acrostic), and semantic-level (sensorimotor) watermark features
|
|
179
|
+
to achieve robust detection even after paraphrasing attacks. The unified detection
|
|
180
|
+
function operates across all feature combinations, enabling flexible configuration
|
|
181
|
+
for different security-robustness tradeoffs.
|
|
182
|
+
|
|
183
|
+
'
|