@researai/deepscientist 1.5.16 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +309 -130
- package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
- package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
- package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
- package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
- package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
- package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
- package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
- package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
- package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
- package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
- package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
- package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
- package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
- package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
- package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
- package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
- package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
- package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
- package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
- package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
- package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
- package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
- package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
- package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
- package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
- package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
- package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
- package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
- package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
- package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
- package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
- package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
- package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
- package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
- package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
- package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
- package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
- package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
- package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
- package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
- package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
- package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
- package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
- package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
- package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
- package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
- package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
- package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
- package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
- package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
- package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
- package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
- package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
- package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
- package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
- package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
- package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
- package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
- package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
- package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
- package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
- package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
- package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
- package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
- package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
- package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
- package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
- package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
- package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
- package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
- package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
- package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
- package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
- package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
- package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
- package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
- package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
- package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
- package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
- package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
- package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
- package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
- package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
- package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
- package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
- package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
- package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
- package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
- package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
- package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
- package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
- package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
- package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
- package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
- package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
- package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
- package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
- package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
- package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
- package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
- package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
- package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
- package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
- package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
- package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
- package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
- package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
- package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
- package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
- package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
- package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
- package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
- package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
- package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
- package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
- package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
- package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
- package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
- package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
- package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
- package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
- package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
- package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
- package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
- package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
- package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
- package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
- package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
- package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
- package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
- package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
- package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
- package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
- package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
- package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
- package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
- package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
- package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
- package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
- package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
- package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
- package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
- package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
- package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
- package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
- package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
- package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
- package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
- package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
- package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
- package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
- package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
- package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
- package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
- package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
- package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
- package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
- package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
- package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
- package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
- package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
- package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
- package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
- package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
- package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
- package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
- package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
- package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
- package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
- package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
- package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
- package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
- package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
- package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
- package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
- package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
- package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
- package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
- package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
- package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
- package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
- package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
- package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
- package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
- package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
- package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
- package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
- package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
- package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
- package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
- package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
- package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
- package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
- package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
- package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
- package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
- package/AISB/image/aisb.b10.climate_earth.svg +16 -0
- package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
- package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
- package/AISB/image/aisb.b2.agent_systems.svg +16 -0
- package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
- package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
- package/AISB/image/aisb.b5.math_proof.svg +16 -0
- package/AISB/image/aisb.b6.research_process.svg +16 -0
- package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
- package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
- package/AISB/image/aisb.b9.material_science.svg +16 -0
- package/README.md +196 -32
- package/bin/ds.js +924 -66
- package/docs/en/00_QUICK_START.md +195 -18
- package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
- package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
- package/docs/en/05_TUI_GUIDE.md +171 -2
- package/docs/en/07_MEMORY_AND_MCP.md +38 -2
- package/docs/en/09_DOCTOR.md +78 -7
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
- package/docs/en/11_LICENSE_AND_RISK.md +4 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +624 -180
- package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +386 -0
- package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
- package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
- package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
- package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
- package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
- package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
- package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
- package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
- package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
- package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
- package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
- package/docs/en/91_DEVELOPMENT.md +266 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
- package/docs/en/README.md +48 -7
- package/docs/images/admin/admin-connectors-health-en.png +0 -0
- package/docs/images/admin/admin-controllers-en.png +0 -0
- package/docs/images/admin/admin-diagnostics-en.png +0 -0
- package/docs/images/admin/admin-errors-en.png +0 -0
- package/docs/images/admin/admin-issues-en.png +0 -0
- package/docs/images/admin/admin-logs-en.png +0 -0
- package/docs/images/admin/admin-quest-detail-en.png +0 -0
- package/docs/images/admin/admin-quests-en.png +0 -0
- package/docs/images/admin/admin-repairs-en.png +0 -0
- package/docs/images/admin/admin-runtime-en.png +0 -0
- package/docs/images/admin/admin-search-en.png +0 -0
- package/docs/images/admin/admin-stats-en.png +0 -0
- package/docs/images/admin/admin-summary-en.png +0 -0
- package/docs/images/connectors/connector-discord-en.png +0 -0
- package/docs/images/connectors/connector-feishu-en.png +0 -0
- package/docs/images/connectors/connector-lingzhu-en.png +0 -0
- package/docs/images/connectors/connector-qq-en.png +0 -0
- package/docs/images/connectors/connector-slack-en.png +0 -0
- package/docs/images/connectors/connector-telegram-en.png +0 -0
- package/docs/images/connectors/connector-weixin-en.png +0 -0
- package/docs/images/connectors/connector-whatsapp-en.png +0 -0
- package/docs/images/settings/settings-baselines-en.png +0 -0
- package/docs/images/settings/settings-config-en.png +0 -0
- package/docs/images/settings/settings-connectors-overview-en.png +0 -0
- package/docs/images/settings/settings-deepxiv-en.png +0 -0
- package/docs/images/settings/settings-mcp-servers-en.png +0 -0
- package/docs/images/settings/settings-plugins-en.png +0 -0
- package/docs/images/settings/settings-runners-en.png +0 -0
- package/docs/zh/00_QUICK_START.md +142 -18
- package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
- package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/zh/05_TUI_GUIDE.md +171 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
- package/docs/zh/09_DOCTOR.md +54 -8
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
- package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +552 -181
- package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +384 -0
- package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
- package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
- package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
- package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
- package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
- package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
- package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
- package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
- package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
- package/docs/zh/README.md +33 -7
- package/install.sh +168 -20
- package/package.json +5 -1
- package/pyproject.toml +2 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +13 -0
- package/src/deepscientist/admin/__init__.py +3 -0
- package/src/deepscientist/admin/charts.py +681 -0
- package/src/deepscientist/admin/logs.py +119 -0
- package/src/deepscientist/admin/repairs.py +217 -0
- package/src/deepscientist/admin/service.py +1310 -0
- package/src/deepscientist/admin/system_info.py +700 -0
- package/src/deepscientist/admin/tasks.py +465 -0
- package/src/deepscientist/admin/tool_metrics.py +600 -0
- package/src/deepscientist/artifact/guidance.py +8 -4
- package/src/deepscientist/artifact/schemas.py +115 -0
- package/src/deepscientist/artifact/service.py +4268 -260
- package/src/deepscientist/bash_exec/monitor.py +30 -3
- package/src/deepscientist/bash_exec/service.py +134 -1
- package/src/deepscientist/benchstore/__init__.py +4 -0
- package/src/deepscientist/benchstore/prompt_builder.py +224 -0
- package/src/deepscientist/benchstore/service.py +1716 -0
- package/src/deepscientist/bridges/connectors.py +8 -2
- package/src/deepscientist/channels/weixin_ilink.py +8 -1
- package/src/deepscientist/cli.py +92 -17
- package/src/deepscientist/codex_cli_compat.py +187 -74
- package/src/deepscientist/config/models.py +82 -11
- package/src/deepscientist/config/service.py +1077 -93
- package/src/deepscientist/connector/weixin_support.py +48 -17
- package/src/deepscientist/daemon/api/handlers.py +827 -235
- package/src/deepscientist/daemon/api/router.py +81 -1
- package/src/deepscientist/daemon/app.py +1512 -85
- package/src/deepscientist/diagnostics/__init__.py +6 -0
- package/src/deepscientist/diagnostics/runner_failures.py +277 -0
- package/src/deepscientist/doctor.py +407 -56
- package/src/deepscientist/evidence_packets.py +590 -0
- package/src/deepscientist/home.py +52 -4
- package/src/deepscientist/kimi_cli_compat.py +50 -0
- package/src/deepscientist/latex_runtime.py +2 -2
- package/src/deepscientist/mcp/context.py +2 -0
- package/src/deepscientist/mcp/schemas.py +114 -0
- package/src/deepscientist/mcp/server.py +1566 -126
- package/src/deepscientist/memory/service.py +203 -16
- package/src/deepscientist/process_control.py +8 -1
- package/src/deepscientist/prompts/builder.py +850 -88
- package/src/deepscientist/quest/__init__.py +2 -2
- package/src/deepscientist/quest/layout.py +12 -1
- package/src/deepscientist/quest/node_traces.py +10 -0
- package/src/deepscientist/quest/service.py +1852 -161
- package/src/deepscientist/quest/stage_views.py +1 -1
- package/src/deepscientist/runners/__init__.py +18 -0
- package/src/deepscientist/runners/base.py +89 -1
- package/src/deepscientist/runners/builtins.py +13 -1
- package/src/deepscientist/runners/claude.py +391 -0
- package/src/deepscientist/runners/codex.py +480 -35
- package/src/deepscientist/runners/codex_telemetry.py +127 -0
- package/src/deepscientist/runners/kimi.py +334 -0
- package/src/deepscientist/runners/metadata.py +68 -0
- package/src/deepscientist/runners/opencode.py +414 -0
- package/src/deepscientist/runners/runtime_overrides.py +100 -0
- package/src/deepscientist/runners/simple_cli.py +538 -0
- package/src/deepscientist/runtime_storage.py +303 -0
- package/src/deepscientist/shared.py +80 -16
- package/src/deepscientist/skills/installer.py +37 -0
- package/src/deepscientist/skills/registry.py +2 -0
- package/src/deepscientist/tinytex.py +2 -2
- package/src/deepscientist/tui.py +10 -3
- package/src/prompts/benchstore/system.md +77 -0
- package/src/prompts/connectors/qq.md +33 -2
- package/src/prompts/connectors/weixin.md +208 -23
- package/src/prompts/contracts/admin_ops.md +74 -0
- package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
- package/src/prompts/contracts/shared_interaction.md +5 -10
- package/src/prompts/start_setup/system.md +422 -0
- package/src/prompts/system.md +411 -304
- package/src/prompts/system_copilot.md +89 -0
- package/src/skills/analysis-campaign/SKILL.md +239 -578
- package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
- package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
- package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
- package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
- package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
- package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
- package/src/skills/baseline/SKILL.md +183 -461
- package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
- package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
- package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
- package/src/skills/baseline/references/baseline-plan-template.md +37 -76
- package/src/skills/baseline/references/boundary-cases.md +86 -0
- package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
- package/src/skills/baseline/references/comparability-contract.md +7 -12
- package/src/skills/baseline/references/operational-guidance.md +56 -0
- package/src/skills/baseline/references/route-selection.md +5 -25
- package/src/skills/decision/SKILL.md +113 -306
- package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
- package/src/skills/decision/references/operational-guidance.md +94 -0
- package/src/skills/decision/references/research-route-criteria.md +7 -8
- package/src/skills/decision/references/strategic-decision-template.md +13 -26
- package/src/skills/experiment/SKILL.md +132 -670
- package/src/skills/experiment/references/execution-playbook.md +374 -0
- package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
- package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
- package/src/skills/experiment/references/operational-guidance.md +108 -0
- package/src/skills/finalize/SKILL.md +62 -0
- package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
- package/src/skills/finalize/references/resume-packet-template.md +7 -0
- package/src/skills/idea/SKILL.md +228 -15
- package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
- package/src/skills/idea/references/current-board-packet-template.md +61 -0
- package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
- package/src/skills/idea/references/idea-generation-playbook.md +21 -0
- package/src/skills/idea/references/idea-thinking-flow.md +6 -0
- package/src/skills/idea/references/literature-survey-template.md +3 -0
- package/src/skills/idea/references/objective-contract-template.md +54 -0
- package/src/skills/idea/references/outline-seeding-example.md +56 -0
- package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
- package/src/skills/idea/references/related-work-playbook.md +75 -2
- package/src/skills/idea/references/research-history-playbook.md +114 -0
- package/src/skills/idea/references/selection-gate.md +58 -6
- package/src/skills/intake-audit/SKILL.md +43 -2
- package/src/skills/intake-audit/references/state-audit-template.md +10 -0
- package/src/skills/nature-data/SKILL.md +128 -0
- package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-data/agents/openai.yaml +4 -0
- package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
- package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
- package/src/skills/nature-data/references/policy-principles.md +103 -0
- package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
- package/src/skills/nature-data/references/source-basis.md +54 -0
- package/src/skills/nature-data/references/statement-patterns.md +153 -0
- package/src/skills/nature-figure/SKILL.md +197 -0
- package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-figure/agents/openai.yaml +4 -0
- package/src/skills/nature-figure/evals/evals.json +37 -0
- package/src/skills/nature-figure/references/api.md +428 -0
- package/src/skills/nature-figure/references/backend-selection.md +100 -0
- package/src/skills/nature-figure/references/chart-types.md +281 -0
- package/src/skills/nature-figure/references/common-patterns.md +349 -0
- package/src/skills/nature-figure/references/design-theory.md +436 -0
- package/src/skills/nature-figure/references/figure-contract.md +93 -0
- package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
- package/src/skills/nature-figure/references/qa-contract.md +119 -0
- package/src/skills/nature-figure/references/r-template-index.md +66 -0
- package/src/skills/nature-figure/references/r-workflow.md +161 -0
- package/src/skills/nature-figure/references/tutorials.md +250 -0
- package/src/skills/nature-paper2ppt/SKILL.md +507 -0
- package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/SKILL.md +385 -0
- package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-polishing/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
- package/src/skills/nature-polishing/references/section-moves.md +240 -0
- package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
- package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
- package/src/skills/optimize/SKILL.md +177 -1568
- package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
- package/src/skills/optimize/references/candidate-board-template.md +13 -0
- package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
- package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
- package/src/skills/optimize/references/debug-response-template.md +29 -0
- package/src/skills/optimize/references/frontier-review-template.md +32 -0
- package/src/skills/optimize/references/fusion-playbook.md +36 -0
- package/src/skills/optimize/references/method-brief-template.md +73 -0
- package/src/skills/optimize/references/operational-guidance.md +621 -0
- package/src/skills/optimize/references/optimization-memory-template.md +30 -0
- package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
- package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
- package/src/skills/optimize/references/prompt-patterns.md +49 -0
- package/src/skills/paper-outline/SKILL.md +227 -0
- package/src/skills/paper-outline/references/outline-patterns.md +87 -0
- package/src/skills/paper-plot/SKILL.md +79 -0
- package/src/skills/paper-plot/agents/openai.yaml +4 -0
- package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
- package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
- package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
- package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
- package/src/skills/paper-plot/references/line_training_curve.md +44 -0
- package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
- package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
- package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
- package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
- package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
- package/src/skills/paper-plot/scripts/line_aime.py +94 -0
- package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
- package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
- package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
- package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
- package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
- package/src/skills/rebuttal/SKILL.md +9 -0
- package/src/skills/references/tool-usage-by-stage.md +438 -0
- package/src/skills/review/SKILL.md +105 -7
- package/src/skills/science/PROVENANCE.md +44 -0
- package/src/skills/science/SKILL.md +137 -0
- package/src/skills/science/references/artifact-science-tool.md +110 -0
- package/src/skills/science/references/claim-type-discipline.md +56 -0
- package/src/skills/science/references/domain-index.md +422 -0
- package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
- package/src/skills/science/references/package-check-playbook.md +64 -0
- package/src/skills/science/references/package-index.min.json +3616 -0
- package/src/skills/science/references/packages/abinit.md +80 -0
- package/src/skills/science/references/packages/acts.md +73 -0
- package/src/skills/science/references/packages/aiida-core.md +80 -0
- package/src/skills/science/references/packages/alamode.md +80 -0
- package/src/skills/science/references/packages/amuse.md +88 -0
- package/src/skills/science/references/packages/anndata.md +88 -0
- package/src/skills/science/references/packages/arbor.md +80 -0
- package/src/skills/science/references/packages/arc.md +73 -0
- package/src/skills/science/references/packages/astropy.md +88 -0
- package/src/skills/science/references/packages/astroquery.md +88 -0
- package/src/skills/science/references/packages/atomate2.md +80 -0
- package/src/skills/science/references/packages/atomsmltr.md +73 -0
- package/src/skills/science/references/packages/awkward.md +73 -0
- package/src/skills/science/references/packages/batman.md +88 -0
- package/src/skills/science/references/packages/biopython.md +88 -0
- package/src/skills/science/references/packages/bloqade.md +73 -0
- package/src/skills/science/references/packages/brian2.md +73 -0
- package/src/skills/science/references/packages/bullet3.md +73 -0
- package/src/skills/science/references/packages/calculix.md +80 -0
- package/src/skills/science/references/packages/cantera.md +73 -0
- package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
- package/src/skills/science/references/packages/ccdproc.md +88 -0
- package/src/skills/science/references/packages/celerite2.md +88 -0
- package/src/skills/science/references/packages/cellrank.md +73 -0
- package/src/skills/science/references/packages/cesm.md +80 -0
- package/src/skills/science/references/packages/chemicals.md +73 -0
- package/src/skills/science/references/packages/chempy.md +73 -0
- package/src/skills/science/references/packages/cirq.md +73 -0
- package/src/skills/science/references/packages/coffea.md +73 -0
- package/src/skills/science/references/packages/cp2k.md +88 -0
- package/src/skills/science/references/packages/custodian.md +80 -0
- package/src/skills/science/references/packages/dart.md +73 -0
- package/src/skills/science/references/packages/datamol.md +88 -0
- package/src/skills/science/references/packages/dd4hep.md +73 -0
- package/src/skills/science/references/packages/dealii.md +80 -0
- package/src/skills/science/references/packages/deepchem.md +88 -0
- package/src/skills/science/references/packages/delphes.md +73 -0
- package/src/skills/science/references/packages/devito.md +80 -0
- package/src/skills/science/references/packages/dftb.md +88 -0
- package/src/skills/science/references/packages/dftd4.md +88 -0
- package/src/skills/science/references/packages/dftk-jl.md +80 -0
- package/src/skills/science/references/packages/dolfinx.md +80 -0
- package/src/skills/science/references/packages/drake.md +73 -0
- package/src/skills/science/references/packages/dumux.md +73 -0
- package/src/skills/science/references/packages/elk.md +80 -0
- package/src/skills/science/references/packages/elmerfem.md +80 -0
- package/src/skills/science/references/packages/enzo-e.md +88 -0
- package/src/skills/science/references/packages/espresso.md +80 -0
- package/src/skills/science/references/packages/exoplanet.md +88 -0
- package/src/skills/science/references/packages/fairroot.md +73 -0
- package/src/skills/science/references/packages/fbpic.md +80 -0
- package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
- package/src/skills/science/references/packages/geant4.md +73 -0
- package/src/skills/science/references/packages/geosx.md +80 -0
- package/src/skills/science/references/packages/gprmax.md +80 -0
- package/src/skills/science/references/packages/gromacs.md +80 -0
- package/src/skills/science/references/packages/gwaslab.md +73 -0
- package/src/skills/science/references/packages/gz-sim.md +73 -0
- package/src/skills/science/references/packages/hail.md +88 -0
- package/src/skills/science/references/packages/hiphive.md +80 -0
- package/src/skills/science/references/packages/hoomd-blue.md +80 -0
- package/src/skills/science/references/packages/itensor.md +73 -0
- package/src/skills/science/references/packages/itensors-jl.md +73 -0
- package/src/skills/science/references/packages/jdftx.md +73 -0
- package/src/skills/science/references/packages/jobflow.md +80 -0
- package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
- package/src/skills/science/references/packages/kite.md +80 -0
- package/src/skills/science/references/packages/kratos.md +80 -0
- package/src/skills/science/references/packages/kwant.md +73 -0
- package/src/skills/science/references/packages/lammps.md +80 -0
- package/src/skills/science/references/packages/lightkurve.md +88 -0
- package/src/skills/science/references/packages/limix.md +73 -0
- package/src/skills/science/references/packages/maxwelllink.md +80 -0
- package/src/skills/science/references/packages/mcdc.md +73 -0
- package/src/skills/science/references/packages/meep.md +80 -0
- package/src/skills/science/references/packages/mfem.md +80 -0
- package/src/skills/science/references/packages/mitgcm.md +73 -0
- package/src/skills/science/references/packages/modflow6.md +73 -0
- package/src/skills/science/references/packages/molecool.md +73 -0
- package/src/skills/science/references/packages/mom6.md +73 -0
- package/src/skills/science/references/packages/moose.md +80 -0
- package/src/skills/science/references/packages/mpas-model.md +73 -0
- package/src/skills/science/references/packages/mujoco.md +73 -0
- package/src/skills/science/references/packages/mumax3.md +73 -0
- package/src/skills/science/references/packages/nekrs.md +80 -0
- package/src/skills/science/references/packages/nessi.md +73 -0
- package/src/skills/science/references/packages/nest-simulator.md +73 -0
- package/src/skills/science/references/packages/netket.md +73 -0
- package/src/skills/science/references/packages/neuron.md +73 -0
- package/src/skills/science/references/packages/nextflow.md +88 -0
- package/src/skills/science/references/packages/nwchem.md +88 -0
- package/src/skills/science/references/packages/openbabel.md +88 -0
- package/src/skills/science/references/packages/openems.md +80 -0
- package/src/skills/science/references/packages/openff-toolkit.md +88 -0
- package/src/skills/science/references/packages/openfoam-dev.md +80 -0
- package/src/skills/science/references/packages/openmc.md +73 -0
- package/src/skills/science/references/packages/openmm.md +80 -0
- package/src/skills/science/references/packages/openmoc.md +73 -0
- package/src/skills/science/references/packages/openmx.md +80 -0
- package/src/skills/science/references/packages/opensees.md +80 -0
- package/src/skills/science/references/packages/opensn.md +80 -0
- package/src/skills/science/references/packages/opm-simulators.md +73 -0
- package/src/skills/science/references/packages/oqupy.md +73 -0
- package/src/skills/science/references/packages/packmol.md +80 -0
- package/src/skills/science/references/packages/palabos.md +80 -0
- package/src/skills/science/references/packages/parflow.md +80 -0
- package/src/skills/science/references/packages/pennylane.md +88 -0
- package/src/skills/science/references/packages/perceval.md +73 -0
- package/src/skills/science/references/packages/phono3py.md +73 -0
- package/src/skills/science/references/packages/phonopy.md +73 -0
- package/src/skills/science/references/packages/photutils.md +88 -0
- package/src/skills/science/references/packages/picongpu.md +80 -0
- package/src/skills/science/references/packages/plink-ng.md +88 -0
- package/src/skills/science/references/packages/precice.md +73 -0
- package/src/skills/science/references/packages/psc.md +80 -0
- package/src/skills/science/references/packages/psi4.md +88 -0
- package/src/skills/science/references/packages/pybinding.md +73 -0
- package/src/skills/science/references/packages/pyfr.md +80 -0
- package/src/skills/science/references/packages/pyhf.md +73 -0
- package/src/skills/science/references/packages/pyiron_base.md +80 -0
- package/src/skills/science/references/packages/pylcp.md +73 -0
- package/src/skills/science/references/packages/pylith.md +80 -0
- package/src/skills/science/references/packages/pynbody.md +88 -0
- package/src/skills/science/references/packages/pysam.md +88 -0
- package/src/skills/science/references/packages/pyscf.md +88 -0
- package/src/skills/science/references/packages/q-e.md +73 -0
- package/src/skills/science/references/packages/qibo.md +73 -0
- package/src/skills/science/references/packages/qiskit.md +73 -0
- package/src/skills/science/references/packages/quantica-jl.md +73 -0
- package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
- package/src/skills/science/references/packages/quimb.md +73 -0
- package/src/skills/science/references/packages/qulacs.md +73 -0
- package/src/skills/science/references/packages/qutip.md +73 -0
- package/src/skills/science/references/packages/rdkit.md +88 -0
- package/src/skills/science/references/packages/rmg-py.md +73 -0
- package/src/skills/science/references/packages/root.md +73 -0
- package/src/skills/science/references/packages/scanpy.md +88 -0
- package/src/skills/science/references/packages/scikit-allel.md +88 -0
- package/src/skills/science/references/packages/scikit-bio.md +88 -0
- package/src/skills/science/references/packages/scqubits.md +73 -0
- package/src/skills/science/references/packages/scuff-em.md +80 -0
- package/src/skills/science/references/packages/scvi-tools.md +73 -0
- package/src/skills/science/references/packages/seissol.md +73 -0
- package/src/skills/science/references/packages/sfepy.md +80 -0
- package/src/skills/science/references/packages/sisl.md +73 -0
- package/src/skills/science/references/packages/smilei.md +80 -0
- package/src/skills/science/references/packages/snakemake.md +88 -0
- package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
- package/src/skills/science/references/packages/specutils.md +88 -0
- package/src/skills/science/references/packages/spglib.md +80 -0
- package/src/skills/science/references/packages/squidpy.md +88 -0
- package/src/skills/science/references/packages/starry.md +88 -0
- package/src/skills/science/references/packages/strawberryfields.md +73 -0
- package/src/skills/science/references/packages/su2.md +80 -0
- package/src/skills/science/references/packages/sunny-jl.md +73 -0
- package/src/skills/science/references/packages/sw4.md +73 -0
- package/src/skills/science/references/packages/swift.md +88 -0
- package/src/skills/science/references/packages/tdnegf.md +73 -0
- package/src/skills/science/references/packages/tenpy.md +73 -0
- package/src/skills/science/references/packages/thermo.md +73 -0
- package/src/skills/science/references/packages/tkwant.md +73 -0
- package/src/skills/science/references/packages/tvb-root.md +73 -0
- package/src/skills/science/references/packages/uproot5.md +73 -0
- package/src/skills/science/references/packages/vampire.md +80 -0
- package/src/skills/science/references/packages/wannier_tools.md +73 -0
- package/src/skills/science/references/packages/warpx.md +80 -0
- package/src/skills/science/references/packages/wrf.md +73 -0
- package/src/skills/science/references/packages/xtb.md +88 -0
- package/src/skills/science/references/packages/yt.md +73 -0
- package/src/skills/science/references/science-task-brief-template.md +71 -0
- package/src/skills/scout/SKILL.md +83 -425
- package/src/skills/scout/references/literature-scout-template.md +5 -24
- package/src/skills/scout/references/operational-guidance.md +191 -0
- package/src/skills/scout/references/paper-triage-playbook.md +11 -35
- package/src/skills/write/SKILL.md +744 -1246
- package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
- package/src/skills/write/references/oral_package_patterns.md +252 -0
- package/src/skills/write/references/oral_writing_principles.md +291 -0
- package/src/skills/write/references/section_rewrite_checklist.md +234 -0
- package/src/tui/dist/app/AppContainer.js +1314 -27
- package/src/tui/dist/components/Composer.js +26 -1
- package/src/tui/dist/components/ConfigScreen.js +2 -1
- package/src/tui/dist/components/InputPrompt.js +25 -9
- package/src/tui/dist/components/MainContent.js +18 -3
- package/src/tui/dist/components/QuestScreen.js +3 -2
- package/src/tui/dist/components/UtilityScreen.js +37 -0
- package/src/tui/dist/hooks/useSafeInput.js +10 -0
- package/src/tui/dist/index.js +13 -1
- package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
- package/src/tui/dist/lib/api.js +89 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AnalysisPlugin-DnSm0GZn.js → AnalysisPlugin-CA94NGmI.js} +1 -1
- package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
- package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
- package/src/ui/dist/assets/{CodeViewerPlugin-itb0tltR.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
- package/src/ui/dist/assets/{DocViewerPlugin-DqKkiCI6.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
- package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
- package/src/ui/dist/assets/{GitDiffViewerPlugin-DxL2ezFG.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
- package/src/ui/dist/assets/{GitSnapshotViewer-B_RQm1YZ.js → GitSnapshotViewer-CweA6VON.js} +2 -2
- package/src/ui/dist/assets/{ImageViewerPlugin-tHqlXY3n.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
- package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
- package/src/ui/dist/assets/{LatexPlugin-B495DTXC.js → LatexPlugin-BQjAaA5J.js} +4 -4
- package/src/ui/dist/assets/{MarkdownViewerPlugin-DG28-61B.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
- package/src/ui/dist/assets/{MarketplacePlugin-BiOGT-Kj.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
- package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
- package/src/ui/dist/assets/{NotebookEditor-CVsj8h_T.js → NotebookEditor-WFyd8Ybt.js} +23 -23
- package/src/ui/dist/assets/{PdfLoader-CASDQmxJ.js → PdfLoader-CLE5u5TS.js} +3 -3
- package/src/ui/dist/assets/{PdfMarkdownPlugin-BFhwoKsY.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
- package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
- package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
- package/src/ui/dist/assets/{TextViewerPlugin-CB4DYfWO.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
- package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
- package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
- package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
- package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
- package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
- package/src/ui/dist/assets/{code-DLC6G24T.js → code-DbsmSd3Y.js} +1 -1
- package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
- package/src/ui/dist/assets/{wrap-text-CwMn-iqb.js → file-jump-queue-DeQBikaw.js} +3 -3
- package/src/ui/dist/assets/{file-socket-Cu4Qln7Y.js → file-socket-DA5XIx88.js} +1 -1
- package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
- package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
- package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
- package/src/ui/dist/assets/{index-wQ7RIIRd.js → index-BsO46tJA.js} +1 -1
- package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
- package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
- package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
- package/src/ui/dist/assets/{project-sync-CsX08Qno.js → project-sync-DPmWKmKD.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-R-GWEhzS.js → zoom-out-DAukFWen.js} +3 -3
- package/src/ui/dist/index.html +3 -3
- package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
- package/src/skills/baseline/references/memory-playbook.md +0 -40
- package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
- package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
- package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
- package/src/skills/write/references/paper-section-playbook.md +0 -64
- package/src/skills/write/references/reviewer-first-writing.md +0 -64
- package/src/skills/write/references/revision-checklist.md +0 -70
- package/src/skills/write/references/section-contracts.md +0 -82
- package/src/skills/write/references/sentence-level-proofing.md +0 -49
- package/src/ui/dist/assets/AiManusChatView-COFACy7V.js +0 -204
- package/src/ui/dist/assets/CliPlugin-CvwCmDQ5.js +0 -109
- package/src/ui/dist/assets/CodeEditorPlugin-cOqSa0xq.js +0 -2
- package/src/ui/dist/assets/GitCommitViewerPlugin-DVgNHBCS.js +0 -1
- package/src/ui/dist/assets/LabCopilotPanel-ClMbq5Yu.js +0 -14
- package/src/ui/dist/assets/LabPlugin-L_SuE8ow.js +0 -22
- package/src/ui/dist/assets/NotebookEditor-C-4Kt1p9.js +0 -81
- package/src/ui/dist/assets/PdfViewerPlugin-DcOzU9vd.js +0 -17
- package/src/ui/dist/assets/SearchPlugin-CHj7M58O.js +0 -16
- package/src/ui/dist/assets/VNCViewer-CjlbyCB3.js +0 -11
- package/src/ui/dist/assets/bot-CFkZY-JP.js +0 -6
- package/src/ui/dist/assets/chevron-up-Dq5ofbht.js +0 -6
- package/src/ui/dist/assets/file-content-Dv4LoZec.js +0 -1
- package/src/ui/dist/assets/file-diff-panel-Denq-lC3.js +0 -1
- package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
- package/src/ui/dist/assets/git-commit-horizontal-BUh6G52n.js +0 -6
- package/src/ui/dist/assets/image-B9HUUddG.js +0 -6
- package/src/ui/dist/assets/index-B2B1sg-M.js +0 -1
- package/src/ui/dist/assets/index-Cgla8biy.css +0 -33
- package/src/ui/dist/assets/index-DRyx7vAc.js +0 -1
- package/src/ui/dist/assets/index-Gbl53BNp.js +0 -2496
- package/src/ui/dist/assets/pdf-effect-queue-ZtnHFCAi.js +0 -6
- package/src/ui/dist/assets/popover-DL6h35vr.js +0 -1
- package/src/ui/dist/assets/select-DvmXt1yY.js +0 -11
- package/src/ui/dist/assets/sigma-7jpXazui.js +0 -6
- package/src/ui/dist/assets/trash-xA7kFt8i.js +0 -11
- package/src/ui/dist/assets/useCliAccess-DsMwDjOp.js +0 -1
- package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
id: aisb.t3.023_guidedembed
|
|
2
|
+
name: 'GSTransform: Guided Space Transformation for Instruction-Following Text Embeddings'
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: Lightweight transformation of pretrained text embeddings to follow user
|
|
5
|
+
instructions without corpus re-encoding, achieving 6-300x speedup.
|
|
6
|
+
task_description: 'This packaged benchmark covers GSTransform, a novel instruction-following
|
|
7
|
+
text embedding framework. Unlike prior approaches that require re-encoding the entire
|
|
8
|
+
corpus for each instruction (O(m×n) forward passes), GSTransform transforms pre-computed
|
|
9
|
+
embeddings in real time using a lightweight model. The two-stage pipeline consists
|
|
10
|
+
of: (1) Instruction-based Label Construction where an LLM generates instruction-aware
|
|
11
|
+
labels from a small sampled corpus, and (2) Label-guided Embedding Transformation
|
|
12
|
+
where a transformation model maps generic embeddings to an instruction-aligned semantic
|
|
13
|
+
space using contrastive and reconstruction losses. The benchmark evaluates on three
|
|
14
|
+
instruction-awareness tasks—clustering, semantic textual similarity (STS), and triplet
|
|
15
|
+
alignment—across nine real-world datasets. Evaluation requires configuring the dataset
|
|
16
|
+
in data.py, the instruction in label_construction.py, and an OpenAI API key in util.py.
|
|
17
|
+
|
|
18
|
+
'
|
|
19
|
+
task_mode: experiment_driven
|
|
20
|
+
requires_execution: true
|
|
21
|
+
requires_paper: true
|
|
22
|
+
integrity_level: cas_plus_canary
|
|
23
|
+
snapshot_status: runnable
|
|
24
|
+
support_level: advanced
|
|
25
|
+
time_band: 6-24h
|
|
26
|
+
cost_band: medium
|
|
27
|
+
difficulty: medium
|
|
28
|
+
data_access: public
|
|
29
|
+
primary_outputs:
|
|
30
|
+
- v_measure
|
|
31
|
+
- nmi
|
|
32
|
+
- sts_score
|
|
33
|
+
- triplet_score
|
|
34
|
+
- embedding_checkpoint
|
|
35
|
+
launch_profiles:
|
|
36
|
+
- id: quick_check
|
|
37
|
+
label: Quick Check
|
|
38
|
+
description: 'Run the clustering-oriented evaluation route on a prepared embedding
|
|
39
|
+
setup. Skips label construction and training; requires pre-computed embeddings
|
|
40
|
+
and labeled classification results from prior runs.
|
|
41
|
+
|
|
42
|
+
'
|
|
43
|
+
estimated_time: 10-30m
|
|
44
|
+
- id: full_pipeline
|
|
45
|
+
label: Full Pipeline
|
|
46
|
+
description: 'Construct instruction-aware labels using an LLM, train the GSTransform
|
|
47
|
+
transformation model, and run the full packaged evaluation pipeline including
|
|
48
|
+
clustering, STS, and triplet evaluations.
|
|
49
|
+
|
|
50
|
+
'
|
|
51
|
+
estimated_time: 4-12h
|
|
52
|
+
requires_api_key: true
|
|
53
|
+
dataset_download:
|
|
54
|
+
primary_method: direct_archive
|
|
55
|
+
sources:
|
|
56
|
+
- url: https://deepscientist.cc/AISB/023_guidedembed
|
|
57
|
+
archive_type: zip
|
|
58
|
+
notes:
|
|
59
|
+
- Download the paper-23-GuidedEmbed archive
|
|
60
|
+
- Contains pretrained UAE-Large-V1 model under uae_model/
|
|
61
|
+
- Configure dataset path in data.py (line 20) before running
|
|
62
|
+
- Configure instruction in label_construction.py (line 289)
|
|
63
|
+
- Requires OpenAI API key in util.py (line 8) for label construction
|
|
64
|
+
credential_requirements:
|
|
65
|
+
mode: api_key_required
|
|
66
|
+
items:
|
|
67
|
+
- name: openai_api_key
|
|
68
|
+
description: OpenAI API key for LLM-based label construction and taxonomy generation
|
|
69
|
+
scope: label_construction.py
|
|
70
|
+
notes:
|
|
71
|
+
- API key only needed during label_construction stage
|
|
72
|
+
- LLM calls are limited to a small annotated subset (5000 samples recommended)
|
|
73
|
+
- Cost is minimal compared to full corpus re-encoding approaches
|
|
74
|
+
resources:
|
|
75
|
+
minimum:
|
|
76
|
+
cpu_cores: 8
|
|
77
|
+
ram_gb: 32
|
|
78
|
+
disk_gb: 50
|
|
79
|
+
gpu_count: 1
|
|
80
|
+
gpu_vram_gb: 12
|
|
81
|
+
recommended:
|
|
82
|
+
cpu_cores: 16
|
|
83
|
+
ram_gb: 64
|
|
84
|
+
disk_gb: 100
|
|
85
|
+
gpu_count: 1
|
|
86
|
+
gpu_vram_gb: 24
|
|
87
|
+
environment:
|
|
88
|
+
python: '3.9'
|
|
89
|
+
cuda: '11.8'
|
|
90
|
+
pytorch: 2.0.0
|
|
91
|
+
flash_attn: null
|
|
92
|
+
key_packages:
|
|
93
|
+
- name: torch
|
|
94
|
+
version: '>=2.0'
|
|
95
|
+
- name: numpy
|
|
96
|
+
version: '>=1.24'
|
|
97
|
+
- name: scikit-learn
|
|
98
|
+
version: '>=1.3'
|
|
99
|
+
- name: sentence-transformers
|
|
100
|
+
version: '>=2.2'
|
|
101
|
+
- name: openai
|
|
102
|
+
version: '>=1.0'
|
|
103
|
+
notes:
|
|
104
|
+
- Full dependency set in bundled requirements.txt
|
|
105
|
+
- UAE-Large-V1 pretrained encoder bundled in uae_model/ directory
|
|
106
|
+
- CUDA required for efficient embedding transformation and evaluation
|
|
107
|
+
risk_flags:
|
|
108
|
+
- api_cost
|
|
109
|
+
- disk_io_bottleneck
|
|
110
|
+
risk_notes:
|
|
111
|
+
- API key consumption limited to label construction phase only
|
|
112
|
+
- Embedding cache (embeddings.pkl) can be large; ensure adequate disk space
|
|
113
|
+
- Training uses contrastive loss with margin; adjust MARGIN (default 1.0) in train.py
|
|
114
|
+
for stability
|
|
115
|
+
recommended_when: 'Use this benchmark when you need a compact NLP training task around
|
|
116
|
+
instruction-aware embeddings with clustering, STS, or triplet evaluation metrics.
|
|
117
|
+
Ideal for exploring efficient alternatives to full corpus re-encoding methods like
|
|
118
|
+
Instructor or InBedder.
|
|
119
|
+
|
|
120
|
+
'
|
|
121
|
+
not_recommended_when: 'Do not use this if you need a very large multimodal benchmark,
|
|
122
|
+
full LLM finetuning workflows, or evaluation scenarios without any API access. This
|
|
123
|
+
benchmark requires at least one GPU and an OpenAI API key for the label construction
|
|
124
|
+
pipeline.
|
|
125
|
+
|
|
126
|
+
'
|
|
127
|
+
paper:
|
|
128
|
+
title: 'Don''t Reinvent the Wheel: Efficient Instruction-Following Text Embedding
|
|
129
|
+
based on Guided Space Transformation'
|
|
130
|
+
venue: ACL 2025 Oral
|
|
131
|
+
year: 2025
|
|
132
|
+
url: https://arxiv.org/abs/2505.24754
|
|
133
|
+
code_url: https://github.com/YingchaojieFeng/GSTransform
|
|
134
|
+
download:
|
|
135
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.023_guidedembed.zip
|
|
136
|
+
archive_type: zip
|
|
137
|
+
local_dir_name: paper-23-GuidedEmbed
|
|
138
|
+
provider: github_release
|
|
139
|
+
repo: ResearAI/DeepScientist
|
|
140
|
+
tag: aisb-v0.0.1
|
|
141
|
+
asset_name: aisb.t3.023_guidedembed.zip
|
|
142
|
+
sha256: 69f98b98d8db4889e5b7063ee007a61b2d823f1ac03114c784e6f02418cc7640
|
|
143
|
+
size_bytes: 1470047
|
|
144
|
+
display:
|
|
145
|
+
palette_seed: sage-indigo-embed
|
|
146
|
+
art_style: semantic-minimal
|
|
147
|
+
accent_priority: medium
|
|
148
|
+
image_path: ../image/023_aisb.t3.023_guidedembed.jpg
|
|
149
|
+
metric_evidence:
|
|
150
|
+
v_measure:
|
|
151
|
+
origin: run_eval_pipeline.py
|
|
152
|
+
source_ref: FINAL V-MEASURE output line
|
|
153
|
+
evaluation_protocol: clustering_with_oracle_labels
|
|
154
|
+
nmi:
|
|
155
|
+
origin: evaluate_cluster.py
|
|
156
|
+
source_ref: sklearn.metrics.normalized_mutual_info_score
|
|
157
|
+
evaluation_protocol: kmeans_clustering
|
|
158
|
+
sts_score:
|
|
159
|
+
origin: evaluate_sts.py
|
|
160
|
+
source_ref: cosine_similarity_comparison
|
|
161
|
+
evaluation_protocol: semantic_textual_similarity_benchmark
|
|
162
|
+
triplet_score:
|
|
163
|
+
origin: evaluate_triplet.py
|
|
164
|
+
source_ref: triplet_accuracy_evaluation
|
|
165
|
+
evaluation_protocol: instruction_aware_triplet_alignment
|
|
166
|
+
execution_notes:
|
|
167
|
+
pipeline_order:
|
|
168
|
+
- step: data.py
|
|
169
|
+
description: Load and preprocess dataset; configure DATASET_PATH
|
|
170
|
+
- step: label_construction.py
|
|
171
|
+
description: Generate instruction-aware taxonomy and labels using LLM
|
|
172
|
+
- step: train.py
|
|
173
|
+
description: Train GSTransform model on labeled embeddings
|
|
174
|
+
- step: evaluate_cluster.py
|
|
175
|
+
description: Evaluate on clustering task (primary V-measure, NMI)
|
|
176
|
+
- step: evaluate_sts.py
|
|
177
|
+
description: Evaluate on semantic textual similarity task
|
|
178
|
+
- step: evaluate_triplet.py
|
|
179
|
+
description: Evaluate on triplet alignment task
|
|
180
|
+
checkpoints:
|
|
181
|
+
- path: cache/model.pth
|
|
182
|
+
description: Trained GSTransform transformation model
|
|
183
|
+
- path: cache/embeddings.pkl
|
|
184
|
+
description: Pre-computed and cached base embeddings
|
|
185
|
+
- path: cache/classifications_xxx.json
|
|
186
|
+
description: LLM-generated instruction-aware labels
|
|
187
|
+
capability_tags:
|
|
188
|
+
- research_code_optimization
|
|
189
|
+
- text_embeddings
|
|
190
|
+
- representation_learning
|
|
191
|
+
- instruction_following
|
|
192
|
+
- nlp
|
|
193
|
+
- efficient_inference
|
|
194
|
+
- contrastive_learning
|
|
195
|
+
aisb_direction: T3
|
|
196
|
+
track_fit:
|
|
197
|
+
- paper_track
|
|
198
|
+
- benchmark_track
|
|
199
|
+
commercial:
|
|
200
|
+
annual_fee: null
|
|
201
|
+
metadata:
|
|
202
|
+
base_encoder: WhereIsAI/UAE-Large-V1
|
|
203
|
+
transformation_output_dim: 128
|
|
204
|
+
training_batch_size: 512
|
|
205
|
+
default_epochs: 500
|
|
206
|
+
default_learning_rate: 0.0001
|
|
207
|
+
default_margin: 1.0
|
|
208
|
+
patience: 50
|
|
209
|
+
n_train_samples: 5000
|
|
210
|
+
datasets_evaluated: 9
|
|
211
|
+
speedup_factor: 6-300x
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
id: aisb.t3.023_guidedembed
|
|
2
|
+
name: 'GSTransform:面向指令跟随的文本嵌入引导式空间变换'
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: 轻量级预训练文本嵌入变换,无需语料库重编码即可跟随用户指令,实现6-300倍加速。
|
|
5
|
+
task_description: '本打包基准测试涵盖GSTransform,一个新颖的指令跟随文本嵌入框架。与先前需要为每条指令重编码整个语料库的方法(O(m×n)次前向传播)不同,GSTransform使用轻量级模型实时变换预计算嵌入。两阶段流程包括:(1)基于指令的标签构建,其中LLM从小规模采样语料库生成指令感知标签;(2)标签引导的嵌入变换,其中变换模型使用对比损失和重构损失将通用嵌入映射到指令对齐的语义空间。基准测试在三个指令感知任务上评估——聚类、语义文本相似度(STS)和三元组对齐——涵盖九个真实世界数据集。评估需要在data.py中配置数据集,在label_construction.py中配置指令,在util.py中配置OpenAI API密钥。
|
|
6
|
+
|
|
7
|
+
'
|
|
8
|
+
task_mode: experiment_driven
|
|
9
|
+
requires_execution: true
|
|
10
|
+
requires_paper: true
|
|
11
|
+
integrity_level: cas_plus_canary
|
|
12
|
+
snapshot_status: runnable
|
|
13
|
+
support_level: advanced
|
|
14
|
+
time_band: 6-24h
|
|
15
|
+
cost_band: medium
|
|
16
|
+
difficulty: medium
|
|
17
|
+
data_access: public
|
|
18
|
+
primary_outputs:
|
|
19
|
+
- v_measure
|
|
20
|
+
- nmi
|
|
21
|
+
- sts_score
|
|
22
|
+
- triplet_score
|
|
23
|
+
- embedding_checkpoint
|
|
24
|
+
launch_profiles:
|
|
25
|
+
- id: quick_check
|
|
26
|
+
label: 快速检查
|
|
27
|
+
description: '在已准备好的嵌入设置上运行面向聚类的评估流程。跳过标签构建和训练阶段,需要来自先前运行预计算的嵌入和带标签的分类结果。
|
|
28
|
+
|
|
29
|
+
'
|
|
30
|
+
estimated_time: 10-30m
|
|
31
|
+
- id: full_pipeline
|
|
32
|
+
label: 完整流程
|
|
33
|
+
description: '使用LLM构建指令感知标签,训练GSTransform变换模型,并运行完整的打包评估流程,包括聚类、STS和三元组评估。
|
|
34
|
+
|
|
35
|
+
'
|
|
36
|
+
estimated_time: 4-12h
|
|
37
|
+
requires_api_key: true
|
|
38
|
+
dataset_download:
|
|
39
|
+
primary_method: direct_archive
|
|
40
|
+
sources:
|
|
41
|
+
- url: https://deepscientist.cc/AISB/023_guidedembed
|
|
42
|
+
archive_type: zip
|
|
43
|
+
notes:
|
|
44
|
+
- 下载paper-23-GuidedEmbed压缩包
|
|
45
|
+
- 包含u ae_model/目录下的预训练UAE-Large-V1模型
|
|
46
|
+
- 运行前在data.py(第20行)配置数据集路径
|
|
47
|
+
- 在label_construction.py(第289行)配置指令
|
|
48
|
+
- 在util.py(第8行)需要OpenAI API密钥用于标签构建
|
|
49
|
+
credential_requirements:
|
|
50
|
+
mode: api_key_required
|
|
51
|
+
items:
|
|
52
|
+
- name: openai_api_key
|
|
53
|
+
description: 用于LLM标签构建和分类生成的OpenAI API密钥
|
|
54
|
+
scope: label_construction.py
|
|
55
|
+
notes:
|
|
56
|
+
- API密钥仅在标签构建阶段需要
|
|
57
|
+
- LLM调用仅限于小规模标注子集(建议5000个样本)
|
|
58
|
+
- 相比完整语料库重编码方法,成本极低
|
|
59
|
+
resources:
|
|
60
|
+
minimum:
|
|
61
|
+
cpu_cores: 8
|
|
62
|
+
ram_gb: 32
|
|
63
|
+
disk_gb: 50
|
|
64
|
+
gpu_count: 1
|
|
65
|
+
gpu_vram_gb: 12
|
|
66
|
+
recommended:
|
|
67
|
+
cpu_cores: 16
|
|
68
|
+
ram_gb: 64
|
|
69
|
+
disk_gb: 100
|
|
70
|
+
gpu_count: 1
|
|
71
|
+
gpu_vram_gb: 24
|
|
72
|
+
environment:
|
|
73
|
+
python: '3.9'
|
|
74
|
+
cuda: '11.8'
|
|
75
|
+
pytorch: 2.0.0
|
|
76
|
+
flash_attn: null
|
|
77
|
+
key_packages:
|
|
78
|
+
- name: torch
|
|
79
|
+
version: '>=2.0'
|
|
80
|
+
- name: numpy
|
|
81
|
+
version: '>=1.24'
|
|
82
|
+
- name: scikit-learn
|
|
83
|
+
version: '>=1.3'
|
|
84
|
+
- name: sentence-transformers
|
|
85
|
+
version: '>=2.2'
|
|
86
|
+
- name: openai
|
|
87
|
+
version: '>=1.0'
|
|
88
|
+
notes:
|
|
89
|
+
- 完整依赖项在打包的requirements.txt中
|
|
90
|
+
- UAE-Large-V1预训练编码器打包在u ae_model/目录中
|
|
91
|
+
- 需要CUDA以支持高效的嵌入变换和评估
|
|
92
|
+
risk_flags:
|
|
93
|
+
- api_cost
|
|
94
|
+
- disk_io_bottleneck
|
|
95
|
+
risk_notes:
|
|
96
|
+
- API密钥消耗仅限于标签构建阶段
|
|
97
|
+
- 嵌入缓存(embeddings.pkl)可能较大,请确保有足够的磁盘空间
|
|
98
|
+
- 训练使用带边缘的对比损失;可在train.py中调整MARGIN(默认1.0)以提高稳定性
|
|
99
|
+
recommended_when: '当需要一个围绕指令感知嵌入的紧凑NLP训练任务时使用本基准测试,包含聚类、STS或三元组评估指标。非常适合探索Instructor或InBedder等完整语料库重编码方法的高效替代方案。
|
|
100
|
+
|
|
101
|
+
'
|
|
102
|
+
not_recommended_when: '如果需要超大规模多模态基准测试、完整LLM微调工作流或完全没有API访问权限的评估场景,请勿使用本基准测试。本基准测试至少需要一个GPU和OpenAI API密钥用于标签构建流程。
|
|
103
|
+
|
|
104
|
+
'
|
|
105
|
+
paper:
|
|
106
|
+
title: 'Don''t Reinvent the Wheel: Efficient Instruction-Following Text Embedding
|
|
107
|
+
based on Guided Space Transformation'
|
|
108
|
+
venue: ACL 2025 Oral
|
|
109
|
+
year: 2025
|
|
110
|
+
url: https://arxiv.org/abs/2505.24754
|
|
111
|
+
code_url: https://github.com/YingchaojieFeng/GSTransform
|
|
112
|
+
download:
|
|
113
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.023_guidedembed.zip
|
|
114
|
+
archive_type: zip
|
|
115
|
+
local_dir_name: paper-23-GuidedEmbed
|
|
116
|
+
provider: github_release
|
|
117
|
+
repo: ResearAI/DeepScientist
|
|
118
|
+
tag: aisb-v0.0.1
|
|
119
|
+
asset_name: aisb.t3.023_guidedembed.zip
|
|
120
|
+
sha256: 69f98b98d8db4889e5b7063ee007a61b2d823f1ac03114c784e6f02418cc7640
|
|
121
|
+
size_bytes: 1470047
|
|
122
|
+
display:
|
|
123
|
+
palette_seed: sage-indigo-embed
|
|
124
|
+
art_style: semantic-minimal
|
|
125
|
+
accent_priority: medium
|
|
126
|
+
image_path: ../image/023_aisb.t3.023_guidedembed.jpg
|
|
127
|
+
metric_evidence:
|
|
128
|
+
v_measure:
|
|
129
|
+
origin: run_eval_pipeline.py
|
|
130
|
+
source_ref: FINAL V-MEASURE output line
|
|
131
|
+
evaluation_protocol: clustering_with_oracle_labels
|
|
132
|
+
nmi:
|
|
133
|
+
origin: evaluate_cluster.py
|
|
134
|
+
source_ref: sklearn.metrics.normalized_mutual_info_score
|
|
135
|
+
evaluation_protocol: kmeans_clustering
|
|
136
|
+
sts_score:
|
|
137
|
+
origin: evaluate_sts.py
|
|
138
|
+
source_ref: cosine_similarity_comparison
|
|
139
|
+
evaluation_protocol: semantic_textual_similarity_benchmark
|
|
140
|
+
triplet_score:
|
|
141
|
+
origin: evaluate_triplet.py
|
|
142
|
+
source_ref: triplet_accuracy_evaluation
|
|
143
|
+
evaluation_protocol: instruction_aware_triplet_alignment
|
|
144
|
+
execution_notes:
|
|
145
|
+
pipeline_order:
|
|
146
|
+
- step: data.py
|
|
147
|
+
description: 加载和预处理数据集;配置DATASET_PATH
|
|
148
|
+
- step: label_construction.py
|
|
149
|
+
description: 使用LLM生成指令感知分类和标签
|
|
150
|
+
- step: train.py
|
|
151
|
+
description: 在带标签的嵌入上训练GSTransform模型
|
|
152
|
+
- step: evaluate_cluster.py
|
|
153
|
+
description: 在聚类任务上评估(主要V-measure、NMI)
|
|
154
|
+
- step: evaluate_sts.py
|
|
155
|
+
description: 在语义文本相似度任务上评估
|
|
156
|
+
- step: evaluate_triplet.py
|
|
157
|
+
description: 在三元组对齐任务上评估
|
|
158
|
+
checkpoints:
|
|
159
|
+
- path: cache/model.pth
|
|
160
|
+
description: 训练好的GSTransform变换模型
|
|
161
|
+
- path: cache/embeddings.pkl
|
|
162
|
+
description: 预计算并缓存的基础嵌入
|
|
163
|
+
- path: cache/classifications_xxx.json
|
|
164
|
+
description: LLM生成的指令感知标签
|
|
165
|
+
capability_tags:
|
|
166
|
+
- research_code_optimization
|
|
167
|
+
- text_embeddings
|
|
168
|
+
- representation_learning
|
|
169
|
+
- instruction_following
|
|
170
|
+
- nlp
|
|
171
|
+
- efficient_inference
|
|
172
|
+
- contrastive_learning
|
|
173
|
+
aisb_direction: T3
|
|
174
|
+
track_fit:
|
|
175
|
+
- paper_track
|
|
176
|
+
- benchmark_track
|
|
177
|
+
commercial:
|
|
178
|
+
annual_fee: null
|
|
179
|
+
metadata:
|
|
180
|
+
base_encoder: WhereIsAI/UAE-Large-V1
|
|
181
|
+
transformation_output_dim: 128
|
|
182
|
+
training_batch_size: 512
|
|
183
|
+
default_epochs: 500
|
|
184
|
+
default_learning_rate: 0.0001
|
|
185
|
+
default_margin: 1.0
|
|
186
|
+
patience: 50
|
|
187
|
+
n_train_samples: 5000
|
|
188
|
+
datasets_evaluated: 9
|
|
189
|
+
speedup_factor: 6-300x
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
id: aisb.t3.024_outputcentric
|
|
2
|
+
name: Enhancing Automated Interpretability with Output-Centric Feature Descriptions
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: Two-faceted interpretability evaluation pipeline describing LLM features
|
|
5
|
+
through input-activation and output-behavior lenses.
|
|
6
|
+
task_description: 'This packaged benchmark covers an interpretability evaluation pipeline
|
|
7
|
+
that describes features through their output behavior. The task characterizes latent
|
|
8
|
+
features in large language models by what they do at the model output level, proposing
|
|
9
|
+
output-centric methods (VocabProj, TokenChange) alongside the standard MaxAct approach.
|
|
10
|
+
The benchmark evaluates feature descriptions along two complementary dimensions:
|
|
11
|
+
input-based evaluation (how accurately descriptions identify activating inputs)
|
|
12
|
+
and output-based evaluation (how effectively descriptions capture causal impact
|
|
13
|
+
on model generation). Experiments span neuron-aligned and sparse autoencoder (SAE)
|
|
14
|
+
features from residual and MLP layers across multiple LLMs including GPT-small,
|
|
15
|
+
LLaMA-3.1-8B, and Gemma-2-2B.
|
|
16
|
+
|
|
17
|
+
'
|
|
18
|
+
task_mode: evaluation_driven
|
|
19
|
+
requires_execution: true
|
|
20
|
+
requires_paper: true
|
|
21
|
+
integrity_level: cas_plus_canary
|
|
22
|
+
snapshot_status: runnable
|
|
23
|
+
support_level: turnkey
|
|
24
|
+
time_band: 1-2h
|
|
25
|
+
cost_band: low
|
|
26
|
+
difficulty: medium
|
|
27
|
+
data_access: public
|
|
28
|
+
primary_outputs:
|
|
29
|
+
- output_based_evaluation_score
|
|
30
|
+
- input_based_evaluation_score
|
|
31
|
+
- feature_descriptions
|
|
32
|
+
launch_profiles:
|
|
33
|
+
- id: notebook_eval
|
|
34
|
+
label: Notebook Eval
|
|
35
|
+
description: Run the packaged feature_descriptions_pipeline.ipynb notebook for output-centric
|
|
36
|
+
feature description evaluation across all model variants.
|
|
37
|
+
dataset_download:
|
|
38
|
+
primary_method: direct_archive
|
|
39
|
+
sources:
|
|
40
|
+
- url: https://deepscientist.cc/AISB/024_outputcentric
|
|
41
|
+
type: primary_archive
|
|
42
|
+
notes:
|
|
43
|
+
- Archive contains pre-computed feature descriptions in descriptions/ directory
|
|
44
|
+
- Includes CSV files for gpt-small, llama-3.1-8b, llama-3.1-8b-it, and gemma-2-2b
|
|
45
|
+
- CPU-only execution feasible for minimum route
|
|
46
|
+
credential_requirements:
|
|
47
|
+
mode: optional_api_keys
|
|
48
|
+
items:
|
|
49
|
+
- name: HF_TOKEN
|
|
50
|
+
description: Hugging Face token for model access
|
|
51
|
+
required: false
|
|
52
|
+
- name: GAI_KEY
|
|
53
|
+
description: Gemini API key for description generation
|
|
54
|
+
required: false
|
|
55
|
+
notes:
|
|
56
|
+
- OPENAI key expected in environment variable if used
|
|
57
|
+
- Evaluation can proceed without API keys using pre-computed descriptions
|
|
58
|
+
resources:
|
|
59
|
+
minimum:
|
|
60
|
+
cpu_cores: 8
|
|
61
|
+
ram_gb: 16
|
|
62
|
+
disk_gb: 20
|
|
63
|
+
gpu_count: 0
|
|
64
|
+
gpu_vram_gb: 0
|
|
65
|
+
notes: CPU-only execution supported for minimum route
|
|
66
|
+
recommended:
|
|
67
|
+
cpu_cores: 16
|
|
68
|
+
ram_gb: 32
|
|
69
|
+
disk_gb: 80
|
|
70
|
+
gpu_count: 1
|
|
71
|
+
gpu_vram_gb: 16
|
|
72
|
+
notes: GPU acceleration recommended for full pipeline evaluation
|
|
73
|
+
environment:
|
|
74
|
+
python: '3.10'
|
|
75
|
+
cuda: '11.8'
|
|
76
|
+
pytorch: 2.1.0
|
|
77
|
+
flash_attn: null
|
|
78
|
+
key_packages:
|
|
79
|
+
- transformers
|
|
80
|
+
- torch
|
|
81
|
+
- pandas
|
|
82
|
+
- numpy
|
|
83
|
+
- jupyter
|
|
84
|
+
notes:
|
|
85
|
+
- See bundled README/requirements.txt for full dependency set
|
|
86
|
+
- CPU-only execution plausible for minimum route
|
|
87
|
+
- Additional packages may be required for SAE feature evaluation
|
|
88
|
+
risk_flags:
|
|
89
|
+
- metric_provisional
|
|
90
|
+
- api_key_optional
|
|
91
|
+
risk_notes:
|
|
92
|
+
- Static code audit confirms executable anchors for all staged metrics
|
|
93
|
+
- No benchmark execution was performed in packaging pass
|
|
94
|
+
- Runtime execution required before trusting metric values
|
|
95
|
+
- API keys optional; evaluation can use pre-computed feature descriptions
|
|
96
|
+
recommended_when: 'Use this benchmark when conducting interpretability research on
|
|
97
|
+
LLM features, developing automated description pipelines, comparing input-centric
|
|
98
|
+
versus output-centric description methods, evaluating SAE features, or assessing
|
|
99
|
+
feature description quality for steering applications. Ideal for benchmarking description
|
|
100
|
+
faithfulness across both input-activation and output-behavior dimensions.
|
|
101
|
+
|
|
102
|
+
'
|
|
103
|
+
not_recommended_when: 'Do not use this benchmark if you require a large-scale supervised
|
|
104
|
+
training benchmark, dense multimodal data processing, or retraining-focused workflows.
|
|
105
|
+
Not suitable for tasks requiring dense annotation pipelines or non-LLM model interpretability.
|
|
106
|
+
|
|
107
|
+
'
|
|
108
|
+
paper:
|
|
109
|
+
title: Enhancing Automated Interpretability with Output-Centric Feature Descriptions
|
|
110
|
+
authors:
|
|
111
|
+
- Yoav Gur-Arieh
|
|
112
|
+
- Roy Mayan
|
|
113
|
+
- Chen Agassy
|
|
114
|
+
- Atticus Geiger
|
|
115
|
+
- Mor Geva
|
|
116
|
+
venue: arXiv preprint
|
|
117
|
+
year: 2025
|
|
118
|
+
eprint: '2501.08319'
|
|
119
|
+
primary_class: cs.CL
|
|
120
|
+
url: https://arxiv.org/abs/2501.08319
|
|
121
|
+
code_url: https://github.com/yoavgur/Feature-Descriptions
|
|
122
|
+
download:
|
|
123
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.024_outputcentric.zip
|
|
124
|
+
archive_type: zip
|
|
125
|
+
local_dir_name: paper-24-OutputCentric
|
|
126
|
+
provider: github_release
|
|
127
|
+
repo: ResearAI/DeepScientist
|
|
128
|
+
tag: aisb-v0.0.1
|
|
129
|
+
asset_name: aisb.t3.024_outputcentric.zip
|
|
130
|
+
sha256: 98fbc91e990b4a6c440895a60947c3f09069c8e5d5cce6283f925f6f27876614
|
|
131
|
+
size_bytes: 3484257
|
|
132
|
+
display:
|
|
133
|
+
palette_seed: ivory-rose-neuron
|
|
134
|
+
art_style: interpretability-atlas
|
|
135
|
+
accent_priority: medium
|
|
136
|
+
image_path: ../image/024_aisb.t3.024_outputcentric.jpg
|
|
137
|
+
capability_tags:
|
|
138
|
+
- research_code_optimization
|
|
139
|
+
- mechanistic_interpretability
|
|
140
|
+
- automated_evaluation
|
|
141
|
+
- large_language_models
|
|
142
|
+
- feature_analysis
|
|
143
|
+
- sparse_autoencoders
|
|
144
|
+
- model_steering
|
|
145
|
+
aisb_direction: T3
|
|
146
|
+
track_fit:
|
|
147
|
+
- paper_track
|
|
148
|
+
- benchmark_track
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
id: aisb.t3.024_outputcentric
|
|
2
|
+
name: 通过以输出为中心的特征描述增强自动化可解释性
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: 双角度可解释性评估流程,从输入激活和输出行为两个视角描述LLM特征。
|
|
5
|
+
task_description: '本基准测试包含一个可解释性评估流程,通过输出行为来描述特征。该任务通过模型输出层面描述大型语言模型中的潜在特征,提出了以输出为中心的方法(VocabProj、TokenChange)以及标准的MaxAct方法。基准测试从两个互补维度评估特征描述:基于输入的评估(描述如何准确识别激活输入)和基于输出的评估(描述如何有效捕捉对模型生成的因果影响)。实验涵盖来自残差层和MLP层的神经元对齐特征和稀疏自编码器(SAE)特征,涉及GPT-small、LLaMA-3.1-8B和Gemma-2-2B等多个LLM。
|
|
6
|
+
|
|
7
|
+
'
|
|
8
|
+
task_mode: evaluation_driven
|
|
9
|
+
requires_execution: true
|
|
10
|
+
requires_paper: true
|
|
11
|
+
integrity_level: cas_plus_canary
|
|
12
|
+
snapshot_status: runnable
|
|
13
|
+
support_level: turnkey
|
|
14
|
+
time_band: 1-2h
|
|
15
|
+
cost_band: low
|
|
16
|
+
difficulty: medium
|
|
17
|
+
data_access: public
|
|
18
|
+
primary_outputs:
|
|
19
|
+
- output_based_evaluation_score
|
|
20
|
+
- input_based_evaluation_score
|
|
21
|
+
- feature_descriptions
|
|
22
|
+
launch_profiles:
|
|
23
|
+
- id: notebook_eval
|
|
24
|
+
label: 笔记本评估
|
|
25
|
+
description: 运行打包好的feature_descriptions_pipeline.ipynb笔记本,对所有模型变体进行以输出为中心的特征描述评估。
|
|
26
|
+
dataset_download:
|
|
27
|
+
primary_method: direct_archive
|
|
28
|
+
sources:
|
|
29
|
+
- url: https://deepscientist.cc/AISB/024_outputcentric
|
|
30
|
+
type: primary_archive
|
|
31
|
+
notes:
|
|
32
|
+
- 压缩包包含descriptions/目录下的预计算特征描述
|
|
33
|
+
- 包含gpt-small、llama-3.1-8b、llama-3.1-8b-it和gemma-2-2b的CSV文件
|
|
34
|
+
- 最低配置可仅使用CPU执行
|
|
35
|
+
credential_requirements:
|
|
36
|
+
mode: optional_api_keys
|
|
37
|
+
items:
|
|
38
|
+
- name: HF_TOKEN
|
|
39
|
+
description: Hugging Face令牌,用于模型访问
|
|
40
|
+
required: false
|
|
41
|
+
- name: GAI_KEY
|
|
42
|
+
description: Gemini API密钥,用于生成描述
|
|
43
|
+
required: false
|
|
44
|
+
notes:
|
|
45
|
+
- 如需使用则应在环境变量中配置OPENAI密钥
|
|
46
|
+
- 无需API密钥即可使用预计算描述进行评估
|
|
47
|
+
resources:
|
|
48
|
+
minimum:
|
|
49
|
+
cpu_cores: 8
|
|
50
|
+
ram_gb: 16
|
|
51
|
+
disk_gb: 20
|
|
52
|
+
gpu_count: 0
|
|
53
|
+
gpu_vram_gb: 0
|
|
54
|
+
notes: 最低配置支持纯CPU执行
|
|
55
|
+
recommended:
|
|
56
|
+
cpu_cores: 16
|
|
57
|
+
ram_gb: 32
|
|
58
|
+
disk_gb: 80
|
|
59
|
+
gpu_count: 1
|
|
60
|
+
gpu_vram_gb: 16
|
|
61
|
+
notes: 建议使用GPU加速以完成完整流程评估
|
|
62
|
+
environment:
|
|
63
|
+
python: '3.10'
|
|
64
|
+
cuda: '11.8'
|
|
65
|
+
pytorch: 2.1.0
|
|
66
|
+
flash_attn: null
|
|
67
|
+
key_packages:
|
|
68
|
+
- transformers
|
|
69
|
+
- torch
|
|
70
|
+
- pandas
|
|
71
|
+
- numpy
|
|
72
|
+
- jupyter
|
|
73
|
+
notes:
|
|
74
|
+
- 详见捆绑的README/requirements.txt获取完整依赖列表
|
|
75
|
+
- 最低配置可仅使用CPU执行
|
|
76
|
+
- SAE特征评估可能需要额外包
|
|
77
|
+
risk_flags:
|
|
78
|
+
- metric_provisional
|
|
79
|
+
- api_key_optional
|
|
80
|
+
risk_notes:
|
|
81
|
+
- 静态代码审查确认所有阶段性指标均有可执行锚点
|
|
82
|
+
- 打包过程中未执行基准测试
|
|
83
|
+
- 使用前需进行运行时执行验证指标值
|
|
84
|
+
- API密钥可选;可使用预计算特征描述进行评估
|
|
85
|
+
recommended_when: '当您进行LLM特征可解释性研究、开发自动化描述流程、比较输入中心与输出中心描述方法、评估SAE特征,或为 steering 应用评估特征描述质量时,可以使用此基准测试。该基准测试非常适合在输入激活和输出行为两个维度上基准测试描述的忠实度。
|
|
86
|
+
|
|
87
|
+
'
|
|
88
|
+
not_recommended_when: '如果您需要大规模监督训练基准测试、密集多模态数据处理或以重训练为重点的工作流程,请勿使用此基准测试。不适用于需要密集标注流程或非LLM模型可解释性的任务。
|
|
89
|
+
|
|
90
|
+
'
|
|
91
|
+
paper:
|
|
92
|
+
title: Enhancing Automated Interpretability with Output-Centric Feature Descriptions
|
|
93
|
+
authors:
|
|
94
|
+
- Yoav Gur-Arieh
|
|
95
|
+
- Roy Mayan
|
|
96
|
+
- Chen Agassy
|
|
97
|
+
- Atticus Geiger
|
|
98
|
+
- Mor Geva
|
|
99
|
+
venue: arXiv preprint
|
|
100
|
+
year: 2025
|
|
101
|
+
eprint: '2501.08319'
|
|
102
|
+
primary_class: cs.CL
|
|
103
|
+
url: https://arxiv.org/abs/2501.08319
|
|
104
|
+
code_url: https://github.com/yoavgur/Feature-Descriptions
|
|
105
|
+
download:
|
|
106
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.024_outputcentric.zip
|
|
107
|
+
archive_type: zip
|
|
108
|
+
local_dir_name: paper-24-OutputCentric
|
|
109
|
+
provider: github_release
|
|
110
|
+
repo: ResearAI/DeepScientist
|
|
111
|
+
tag: aisb-v0.0.1
|
|
112
|
+
asset_name: aisb.t3.024_outputcentric.zip
|
|
113
|
+
sha256: 98fbc91e990b4a6c440895a60947c3f09069c8e5d5cce6283f925f6f27876614
|
|
114
|
+
size_bytes: 3484257
|
|
115
|
+
display:
|
|
116
|
+
palette_seed: ivory-rose-neuron
|
|
117
|
+
art_style: interpretability-atlas
|
|
118
|
+
accent_priority: medium
|
|
119
|
+
image_path: ../image/024_aisb.t3.024_outputcentric.jpg
|
|
120
|
+
capability_tags:
|
|
121
|
+
- research_code_optimization
|
|
122
|
+
- mechanistic_interpretability
|
|
123
|
+
- automated_evaluation
|
|
124
|
+
- large_language_models
|
|
125
|
+
- feature_analysis
|
|
126
|
+
- sparse_autoencoders
|
|
127
|
+
- model_steering
|
|
128
|
+
aisb_direction: T3
|
|
129
|
+
track_fit:
|
|
130
|
+
- paper_track
|
|
131
|
+
- benchmark_track
|