@researai/deepscientist 1.5.16 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +309 -130
- package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
- package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
- package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
- package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
- package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
- package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
- package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
- package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
- package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
- package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
- package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
- package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
- package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
- package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
- package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
- package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
- package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
- package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
- package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
- package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
- package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
- package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
- package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
- package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
- package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
- package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
- package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
- package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
- package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
- package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
- package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
- package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
- package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
- package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
- package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
- package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
- package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
- package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
- package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
- package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
- package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
- package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
- package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
- package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
- package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
- package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
- package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
- package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
- package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
- package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
- package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
- package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
- package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
- package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
- package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
- package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
- package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
- package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
- package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
- package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
- package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
- package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
- package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
- package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
- package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
- package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
- package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
- package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
- package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
- package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
- package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
- package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
- package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
- package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
- package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
- package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
- package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
- package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
- package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
- package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
- package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
- package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
- package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
- package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
- package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
- package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
- package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
- package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
- package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
- package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
- package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
- package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
- package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
- package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
- package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
- package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
- package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
- package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
- package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
- package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
- package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
- package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
- package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
- package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
- package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
- package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
- package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
- package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
- package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
- package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
- package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
- package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
- package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
- package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
- package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
- package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
- package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
- package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
- package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
- package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
- package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
- package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
- package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
- package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
- package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
- package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
- package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
- package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
- package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
- package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
- package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
- package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
- package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
- package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
- package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
- package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
- package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
- package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
- package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
- package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
- package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
- package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
- package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
- package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
- package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
- package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
- package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
- package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
- package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
- package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
- package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
- package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
- package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
- package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
- package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
- package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
- package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
- package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
- package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
- package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
- package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
- package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
- package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
- package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
- package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
- package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
- package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
- package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
- package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
- package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
- package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
- package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
- package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
- package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
- package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
- package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
- package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
- package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
- package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
- package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
- package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
- package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
- package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
- package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
- package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
- package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
- package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
- package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
- package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
- package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
- package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
- package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
- package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
- package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
- package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
- package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
- package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
- package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
- package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
- package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
- package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
- package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
- package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
- package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
- package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
- package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
- package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
- package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
- package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
- package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
- package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
- package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
- package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
- package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
- package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
- package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
- package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
- package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
- package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
- package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
- package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
- package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
- package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
- package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
- package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
- package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
- package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
- package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
- package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
- package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
- package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
- package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
- package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
- package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
- package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
- package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
- package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
- package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
- package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
- package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
- package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
- package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
- package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
- package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
- package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
- package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
- package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
- package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
- package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
- package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
- package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
- package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
- package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
- package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
- package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
- package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
- package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
- package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
- package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
- package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
- package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
- package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
- package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
- package/AISB/image/aisb.b10.climate_earth.svg +16 -0
- package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
- package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
- package/AISB/image/aisb.b2.agent_systems.svg +16 -0
- package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
- package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
- package/AISB/image/aisb.b5.math_proof.svg +16 -0
- package/AISB/image/aisb.b6.research_process.svg +16 -0
- package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
- package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
- package/AISB/image/aisb.b9.material_science.svg +16 -0
- package/README.md +196 -32
- package/bin/ds.js +924 -66
- package/docs/en/00_QUICK_START.md +195 -18
- package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
- package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
- package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
- package/docs/en/05_TUI_GUIDE.md +171 -2
- package/docs/en/07_MEMORY_AND_MCP.md +38 -2
- package/docs/en/09_DOCTOR.md +78 -7
- package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
- package/docs/en/11_LICENSE_AND_RISK.md +4 -0
- package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/en/15_CODEX_PROVIDER_SETUP.md +624 -180
- package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
- package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +386 -0
- package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
- package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
- package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
- package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
- package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
- package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
- package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
- package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
- package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
- package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
- package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
- package/docs/en/91_DEVELOPMENT.md +266 -0
- package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
- package/docs/en/README.md +48 -7
- package/docs/images/admin/admin-connectors-health-en.png +0 -0
- package/docs/images/admin/admin-controllers-en.png +0 -0
- package/docs/images/admin/admin-diagnostics-en.png +0 -0
- package/docs/images/admin/admin-errors-en.png +0 -0
- package/docs/images/admin/admin-issues-en.png +0 -0
- package/docs/images/admin/admin-logs-en.png +0 -0
- package/docs/images/admin/admin-quest-detail-en.png +0 -0
- package/docs/images/admin/admin-quests-en.png +0 -0
- package/docs/images/admin/admin-repairs-en.png +0 -0
- package/docs/images/admin/admin-runtime-en.png +0 -0
- package/docs/images/admin/admin-search-en.png +0 -0
- package/docs/images/admin/admin-stats-en.png +0 -0
- package/docs/images/admin/admin-summary-en.png +0 -0
- package/docs/images/connectors/connector-discord-en.png +0 -0
- package/docs/images/connectors/connector-feishu-en.png +0 -0
- package/docs/images/connectors/connector-lingzhu-en.png +0 -0
- package/docs/images/connectors/connector-qq-en.png +0 -0
- package/docs/images/connectors/connector-slack-en.png +0 -0
- package/docs/images/connectors/connector-telegram-en.png +0 -0
- package/docs/images/connectors/connector-weixin-en.png +0 -0
- package/docs/images/connectors/connector-whatsapp-en.png +0 -0
- package/docs/images/settings/settings-baselines-en.png +0 -0
- package/docs/images/settings/settings-config-en.png +0 -0
- package/docs/images/settings/settings-connectors-overview-en.png +0 -0
- package/docs/images/settings/settings-deepxiv-en.png +0 -0
- package/docs/images/settings/settings-mcp-servers-en.png +0 -0
- package/docs/images/settings/settings-plugins-en.png +0 -0
- package/docs/images/settings/settings-runners-en.png +0 -0
- package/docs/zh/00_QUICK_START.md +142 -18
- package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
- package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
- package/docs/zh/05_TUI_GUIDE.md +171 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
- package/docs/zh/09_DOCTOR.md +54 -8
- package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
- package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
- package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
- package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
- package/docs/zh/15_CODEX_PROVIDER_SETUP.md +552 -181
- package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +384 -0
- package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
- package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
- package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
- package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
- package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
- package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
- package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
- package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
- package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
- package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
- package/docs/zh/README.md +33 -7
- package/install.sh +168 -20
- package/package.json +5 -1
- package/pyproject.toml +2 -1
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +13 -0
- package/src/deepscientist/admin/__init__.py +3 -0
- package/src/deepscientist/admin/charts.py +681 -0
- package/src/deepscientist/admin/logs.py +119 -0
- package/src/deepscientist/admin/repairs.py +217 -0
- package/src/deepscientist/admin/service.py +1310 -0
- package/src/deepscientist/admin/system_info.py +700 -0
- package/src/deepscientist/admin/tasks.py +465 -0
- package/src/deepscientist/admin/tool_metrics.py +600 -0
- package/src/deepscientist/artifact/guidance.py +8 -4
- package/src/deepscientist/artifact/schemas.py +115 -0
- package/src/deepscientist/artifact/service.py +4268 -260
- package/src/deepscientist/bash_exec/monitor.py +30 -3
- package/src/deepscientist/bash_exec/service.py +134 -1
- package/src/deepscientist/benchstore/__init__.py +4 -0
- package/src/deepscientist/benchstore/prompt_builder.py +224 -0
- package/src/deepscientist/benchstore/service.py +1716 -0
- package/src/deepscientist/bridges/connectors.py +8 -2
- package/src/deepscientist/channels/weixin_ilink.py +8 -1
- package/src/deepscientist/cli.py +92 -17
- package/src/deepscientist/codex_cli_compat.py +187 -74
- package/src/deepscientist/config/models.py +82 -11
- package/src/deepscientist/config/service.py +1077 -93
- package/src/deepscientist/connector/weixin_support.py +48 -17
- package/src/deepscientist/daemon/api/handlers.py +827 -235
- package/src/deepscientist/daemon/api/router.py +81 -1
- package/src/deepscientist/daemon/app.py +1512 -85
- package/src/deepscientist/diagnostics/__init__.py +6 -0
- package/src/deepscientist/diagnostics/runner_failures.py +277 -0
- package/src/deepscientist/doctor.py +407 -56
- package/src/deepscientist/evidence_packets.py +590 -0
- package/src/deepscientist/home.py +52 -4
- package/src/deepscientist/kimi_cli_compat.py +50 -0
- package/src/deepscientist/latex_runtime.py +2 -2
- package/src/deepscientist/mcp/context.py +2 -0
- package/src/deepscientist/mcp/schemas.py +114 -0
- package/src/deepscientist/mcp/server.py +1566 -126
- package/src/deepscientist/memory/service.py +203 -16
- package/src/deepscientist/process_control.py +8 -1
- package/src/deepscientist/prompts/builder.py +850 -88
- package/src/deepscientist/quest/__init__.py +2 -2
- package/src/deepscientist/quest/layout.py +12 -1
- package/src/deepscientist/quest/node_traces.py +10 -0
- package/src/deepscientist/quest/service.py +1852 -161
- package/src/deepscientist/quest/stage_views.py +1 -1
- package/src/deepscientist/runners/__init__.py +18 -0
- package/src/deepscientist/runners/base.py +89 -1
- package/src/deepscientist/runners/builtins.py +13 -1
- package/src/deepscientist/runners/claude.py +391 -0
- package/src/deepscientist/runners/codex.py +480 -35
- package/src/deepscientist/runners/codex_telemetry.py +127 -0
- package/src/deepscientist/runners/kimi.py +334 -0
- package/src/deepscientist/runners/metadata.py +68 -0
- package/src/deepscientist/runners/opencode.py +414 -0
- package/src/deepscientist/runners/runtime_overrides.py +100 -0
- package/src/deepscientist/runners/simple_cli.py +538 -0
- package/src/deepscientist/runtime_storage.py +303 -0
- package/src/deepscientist/shared.py +80 -16
- package/src/deepscientist/skills/installer.py +37 -0
- package/src/deepscientist/skills/registry.py +2 -0
- package/src/deepscientist/tinytex.py +2 -2
- package/src/deepscientist/tui.py +10 -3
- package/src/prompts/benchstore/system.md +77 -0
- package/src/prompts/connectors/qq.md +33 -2
- package/src/prompts/connectors/weixin.md +208 -23
- package/src/prompts/contracts/admin_ops.md +74 -0
- package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
- package/src/prompts/contracts/shared_interaction.md +5 -10
- package/src/prompts/start_setup/system.md +422 -0
- package/src/prompts/system.md +411 -304
- package/src/prompts/system_copilot.md +89 -0
- package/src/skills/analysis-campaign/SKILL.md +239 -578
- package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
- package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
- package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
- package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
- package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
- package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
- package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
- package/src/skills/baseline/SKILL.md +183 -461
- package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
- package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
- package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
- package/src/skills/baseline/references/baseline-plan-template.md +37 -76
- package/src/skills/baseline/references/boundary-cases.md +86 -0
- package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
- package/src/skills/baseline/references/comparability-contract.md +7 -12
- package/src/skills/baseline/references/operational-guidance.md +56 -0
- package/src/skills/baseline/references/route-selection.md +5 -25
- package/src/skills/decision/SKILL.md +113 -306
- package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
- package/src/skills/decision/references/operational-guidance.md +94 -0
- package/src/skills/decision/references/research-route-criteria.md +7 -8
- package/src/skills/decision/references/strategic-decision-template.md +13 -26
- package/src/skills/experiment/SKILL.md +132 -670
- package/src/skills/experiment/references/execution-playbook.md +374 -0
- package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
- package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
- package/src/skills/experiment/references/operational-guidance.md +108 -0
- package/src/skills/finalize/SKILL.md +62 -0
- package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
- package/src/skills/finalize/references/resume-packet-template.md +7 -0
- package/src/skills/idea/SKILL.md +228 -15
- package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
- package/src/skills/idea/references/current-board-packet-template.md +61 -0
- package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
- package/src/skills/idea/references/idea-generation-playbook.md +21 -0
- package/src/skills/idea/references/idea-thinking-flow.md +6 -0
- package/src/skills/idea/references/literature-survey-template.md +3 -0
- package/src/skills/idea/references/objective-contract-template.md +54 -0
- package/src/skills/idea/references/outline-seeding-example.md +56 -0
- package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
- package/src/skills/idea/references/related-work-playbook.md +75 -2
- package/src/skills/idea/references/research-history-playbook.md +114 -0
- package/src/skills/idea/references/selection-gate.md +58 -6
- package/src/skills/intake-audit/SKILL.md +43 -2
- package/src/skills/intake-audit/references/state-audit-template.md +10 -0
- package/src/skills/nature-data/SKILL.md +128 -0
- package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-data/agents/openai.yaml +4 -0
- package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
- package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
- package/src/skills/nature-data/references/policy-principles.md +103 -0
- package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
- package/src/skills/nature-data/references/source-basis.md +54 -0
- package/src/skills/nature-data/references/statement-patterns.md +153 -0
- package/src/skills/nature-figure/SKILL.md +197 -0
- package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-figure/agents/openai.yaml +4 -0
- package/src/skills/nature-figure/evals/evals.json +37 -0
- package/src/skills/nature-figure/references/api.md +428 -0
- package/src/skills/nature-figure/references/backend-selection.md +100 -0
- package/src/skills/nature-figure/references/chart-types.md +281 -0
- package/src/skills/nature-figure/references/common-patterns.md +349 -0
- package/src/skills/nature-figure/references/design-theory.md +436 -0
- package/src/skills/nature-figure/references/figure-contract.md +93 -0
- package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
- package/src/skills/nature-figure/references/qa-contract.md +119 -0
- package/src/skills/nature-figure/references/r-template-index.md +66 -0
- package/src/skills/nature-figure/references/r-workflow.md +161 -0
- package/src/skills/nature-figure/references/tutorials.md +250 -0
- package/src/skills/nature-paper2ppt/SKILL.md +507 -0
- package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/SKILL.md +385 -0
- package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/nature-polishing/agents/openai.yaml +4 -0
- package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
- package/src/skills/nature-polishing/references/section-moves.md +240 -0
- package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
- package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
- package/src/skills/optimize/SKILL.md +177 -1568
- package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
- package/src/skills/optimize/references/candidate-board-template.md +13 -0
- package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
- package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
- package/src/skills/optimize/references/debug-response-template.md +29 -0
- package/src/skills/optimize/references/frontier-review-template.md +32 -0
- package/src/skills/optimize/references/fusion-playbook.md +36 -0
- package/src/skills/optimize/references/method-brief-template.md +73 -0
- package/src/skills/optimize/references/operational-guidance.md +621 -0
- package/src/skills/optimize/references/optimization-memory-template.md +30 -0
- package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
- package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
- package/src/skills/optimize/references/prompt-patterns.md +49 -0
- package/src/skills/paper-outline/SKILL.md +227 -0
- package/src/skills/paper-outline/references/outline-patterns.md +87 -0
- package/src/skills/paper-plot/SKILL.md +79 -0
- package/src/skills/paper-plot/agents/openai.yaml +4 -0
- package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
- package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
- package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
- package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
- package/src/skills/paper-plot/references/line_training_curve.md +44 -0
- package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
- package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
- package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
- package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
- package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
- package/src/skills/paper-plot/scripts/line_aime.py +94 -0
- package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
- package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
- package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
- package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
- package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
- package/src/skills/rebuttal/SKILL.md +9 -0
- package/src/skills/references/tool-usage-by-stage.md +438 -0
- package/src/skills/review/SKILL.md +105 -7
- package/src/skills/science/PROVENANCE.md +44 -0
- package/src/skills/science/SKILL.md +137 -0
- package/src/skills/science/references/artifact-science-tool.md +110 -0
- package/src/skills/science/references/claim-type-discipline.md +56 -0
- package/src/skills/science/references/domain-index.md +422 -0
- package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
- package/src/skills/science/references/package-check-playbook.md +64 -0
- package/src/skills/science/references/package-index.min.json +3616 -0
- package/src/skills/science/references/packages/abinit.md +80 -0
- package/src/skills/science/references/packages/acts.md +73 -0
- package/src/skills/science/references/packages/aiida-core.md +80 -0
- package/src/skills/science/references/packages/alamode.md +80 -0
- package/src/skills/science/references/packages/amuse.md +88 -0
- package/src/skills/science/references/packages/anndata.md +88 -0
- package/src/skills/science/references/packages/arbor.md +80 -0
- package/src/skills/science/references/packages/arc.md +73 -0
- package/src/skills/science/references/packages/astropy.md +88 -0
- package/src/skills/science/references/packages/astroquery.md +88 -0
- package/src/skills/science/references/packages/atomate2.md +80 -0
- package/src/skills/science/references/packages/atomsmltr.md +73 -0
- package/src/skills/science/references/packages/awkward.md +73 -0
- package/src/skills/science/references/packages/batman.md +88 -0
- package/src/skills/science/references/packages/biopython.md +88 -0
- package/src/skills/science/references/packages/bloqade.md +73 -0
- package/src/skills/science/references/packages/brian2.md +73 -0
- package/src/skills/science/references/packages/bullet3.md +73 -0
- package/src/skills/science/references/packages/calculix.md +80 -0
- package/src/skills/science/references/packages/cantera.md +73 -0
- package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
- package/src/skills/science/references/packages/ccdproc.md +88 -0
- package/src/skills/science/references/packages/celerite2.md +88 -0
- package/src/skills/science/references/packages/cellrank.md +73 -0
- package/src/skills/science/references/packages/cesm.md +80 -0
- package/src/skills/science/references/packages/chemicals.md +73 -0
- package/src/skills/science/references/packages/chempy.md +73 -0
- package/src/skills/science/references/packages/cirq.md +73 -0
- package/src/skills/science/references/packages/coffea.md +73 -0
- package/src/skills/science/references/packages/cp2k.md +88 -0
- package/src/skills/science/references/packages/custodian.md +80 -0
- package/src/skills/science/references/packages/dart.md +73 -0
- package/src/skills/science/references/packages/datamol.md +88 -0
- package/src/skills/science/references/packages/dd4hep.md +73 -0
- package/src/skills/science/references/packages/dealii.md +80 -0
- package/src/skills/science/references/packages/deepchem.md +88 -0
- package/src/skills/science/references/packages/delphes.md +73 -0
- package/src/skills/science/references/packages/devito.md +80 -0
- package/src/skills/science/references/packages/dftb.md +88 -0
- package/src/skills/science/references/packages/dftd4.md +88 -0
- package/src/skills/science/references/packages/dftk-jl.md +80 -0
- package/src/skills/science/references/packages/dolfinx.md +80 -0
- package/src/skills/science/references/packages/drake.md +73 -0
- package/src/skills/science/references/packages/dumux.md +73 -0
- package/src/skills/science/references/packages/elk.md +80 -0
- package/src/skills/science/references/packages/elmerfem.md +80 -0
- package/src/skills/science/references/packages/enzo-e.md +88 -0
- package/src/skills/science/references/packages/espresso.md +80 -0
- package/src/skills/science/references/packages/exoplanet.md +88 -0
- package/src/skills/science/references/packages/fairroot.md +73 -0
- package/src/skills/science/references/packages/fbpic.md +80 -0
- package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
- package/src/skills/science/references/packages/geant4.md +73 -0
- package/src/skills/science/references/packages/geosx.md +80 -0
- package/src/skills/science/references/packages/gprmax.md +80 -0
- package/src/skills/science/references/packages/gromacs.md +80 -0
- package/src/skills/science/references/packages/gwaslab.md +73 -0
- package/src/skills/science/references/packages/gz-sim.md +73 -0
- package/src/skills/science/references/packages/hail.md +88 -0
- package/src/skills/science/references/packages/hiphive.md +80 -0
- package/src/skills/science/references/packages/hoomd-blue.md +80 -0
- package/src/skills/science/references/packages/itensor.md +73 -0
- package/src/skills/science/references/packages/itensors-jl.md +73 -0
- package/src/skills/science/references/packages/jdftx.md +73 -0
- package/src/skills/science/references/packages/jobflow.md +80 -0
- package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
- package/src/skills/science/references/packages/kite.md +80 -0
- package/src/skills/science/references/packages/kratos.md +80 -0
- package/src/skills/science/references/packages/kwant.md +73 -0
- package/src/skills/science/references/packages/lammps.md +80 -0
- package/src/skills/science/references/packages/lightkurve.md +88 -0
- package/src/skills/science/references/packages/limix.md +73 -0
- package/src/skills/science/references/packages/maxwelllink.md +80 -0
- package/src/skills/science/references/packages/mcdc.md +73 -0
- package/src/skills/science/references/packages/meep.md +80 -0
- package/src/skills/science/references/packages/mfem.md +80 -0
- package/src/skills/science/references/packages/mitgcm.md +73 -0
- package/src/skills/science/references/packages/modflow6.md +73 -0
- package/src/skills/science/references/packages/molecool.md +73 -0
- package/src/skills/science/references/packages/mom6.md +73 -0
- package/src/skills/science/references/packages/moose.md +80 -0
- package/src/skills/science/references/packages/mpas-model.md +73 -0
- package/src/skills/science/references/packages/mujoco.md +73 -0
- package/src/skills/science/references/packages/mumax3.md +73 -0
- package/src/skills/science/references/packages/nekrs.md +80 -0
- package/src/skills/science/references/packages/nessi.md +73 -0
- package/src/skills/science/references/packages/nest-simulator.md +73 -0
- package/src/skills/science/references/packages/netket.md +73 -0
- package/src/skills/science/references/packages/neuron.md +73 -0
- package/src/skills/science/references/packages/nextflow.md +88 -0
- package/src/skills/science/references/packages/nwchem.md +88 -0
- package/src/skills/science/references/packages/openbabel.md +88 -0
- package/src/skills/science/references/packages/openems.md +80 -0
- package/src/skills/science/references/packages/openff-toolkit.md +88 -0
- package/src/skills/science/references/packages/openfoam-dev.md +80 -0
- package/src/skills/science/references/packages/openmc.md +73 -0
- package/src/skills/science/references/packages/openmm.md +80 -0
- package/src/skills/science/references/packages/openmoc.md +73 -0
- package/src/skills/science/references/packages/openmx.md +80 -0
- package/src/skills/science/references/packages/opensees.md +80 -0
- package/src/skills/science/references/packages/opensn.md +80 -0
- package/src/skills/science/references/packages/opm-simulators.md +73 -0
- package/src/skills/science/references/packages/oqupy.md +73 -0
- package/src/skills/science/references/packages/packmol.md +80 -0
- package/src/skills/science/references/packages/palabos.md +80 -0
- package/src/skills/science/references/packages/parflow.md +80 -0
- package/src/skills/science/references/packages/pennylane.md +88 -0
- package/src/skills/science/references/packages/perceval.md +73 -0
- package/src/skills/science/references/packages/phono3py.md +73 -0
- package/src/skills/science/references/packages/phonopy.md +73 -0
- package/src/skills/science/references/packages/photutils.md +88 -0
- package/src/skills/science/references/packages/picongpu.md +80 -0
- package/src/skills/science/references/packages/plink-ng.md +88 -0
- package/src/skills/science/references/packages/precice.md +73 -0
- package/src/skills/science/references/packages/psc.md +80 -0
- package/src/skills/science/references/packages/psi4.md +88 -0
- package/src/skills/science/references/packages/pybinding.md +73 -0
- package/src/skills/science/references/packages/pyfr.md +80 -0
- package/src/skills/science/references/packages/pyhf.md +73 -0
- package/src/skills/science/references/packages/pyiron_base.md +80 -0
- package/src/skills/science/references/packages/pylcp.md +73 -0
- package/src/skills/science/references/packages/pylith.md +80 -0
- package/src/skills/science/references/packages/pynbody.md +88 -0
- package/src/skills/science/references/packages/pysam.md +88 -0
- package/src/skills/science/references/packages/pyscf.md +88 -0
- package/src/skills/science/references/packages/q-e.md +73 -0
- package/src/skills/science/references/packages/qibo.md +73 -0
- package/src/skills/science/references/packages/qiskit.md +73 -0
- package/src/skills/science/references/packages/quantica-jl.md +73 -0
- package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
- package/src/skills/science/references/packages/quimb.md +73 -0
- package/src/skills/science/references/packages/qulacs.md +73 -0
- package/src/skills/science/references/packages/qutip.md +73 -0
- package/src/skills/science/references/packages/rdkit.md +88 -0
- package/src/skills/science/references/packages/rmg-py.md +73 -0
- package/src/skills/science/references/packages/root.md +73 -0
- package/src/skills/science/references/packages/scanpy.md +88 -0
- package/src/skills/science/references/packages/scikit-allel.md +88 -0
- package/src/skills/science/references/packages/scikit-bio.md +88 -0
- package/src/skills/science/references/packages/scqubits.md +73 -0
- package/src/skills/science/references/packages/scuff-em.md +80 -0
- package/src/skills/science/references/packages/scvi-tools.md +73 -0
- package/src/skills/science/references/packages/seissol.md +73 -0
- package/src/skills/science/references/packages/sfepy.md +80 -0
- package/src/skills/science/references/packages/sisl.md +73 -0
- package/src/skills/science/references/packages/smilei.md +80 -0
- package/src/skills/science/references/packages/snakemake.md +88 -0
- package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
- package/src/skills/science/references/packages/specutils.md +88 -0
- package/src/skills/science/references/packages/spglib.md +80 -0
- package/src/skills/science/references/packages/squidpy.md +88 -0
- package/src/skills/science/references/packages/starry.md +88 -0
- package/src/skills/science/references/packages/strawberryfields.md +73 -0
- package/src/skills/science/references/packages/su2.md +80 -0
- package/src/skills/science/references/packages/sunny-jl.md +73 -0
- package/src/skills/science/references/packages/sw4.md +73 -0
- package/src/skills/science/references/packages/swift.md +88 -0
- package/src/skills/science/references/packages/tdnegf.md +73 -0
- package/src/skills/science/references/packages/tenpy.md +73 -0
- package/src/skills/science/references/packages/thermo.md +73 -0
- package/src/skills/science/references/packages/tkwant.md +73 -0
- package/src/skills/science/references/packages/tvb-root.md +73 -0
- package/src/skills/science/references/packages/uproot5.md +73 -0
- package/src/skills/science/references/packages/vampire.md +80 -0
- package/src/skills/science/references/packages/wannier_tools.md +73 -0
- package/src/skills/science/references/packages/warpx.md +80 -0
- package/src/skills/science/references/packages/wrf.md +73 -0
- package/src/skills/science/references/packages/xtb.md +88 -0
- package/src/skills/science/references/packages/yt.md +73 -0
- package/src/skills/science/references/science-task-brief-template.md +71 -0
- package/src/skills/scout/SKILL.md +83 -425
- package/src/skills/scout/references/literature-scout-template.md +5 -24
- package/src/skills/scout/references/operational-guidance.md +191 -0
- package/src/skills/scout/references/paper-triage-playbook.md +11 -35
- package/src/skills/write/SKILL.md +744 -1246
- package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
- package/src/skills/write/references/oral_package_patterns.md +252 -0
- package/src/skills/write/references/oral_writing_principles.md +291 -0
- package/src/skills/write/references/section_rewrite_checklist.md +234 -0
- package/src/tui/dist/app/AppContainer.js +1314 -27
- package/src/tui/dist/components/Composer.js +26 -1
- package/src/tui/dist/components/ConfigScreen.js +2 -1
- package/src/tui/dist/components/InputPrompt.js +25 -9
- package/src/tui/dist/components/MainContent.js +18 -3
- package/src/tui/dist/components/QuestScreen.js +3 -2
- package/src/tui/dist/components/UtilityScreen.js +37 -0
- package/src/tui/dist/hooks/useSafeInput.js +10 -0
- package/src/tui/dist/index.js +13 -1
- package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
- package/src/tui/dist/lib/api.js +89 -1
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AnalysisPlugin-DnSm0GZn.js → AnalysisPlugin-CA94NGmI.js} +1 -1
- package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
- package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
- package/src/ui/dist/assets/{CodeViewerPlugin-itb0tltR.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
- package/src/ui/dist/assets/{DocViewerPlugin-DqKkiCI6.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
- package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
- package/src/ui/dist/assets/{GitDiffViewerPlugin-DxL2ezFG.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
- package/src/ui/dist/assets/{GitSnapshotViewer-B_RQm1YZ.js → GitSnapshotViewer-CweA6VON.js} +2 -2
- package/src/ui/dist/assets/{ImageViewerPlugin-tHqlXY3n.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
- package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
- package/src/ui/dist/assets/{LatexPlugin-B495DTXC.js → LatexPlugin-BQjAaA5J.js} +4 -4
- package/src/ui/dist/assets/{MarkdownViewerPlugin-DG28-61B.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
- package/src/ui/dist/assets/{MarketplacePlugin-BiOGT-Kj.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
- package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
- package/src/ui/dist/assets/{NotebookEditor-CVsj8h_T.js → NotebookEditor-WFyd8Ybt.js} +23 -23
- package/src/ui/dist/assets/{PdfLoader-CASDQmxJ.js → PdfLoader-CLE5u5TS.js} +3 -3
- package/src/ui/dist/assets/{PdfMarkdownPlugin-BFhwoKsY.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
- package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
- package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
- package/src/ui/dist/assets/{TextViewerPlugin-CB4DYfWO.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
- package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
- package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
- package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
- package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
- package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
- package/src/ui/dist/assets/{code-DLC6G24T.js → code-DbsmSd3Y.js} +1 -1
- package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
- package/src/ui/dist/assets/{wrap-text-CwMn-iqb.js → file-jump-queue-DeQBikaw.js} +3 -3
- package/src/ui/dist/assets/{file-socket-Cu4Qln7Y.js → file-socket-DA5XIx88.js} +1 -1
- package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
- package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
- package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
- package/src/ui/dist/assets/{index-wQ7RIIRd.js → index-BsO46tJA.js} +1 -1
- package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
- package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
- package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
- package/src/ui/dist/assets/{project-sync-CsX08Qno.js → project-sync-DPmWKmKD.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-R-GWEhzS.js → zoom-out-DAukFWen.js} +3 -3
- package/src/ui/dist/index.html +3 -3
- package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
- package/src/skills/baseline/references/memory-playbook.md +0 -40
- package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
- package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
- package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
- package/src/skills/write/references/paper-section-playbook.md +0 -64
- package/src/skills/write/references/reviewer-first-writing.md +0 -64
- package/src/skills/write/references/revision-checklist.md +0 -70
- package/src/skills/write/references/section-contracts.md +0 -82
- package/src/skills/write/references/sentence-level-proofing.md +0 -49
- package/src/ui/dist/assets/AiManusChatView-COFACy7V.js +0 -204
- package/src/ui/dist/assets/CliPlugin-CvwCmDQ5.js +0 -109
- package/src/ui/dist/assets/CodeEditorPlugin-cOqSa0xq.js +0 -2
- package/src/ui/dist/assets/GitCommitViewerPlugin-DVgNHBCS.js +0 -1
- package/src/ui/dist/assets/LabCopilotPanel-ClMbq5Yu.js +0 -14
- package/src/ui/dist/assets/LabPlugin-L_SuE8ow.js +0 -22
- package/src/ui/dist/assets/NotebookEditor-C-4Kt1p9.js +0 -81
- package/src/ui/dist/assets/PdfViewerPlugin-DcOzU9vd.js +0 -17
- package/src/ui/dist/assets/SearchPlugin-CHj7M58O.js +0 -16
- package/src/ui/dist/assets/VNCViewer-CjlbyCB3.js +0 -11
- package/src/ui/dist/assets/bot-CFkZY-JP.js +0 -6
- package/src/ui/dist/assets/chevron-up-Dq5ofbht.js +0 -6
- package/src/ui/dist/assets/file-content-Dv4LoZec.js +0 -1
- package/src/ui/dist/assets/file-diff-panel-Denq-lC3.js +0 -1
- package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
- package/src/ui/dist/assets/git-commit-horizontal-BUh6G52n.js +0 -6
- package/src/ui/dist/assets/image-B9HUUddG.js +0 -6
- package/src/ui/dist/assets/index-B2B1sg-M.js +0 -1
- package/src/ui/dist/assets/index-Cgla8biy.css +0 -33
- package/src/ui/dist/assets/index-DRyx7vAc.js +0 -1
- package/src/ui/dist/assets/index-Gbl53BNp.js +0 -2496
- package/src/ui/dist/assets/pdf-effect-queue-ZtnHFCAi.js +0 -6
- package/src/ui/dist/assets/popover-DL6h35vr.js +0 -1
- package/src/ui/dist/assets/select-DvmXt1yY.js +0 -11
- package/src/ui/dist/assets/sigma-7jpXazui.js +0 -6
- package/src/ui/dist/assets/trash-xA7kFt8i.js +0 -11
- package/src/ui/dist/assets/useCliAccess-DsMwDjOp.js +0 -1
- package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
id: aisb.t3.039_calf
|
|
2
|
+
name: 'CALF: Aligning LLMs for Time Series Forecasting via Cross-modal Fine-Tuning'
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: Cross-modal fine-tuning framework that aligns time series distributions
|
|
5
|
+
with LLM textual representations for state-of-the-art multivariate forecasting.
|
|
6
|
+
task_description: 'This packaged benchmark covers CALF (Cross-ModAl LLM Fine-Tuning),
|
|
7
|
+
a framework that bridges the distribution discrepancy between temporal data and
|
|
8
|
+
the textual nature of LLMs. The task involves adapting a language-model-style architecture
|
|
9
|
+
to time-series inputs through three cross-modal fine-tuning techniques: (1) Cross-Modal
|
|
10
|
+
Match Module for input distribution alignment via principal word embedding extraction
|
|
11
|
+
and cross-attention; (2) Feature Regularization Loss for intermediate layer alignment
|
|
12
|
+
and gradient guidance; (3) Output Consistency Loss for resolving representation
|
|
13
|
+
space discrepancies. Supports both long-term (ETT-small, electricity, traffic, weather)
|
|
14
|
+
and short-term (M4) forecasting tasks with seq_len/pred_len configurations.
|
|
15
|
+
|
|
16
|
+
'
|
|
17
|
+
task_mode: experiment_driven
|
|
18
|
+
requires_execution: true
|
|
19
|
+
requires_paper: true
|
|
20
|
+
integrity_level: cas_plus_canary
|
|
21
|
+
snapshot_status: runnable
|
|
22
|
+
support_level: advanced
|
|
23
|
+
time_band: 1d+
|
|
24
|
+
cost_band: high
|
|
25
|
+
difficulty: hard
|
|
26
|
+
data_access: public
|
|
27
|
+
primary_outputs:
|
|
28
|
+
- mse
|
|
29
|
+
- mae
|
|
30
|
+
- forecast_checkpoints
|
|
31
|
+
launch_profiles:
|
|
32
|
+
- id: quick_eval
|
|
33
|
+
label: Quick Eval
|
|
34
|
+
description: Run one packaged long-term forecasting script on a prepared dataset
|
|
35
|
+
using default ETTh2 configuration (seq_len=96, pred_len=96/192/336/720).
|
|
36
|
+
- id: full_train_eval
|
|
37
|
+
label: Full Train + Eval
|
|
38
|
+
description: Run the complete CALF training and forecasting evaluation workflow
|
|
39
|
+
across all supported datasets (long-term and short-term forecasting).
|
|
40
|
+
- id: few_shot_forecasting
|
|
41
|
+
label: Few-Shot Forecasting
|
|
42
|
+
description: Run few-shot forecasting evaluation to assess LLM generalization with
|
|
43
|
+
limited training data.
|
|
44
|
+
- id: zero_shot_forecasting
|
|
45
|
+
label: Zero-Shot Forecasting
|
|
46
|
+
description: Run zero-shot forecasting evaluation to assess transfer capabilities.
|
|
47
|
+
dataset_download:
|
|
48
|
+
primary_method: mixed
|
|
49
|
+
sources:
|
|
50
|
+
- name: Long-term datasets (Autoformer)
|
|
51
|
+
url: https://drive.google.com/drive/folders/1ZOYpTUa82_jCcxIdTmyr0LXQfvaM9vIy
|
|
52
|
+
type: google_drive
|
|
53
|
+
files:
|
|
54
|
+
- ETT-small/ETTh1.csv
|
|
55
|
+
- ETT-small/ETTh2.csv
|
|
56
|
+
- ETT-small/ETTm1.csv
|
|
57
|
+
- ETT-small/ETTm2.csv
|
|
58
|
+
- electricity/electricity.csv
|
|
59
|
+
- traffic/traffic.csv
|
|
60
|
+
- weather/weather.csv
|
|
61
|
+
- name: Short-term M4 dataset
|
|
62
|
+
url: https://drive.google.com/drive/folders/15zio96o3NK4XOoR5L88oaWcJDVOiqQo9
|
|
63
|
+
type: google_drive
|
|
64
|
+
files:
|
|
65
|
+
- m4/
|
|
66
|
+
notes:
|
|
67
|
+
- Long-term datasets must be organized under ./datasets/{dataset_name}/{filename}.csv
|
|
68
|
+
- M4 dataset should be placed under ./datasets/m4/
|
|
69
|
+
- Word token embeddings require extraction via pca.py before training (saves to
|
|
70
|
+
./wte_pca_500.pt)
|
|
71
|
+
credential_requirements:
|
|
72
|
+
mode: none
|
|
73
|
+
items: []
|
|
74
|
+
notes:
|
|
75
|
+
- Google Drive links for datasets require no authentication for public access
|
|
76
|
+
- External LLM weights (GPT-2 based) downloaded automatically via transformers library
|
|
77
|
+
resources:
|
|
78
|
+
minimum:
|
|
79
|
+
cpu_cores: 16
|
|
80
|
+
ram_gb: 64
|
|
81
|
+
disk_gb: 100
|
|
82
|
+
gpu_count: 1
|
|
83
|
+
gpu_vram_gb: 24
|
|
84
|
+
notes: Single GPU sufficient for seq_len=96, pred_len up to 720; batch_size may
|
|
85
|
+
need reduction on lower VRAM
|
|
86
|
+
recommended:
|
|
87
|
+
cpu_cores: 32
|
|
88
|
+
ram_gb: 128
|
|
89
|
+
disk_gb: 200
|
|
90
|
+
gpu_count: 1
|
|
91
|
+
gpu_vram_gb: 48
|
|
92
|
+
notes: 48GB VRAM allows batch_size=256 with all prediction lengths; higher VRAM
|
|
93
|
+
enables faster training
|
|
94
|
+
environment:
|
|
95
|
+
python: '3.9'
|
|
96
|
+
cuda: '11.8'
|
|
97
|
+
pytorch: 2.1.0
|
|
98
|
+
flash_attn: null
|
|
99
|
+
key_packages:
|
|
100
|
+
- transformers==4.30.1
|
|
101
|
+
- torch>=2.1.0
|
|
102
|
+
- numpy
|
|
103
|
+
- pandas
|
|
104
|
+
- scikit-learn
|
|
105
|
+
notes:
|
|
106
|
+
- Python 3.9 explicitly required by project README
|
|
107
|
+
- Exact PyTorch/CUDA versions delegated to user environment; 2.1.0+ with CUDA 11.8
|
|
108
|
+
recommended
|
|
109
|
+
- See bundled requirements.txt for full dependency set
|
|
110
|
+
- LLM backbone based on GPT-2 architecture (downloaded via transformers)
|
|
111
|
+
risk_flags:
|
|
112
|
+
- high_compute_cost
|
|
113
|
+
- multi_hour_training
|
|
114
|
+
- large_download
|
|
115
|
+
risk_notes:
|
|
116
|
+
- Full evaluation across all datasets and prediction lengths requires extended training
|
|
117
|
+
time
|
|
118
|
+
- Dataset downloads from Google Drive total several GB
|
|
119
|
+
- GPU memory requirements scale with batch_size and prediction length
|
|
120
|
+
- Training uses fixed seed (2021) for reproducibility
|
|
121
|
+
recommended_when: 'Use this benchmark when evaluating cross-modal time-series alignment,
|
|
122
|
+
LLM fine-tuning for temporal data, long-horizon forecasting performance, or few-shot/zero-shot
|
|
123
|
+
generalization capabilities of language model backbones on time series tasks.
|
|
124
|
+
|
|
125
|
+
'
|
|
126
|
+
not_recommended_when: 'Do not use this benchmark if you need a lightweight CPU-first
|
|
127
|
+
time-series benchmark, cannot provision GPUs with 24GB+ VRAM, or require inference-only
|
|
128
|
+
evaluation without training infrastructure.
|
|
129
|
+
|
|
130
|
+
'
|
|
131
|
+
paper:
|
|
132
|
+
title: 'CALF: Aligning LLMs for Time Series Forecasting via Cross-modal Fine-Tuning'
|
|
133
|
+
authors:
|
|
134
|
+
- Peiyuan Liu
|
|
135
|
+
- Hang Guo
|
|
136
|
+
- Tao Dai
|
|
137
|
+
- Naiqi Li
|
|
138
|
+
- Jigang Bao
|
|
139
|
+
- Xudong Ren
|
|
140
|
+
- Yong Jiang
|
|
141
|
+
- Shu-Tao Xia
|
|
142
|
+
venue: arXiv preprint
|
|
143
|
+
year: 2024
|
|
144
|
+
arxiv: '2403.07300'
|
|
145
|
+
url: https://arxiv.org/abs/2403.07300
|
|
146
|
+
code_url: https://github.com/Hank0626/CALF
|
|
147
|
+
affiliations:
|
|
148
|
+
- Tsinghua Shenzhen International Graduate School
|
|
149
|
+
- Shenzhen University
|
|
150
|
+
- Pengcheng Laboratory
|
|
151
|
+
download:
|
|
152
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.039_calf.zip
|
|
153
|
+
archive_type: zip
|
|
154
|
+
local_dir_name: paper-39-CALF
|
|
155
|
+
provider: github_release
|
|
156
|
+
repo: ResearAI/DeepScientist
|
|
157
|
+
tag: aisb-v0.0.1
|
|
158
|
+
asset_name: aisb.t3.039_calf.zip
|
|
159
|
+
sha256: bf50089c65f0004d0e49a6d82c6225df9b7baacceead8814a715540189a49082
|
|
160
|
+
size_bytes: 3132751
|
|
161
|
+
display:
|
|
162
|
+
palette_seed: teal-amber-signal
|
|
163
|
+
art_style: forecasting-lab
|
|
164
|
+
accent_priority: high
|
|
165
|
+
tags:
|
|
166
|
+
- large_language_models
|
|
167
|
+
- cross_modal_learning
|
|
168
|
+
- time_series_forecasting
|
|
169
|
+
- multivariate_forecasting
|
|
170
|
+
- llm_finetuning
|
|
171
|
+
- transformers
|
|
172
|
+
image_path: ../image/039_aisb.t3.039_calf.jpg
|
|
173
|
+
execution_anchors:
|
|
174
|
+
entry_point: run.py
|
|
175
|
+
main_script: scripts/long_term_forecasting/ETTh2.sh
|
|
176
|
+
key_modules:
|
|
177
|
+
- exp/exp_long_term_forecasting.py
|
|
178
|
+
- exp/exp_short_term_forecasting.py
|
|
179
|
+
- utils/metrics.py
|
|
180
|
+
- utils/cmLoss.py
|
|
181
|
+
- data_provider/data_factory.py
|
|
182
|
+
default_args:
|
|
183
|
+
task_name: long_term_forecast
|
|
184
|
+
model: CALF
|
|
185
|
+
seq_len: 96
|
|
186
|
+
pred_len: 96
|
|
187
|
+
batch_size: 256
|
|
188
|
+
learning_rate: 0.0005
|
|
189
|
+
train_epochs: 100
|
|
190
|
+
d_model: 768
|
|
191
|
+
n_heads: 4
|
|
192
|
+
gpt_layers: 6
|
|
193
|
+
metric_implementations:
|
|
194
|
+
mse:
|
|
195
|
+
source: utils/metrics.py::MSE
|
|
196
|
+
code_backed: true
|
|
197
|
+
mae:
|
|
198
|
+
source: utils/metrics.py::MAE
|
|
199
|
+
code_backed: true
|
|
200
|
+
rmse:
|
|
201
|
+
source: utils/metrics.py::RMSE
|
|
202
|
+
code_backed: true
|
|
203
|
+
mape:
|
|
204
|
+
source: utils/metrics.py::MAPE
|
|
205
|
+
code_backed: true
|
|
206
|
+
mspe:
|
|
207
|
+
source: utils/metrics.py::MSPE
|
|
208
|
+
code_backed: true
|
|
209
|
+
output_artifacts:
|
|
210
|
+
checkpoints:
|
|
211
|
+
path: ./checkpoints
|
|
212
|
+
format: pytorch_model
|
|
213
|
+
results:
|
|
214
|
+
path: ./results
|
|
215
|
+
format: npy
|
|
216
|
+
summaries:
|
|
217
|
+
path: ./results_{task_name}.txt
|
|
218
|
+
format: text
|
|
219
|
+
recovery_notes: 'Post-training artifacts include checkpoints in ./checkpoints, numerical
|
|
220
|
+
results in .npy format under ./results, and performance summaries in ./results_{task_name}.txt.
|
|
221
|
+
Early stopping with patience=5 is implemented. Training uses fixed seed (2021) for
|
|
222
|
+
reproducibility.
|
|
223
|
+
|
|
224
|
+
'
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
id: aisb.t3.039_calf
|
|
2
|
+
name: 'CALF:通过跨模态微调实现大语言模型时间序列预测对齐'
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: 跨模态微调框架,通过大语言模型文本表示对齐时间序列分布,实现最先进的多元预测。
|
|
5
|
+
task_description: |-
|
|
6
|
+
本打包基准测试涵盖 CALF(跨模态大语言模型微调),这是一个弥合时间数据与 LLM 文本性质之间分布差异的框架。该任务涉及通过三种跨模态微调技术将类语言模型架构适配于时间序列输入:(1)跨模态匹配模块,通过主词嵌入提取和交叉注意力实现输入分布对齐;(2)特征正则化损失,用于中间层对齐和梯度引导;(3)输出一致性损失,用于解决表示空间差异。支持长期(ETT-small、electricity、traffic、weather)和短期(M4)预测任务,配置 seq_len/pred_len。
|
|
7
|
+
|
|
8
|
+
task_mode: experiment_driven
|
|
9
|
+
requires_execution: true
|
|
10
|
+
requires_paper: true
|
|
11
|
+
integrity_level: cas_plus_canary
|
|
12
|
+
snapshot_status: runnable
|
|
13
|
+
support_level: advanced
|
|
14
|
+
time_band: 1d+
|
|
15
|
+
cost_band: high
|
|
16
|
+
difficulty: hard
|
|
17
|
+
data_access: public
|
|
18
|
+
primary_outputs:
|
|
19
|
+
- mse
|
|
20
|
+
- mae
|
|
21
|
+
- forecast_checkpoints
|
|
22
|
+
launch_profiles:
|
|
23
|
+
- id: quick_eval
|
|
24
|
+
label: 快速评估
|
|
25
|
+
description: 使用默认 ETTh2 配置(seq_len=96,pred_len=96/192/336/720)在准备好的数据集上运行一个打包的长期预测脚本。
|
|
26
|
+
- id: full_train_eval
|
|
27
|
+
label: 完整训练 + 评估
|
|
28
|
+
description: 在所有支持的数据集(长期和短期预测)上运行完整的 CALF 训练和预测评估工作流程。
|
|
29
|
+
- id: few_shot_forecasting
|
|
30
|
+
label: 小样本预测
|
|
31
|
+
description: 运行小样本预测评估以评估 LLM 在有限训练数据下的泛化能力。
|
|
32
|
+
- id: zero_shot_forecasting
|
|
33
|
+
label: 零样本预测
|
|
34
|
+
description: 运行零样本预测评估以评估迁移能力。
|
|
35
|
+
dataset_download:
|
|
36
|
+
primary_method: mixed
|
|
37
|
+
sources:
|
|
38
|
+
- name: 长期数据集(Autoformer)
|
|
39
|
+
url: https://drive.google.com/drive/folders/1ZOYpTUa82_jCcxIdTmyr0LXQfvaM9vIy
|
|
40
|
+
type: google_drive
|
|
41
|
+
files:
|
|
42
|
+
- ETT-small/ETTh1.csv
|
|
43
|
+
- ETT-small/ETTh2.csv
|
|
44
|
+
- ETT-small/ETTm1.csv
|
|
45
|
+
- ETT-small/ETTm2.csv
|
|
46
|
+
- electricity/electricity.csv
|
|
47
|
+
- traffic/traffic.csv
|
|
48
|
+
- weather/weather.csv
|
|
49
|
+
- name: 短期 M4 数据集
|
|
50
|
+
url: https://drive.google.com/drive/folders/15zio96o3NK4XOoR5L88oaWcJDVOiqQo9
|
|
51
|
+
type: google_drive
|
|
52
|
+
files:
|
|
53
|
+
- m4/
|
|
54
|
+
notes:
|
|
55
|
+
- 长期数据集必须按 ./datasets/{dataset_name}/{filename}.csv 组织
|
|
56
|
+
- M4 数据集应放置在 ./datasets/m4/ 下
|
|
57
|
+
- 词标记嵌入需要通过 pca.py 在训练前提取(保存至 ./wte_pca_500.pt)
|
|
58
|
+
credential_requirements:
|
|
59
|
+
mode: none
|
|
60
|
+
items: []
|
|
61
|
+
notes:
|
|
62
|
+
- 数据集的 Google Drive 链接无需认证即可公开访问
|
|
63
|
+
- 外部 LLM 权重(基于 GPT-2)通过 transformers 库自动下载
|
|
64
|
+
resources:
|
|
65
|
+
minimum:
|
|
66
|
+
cpu_cores: 16
|
|
67
|
+
ram_gb: 64
|
|
68
|
+
disk_gb: 100
|
|
69
|
+
gpu_count: 1
|
|
70
|
+
gpu_vram_gb: 24
|
|
71
|
+
notes: 单 GPU 足以支持 seq_len=96,pred_len 最高 720;显存较低时可能需要减小 batch_size
|
|
72
|
+
recommended:
|
|
73
|
+
cpu_cores: 32
|
|
74
|
+
ram_gb: 128
|
|
75
|
+
disk_gb: 200
|
|
76
|
+
gpu_count: 1
|
|
77
|
+
gpu_vram_gb: 48
|
|
78
|
+
notes: 48GB 显存允许 batch_size=256 搭配所有预测长度;更高显存可加快训练速度
|
|
79
|
+
environment:
|
|
80
|
+
python: '3.9'
|
|
81
|
+
cuda: '11.8'
|
|
82
|
+
pytorch: 2.1.0
|
|
83
|
+
flash_attn: null
|
|
84
|
+
key_packages:
|
|
85
|
+
- transformers==4.30.1
|
|
86
|
+
- torch>=2.1.0
|
|
87
|
+
- numpy
|
|
88
|
+
- pandas
|
|
89
|
+
- scikit-learn
|
|
90
|
+
notes:
|
|
91
|
+
- 项目 README 明确要求 Python 3.9
|
|
92
|
+
- 精确的 PyTorch/CUDA 版本由用户环境决定;推荐 2.1.0+ 搭配 CUDA 11.8
|
|
93
|
+
- 完整依赖项请参阅打包的 requirements.txt
|
|
94
|
+
- LLM 主干基于 GPT-2 架构(通过 transformers 下载)
|
|
95
|
+
risk_flags:
|
|
96
|
+
- high_compute_cost
|
|
97
|
+
- multi_hour_training
|
|
98
|
+
- large_download
|
|
99
|
+
risk_notes:
|
|
100
|
+
- 在所有数据集和预测长度上进行完整评估需要较长的训练时间
|
|
101
|
+
- 从 Google Drive 下载数据集总计数 GB
|
|
102
|
+
- GPU 显存需求随 batch_size 和预测长度增加而增加
|
|
103
|
+
- 训练使用固定随机种子(2021)以确保可重复性
|
|
104
|
+
recommended_when: 在评估跨模态时间序列对齐、时序数据 LLM 微调、长期预测性能,或语言模型主干在时间序列任务上的小样本/零样本泛化能力时使用此基准测试。
|
|
105
|
+
not_recommended_when: 如果需要轻量级 CPU 优先的时间序列基准测试、无法提供 24GB+ 显存的 GPU,或需要无训练基础设施的纯推理评估,请勿使用此基准测试。
|
|
106
|
+
paper:
|
|
107
|
+
title: 'CALF: Aligning LLMs for Time Series Forecasting via Cross-modal Fine-Tuning'
|
|
108
|
+
authors:
|
|
109
|
+
- Peiyuan Liu
|
|
110
|
+
- Hang Guo
|
|
111
|
+
- Tao Dai
|
|
112
|
+
- Naiqi Li
|
|
113
|
+
- Jigang Bao
|
|
114
|
+
- Xudong Ren
|
|
115
|
+
- Yong Jiang
|
|
116
|
+
- Shu-Tao Xia
|
|
117
|
+
venue: arXiv preprint
|
|
118
|
+
year: 2024
|
|
119
|
+
arxiv: '2403.07300'
|
|
120
|
+
url: https://arxiv.org/abs/2403.07300
|
|
121
|
+
code_url: https://github.com/Hank0626/CALF
|
|
122
|
+
affiliations:
|
|
123
|
+
- Tsinghua Shenzhen International Graduate School
|
|
124
|
+
- Shenzhen University
|
|
125
|
+
- Pengcheng Laboratory
|
|
126
|
+
download:
|
|
127
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.039_calf.zip
|
|
128
|
+
archive_type: zip
|
|
129
|
+
local_dir_name: paper-39-CALF
|
|
130
|
+
provider: github_release
|
|
131
|
+
repo: ResearAI/DeepScientist
|
|
132
|
+
tag: aisb-v0.0.1
|
|
133
|
+
asset_name: aisb.t3.039_calf.zip
|
|
134
|
+
sha256: bf50089c65f0004d0e49a6d82c6225df9b7baacceead8814a715540189a49082
|
|
135
|
+
size_bytes: 3132751
|
|
136
|
+
display:
|
|
137
|
+
palette_seed: teal-amber-signal
|
|
138
|
+
art_style: forecasting-lab
|
|
139
|
+
accent_priority: high
|
|
140
|
+
tags:
|
|
141
|
+
- large_language_models
|
|
142
|
+
- cross_modal_learning
|
|
143
|
+
- time_series_forecasting
|
|
144
|
+
- multivariate_forecasting
|
|
145
|
+
- llm_finetuning
|
|
146
|
+
- transformers
|
|
147
|
+
image_path: ../image/039_aisb.t3.039_calf.jpg
|
|
148
|
+
execution_anchors:
|
|
149
|
+
entry_point: run.py
|
|
150
|
+
main_script: scripts/long_term_forecasting/ETTh2.sh
|
|
151
|
+
key_modules:
|
|
152
|
+
- exp/exp_long_term_forecasting.py
|
|
153
|
+
- exp/exp_short_term_forecasting.py
|
|
154
|
+
- utils/metrics.py
|
|
155
|
+
- utils/cmLoss.py
|
|
156
|
+
- data_provider/data_factory.py
|
|
157
|
+
default_args:
|
|
158
|
+
task_name: long_term_forecast
|
|
159
|
+
model: CALF
|
|
160
|
+
seq_len: 96
|
|
161
|
+
pred_len: 96
|
|
162
|
+
batch_size: 256
|
|
163
|
+
learning_rate: 0.0005
|
|
164
|
+
train_epochs: 100
|
|
165
|
+
d_model: 768
|
|
166
|
+
n_heads: 4
|
|
167
|
+
gpt_layers: 6
|
|
168
|
+
metric_implementations:
|
|
169
|
+
mse:
|
|
170
|
+
source: utils/metrics.py::MSE
|
|
171
|
+
code_backed: true
|
|
172
|
+
mae:
|
|
173
|
+
source: utils/metrics.py::MAE
|
|
174
|
+
code_backed: true
|
|
175
|
+
rmse:
|
|
176
|
+
source: utils/metrics.py::RMSE
|
|
177
|
+
code_backed: true
|
|
178
|
+
mape:
|
|
179
|
+
source: utils/metrics.py::MAPE
|
|
180
|
+
code_backed: true
|
|
181
|
+
mspe:
|
|
182
|
+
source: utils/metrics.py::MSPE
|
|
183
|
+
code_backed: true
|
|
184
|
+
output_artifacts:
|
|
185
|
+
checkpoints:
|
|
186
|
+
path: ./checkpoints
|
|
187
|
+
format: pytorch_model
|
|
188
|
+
results:
|
|
189
|
+
path: ./results
|
|
190
|
+
format: npy
|
|
191
|
+
summaries:
|
|
192
|
+
path: ./results_{task_name}.txt
|
|
193
|
+
format: text
|
|
194
|
+
recovery_notes: 训练后的产物包括 ./checkpoints 中的检查点、./results 下 .npy 格式的数值结果,以及 ./results_{task_name}.txt 中的性能摘要。已实现早停机制(patience=5)。训练使用固定随机种子(2021)以确保可重复性。
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
id: aisb.t3.040_graniteguardian
|
|
2
|
+
name: 'Granite Guardian: Comprehensive LLM Safeguarding'
|
|
3
|
+
version: 0.1.0
|
|
4
|
+
one_line: Multi-head LLM safeguard that detects jailbreaks, hallucinations, and other
|
|
5
|
+
risks across prompts and responses under configurable criteria.
|
|
6
|
+
task_description: 'This packaged benchmark covers Granite Guardian style LLM safeguarding
|
|
7
|
+
across risk dimensions including jailbreaks, social bias, profanity, violence, sexual
|
|
8
|
+
content, unethical behavior, and RAG-specific hallucination risks (context relevance,
|
|
9
|
+
groundedness, answer relevance). The task involves evaluating or calibrating risk
|
|
10
|
+
detection heads so that safety detection metrics improve. The current snapshot bundles
|
|
11
|
+
cookbook-style usage routes and Jupyter notebooks demonstrating model inference
|
|
12
|
+
with vLLM, but the packaged XSTEST-style evaluator for faithful metric reproduction
|
|
13
|
+
is not yet included in the archive. External evaluation against the full metric
|
|
14
|
+
contract is required before treating this as a complete benchmark route.
|
|
15
|
+
|
|
16
|
+
'
|
|
17
|
+
capability_tags:
|
|
18
|
+
- llm_safety
|
|
19
|
+
- guardrails
|
|
20
|
+
- risk_detection
|
|
21
|
+
- classification
|
|
22
|
+
- evaluation
|
|
23
|
+
- rag_quality
|
|
24
|
+
- content_moderation
|
|
25
|
+
- research_code_optimization
|
|
26
|
+
aisb_direction: T3
|
|
27
|
+
track_fit:
|
|
28
|
+
- paper_track
|
|
29
|
+
- benchmark_track
|
|
30
|
+
task_mode: evaluation_driven
|
|
31
|
+
requires_execution: true
|
|
32
|
+
requires_paper: true
|
|
33
|
+
integrity_level: cas_plus_canary
|
|
34
|
+
snapshot_status: external_eval_required
|
|
35
|
+
support_level: recovery
|
|
36
|
+
time_band: 2-6h
|
|
37
|
+
cost_band: medium
|
|
38
|
+
difficulty: medium
|
|
39
|
+
data_access: public
|
|
40
|
+
primary_outputs:
|
|
41
|
+
- xstest_rh_auc
|
|
42
|
+
- xstest_rh_f1
|
|
43
|
+
- xstest_rr_auc
|
|
44
|
+
- xstest_rr_f1
|
|
45
|
+
- risk_scoring_examples
|
|
46
|
+
- guardrail_auc
|
|
47
|
+
- rag_hallucination_auc
|
|
48
|
+
launch_profiles:
|
|
49
|
+
- id: cookbook_demo
|
|
50
|
+
label: Cookbook Demo
|
|
51
|
+
description: 'Run the packaged Granite Guardian cookbooks (vLLM-based notebooks)
|
|
52
|
+
to verify model usage and risk scoring behavior for configured criteria. Covers
|
|
53
|
+
jailbreak detection, RAG groundedness checking, and bring-your-own-criteria workflows.
|
|
54
|
+
|
|
55
|
+
'
|
|
56
|
+
- id: restore_eval
|
|
57
|
+
label: Restore Eval Route
|
|
58
|
+
description: 'Attach or restore the missing XSTEST-style evaluator before treating
|
|
59
|
+
this benchmark as a faithful metric reproduction route. Current snapshot lacks
|
|
60
|
+
executable anchors for XSTEST metrics.
|
|
61
|
+
|
|
62
|
+
'
|
|
63
|
+
- id: external_benchmark_eval
|
|
64
|
+
label: External Benchmark Eval
|
|
65
|
+
description: 'Evaluate Granite Guardian models on external benchmarks including
|
|
66
|
+
XSTEST, LLM-AggreFact, and REVEAL to obtain reproducible AUC and F1 scores.
|
|
67
|
+
|
|
68
|
+
'
|
|
69
|
+
dataset_download:
|
|
70
|
+
primary_method: huggingface
|
|
71
|
+
sources:
|
|
72
|
+
- name: Granite Guardian Models
|
|
73
|
+
url: https://huggingface.co/ibm-granite
|
|
74
|
+
notes:
|
|
75
|
+
- granite-guardian-3.3-8b
|
|
76
|
+
- granite-guardian-3.2-5b
|
|
77
|
+
- granite-guardian-3.2-3b-a800m
|
|
78
|
+
- granite-guardian-3.1-8b
|
|
79
|
+
- granite-guardian-3.1-2b
|
|
80
|
+
- name: Evaluation Datasets
|
|
81
|
+
url: https://huggingface.co/ibm
|
|
82
|
+
notes:
|
|
83
|
+
- SocialStigmaQA
|
|
84
|
+
- AttaQ
|
|
85
|
+
- ProvoQ
|
|
86
|
+
- WikiContradict
|
|
87
|
+
credential_requirements:
|
|
88
|
+
mode: none
|
|
89
|
+
items: []
|
|
90
|
+
notes:
|
|
91
|
+
- All model weights and datasets are publicly accessible
|
|
92
|
+
- HF Spaces demo available at https://huggingface.co/spaces/ibm-granite/granite-guardian
|
|
93
|
+
resources:
|
|
94
|
+
minimum:
|
|
95
|
+
cpu_cores: 8
|
|
96
|
+
ram_gb: 32
|
|
97
|
+
disk_gb: 80
|
|
98
|
+
gpu_count: 1
|
|
99
|
+
gpu_vram_gb: 24
|
|
100
|
+
recommended:
|
|
101
|
+
cpu_cores: 16
|
|
102
|
+
ram_gb: 64
|
|
103
|
+
disk_gb: 150
|
|
104
|
+
gpu_count: 1
|
|
105
|
+
gpu_vram_gb: 48
|
|
106
|
+
environment:
|
|
107
|
+
python: '3.10'
|
|
108
|
+
cuda: '11.8'
|
|
109
|
+
pytorch: 2.1.0
|
|
110
|
+
flash_attn: null
|
|
111
|
+
key_packages:
|
|
112
|
+
- torch
|
|
113
|
+
- transformers
|
|
114
|
+
- vllm
|
|
115
|
+
notes:
|
|
116
|
+
- See bundled README/requirements for the full dependency set
|
|
117
|
+
- vLLM recommended for efficient inference
|
|
118
|
+
- Content warning: examples may contain offensive language, stereotypes, or discriminatory
|
|
119
|
+
content
|
|
120
|
+
risk_flags:
|
|
121
|
+
- blocked_metrics
|
|
122
|
+
- partial_snapshot
|
|
123
|
+
risk_notes:
|
|
124
|
+
- xstest_rh_auc metric has no executable code anchor in current snapshot
|
|
125
|
+
- xstest_rh_f1 metric has no executable code anchor in current snapshot
|
|
126
|
+
- xstest_rr_auc metric has no executable code anchor in current snapshot
|
|
127
|
+
- xstest_rr_f1 metric has no executable code anchor in current snapshot
|
|
128
|
+
- No benchmark execution was performed in this packaging pass
|
|
129
|
+
- Treat as blocked until source restoration or richer local snapshot is recovered
|
|
130
|
+
- Recovery rule: prefer existing code files over README-only claims
|
|
131
|
+
recommended_when: 'Use this benchmark when you want a practical LLM-safeguard evaluation
|
|
132
|
+
task with configurable criteria, production-style monitoring use cases, or RAG pipeline
|
|
133
|
+
safety verification. Particularly suited for evaluating risk detection in retrieval-augmented
|
|
134
|
+
generation settings (groundedness, context relevance, answer relevance) and jailbreak
|
|
135
|
+
resistance testing.
|
|
136
|
+
|
|
137
|
+
'
|
|
138
|
+
not_recommended_when: 'Do not use this if you need a fully executable benchmark with
|
|
139
|
+
XSTEST metric anchors in the current snapshot, or if your focus is on generative
|
|
140
|
+
quality rather than safety and verification. Not suitable when reproducible metric
|
|
141
|
+
reproduction is required without external evaluation setup.
|
|
142
|
+
|
|
143
|
+
'
|
|
144
|
+
paper:
|
|
145
|
+
title: 'Granite Guardian: Comprehensive LLM Safeguarding'
|
|
146
|
+
authors: 'Inkit Padhi, Manish Nagireddy, Giandomenico Cornacchia, Subhajit Chaudhury,
|
|
147
|
+
Tejaswini Pedapati, Pierre Dognin, Keerthiram Murugesan, Erik Miehling, Martin
|
|
148
|
+
Santillan, Cooper Kieran Fraser, Giulio Zizzo, Muhammad Zaid Hameed, Mark Purcell,
|
|
149
|
+
Michael Desmond, Qian Pan, Zahra Ashktorab, Inge Vejsbjerg, Elizabeth Daly, Michael
|
|
150
|
+
Hind, Werner Geyer, Ambrish Rawat, Kush R. Varshney, Prasanna Sattigeri
|
|
151
|
+
|
|
152
|
+
'
|
|
153
|
+
venue: NAACL 2025 Industry Track
|
|
154
|
+
year: 2025
|
|
155
|
+
url: https://arxiv.org/abs/2412.07724
|
|
156
|
+
github: https://github.com/ibm-granite/granite-guardian
|
|
157
|
+
license: Apache 2.0
|
|
158
|
+
key_findings:
|
|
159
|
+
- AUC scores of 0.871 on harmful content benchmarks
|
|
160
|
+
- AUC scores of 0.854 on RAG-hallucination-related benchmarks
|
|
161
|
+
- Outperforms other open-source and closed-source models on deployment-focused metrics
|
|
162
|
+
- Trained on unique dataset combining human annotations and synthetic data
|
|
163
|
+
- Supports bring-your-own-criteria for custom risk detection
|
|
164
|
+
download:
|
|
165
|
+
url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.040_graniteguardian.zip
|
|
166
|
+
archive_type: zip
|
|
167
|
+
local_dir_name: paper-40-GraniteGuardian
|
|
168
|
+
provider: github_release
|
|
169
|
+
repo: ResearAI/DeepScientist
|
|
170
|
+
tag: aisb-v0.0.1
|
|
171
|
+
asset_name: aisb.t3.040_graniteguardian.zip
|
|
172
|
+
sha256: c98d57a175b4dc64b7f4bb81e3c4c16adb38a98775335f077f780d491e92b4e2
|
|
173
|
+
size_bytes: 2545921
|
|
174
|
+
display:
|
|
175
|
+
palette_seed: indigo-sand-shield
|
|
176
|
+
art_style: safety-console
|
|
177
|
+
accent_priority: high
|
|
178
|
+
image_path: ../image/040_aisb.t3.040_graniteguardian.jpg
|
|
179
|
+
code_anchors:
|
|
180
|
+
executable:
|
|
181
|
+
- cookbooks/granite-guardian-3.3/quickstart.ipynb
|
|
182
|
+
- cookbooks/granite-guardian-3.2/detailed_guide_vllm.ipynb
|
|
183
|
+
- cookbooks/granite-guardian-3.1/quick_start_vllm.ipynb
|
|
184
|
+
code_roots:
|
|
185
|
+
- cookbooks/
|
|
186
|
+
blocked_metrics:
|
|
187
|
+
- xstest_rh_auc
|
|
188
|
+
- xstest_rh_f1
|
|
189
|
+
- xstest_rr_auc
|
|
190
|
+
- xstest_rr_f1
|
|
191
|
+
recovery_rule: 'Prefer existing code files over README-only claims when choosing origin_path,
|
|
192
|
+
source_ref, and evaluation_protocol.code_paths. Keep metrics_summary empty until
|
|
193
|
+
trusted execution outputs exist. Mark metrics as provisional/blocked if no executable
|
|
194
|
+
anchor exists in current snapshot.
|
|
195
|
+
|
|
196
|
+
'
|
|
197
|
+
commercial:
|
|
198
|
+
annual_fee: null
|
|
199
|
+
notes: Models released as open-source under Apache 2.0 license
|