@researai/deepscientist 1.5.17 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (894) hide show
  1. package/AGENTS.md +309 -130
  2. package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
  3. package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
  4. package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
  5. package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
  6. package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
  7. package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
  8. package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
  9. package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
  10. package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
  11. package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
  12. package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
  13. package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
  14. package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
  15. package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
  16. package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
  17. package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
  18. package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
  19. package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
  20. package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
  21. package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
  22. package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
  23. package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
  24. package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
  25. package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
  26. package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
  27. package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
  28. package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
  29. package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
  30. package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
  31. package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
  32. package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
  33. package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
  34. package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
  35. package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
  36. package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
  37. package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
  38. package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
  39. package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
  40. package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
  41. package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
  42. package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
  43. package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
  44. package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
  45. package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
  46. package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
  47. package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
  48. package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
  49. package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
  50. package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
  51. package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
  52. package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
  53. package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
  54. package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
  55. package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
  56. package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
  57. package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
  58. package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
  59. package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
  60. package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
  61. package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
  62. package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
  63. package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
  64. package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
  65. package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
  66. package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
  67. package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
  68. package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
  69. package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
  70. package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
  71. package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
  72. package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
  73. package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
  74. package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
  75. package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
  76. package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
  77. package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
  78. package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
  79. package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
  80. package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
  81. package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
  82. package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
  83. package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
  84. package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
  85. package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
  86. package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
  87. package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
  88. package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
  89. package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
  90. package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
  91. package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
  92. package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
  93. package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
  94. package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
  95. package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
  96. package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
  97. package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
  98. package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
  99. package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
  100. package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
  101. package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
  102. package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
  103. package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
  104. package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
  105. package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
  106. package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
  107. package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
  108. package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
  109. package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
  110. package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
  111. package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
  112. package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
  113. package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
  114. package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
  115. package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
  116. package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
  117. package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
  118. package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
  119. package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
  120. package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
  121. package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
  122. package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
  123. package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
  124. package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
  125. package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
  126. package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
  127. package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
  128. package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
  129. package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
  130. package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
  131. package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
  132. package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
  133. package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
  134. package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
  135. package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
  136. package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
  137. package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
  138. package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
  139. package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
  140. package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
  141. package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
  142. package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
  143. package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
  144. package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
  145. package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
  146. package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
  147. package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
  148. package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
  149. package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
  150. package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
  151. package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
  152. package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
  153. package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
  154. package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
  155. package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
  156. package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
  157. package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
  158. package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
  159. package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
  160. package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
  161. package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
  162. package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
  163. package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
  164. package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
  165. package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
  166. package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
  167. package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
  168. package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
  169. package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
  170. package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
  171. package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
  172. package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
  173. package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
  174. package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
  175. package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
  176. package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
  177. package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
  178. package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
  179. package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
  180. package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
  181. package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
  182. package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
  183. package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
  184. package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
  185. package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
  186. package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
  187. package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
  188. package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
  189. package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
  190. package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
  191. package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
  192. package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
  193. package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
  194. package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
  195. package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
  196. package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
  197. package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
  198. package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
  199. package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
  200. package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
  201. package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
  202. package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
  203. package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
  204. package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
  205. package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
  206. package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
  207. package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
  208. package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
  209. package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
  210. package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
  211. package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
  212. package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
  213. package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
  214. package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
  215. package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
  216. package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
  217. package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
  218. package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
  219. package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
  220. package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
  221. package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
  222. package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
  223. package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
  224. package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
  225. package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
  226. package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
  227. package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
  228. package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
  229. package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
  230. package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
  231. package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
  232. package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
  233. package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
  234. package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
  235. package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
  236. package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
  237. package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
  238. package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
  239. package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
  240. package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
  241. package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
  242. package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
  243. package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
  244. package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
  245. package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
  246. package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
  247. package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
  248. package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
  249. package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
  250. package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
  251. package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
  252. package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
  253. package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
  254. package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
  255. package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
  256. package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
  257. package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
  258. package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
  259. package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
  260. package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
  261. package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
  262. package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
  263. package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
  264. package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
  265. package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
  266. package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
  267. package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
  268. package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
  269. package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
  270. package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
  271. package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
  272. package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
  273. package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
  274. package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
  275. package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
  276. package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
  277. package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
  278. package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
  279. package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
  280. package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
  281. package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
  282. package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
  283. package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
  284. package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
  285. package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
  286. package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
  287. package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
  288. package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
  289. package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
  290. package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
  291. package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
  292. package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
  293. package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
  294. package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
  295. package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
  296. package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
  297. package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
  298. package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
  299. package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
  300. package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
  301. package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
  302. package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
  303. package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
  304. package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
  305. package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
  306. package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
  307. package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
  308. package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
  309. package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
  310. package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
  311. package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
  312. package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
  313. package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
  314. package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
  315. package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
  316. package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
  317. package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
  318. package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
  319. package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
  320. package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
  321. package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
  322. package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
  323. package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
  324. package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
  325. package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
  326. package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
  327. package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
  328. package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
  329. package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
  330. package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
  331. package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
  332. package/AISB/image/aisb.b10.climate_earth.svg +16 -0
  333. package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
  334. package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
  335. package/AISB/image/aisb.b2.agent_systems.svg +16 -0
  336. package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
  337. package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
  338. package/AISB/image/aisb.b5.math_proof.svg +16 -0
  339. package/AISB/image/aisb.b6.research_process.svg +16 -0
  340. package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
  341. package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
  342. package/AISB/image/aisb.b9.material_science.svg +16 -0
  343. package/README.md +132 -11
  344. package/bin/ds.js +376 -49
  345. package/docs/en/00_QUICK_START.md +135 -18
  346. package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
  347. package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
  348. package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
  349. package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
  350. package/docs/en/05_TUI_GUIDE.md +171 -2
  351. package/docs/en/07_MEMORY_AND_MCP.md +38 -2
  352. package/docs/en/09_DOCTOR.md +64 -4
  353. package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
  354. package/docs/en/11_LICENSE_AND_RISK.md +4 -0
  355. package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
  356. package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
  357. package/docs/en/15_CODEX_PROVIDER_SETUP.md +622 -187
  358. package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
  359. package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
  360. package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
  361. package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +105 -2
  362. package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
  363. package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
  364. package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
  365. package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
  366. package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
  367. package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
  368. package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
  369. package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
  370. package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
  371. package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
  372. package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
  373. package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
  374. package/docs/en/91_DEVELOPMENT.md +29 -0
  375. package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
  376. package/docs/en/README.md +44 -7
  377. package/docs/images/admin/admin-connectors-health-en.png +0 -0
  378. package/docs/images/admin/admin-controllers-en.png +0 -0
  379. package/docs/images/admin/admin-diagnostics-en.png +0 -0
  380. package/docs/images/admin/admin-errors-en.png +0 -0
  381. package/docs/images/admin/admin-issues-en.png +0 -0
  382. package/docs/images/admin/admin-logs-en.png +0 -0
  383. package/docs/images/admin/admin-quest-detail-en.png +0 -0
  384. package/docs/images/admin/admin-quests-en.png +0 -0
  385. package/docs/images/admin/admin-repairs-en.png +0 -0
  386. package/docs/images/admin/admin-runtime-en.png +0 -0
  387. package/docs/images/admin/admin-search-en.png +0 -0
  388. package/docs/images/admin/admin-stats-en.png +0 -0
  389. package/docs/images/admin/admin-summary-en.png +0 -0
  390. package/docs/images/connectors/connector-discord-en.png +0 -0
  391. package/docs/images/connectors/connector-feishu-en.png +0 -0
  392. package/docs/images/connectors/connector-lingzhu-en.png +0 -0
  393. package/docs/images/connectors/connector-qq-en.png +0 -0
  394. package/docs/images/connectors/connector-slack-en.png +0 -0
  395. package/docs/images/connectors/connector-telegram-en.png +0 -0
  396. package/docs/images/connectors/connector-weixin-en.png +0 -0
  397. package/docs/images/connectors/connector-whatsapp-en.png +0 -0
  398. package/docs/images/settings/settings-baselines-en.png +0 -0
  399. package/docs/images/settings/settings-config-en.png +0 -0
  400. package/docs/images/settings/settings-connectors-overview-en.png +0 -0
  401. package/docs/images/settings/settings-deepxiv-en.png +0 -0
  402. package/docs/images/settings/settings-mcp-servers-en.png +0 -0
  403. package/docs/images/settings/settings-plugins-en.png +0 -0
  404. package/docs/images/settings/settings-runners-en.png +0 -0
  405. package/docs/zh/00_QUICK_START.md +92 -17
  406. package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
  407. package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
  408. package/docs/zh/05_TUI_GUIDE.md +171 -2
  409. package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
  410. package/docs/zh/09_DOCTOR.md +39 -4
  411. package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
  412. package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
  413. package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
  414. package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
  415. package/docs/zh/15_CODEX_PROVIDER_SETUP.md +550 -188
  416. package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +105 -2
  417. package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
  418. package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
  419. package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
  420. package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
  421. package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
  422. package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
  423. package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
  424. package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
  425. package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
  426. package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
  427. package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
  428. package/docs/zh/README.md +29 -7
  429. package/install.sh +122 -16
  430. package/package.json +4 -1
  431. package/pyproject.toml +2 -1
  432. package/src/deepscientist/__init__.py +1 -1
  433. package/src/deepscientist/acp/envelope.py +13 -0
  434. package/src/deepscientist/admin/__init__.py +3 -0
  435. package/src/deepscientist/admin/charts.py +681 -0
  436. package/src/deepscientist/admin/logs.py +119 -0
  437. package/src/deepscientist/admin/repairs.py +217 -0
  438. package/src/deepscientist/admin/service.py +1310 -0
  439. package/src/deepscientist/admin/system_info.py +700 -0
  440. package/src/deepscientist/admin/tasks.py +465 -0
  441. package/src/deepscientist/admin/tool_metrics.py +600 -0
  442. package/src/deepscientist/artifact/guidance.py +8 -4
  443. package/src/deepscientist/artifact/schemas.py +115 -0
  444. package/src/deepscientist/artifact/service.py +4268 -260
  445. package/src/deepscientist/bash_exec/monitor.py +30 -3
  446. package/src/deepscientist/bash_exec/service.py +134 -1
  447. package/src/deepscientist/benchstore/__init__.py +4 -0
  448. package/src/deepscientist/benchstore/prompt_builder.py +224 -0
  449. package/src/deepscientist/benchstore/service.py +1716 -0
  450. package/src/deepscientist/channels/weixin_ilink.py +8 -1
  451. package/src/deepscientist/cli.py +92 -17
  452. package/src/deepscientist/codex_cli_compat.py +2 -2
  453. package/src/deepscientist/config/models.py +82 -11
  454. package/src/deepscientist/config/service.py +927 -91
  455. package/src/deepscientist/connector/weixin_support.py +48 -17
  456. package/src/deepscientist/daemon/api/handlers.py +697 -210
  457. package/src/deepscientist/daemon/api/router.py +76 -1
  458. package/src/deepscientist/daemon/app.py +1054 -51
  459. package/src/deepscientist/diagnostics/runner_failures.py +147 -0
  460. package/src/deepscientist/doctor.py +212 -65
  461. package/src/deepscientist/evidence_packets.py +590 -0
  462. package/src/deepscientist/home.py +52 -4
  463. package/src/deepscientist/kimi_cli_compat.py +50 -0
  464. package/src/deepscientist/latex_runtime.py +2 -2
  465. package/src/deepscientist/mcp/context.py +2 -0
  466. package/src/deepscientist/mcp/schemas.py +114 -0
  467. package/src/deepscientist/mcp/server.py +1566 -126
  468. package/src/deepscientist/memory/service.py +203 -16
  469. package/src/deepscientist/process_control.py +8 -1
  470. package/src/deepscientist/prompts/builder.py +836 -92
  471. package/src/deepscientist/quest/__init__.py +2 -2
  472. package/src/deepscientist/quest/layout.py +12 -1
  473. package/src/deepscientist/quest/node_traces.py +10 -0
  474. package/src/deepscientist/quest/service.py +1430 -139
  475. package/src/deepscientist/quest/stage_views.py +1 -1
  476. package/src/deepscientist/runners/__init__.py +18 -0
  477. package/src/deepscientist/runners/base.py +89 -1
  478. package/src/deepscientist/runners/builtins.py +13 -1
  479. package/src/deepscientist/runners/claude.py +391 -0
  480. package/src/deepscientist/runners/codex.py +421 -21
  481. package/src/deepscientist/runners/codex_telemetry.py +127 -0
  482. package/src/deepscientist/runners/kimi.py +334 -0
  483. package/src/deepscientist/runners/metadata.py +68 -0
  484. package/src/deepscientist/runners/opencode.py +414 -0
  485. package/src/deepscientist/runners/runtime_overrides.py +100 -0
  486. package/src/deepscientist/runners/simple_cli.py +538 -0
  487. package/src/deepscientist/runtime_storage.py +303 -0
  488. package/src/deepscientist/shared.py +61 -16
  489. package/src/deepscientist/skills/installer.py +37 -0
  490. package/src/deepscientist/skills/registry.py +2 -0
  491. package/src/deepscientist/tinytex.py +2 -2
  492. package/src/deepscientist/tui.py +10 -3
  493. package/src/prompts/benchstore/system.md +77 -0
  494. package/src/prompts/connectors/qq.md +33 -2
  495. package/src/prompts/connectors/weixin.md +208 -23
  496. package/src/prompts/contracts/admin_ops.md +74 -0
  497. package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
  498. package/src/prompts/contracts/shared_interaction.md +5 -11
  499. package/src/prompts/start_setup/system.md +422 -0
  500. package/src/prompts/system.md +409 -315
  501. package/src/prompts/system_copilot.md +88 -12
  502. package/src/skills/analysis-campaign/SKILL.md +239 -578
  503. package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
  504. package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
  505. package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
  506. package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
  507. package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
  508. package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
  509. package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
  510. package/src/skills/baseline/SKILL.md +183 -461
  511. package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
  512. package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
  513. package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
  514. package/src/skills/baseline/references/baseline-plan-template.md +37 -76
  515. package/src/skills/baseline/references/boundary-cases.md +86 -0
  516. package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
  517. package/src/skills/baseline/references/comparability-contract.md +7 -12
  518. package/src/skills/baseline/references/operational-guidance.md +56 -0
  519. package/src/skills/baseline/references/route-selection.md +5 -25
  520. package/src/skills/decision/SKILL.md +113 -306
  521. package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
  522. package/src/skills/decision/references/operational-guidance.md +94 -0
  523. package/src/skills/decision/references/research-route-criteria.md +7 -8
  524. package/src/skills/decision/references/strategic-decision-template.md +13 -26
  525. package/src/skills/experiment/SKILL.md +132 -670
  526. package/src/skills/experiment/references/execution-playbook.md +374 -0
  527. package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
  528. package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
  529. package/src/skills/experiment/references/operational-guidance.md +108 -0
  530. package/src/skills/finalize/SKILL.md +62 -0
  531. package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
  532. package/src/skills/finalize/references/resume-packet-template.md +7 -0
  533. package/src/skills/idea/SKILL.md +228 -15
  534. package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
  535. package/src/skills/idea/references/current-board-packet-template.md +61 -0
  536. package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
  537. package/src/skills/idea/references/idea-generation-playbook.md +21 -0
  538. package/src/skills/idea/references/idea-thinking-flow.md +6 -0
  539. package/src/skills/idea/references/literature-survey-template.md +3 -0
  540. package/src/skills/idea/references/objective-contract-template.md +54 -0
  541. package/src/skills/idea/references/outline-seeding-example.md +56 -0
  542. package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
  543. package/src/skills/idea/references/related-work-playbook.md +75 -2
  544. package/src/skills/idea/references/research-history-playbook.md +114 -0
  545. package/src/skills/idea/references/selection-gate.md +58 -6
  546. package/src/skills/intake-audit/SKILL.md +43 -2
  547. package/src/skills/intake-audit/references/state-audit-template.md +10 -0
  548. package/src/skills/nature-data/SKILL.md +128 -0
  549. package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
  550. package/src/skills/nature-data/agents/openai.yaml +4 -0
  551. package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
  552. package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
  553. package/src/skills/nature-data/references/policy-principles.md +103 -0
  554. package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
  555. package/src/skills/nature-data/references/source-basis.md +54 -0
  556. package/src/skills/nature-data/references/statement-patterns.md +153 -0
  557. package/src/skills/nature-figure/SKILL.md +197 -0
  558. package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
  559. package/src/skills/nature-figure/agents/openai.yaml +4 -0
  560. package/src/skills/nature-figure/evals/evals.json +37 -0
  561. package/src/skills/nature-figure/references/api.md +428 -0
  562. package/src/skills/nature-figure/references/backend-selection.md +100 -0
  563. package/src/skills/nature-figure/references/chart-types.md +281 -0
  564. package/src/skills/nature-figure/references/common-patterns.md +349 -0
  565. package/src/skills/nature-figure/references/design-theory.md +436 -0
  566. package/src/skills/nature-figure/references/figure-contract.md +93 -0
  567. package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
  568. package/src/skills/nature-figure/references/qa-contract.md +119 -0
  569. package/src/skills/nature-figure/references/r-template-index.md +66 -0
  570. package/src/skills/nature-figure/references/r-workflow.md +161 -0
  571. package/src/skills/nature-figure/references/tutorials.md +250 -0
  572. package/src/skills/nature-paper2ppt/SKILL.md +507 -0
  573. package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
  574. package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
  575. package/src/skills/nature-polishing/SKILL.md +385 -0
  576. package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
  577. package/src/skills/nature-polishing/agents/openai.yaml +4 -0
  578. package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
  579. package/src/skills/nature-polishing/references/section-moves.md +240 -0
  580. package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
  581. package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
  582. package/src/skills/optimize/SKILL.md +177 -1568
  583. package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
  584. package/src/skills/optimize/references/candidate-board-template.md +13 -0
  585. package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
  586. package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
  587. package/src/skills/optimize/references/debug-response-template.md +29 -0
  588. package/src/skills/optimize/references/frontier-review-template.md +32 -0
  589. package/src/skills/optimize/references/fusion-playbook.md +36 -0
  590. package/src/skills/optimize/references/method-brief-template.md +73 -0
  591. package/src/skills/optimize/references/operational-guidance.md +621 -0
  592. package/src/skills/optimize/references/optimization-memory-template.md +30 -0
  593. package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
  594. package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
  595. package/src/skills/optimize/references/prompt-patterns.md +49 -0
  596. package/src/skills/paper-outline/SKILL.md +227 -0
  597. package/src/skills/paper-outline/references/outline-patterns.md +87 -0
  598. package/src/skills/paper-plot/SKILL.md +79 -0
  599. package/src/skills/paper-plot/agents/openai.yaml +4 -0
  600. package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
  601. package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
  602. package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
  603. package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
  604. package/src/skills/paper-plot/references/line_training_curve.md +44 -0
  605. package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
  606. package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
  607. package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
  608. package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
  609. package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
  610. package/src/skills/paper-plot/scripts/line_aime.py +94 -0
  611. package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
  612. package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
  613. package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
  614. package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
  615. package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
  616. package/src/skills/rebuttal/SKILL.md +9 -0
  617. package/src/skills/references/tool-usage-by-stage.md +438 -0
  618. package/src/skills/review/SKILL.md +105 -7
  619. package/src/skills/science/PROVENANCE.md +44 -0
  620. package/src/skills/science/SKILL.md +137 -0
  621. package/src/skills/science/references/artifact-science-tool.md +110 -0
  622. package/src/skills/science/references/claim-type-discipline.md +56 -0
  623. package/src/skills/science/references/domain-index.md +422 -0
  624. package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
  625. package/src/skills/science/references/package-check-playbook.md +64 -0
  626. package/src/skills/science/references/package-index.min.json +3616 -0
  627. package/src/skills/science/references/packages/abinit.md +80 -0
  628. package/src/skills/science/references/packages/acts.md +73 -0
  629. package/src/skills/science/references/packages/aiida-core.md +80 -0
  630. package/src/skills/science/references/packages/alamode.md +80 -0
  631. package/src/skills/science/references/packages/amuse.md +88 -0
  632. package/src/skills/science/references/packages/anndata.md +88 -0
  633. package/src/skills/science/references/packages/arbor.md +80 -0
  634. package/src/skills/science/references/packages/arc.md +73 -0
  635. package/src/skills/science/references/packages/astropy.md +88 -0
  636. package/src/skills/science/references/packages/astroquery.md +88 -0
  637. package/src/skills/science/references/packages/atomate2.md +80 -0
  638. package/src/skills/science/references/packages/atomsmltr.md +73 -0
  639. package/src/skills/science/references/packages/awkward.md +73 -0
  640. package/src/skills/science/references/packages/batman.md +88 -0
  641. package/src/skills/science/references/packages/biopython.md +88 -0
  642. package/src/skills/science/references/packages/bloqade.md +73 -0
  643. package/src/skills/science/references/packages/brian2.md +73 -0
  644. package/src/skills/science/references/packages/bullet3.md +73 -0
  645. package/src/skills/science/references/packages/calculix.md +80 -0
  646. package/src/skills/science/references/packages/cantera.md +73 -0
  647. package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
  648. package/src/skills/science/references/packages/ccdproc.md +88 -0
  649. package/src/skills/science/references/packages/celerite2.md +88 -0
  650. package/src/skills/science/references/packages/cellrank.md +73 -0
  651. package/src/skills/science/references/packages/cesm.md +80 -0
  652. package/src/skills/science/references/packages/chemicals.md +73 -0
  653. package/src/skills/science/references/packages/chempy.md +73 -0
  654. package/src/skills/science/references/packages/cirq.md +73 -0
  655. package/src/skills/science/references/packages/coffea.md +73 -0
  656. package/src/skills/science/references/packages/cp2k.md +88 -0
  657. package/src/skills/science/references/packages/custodian.md +80 -0
  658. package/src/skills/science/references/packages/dart.md +73 -0
  659. package/src/skills/science/references/packages/datamol.md +88 -0
  660. package/src/skills/science/references/packages/dd4hep.md +73 -0
  661. package/src/skills/science/references/packages/dealii.md +80 -0
  662. package/src/skills/science/references/packages/deepchem.md +88 -0
  663. package/src/skills/science/references/packages/delphes.md +73 -0
  664. package/src/skills/science/references/packages/devito.md +80 -0
  665. package/src/skills/science/references/packages/dftb.md +88 -0
  666. package/src/skills/science/references/packages/dftd4.md +88 -0
  667. package/src/skills/science/references/packages/dftk-jl.md +80 -0
  668. package/src/skills/science/references/packages/dolfinx.md +80 -0
  669. package/src/skills/science/references/packages/drake.md +73 -0
  670. package/src/skills/science/references/packages/dumux.md +73 -0
  671. package/src/skills/science/references/packages/elk.md +80 -0
  672. package/src/skills/science/references/packages/elmerfem.md +80 -0
  673. package/src/skills/science/references/packages/enzo-e.md +88 -0
  674. package/src/skills/science/references/packages/espresso.md +80 -0
  675. package/src/skills/science/references/packages/exoplanet.md +88 -0
  676. package/src/skills/science/references/packages/fairroot.md +73 -0
  677. package/src/skills/science/references/packages/fbpic.md +80 -0
  678. package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
  679. package/src/skills/science/references/packages/geant4.md +73 -0
  680. package/src/skills/science/references/packages/geosx.md +80 -0
  681. package/src/skills/science/references/packages/gprmax.md +80 -0
  682. package/src/skills/science/references/packages/gromacs.md +80 -0
  683. package/src/skills/science/references/packages/gwaslab.md +73 -0
  684. package/src/skills/science/references/packages/gz-sim.md +73 -0
  685. package/src/skills/science/references/packages/hail.md +88 -0
  686. package/src/skills/science/references/packages/hiphive.md +80 -0
  687. package/src/skills/science/references/packages/hoomd-blue.md +80 -0
  688. package/src/skills/science/references/packages/itensor.md +73 -0
  689. package/src/skills/science/references/packages/itensors-jl.md +73 -0
  690. package/src/skills/science/references/packages/jdftx.md +73 -0
  691. package/src/skills/science/references/packages/jobflow.md +80 -0
  692. package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
  693. package/src/skills/science/references/packages/kite.md +80 -0
  694. package/src/skills/science/references/packages/kratos.md +80 -0
  695. package/src/skills/science/references/packages/kwant.md +73 -0
  696. package/src/skills/science/references/packages/lammps.md +80 -0
  697. package/src/skills/science/references/packages/lightkurve.md +88 -0
  698. package/src/skills/science/references/packages/limix.md +73 -0
  699. package/src/skills/science/references/packages/maxwelllink.md +80 -0
  700. package/src/skills/science/references/packages/mcdc.md +73 -0
  701. package/src/skills/science/references/packages/meep.md +80 -0
  702. package/src/skills/science/references/packages/mfem.md +80 -0
  703. package/src/skills/science/references/packages/mitgcm.md +73 -0
  704. package/src/skills/science/references/packages/modflow6.md +73 -0
  705. package/src/skills/science/references/packages/molecool.md +73 -0
  706. package/src/skills/science/references/packages/mom6.md +73 -0
  707. package/src/skills/science/references/packages/moose.md +80 -0
  708. package/src/skills/science/references/packages/mpas-model.md +73 -0
  709. package/src/skills/science/references/packages/mujoco.md +73 -0
  710. package/src/skills/science/references/packages/mumax3.md +73 -0
  711. package/src/skills/science/references/packages/nekrs.md +80 -0
  712. package/src/skills/science/references/packages/nessi.md +73 -0
  713. package/src/skills/science/references/packages/nest-simulator.md +73 -0
  714. package/src/skills/science/references/packages/netket.md +73 -0
  715. package/src/skills/science/references/packages/neuron.md +73 -0
  716. package/src/skills/science/references/packages/nextflow.md +88 -0
  717. package/src/skills/science/references/packages/nwchem.md +88 -0
  718. package/src/skills/science/references/packages/openbabel.md +88 -0
  719. package/src/skills/science/references/packages/openems.md +80 -0
  720. package/src/skills/science/references/packages/openff-toolkit.md +88 -0
  721. package/src/skills/science/references/packages/openfoam-dev.md +80 -0
  722. package/src/skills/science/references/packages/openmc.md +73 -0
  723. package/src/skills/science/references/packages/openmm.md +80 -0
  724. package/src/skills/science/references/packages/openmoc.md +73 -0
  725. package/src/skills/science/references/packages/openmx.md +80 -0
  726. package/src/skills/science/references/packages/opensees.md +80 -0
  727. package/src/skills/science/references/packages/opensn.md +80 -0
  728. package/src/skills/science/references/packages/opm-simulators.md +73 -0
  729. package/src/skills/science/references/packages/oqupy.md +73 -0
  730. package/src/skills/science/references/packages/packmol.md +80 -0
  731. package/src/skills/science/references/packages/palabos.md +80 -0
  732. package/src/skills/science/references/packages/parflow.md +80 -0
  733. package/src/skills/science/references/packages/pennylane.md +88 -0
  734. package/src/skills/science/references/packages/perceval.md +73 -0
  735. package/src/skills/science/references/packages/phono3py.md +73 -0
  736. package/src/skills/science/references/packages/phonopy.md +73 -0
  737. package/src/skills/science/references/packages/photutils.md +88 -0
  738. package/src/skills/science/references/packages/picongpu.md +80 -0
  739. package/src/skills/science/references/packages/plink-ng.md +88 -0
  740. package/src/skills/science/references/packages/precice.md +73 -0
  741. package/src/skills/science/references/packages/psc.md +80 -0
  742. package/src/skills/science/references/packages/psi4.md +88 -0
  743. package/src/skills/science/references/packages/pybinding.md +73 -0
  744. package/src/skills/science/references/packages/pyfr.md +80 -0
  745. package/src/skills/science/references/packages/pyhf.md +73 -0
  746. package/src/skills/science/references/packages/pyiron_base.md +80 -0
  747. package/src/skills/science/references/packages/pylcp.md +73 -0
  748. package/src/skills/science/references/packages/pylith.md +80 -0
  749. package/src/skills/science/references/packages/pynbody.md +88 -0
  750. package/src/skills/science/references/packages/pysam.md +88 -0
  751. package/src/skills/science/references/packages/pyscf.md +88 -0
  752. package/src/skills/science/references/packages/q-e.md +73 -0
  753. package/src/skills/science/references/packages/qibo.md +73 -0
  754. package/src/skills/science/references/packages/qiskit.md +73 -0
  755. package/src/skills/science/references/packages/quantica-jl.md +73 -0
  756. package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
  757. package/src/skills/science/references/packages/quimb.md +73 -0
  758. package/src/skills/science/references/packages/qulacs.md +73 -0
  759. package/src/skills/science/references/packages/qutip.md +73 -0
  760. package/src/skills/science/references/packages/rdkit.md +88 -0
  761. package/src/skills/science/references/packages/rmg-py.md +73 -0
  762. package/src/skills/science/references/packages/root.md +73 -0
  763. package/src/skills/science/references/packages/scanpy.md +88 -0
  764. package/src/skills/science/references/packages/scikit-allel.md +88 -0
  765. package/src/skills/science/references/packages/scikit-bio.md +88 -0
  766. package/src/skills/science/references/packages/scqubits.md +73 -0
  767. package/src/skills/science/references/packages/scuff-em.md +80 -0
  768. package/src/skills/science/references/packages/scvi-tools.md +73 -0
  769. package/src/skills/science/references/packages/seissol.md +73 -0
  770. package/src/skills/science/references/packages/sfepy.md +80 -0
  771. package/src/skills/science/references/packages/sisl.md +73 -0
  772. package/src/skills/science/references/packages/smilei.md +80 -0
  773. package/src/skills/science/references/packages/snakemake.md +88 -0
  774. package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
  775. package/src/skills/science/references/packages/specutils.md +88 -0
  776. package/src/skills/science/references/packages/spglib.md +80 -0
  777. package/src/skills/science/references/packages/squidpy.md +88 -0
  778. package/src/skills/science/references/packages/starry.md +88 -0
  779. package/src/skills/science/references/packages/strawberryfields.md +73 -0
  780. package/src/skills/science/references/packages/su2.md +80 -0
  781. package/src/skills/science/references/packages/sunny-jl.md +73 -0
  782. package/src/skills/science/references/packages/sw4.md +73 -0
  783. package/src/skills/science/references/packages/swift.md +88 -0
  784. package/src/skills/science/references/packages/tdnegf.md +73 -0
  785. package/src/skills/science/references/packages/tenpy.md +73 -0
  786. package/src/skills/science/references/packages/thermo.md +73 -0
  787. package/src/skills/science/references/packages/tkwant.md +73 -0
  788. package/src/skills/science/references/packages/tvb-root.md +73 -0
  789. package/src/skills/science/references/packages/uproot5.md +73 -0
  790. package/src/skills/science/references/packages/vampire.md +80 -0
  791. package/src/skills/science/references/packages/wannier_tools.md +73 -0
  792. package/src/skills/science/references/packages/warpx.md +80 -0
  793. package/src/skills/science/references/packages/wrf.md +73 -0
  794. package/src/skills/science/references/packages/xtb.md +88 -0
  795. package/src/skills/science/references/packages/yt.md +73 -0
  796. package/src/skills/science/references/science-task-brief-template.md +71 -0
  797. package/src/skills/scout/SKILL.md +83 -425
  798. package/src/skills/scout/references/literature-scout-template.md +5 -24
  799. package/src/skills/scout/references/operational-guidance.md +191 -0
  800. package/src/skills/scout/references/paper-triage-playbook.md +11 -35
  801. package/src/skills/write/SKILL.md +744 -1246
  802. package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
  803. package/src/skills/write/references/oral_package_patterns.md +252 -0
  804. package/src/skills/write/references/oral_writing_principles.md +291 -0
  805. package/src/skills/write/references/section_rewrite_checklist.md +234 -0
  806. package/src/tui/dist/app/AppContainer.js +1314 -27
  807. package/src/tui/dist/components/Composer.js +26 -1
  808. package/src/tui/dist/components/ConfigScreen.js +2 -1
  809. package/src/tui/dist/components/InputPrompt.js +25 -9
  810. package/src/tui/dist/components/MainContent.js +18 -3
  811. package/src/tui/dist/components/QuestScreen.js +3 -2
  812. package/src/tui/dist/components/UtilityScreen.js +37 -0
  813. package/src/tui/dist/hooks/useSafeInput.js +10 -0
  814. package/src/tui/dist/index.js +13 -1
  815. package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
  816. package/src/tui/dist/lib/api.js +89 -1
  817. package/src/tui/package.json +1 -1
  818. package/src/ui/dist/assets/{AnalysisPlugin-BCKAfjba.js → AnalysisPlugin-CA94NGmI.js} +1 -1
  819. package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
  820. package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
  821. package/src/ui/dist/assets/{CodeViewerPlugin-CbaFRrUU.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
  822. package/src/ui/dist/assets/{DocViewerPlugin-DAjLVeQD.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
  823. package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
  824. package/src/ui/dist/assets/{GitDiffViewerPlugin-CQACjoAA.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
  825. package/src/ui/dist/assets/{GitSnapshotViewer-0r4nLPke.js → GitSnapshotViewer-CweA6VON.js} +2 -2
  826. package/src/ui/dist/assets/{ImageViewerPlugin-nBOmI2v_.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
  827. package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
  828. package/src/ui/dist/assets/{LatexPlugin-ZwtV8pIp.js → LatexPlugin-BQjAaA5J.js} +4 -4
  829. package/src/ui/dist/assets/{MarkdownViewerPlugin-DKqVfKyW.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
  830. package/src/ui/dist/assets/{MarketplacePlugin-BwxStZ9D.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
  831. package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
  832. package/src/ui/dist/assets/{NotebookEditor-DB9N_T9q.js → NotebookEditor-WFyd8Ybt.js} +3 -3
  833. package/src/ui/dist/assets/{PdfLoader-eWBONbQP.js → PdfLoader-CLE5u5TS.js} +3 -3
  834. package/src/ui/dist/assets/{PdfMarkdownPlugin-D22YOZL3.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
  835. package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
  836. package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
  837. package/src/ui/dist/assets/{TextViewerPlugin-C5xqeeUH.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
  838. package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
  839. package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
  840. package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
  841. package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
  842. package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
  843. package/src/ui/dist/assets/{code-WlFHE7z_.js → code-DbsmSd3Y.js} +1 -1
  844. package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
  845. package/src/ui/dist/assets/{wrap-text-BC-Hltpd.js → file-jump-queue-DeQBikaw.js} +3 -3
  846. package/src/ui/dist/assets/{file-socket-CfQPKQKj.js → file-socket-DA5XIx88.js} +1 -1
  847. package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
  848. package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
  849. package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
  850. package/src/ui/dist/assets/{index-CwNu1aH4.js → index-BsO46tJA.js} +1 -1
  851. package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
  852. package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
  853. package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
  854. package/src/ui/dist/assets/{project-sync-C9IdzdZW.js → project-sync-DPmWKmKD.js} +1 -1
  855. package/src/ui/dist/assets/{zoom-out-E_gaeAxL.js → zoom-out-DAukFWen.js} +3 -3
  856. package/src/ui/dist/index.html +3 -3
  857. package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
  858. package/src/skills/baseline/references/memory-playbook.md +0 -40
  859. package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
  860. package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
  861. package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
  862. package/src/skills/write/references/paper-section-playbook.md +0 -64
  863. package/src/skills/write/references/reviewer-first-writing.md +0 -64
  864. package/src/skills/write/references/revision-checklist.md +0 -70
  865. package/src/skills/write/references/section-contracts.md +0 -82
  866. package/src/skills/write/references/sentence-level-proofing.md +0 -49
  867. package/src/ui/dist/assets/AiManusChatView-Bv-Z8YpU.js +0 -204
  868. package/src/ui/dist/assets/CliPlugin-BCKcpc35.js +0 -109
  869. package/src/ui/dist/assets/CodeEditorPlugin-DbOfSJ8K.js +0 -2
  870. package/src/ui/dist/assets/GitCommitViewerPlugin-CIUqbUDO.js +0 -1
  871. package/src/ui/dist/assets/LabCopilotPanel-BHxOxF4z.js +0 -14
  872. package/src/ui/dist/assets/LabPlugin-BKoZGs95.js +0 -22
  873. package/src/ui/dist/assets/NotebookEditor-BEQhaQbt.js +0 -81
  874. package/src/ui/dist/assets/PdfViewerPlugin-c-RK9DLM.js +0 -17
  875. package/src/ui/dist/assets/SearchPlugin-CxF9ytAx.js +0 -16
  876. package/src/ui/dist/assets/VNCViewer-BoLGLnHz.js +0 -11
  877. package/src/ui/dist/assets/bot-DREQOxzP.js +0 -6
  878. package/src/ui/dist/assets/chevron-up-C9Qpx4DE.js +0 -6
  879. package/src/ui/dist/assets/file-content-BZMz3RYp.js +0 -1
  880. package/src/ui/dist/assets/file-diff-panel-CQhw0jS2.js +0 -1
  881. package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
  882. package/src/ui/dist/assets/git-commit-horizontal-DxZ8DCZh.js +0 -6
  883. package/src/ui/dist/assets/image-Bgl4VIyx.js +0 -6
  884. package/src/ui/dist/assets/index-BpV6lusQ.css +0 -33
  885. package/src/ui/dist/assets/index-CBNVuWcP.js +0 -2496
  886. package/src/ui/dist/assets/index-DrUnlf6K.js +0 -1
  887. package/src/ui/dist/assets/index-NW-h8VzN.js +0 -1
  888. package/src/ui/dist/assets/pdf-effect-queue-J8OnM0jE.js +0 -6
  889. package/src/ui/dist/assets/popover-CLc0pPP8.js +0 -1
  890. package/src/ui/dist/assets/select-Cs2PmzwL.js +0 -11
  891. package/src/ui/dist/assets/sigma-ClKcHAXm.js +0 -6
  892. package/src/ui/dist/assets/trash-DwpbFr3w.js +0 -11
  893. package/src/ui/dist/assets/useCliAccess-NQ8m0Let.js +0 -1
  894. package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
@@ -0,0 +1,159 @@
1
+ schema_version: 1
2
+ id: aisb.t3.012_efficientqat
3
+ name: 'EfficientQAT:大型语言模型的高效量化感知训练'
4
+ version: 0.1.0
5
+ one_line: '针对 Llama-2/3 和 Mistral 系列(7B–70B)的两阶段低位宽量化感知训练(Block-AP → E2E-QP),通过 WikiText-2 困惑度、C4 困惑度及五个推理基准零样本准确率进行评估。
6
+
7
+ '
8
+ task_description: '本基准实现了 EfficientQAT,这是一种针对大型语言模型在 2/3/4 位权重仅量化精度下的两阶段量化感知训练流程。阶段一(Block-AP)使用 4096 个 RedPajama 样本(上下文长度 2048)的重构损失,对所有参数——权重、缩放因子和零点——进行逐块训练。阶段二(E2E-QP)冻结量化后的整数权重,仅在目标数据集(RedPajama 或 Alpaca,上下文长度 4096)上端到端微调量化缩放因子。主要可执行入口为 main_block_ap.py 和 main_e2e_qp.py,并配有辅助的缩放因子校准脚本(calibrate_scales.py、calibrate_scales_v2.py)。评估指标包括 WikiText-2 困惑度、C4 困惑度,以及在 WinoGrande、PIQA、HellaSwag、ARC-Easy 和 ARC-Challenge 上使用 lm-eval v0.4.2 测量的平均零样本准确率。多种模型/位宽配置的预量化检查点可在 HuggingFace 上获取,并可通过捆绑的转换脚本转换为 GPTQ 或 BitBLAS 格式。论文报告称,2 位 Llama-2-70B 可在单张 A100-80GB 上于 41 小时内完成,且精度下降小于 3 个点。无需外部评估服务,所有指标均在本地计算。
9
+
10
+ '
11
+ capability_tags:
12
+ - research_code_optimization
13
+ - large_language_models
14
+ - quantization
15
+ - model_compression
16
+ - efficient_inference
17
+ - quantization_aware_training
18
+ aisb_direction: T3
19
+ track_fit:
20
+ - paper_track
21
+ - benchmark_track
22
+ task_mode: experiment_driven
23
+ requires_execution: true
24
+ requires_paper: true
25
+ integrity_level: cas_plus_canary
26
+ snapshot_status: runnable
27
+ support_level: advanced
28
+ time_band: 1d+
29
+ cost_band: high
30
+ difficulty: hard
31
+ data_access: public
32
+ primary_outputs:
33
+ - wikitext2_ppl
34
+ - c4_ppl
35
+ - quantized_checkpoint
36
+ - avg_accuracy
37
+ launch_profiles:
38
+ - id: calibration_only
39
+ label: 仅校准
40
+ description: '使用 WikiText-2 训练数据对预量化检查点运行缩放因子校准(calibrate_scales.py 或 calibrate_scales_v2.py)。生成更新后的缩放因子参数,无需完整的 Block-AP 或 E2E-QP 再训练。最快的路径;需要磁盘上有预量化模型。
41
+
42
+ '
43
+ - id: qat_eval
44
+ label: QAT + 评估
45
+ description: '运行完整的两阶段 EfficientQAT 流程:通过 main_block_ap.py 进行 Block-AP 逐块训练,然后通过 main_e2e_qp.py 进行 E2E-QP 端到端缩放因子训练,最后评估 WikiText-2/C4 困惑度及五个推理任务的零样本准确率。这是论文忠实路线,需要大量 GPU 时间(根据模型大小可能需要数小时到数天)。
46
+
47
+ '
48
+ - id: eval_pretrained
49
+ label: 评估预量化模型
50
+ description: '从 HuggingFace 下载预量化的 EfficientQAT 检查点,无需任何训练即可评估困惑度和零样本准确率。使用带 --resume_quant 参数的 main_block_ap.py。
51
+
52
+ '
53
+ dataset_download:
54
+ primary_method: mixed
55
+ sources:
56
+ - kind: huggingface
57
+ url: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T-Sample
58
+ access: public
59
+ note: 'RedPajama 校准数据,用于 Block-AP(4096 样本,序列长度 2048)和 E2E-QP(4096 样本,序列长度 4096)。由 datautils_block.py 自动下载。
60
+
61
+ '
62
+ - kind: huggingface
63
+ url: https://huggingface.co/datasets/wikitext
64
+ access: public
65
+ note: 'WikiText-2(wikitext-2-raw-v1),用于困惑度评估和缩放因子校准。在评估期间自动下载。
66
+
67
+ '
68
+ - kind: huggingface
69
+ url: https://huggingface.co/ChenMnZ
70
+ access: public
71
+ note: 'Llama-2(7B/13B/70B)、Llama-3(8B/70B)、Llama-3-Instruct 和 Mistral-Large 的预量化模型检查点,支持 EQAT/GPTQ/BitBLAS 格式。各检查点大小从约 2 GB 到约 39 GB 不等。
72
+
73
+ '
74
+ - kind: huggingface
75
+ url: https://huggingface.co/datasets/tatsu-lab/alpaca
76
+ access: public
77
+ note: 'Alpaca 指令微调数据集,用作指令微调场景的替代 E2E-QP 训练数据。
78
+
79
+ '
80
+ notes:
81
+ - 全精度基模型(如约 131 GB 的 Llama-2-70B)如需从头运行 Block-AP,必须从其原始 HuggingFace 仓库单独下载。
82
+ - 预量化检查点要小得多(例如 w2g64 Llama-2-70B 约 20 GB)。
83
+ - 总磁盘使用量很大程度上取决于目标模型系列和位宽。
84
+ credential_requirements:
85
+ mode: none
86
+ items:
87
+ - HuggingFace 账户可能需要用于门控模型(Llama-2、Llama-3),但访问 EfficientQAT 检查点本身无需账户。
88
+ notes:
89
+ - Meta Llama 模型权重需要在 HuggingFace 上接受许可协议后方可下载。
90
+ - ChenMnZ 命名空间下的预量化 EfficientQAT 检查点可公开访问。
91
+ resources:
92
+ minimum:
93
+ cpu_cores: 16
94
+ ram_gb: 64
95
+ disk_gb: 200
96
+ gpu_count: 1
97
+ gpu_vram_gb: 48
98
+ recommended:
99
+ cpu_cores: 32
100
+ ram_gb: 128
101
+ disk_gb: 500
102
+ gpu_count: 2
103
+ gpu_vram_gb: 80
104
+ environment:
105
+ python: '3.11'
106
+ cuda: null
107
+ pytorch: 2.2.2
108
+ flash_attn: null
109
+ key_packages:
110
+ - bitsandbytes==0.41.0
111
+ - transformers==4.40.1
112
+ - lm-eval==0.4.2
113
+ - accelerate
114
+ - datasets
115
+ notes:
116
+ - 完整的依赖项列表请参阅捆绑的 requirements.txt。
117
+ - CUDA 工具包版本在仓库中未固定;任何支持 PyTorch 2.2.2 的版本均可使用。
118
+ - GPTQModel(已测试 v0.9.8)仅在模型格式转换为 GPTQ/BitBLAS 时需要;核心 QAT 和评估不需要。
119
+ - bitsandbytes 用于 E2E-QP 训练中的 AdamW 优化器。
120
+ risk_flags:
121
+ - large_model_download
122
+ - high_gpu_memory
123
+ - long_training_time
124
+ - gated_model_access
125
+ risk_notes:
126
+ - 70B 模型路线需要单张 A100-80GB(论文报告 2 位 Llama-2-70B 的 Block-AP + E2E-QP 需 41 小时)或多人 GPU 配置。
127
+ - 7B 模型路线可在单张 48GB GPU 上完成,但完整的 Block-AP + E2E-QP 仍需数小时。
128
+ - Llama-2/3 的全精度基模型权重在 HuggingFace 上受门控限制,需要接受许可协议。
129
+ - Block-AP 中的 --off_load_to_disk 标志可以训练速度换取降低 CPU 内存使用量。
130
+ - 打包过程中未执行基准测试运行;仍需运行时验证。
131
+ - 论文报告 2 位 Llama-2-70B 的 E2E-QP 内存需求为 34.2 GB。
132
+ recommended_when: '当您需要一个以激进位宽(2-4 位)量化感知训练为中心的 LLM 系统任务,且具备自包含的训练和评估流程,覆盖从 7B 到 70B 的多种模型规模时,可使用此基准。适用于评估低位宽 QAT 的优化策略、与 PTQ 和 Q-PEFT 基线对比,或生成可部署的 GPTQ/BitBLAS 格式量化检查点。
133
+
134
+ '
135
+ not_recommended_when: '如果您无法访问至少一块 ≥48 GB VRAM 的 GPU、需要亚小时级基准测试周转时间,或关注的是小型模型(<7B 参数),则不应使用此基准。此外,如需权重-激活量化基准也不适用(此基准为权重仅量化)。
136
+
137
+ '
138
+ paper:
139
+ title: 'EfficientQAT: Efficient Quantization-Aware Training for Large Language Models'
140
+ venue: ACL 2025 Main
141
+ year: 2025
142
+ url: https://arxiv.org/abs/2407.11062
143
+ download:
144
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.012_efficientqat.zip
145
+ archive_type: zip
146
+ local_dir_name: paper-12-EfficientQAT
147
+ provider: github_release
148
+ repo: ResearAI/DeepScientist
149
+ tag: aisb-v0.0.1
150
+ asset_name: aisb.t3.012_efficientqat.zip
151
+ sha256: 8f53850f12f1bdbc4e3212b21fb51a7479ebb8ab715f3138d96ef1da979b2977
152
+ size_bytes: 107235
153
+ commercial:
154
+ annual_fee: null
155
+ display:
156
+ palette_seed: amber-steel-llm
157
+ art_style: hardware-editorial
158
+ accent_priority: high
159
+ image_path: ../image/012_aisb.t3.012_efficientqat.jpg
@@ -0,0 +1,152 @@
1
+ schema_version: 1
2
+ id: aisb.t3.013_appl
3
+ name: 'APPL: A Prompt Programming Language for Harmonious Integration of Programs
4
+ and Large Language Model Prompts'
5
+ version: 0.1.0
6
+ one_line: Measure and optimize the AST compactness of APPL prompt-programming constructs
7
+ by running a code-backed evaluator on canonical code snippets from the paper.
8
+ task_description: 'This benchmark evaluates the structural compactness of APPL, a
9
+ Python-native prompt programming language that seamlessly integrates LLM calls into
10
+ conventional programs. The primary task is to optimize the APPL runtime and language
11
+ constructs so that canonical prompt-program patterns (e.g., Chain-of-Thought with
12
+ self-consistency from Figure 7a of the paper) yield smaller Python ASTs, measured
13
+ by counting AST nodes via the bundled eval_ast_size.py script. The evaluator parses
14
+ a fixed code snippet with Python''s ast module and reports the total node count.
15
+ No model training is involved; the benchmark is CPU-only and focuses on compiler-style
16
+ simplification and language design efficiency. An LLM API key (e.g., OpenAI) is
17
+ required only if you wish to exercise the APPL runtime end-to-end beyond the AST
18
+ metric; the core AST evaluation itself needs no external API.
19
+
20
+ '
21
+ capability_tags:
22
+ - research_code_optimization
23
+ - prompt_programming
24
+ - software_language_tools
25
+ - llm_tooling
26
+ - python
27
+ aisb_direction: T3
28
+ track_fit:
29
+ - paper_track
30
+ - benchmark_track
31
+ task_mode: evaluation_driven
32
+ requires_execution: true
33
+ requires_paper: true
34
+ integrity_level: cas_plus_canary
35
+ snapshot_status: runnable
36
+ support_level: turnkey
37
+ cost_band: low
38
+ time_band: 30-60m
39
+ difficulty: medium
40
+ data_access: public
41
+ primary_outputs:
42
+ - ast_size
43
+ - runtime_output
44
+ launch_profiles:
45
+ - id: quick_check
46
+ label: Quick Check
47
+ description: 'Run eval_ast_size.py to compute the AST node count of the canonical
48
+ CoT-SC snippet. No LLM API key needed. Completes in seconds.
49
+
50
+ '
51
+ - id: ast_eval
52
+ label: AST Eval
53
+ description: 'Run the full compactness-focused APPL evaluation workflow, including
54
+ any optimizations applied to the APPL language constructs, and report the resulting
55
+ ast_size metric.
56
+
57
+ '
58
+ - id: runtime_exercise
59
+ label: Runtime Exercise
60
+ description: 'Install applang, configure an LLM backend (e.g., OpenAI), and run
61
+ the bundled examples to verify end-to-end runtime behavior alongside the AST metric.
62
+
63
+ '
64
+ dataset_download:
65
+ primary_method: self_contained
66
+ sources: []
67
+ notes:
68
+ - No external dataset download is required. The evaluation code snippet is embedded
69
+ in eval_ast_size.py.
70
+ - The full APPL library source is included in the snapshot under src/appl/.
71
+ credential_requirements:
72
+ mode: optional
73
+ items:
74
+ - OpenAI API key (only for end-to-end runtime exercises, not for the core AST metric)
75
+ notes:
76
+ - The primary ast_size metric requires no credentials or network access.
77
+ - Set OPENAI_API_KEY in .env or as an environment variable if exercising runtime
78
+ examples.
79
+ - Other LLM backends supported via litellm may require their own API keys.
80
+ resources:
81
+ minimum:
82
+ cpu_cores: 4
83
+ ram_gb: 8
84
+ disk_gb: 10
85
+ gpu_count: 0
86
+ gpu_vram_gb: 0
87
+ recommended:
88
+ cpu_cores: 8
89
+ ram_gb: 16
90
+ disk_gb: 20
91
+ gpu_count: 0
92
+ gpu_vram_gb: 0
93
+ environment:
94
+ python: '3.9'
95
+ cuda: null
96
+ pytorch: null
97
+ flash_attn: null
98
+ key_packages:
99
+ - applang>=0.2.2
100
+ - litellm>=1.59.8
101
+ - openai>=1.13.3
102
+ - pydantic>=2.6.3
103
+ - libcst>=1.4.0
104
+ notes:
105
+ - CPU-only execution is sufficient for the core AST evaluation metric.
106
+ - Python 3.9+ required; tested through 3.13.
107
+ - Install via pip install -U applang or from the bundled pyproject.toml with pdm.
108
+ - See pyproject.toml for the full dependency set including optional extras (lunary,
109
+ instructor, langfuse).
110
+ risk_flags:
111
+ - optional_api_dependency
112
+ risk_notes:
113
+ - The core ast_size metric is fully self-contained and reproducible without any external
114
+ service.
115
+ - End-to-end runtime exercises require a working LLM API (OpenAI or other litellm-supported
116
+ backend), which incurs API costs and introduces non-determinism in generated text.
117
+ - No benchmark execution was performed during the packaging pass; metric values should
118
+ be verified by running eval_ast_size.py.
119
+ recommended_when: 'Use this benchmark when you want a software-oriented LLM task that
120
+ emphasizes runtime behavior, prompt-program structure, and code compactness rather
121
+ than GPU training. Suitable for evaluating compiler-style optimizations to prompt
122
+ programming language constructs.
123
+
124
+ '
125
+ not_recommended_when: 'Do not use this if you are looking for a heavy model-training
126
+ benchmark, a purely offline task with no LLM backend integration path, or a benchmark
127
+ that measures model accuracy on downstream NLP tasks.
128
+
129
+ '
130
+ paper:
131
+ title: 'APPL: A Prompt Programming Language for Harmonious Integration of Programs
132
+ and Large Language Model Prompts'
133
+ venue: arXiv preprint
134
+ year: 2024
135
+ url: https://arxiv.org/abs/2406.13161
136
+ download:
137
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.013_appl.zip
138
+ archive_type: zip
139
+ local_dir_name: paper-13-APPL
140
+ provider: github_release
141
+ repo: ResearAI/DeepScientist
142
+ tag: aisb-v0.0.1
143
+ asset_name: aisb.t3.013_appl.zip
144
+ sha256: 92f377259bd60724229ee6c61adda761177ee60e47813ebb71dc37f43c0e24f2
145
+ size_bytes: 2259394
146
+ commercial:
147
+ annual_fee: null
148
+ display:
149
+ palette_seed: apple-ink-notebook
150
+ art_style: language-design
151
+ accent_priority: medium
152
+ image_path: ../image/013_aisb.t3.013_appl.jpg
@@ -0,0 +1,126 @@
1
+ schema_version: 1
2
+ id: aisb.t3.013_appl
3
+ name: 'APPL: A Prompt Programming Language for Harmonious Integration of Programs
4
+ and Large Language Model Prompts'
5
+ version: 0.1.0
6
+ one_line: 通过对论文中的规范代码片段运行代码支持评估器,测量并优化APPL提示程序结构的AST紧凑性。
7
+ task_description: '该基准测试用于评估APPL(一种原生Python提示编程语言,可将LLM调用无缝集成到传统程序中)的结构紧凑性。主要任务是优化APPL运行时和语言结构,使规范的提示程序模式(例如论文图7a中的带自洽的思维链)生成更小的Python AST,通过捆绑的eval_ast_size.py脚本统计AST节点数量来测量。评估器使用Python的ast模块解析固定代码片段并报告总节点数。不涉及模型训练;基准测试仅需CPU,专注于编译器风格的简化和语言设计效率。如果您希望在AST指标之外对APPL运行时进行端到端练习,则需要LLM API密钥(如OpenAI);核心AST评估本身无需外部API。
8
+
9
+ '
10
+ capability_tags:
11
+ - research_code_optimization
12
+ - prompt_programming
13
+ - software_language_tools
14
+ - llm_tooling
15
+ - python
16
+ aisb_direction: T3
17
+ track_fit:
18
+ - paper_track
19
+ - benchmark_track
20
+ task_mode: evaluation_driven
21
+ requires_execution: true
22
+ requires_paper: true
23
+ integrity_level: cas_plus_canary
24
+ snapshot_status: runnable
25
+ support_level: turnkey
26
+ cost_band: low
27
+ time_band: 30-60m
28
+ difficulty: medium
29
+ data_access: public
30
+ primary_outputs:
31
+ - ast_size
32
+ - runtime_output
33
+ launch_profiles:
34
+ - id: quick_check
35
+ label: 快速检查
36
+ description: '运行eval_ast_size.py计算规范CoT-SC代码片段的AST节点数。无需LLM API密钥。几秒内完成。
37
+
38
+ '
39
+ - id: ast_eval
40
+ label: AST评估
41
+ description: '运行完整的以紧凑性为重点的APPL评估工作流程,包括对APPL语言结构应用的任何优化,并报告最终的ast_size指标。
42
+
43
+ '
44
+ - id: runtime_exercise
45
+ label: 运行时练习
46
+ description: '安装applang,配置LLM后端(如OpenAI),并运行捆绑的示例以验证端到端运行时行为以及AST指标。
47
+
48
+ '
49
+ dataset_download:
50
+ primary_method: self_contained
51
+ sources: []
52
+ notes:
53
+ - 无需外部数据集下载。评估代码片段已嵌入eval_ast_size.py中。
54
+ - 完整的APPL库源代码包含在快照的src/appl/目录下。
55
+ credential_requirements:
56
+ mode: optional
57
+ items:
58
+ - OpenAI API密钥(仅用于端到端运行时练习,不用于核心AST指标)
59
+ notes:
60
+ - 主要的ast_size指标无需凭据或网络访问。
61
+ - 如果需要运行运行时示例,请在.env中设置OPENAI_API_KEY或作为环境变量。
62
+ - 通过litellm支持的其他LLM后端可能需要各自的API密钥。
63
+ resources:
64
+ minimum:
65
+ cpu_cores: 4
66
+ ram_gb: 8
67
+ disk_gb: 10
68
+ gpu_count: 0
69
+ gpu_vram_gb: 0
70
+ recommended:
71
+ cpu_cores: 8
72
+ ram_gb: 16
73
+ disk_gb: 20
74
+ gpu_count: 0
75
+ gpu_vram_gb: 0
76
+ environment:
77
+ python: '3.9'
78
+ cuda: null
79
+ pytorch: null
80
+ flash_attn: null
81
+ key_packages:
82
+ - applang>=0.2.2
83
+ - litellm>=1.59.8
84
+ - openai>=1.13.3
85
+ - pydantic>=2.6.3
86
+ - libcst>=1.4.0
87
+ notes:
88
+ - 纯CPU执行足以完成核心AST评估指标。
89
+ - 需要Python 3.9+;已测试至3.13版本。
90
+ - 可通过pip install -U applang安装,或使用pdm从捆绑的pyproject.toml安装。
91
+ - 完整的依赖项集合(包括可选的lunary、instructor、langfuse)请参见pyproject.toml。
92
+ risk_flags:
93
+ - optional_api_dependency
94
+ risk_notes:
95
+ - 核心ast_size指标完全自包含,无需任何外部服务即可复现。
96
+ - 端到端运行时练习需要可用的LLM API(OpenAI或其他litellm支持的后端),会产生API费用,并在生成的文本中引入非确定性。
97
+ - 打包过程中未执行基准测试;应通过运行eval_ast_size.py来验证指标值。
98
+ recommended_when: '当您需要一个强调运行时行为、提示程序结构和代码紧凑性而非GPU训练的面向软件的LLM任务时使用此基准测试。适用于评估对提示编程语言结构的编译器风格优化。
99
+
100
+ '
101
+ not_recommended_when: '如果您正在寻找重型模型训练基准测试、完全离线且无LLM后端集成路径的任务,或测量模型在下游NLP任务上准确性的基准测试,请勿使用此基准测试。
102
+
103
+ '
104
+ paper:
105
+ title: 'APPL: A Prompt Programming Language for Harmonious Integration of Programs
106
+ and Large Language Model Prompts'
107
+ venue: arXiv preprint
108
+ year: 2024
109
+ url: https://arxiv.org/abs/2406.13161
110
+ download:
111
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.013_appl.zip
112
+ archive_type: zip
113
+ local_dir_name: paper-13-APPL
114
+ provider: github_release
115
+ repo: ResearAI/DeepScientist
116
+ tag: aisb-v0.0.1
117
+ asset_name: aisb.t3.013_appl.zip
118
+ sha256: 92f377259bd60724229ee6c61adda761177ee60e47813ebb71dc37f43c0e24f2
119
+ size_bytes: 2259394
120
+ commercial:
121
+ annual_fee: null
122
+ display:
123
+ palette_seed: apple-ink-notebook
124
+ art_style: language-design
125
+ accent_priority: medium
126
+ image_path: ../image/013_aisb.t3.013_appl.jpg
@@ -0,0 +1,207 @@
1
+ schema_version: 1
2
+ id: aisb.t3.014_piguard
3
+ name: 'PIGuard: Prompt Injection Guardrail via Mitigating Overdefense for Free'
4
+ version: 0.1.0
5
+ one_line: 'Train or fine-tune a DeBERTa-based prompt-injection guard model (PIGuard)
6
+ and evaluate it on four benchmarks (NotInject, PINT, BIPIA, WildGuard-Benign) measuring
7
+ malicious detection, benign accuracy, and over-defense accuracy across trigger-word
8
+ difficulty levels.
9
+
10
+ '
11
+ task_description: 'This benchmark packages the full PIGuard pipeline: a lightweight
12
+ DeBERTa-based binary classifier that detects prompt-injection attacks while minimizing
13
+ over-defense (false positives on benign inputs containing trigger words like "ignore").
14
+ The optimization task is to improve injection recall without increasing false positives.
15
+ Training data from 20 open-source datasets plus LLM-augmented samples is bundled
16
+ in the snapshot. Evaluation runs across four test sets: NotInject (339 benign samples
17
+ with 1/2/3 trigger words, split into three difficulty subsets of 113 each), BIPIA
18
+ (malicious), WildGuard-Benign, and PINT. The PINT benchmark requires a separate
19
+ access request. Three evaluation scripts are provided: eval.py (loads local checkpoint
20
+ via PIGuard.py), eval_piguard.py (loads HuggingFace-format weights from /tmp/PIGuard_weights),
21
+ and run_official_eval.py (same but with a custom benign-score threshold of 0.10).
22
+ Primary metrics are over-defense accuracy on NotInject (overall and per-trigger-count),
23
+ malicious accuracy on PINT/BIPIA, and benign accuracy on PINT/WildGuard-Benign.
24
+ The model weights can be loaded from HuggingFace (leolee99/PIGuard) or from a Google
25
+ Drive checkpoint.
26
+
27
+ '
28
+ capability_tags:
29
+ - research_code_optimization
30
+ - llm_security
31
+ - classification
32
+ - prompt_injection
33
+ - robustness
34
+ aisb_direction: T3
35
+ track_fit:
36
+ - paper_track
37
+ - benchmark_track
38
+ task_mode: experiment_driven
39
+ requires_execution: true
40
+ requires_paper: true
41
+ integrity_level: cas_plus_canary
42
+ snapshot_status: runnable
43
+ support_level: turnkey
44
+ cost_band: medium
45
+ time_band: 6-24h
46
+ difficulty: medium
47
+ data_access: restricted
48
+ primary_outputs:
49
+ - over_defense_accuracy
50
+ - one_trigger_accuracy
51
+ - two_trigger_accuracy
52
+ - three_trigger_accuracy
53
+ - malicious_accuracy
54
+ - benign_accuracy
55
+ - trigger_accuracy_breakdown
56
+ - evaluation_report
57
+ launch_profiles:
58
+ - id: quick_check
59
+ label: Quick Check (HuggingFace weights)
60
+ description: 'Run eval_hf.py or eval_piguard.py to evaluate the pretrained PIGuard
61
+ model from HuggingFace on all bundled test sets (NotInject, BIPIA, WildGuard-Benign).
62
+ No training needed. PINT evaluation is skipped unless you have obtained access
63
+ separately.
64
+
65
+ '
66
+ - id: full_eval
67
+ label: Full Eval (all four benchmarks)
68
+ description: 'Run eval.py --resume <checkpoint> or run_official_eval.py across NotInject,
69
+ BIPIA, WildGuard-Benign, and PINT. Requires PINT access request and YAML-to-JSON
70
+ conversion via util.py.
71
+
72
+ '
73
+ - id: train_and_eval
74
+ label: Train + Evaluate
75
+ description: 'Run train.py to retrain PIGuard with the MOF strategy on the bundled
76
+ 20-source training set, then evaluate the resulting checkpoint on all four benchmarks.
77
+ Expect 6-24h on a single GPU depending on VRAM and batch size.
78
+
79
+ '
80
+ dataset_download:
81
+ primary_method: mixed
82
+ sources:
83
+ - kind: huggingface
84
+ url: https://huggingface.co/datasets/leolee99/NotInject
85
+ access: public
86
+ note: 'NotInject over-defense evaluation dataset (339 samples, three subsets of
87
+ 113 each). Also bundled in the snapshot under datasets/.
88
+
89
+ '
90
+ - kind: huggingface
91
+ url: https://huggingface.co/leolee99/PIGuard
92
+ access: public
93
+ note: Pretrained PIGuard model weights (DeBERTa-based).
94
+ - kind: google_drive
95
+ url: https://drive.google.com/file/d/1JpiVb_wtnbBLNEjIx1KS7PHuvmARQKTu/view?usp=sharing
96
+ access: public
97
+ note: Alternative checkpoint download.
98
+ - kind: external
99
+ url: https://share-eu1.hsforms.com/1TwiBEvLXRrCjJSdnbnHpLwfdfs3
100
+ access: request_required
101
+ note: 'PINT benchmark from Lakera AI. Not public; requires filling out an access
102
+ request form. Must convert from YAML to JSON via util.py after download.
103
+
104
+ '
105
+ - kind: bundled
106
+ url: null
107
+ access: public
108
+ note: 'Training data (20 open-source datasets + LLM augmentations), validation
109
+ set (144 samples), and test sets (NotInject, BIPIA, WildGuard-Benign) are all
110
+ bundled under PIGuard/datasets/.
111
+
112
+ '
113
+ notes:
114
+ - Total bundled data is modest (tens of MB). No large-scale download required for
115
+ most profiles.
116
+ - PINT is the only external dataset that requires a separate access request.
117
+ credential_requirements:
118
+ mode: none
119
+ items: []
120
+ notes:
121
+ - PINT benchmark access requires a form submission to Lakera AI, but no API key.
122
+ - HuggingFace model download is public; no token needed.
123
+ resources:
124
+ minimum:
125
+ cpu_cores: 8
126
+ ram_gb: 32
127
+ disk_gb: 50
128
+ gpu_count: 1
129
+ gpu_vram_gb: 12
130
+ recommended:
131
+ cpu_cores: 16
132
+ ram_gb: 64
133
+ disk_gb: 120
134
+ gpu_count: 1
135
+ gpu_vram_gb: 24
136
+ environment:
137
+ python: '3.10'
138
+ cuda: '11.8'
139
+ pytorch: 2.4.0
140
+ flash_attn: 2.6.1
141
+ key_packages:
142
+ - flash-attn==2.6.1
143
+ - vllm==0.5.4
144
+ - transformers==4.44.0
145
+ - ptflops
146
+ notes:
147
+ - See the bundled requirements.txt for the full dependency set.
148
+ - The model is DeBERTa-based (not an LLM); vllm is used only in certain evaluation
149
+ modes.
150
+ - flash-attn requires compatible CUDA and GPU architecture.
151
+ risk_flags:
152
+ - restricted_dataset_component
153
+ - external_model_weights
154
+ risk_notes:
155
+ - 'The PINT benchmark is not public and not bundled. Full four-benchmark evaluation
156
+ requires requesting access from Lakera AI via a web form. Without PINT, three of
157
+ four test sets are still available.
158
+
159
+ '
160
+ - 'Model weights must be downloaded from HuggingFace or Google Drive at runtime unless
161
+ pre-staged at /tmp/PIGuard_weights.
162
+
163
+ '
164
+ - 'eval_piguard.py and run_official_eval.py hardcode MODEL_PATH to /tmp/PIGuard_weights
165
+ and DATASET_ROOT to /repo/datasets. These paths may need adjustment.
166
+
167
+ '
168
+ - 'run_official_eval.py uses a custom BENIGN_THRESHOLD of 0.10 (lowered from 0.50),
169
+ which changes the classification boundary compared to the default argmax approach
170
+ in eval.py.
171
+
172
+ '
173
+ recommended_when: 'Use this benchmark when you want a security-oriented text classification
174
+ task with dual pressure: improving prompt-injection detection recall while controlling
175
+ over-defense (false positives on benign inputs containing attack-like trigger words).
176
+ Good fit for studying shortcut learning, DeBERTa fine-tuning strategies, and threshold
177
+ calibration on lightweight guard models.
178
+
179
+ '
180
+ not_recommended_when: 'Do not use this if you cannot provide a GPU (the model requires
181
+ CUDA for practical training and batched inference), if you need the full PINT evaluation
182
+ but cannot obtain access, or if you are looking for a generative LLM benchmark rather
183
+ than a binary classification task.
184
+
185
+ '
186
+ paper:
187
+ title: 'PIGuard: Prompt Injection Guardrail via Mitigating Overdefense for Free'
188
+ venue: ACL 2025
189
+ year: 2025
190
+ url: https://aclanthology.org/2025.acl-long.1468/
191
+ download:
192
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.014_piguard.zip
193
+ archive_type: zip
194
+ local_dir_name: paper-14-PIGuard
195
+ provider: github_release
196
+ repo: ResearAI/DeepScientist
197
+ tag: aisb-v0.0.1
198
+ asset_name: aisb.t3.014_piguard.zip
199
+ sha256: 9a7996fc3b40709caa357b763d7a51e1f1f2ab449480e403ec34a7c248eb4c81
200
+ size_bytes: 1621668
201
+ commercial:
202
+ annual_fee: null
203
+ display:
204
+ palette_seed: rust-sand-guard
205
+ art_style: safety-dashboard
206
+ accent_priority: high
207
+ image_path: ../image/014_aisb.t3.014_piguard.jpg