@researai/deepscientist 1.5.16 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (896) hide show
  1. package/AGENTS.md +309 -130
  2. package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
  3. package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
  4. package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
  5. package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
  6. package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
  7. package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
  8. package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
  9. package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
  10. package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
  11. package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
  12. package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
  13. package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
  14. package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
  15. package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
  16. package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
  17. package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
  18. package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
  19. package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
  20. package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
  21. package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
  22. package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
  23. package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
  24. package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
  25. package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
  26. package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
  27. package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
  28. package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
  29. package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
  30. package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
  31. package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
  32. package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
  33. package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
  34. package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
  35. package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
  36. package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
  37. package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
  38. package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
  39. package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
  40. package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
  41. package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
  42. package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
  43. package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
  44. package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
  45. package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
  46. package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
  47. package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
  48. package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
  49. package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
  50. package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
  51. package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
  52. package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
  53. package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
  54. package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
  55. package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
  56. package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
  57. package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
  58. package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
  59. package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
  60. package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
  61. package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
  62. package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
  63. package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
  64. package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
  65. package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
  66. package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
  67. package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
  68. package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
  69. package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
  70. package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
  71. package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
  72. package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
  73. package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
  74. package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
  75. package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
  76. package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
  77. package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
  78. package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
  79. package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
  80. package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
  81. package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
  82. package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
  83. package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
  84. package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
  85. package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
  86. package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
  87. package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
  88. package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
  89. package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
  90. package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
  91. package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
  92. package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
  93. package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
  94. package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
  95. package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
  96. package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
  97. package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
  98. package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
  99. package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
  100. package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
  101. package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
  102. package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
  103. package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
  104. package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
  105. package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
  106. package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
  107. package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
  108. package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
  109. package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
  110. package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
  111. package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
  112. package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
  113. package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
  114. package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
  115. package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
  116. package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
  117. package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
  118. package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
  119. package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
  120. package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
  121. package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
  122. package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
  123. package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
  124. package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
  125. package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
  126. package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
  127. package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
  128. package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
  129. package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
  130. package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
  131. package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
  132. package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
  133. package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
  134. package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
  135. package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
  136. package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
  137. package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
  138. package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
  139. package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
  140. package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
  141. package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
  142. package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
  143. package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
  144. package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
  145. package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
  146. package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
  147. package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
  148. package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
  149. package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
  150. package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
  151. package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
  152. package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
  153. package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
  154. package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
  155. package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
  156. package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
  157. package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
  158. package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
  159. package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
  160. package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
  161. package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
  162. package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
  163. package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
  164. package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
  165. package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
  166. package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
  167. package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
  168. package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
  169. package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
  170. package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
  171. package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
  172. package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
  173. package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
  174. package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
  175. package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
  176. package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
  177. package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
  178. package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
  179. package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
  180. package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
  181. package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
  182. package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
  183. package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
  184. package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
  185. package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
  186. package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
  187. package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
  188. package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
  189. package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
  190. package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
  191. package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
  192. package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
  193. package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
  194. package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
  195. package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
  196. package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
  197. package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
  198. package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
  199. package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
  200. package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
  201. package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
  202. package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
  203. package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
  204. package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
  205. package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
  206. package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
  207. package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
  208. package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
  209. package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
  210. package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
  211. package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
  212. package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
  213. package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
  214. package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
  215. package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
  216. package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
  217. package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
  218. package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
  219. package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
  220. package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
  221. package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
  222. package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
  223. package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
  224. package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
  225. package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
  226. package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
  227. package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
  228. package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
  229. package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
  230. package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
  231. package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
  232. package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
  233. package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
  234. package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
  235. package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
  236. package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
  237. package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
  238. package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
  239. package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
  240. package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
  241. package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
  242. package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
  243. package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
  244. package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
  245. package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
  246. package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
  247. package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
  248. package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
  249. package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
  250. package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
  251. package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
  252. package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
  253. package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
  254. package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
  255. package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
  256. package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
  257. package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
  258. package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
  259. package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
  260. package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
  261. package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
  262. package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
  263. package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
  264. package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
  265. package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
  266. package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
  267. package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
  268. package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
  269. package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
  270. package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
  271. package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
  272. package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
  273. package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
  274. package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
  275. package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
  276. package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
  277. package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
  278. package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
  279. package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
  280. package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
  281. package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
  282. package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
  283. package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
  284. package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
  285. package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
  286. package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
  287. package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
  288. package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
  289. package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
  290. package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
  291. package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
  292. package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
  293. package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
  294. package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
  295. package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
  296. package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
  297. package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
  298. package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
  299. package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
  300. package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
  301. package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
  302. package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
  303. package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
  304. package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
  305. package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
  306. package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
  307. package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
  308. package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
  309. package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
  310. package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
  311. package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
  312. package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
  313. package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
  314. package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
  315. package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
  316. package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
  317. package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
  318. package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
  319. package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
  320. package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
  321. package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
  322. package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
  323. package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
  324. package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
  325. package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
  326. package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
  327. package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
  328. package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
  329. package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
  330. package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
  331. package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
  332. package/AISB/image/aisb.b10.climate_earth.svg +16 -0
  333. package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
  334. package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
  335. package/AISB/image/aisb.b2.agent_systems.svg +16 -0
  336. package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
  337. package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
  338. package/AISB/image/aisb.b5.math_proof.svg +16 -0
  339. package/AISB/image/aisb.b6.research_process.svg +16 -0
  340. package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
  341. package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
  342. package/AISB/image/aisb.b9.material_science.svg +16 -0
  343. package/README.md +196 -32
  344. package/bin/ds.js +924 -66
  345. package/docs/en/00_QUICK_START.md +195 -18
  346. package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
  347. package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
  348. package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
  349. package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
  350. package/docs/en/05_TUI_GUIDE.md +171 -2
  351. package/docs/en/07_MEMORY_AND_MCP.md +38 -2
  352. package/docs/en/09_DOCTOR.md +78 -7
  353. package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
  354. package/docs/en/11_LICENSE_AND_RISK.md +4 -0
  355. package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
  356. package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
  357. package/docs/en/15_CODEX_PROVIDER_SETUP.md +624 -180
  358. package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
  359. package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
  360. package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
  361. package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +386 -0
  362. package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
  363. package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
  364. package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
  365. package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
  366. package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
  367. package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
  368. package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
  369. package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
  370. package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
  371. package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
  372. package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
  373. package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
  374. package/docs/en/91_DEVELOPMENT.md +266 -0
  375. package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
  376. package/docs/en/README.md +48 -7
  377. package/docs/images/admin/admin-connectors-health-en.png +0 -0
  378. package/docs/images/admin/admin-controllers-en.png +0 -0
  379. package/docs/images/admin/admin-diagnostics-en.png +0 -0
  380. package/docs/images/admin/admin-errors-en.png +0 -0
  381. package/docs/images/admin/admin-issues-en.png +0 -0
  382. package/docs/images/admin/admin-logs-en.png +0 -0
  383. package/docs/images/admin/admin-quest-detail-en.png +0 -0
  384. package/docs/images/admin/admin-quests-en.png +0 -0
  385. package/docs/images/admin/admin-repairs-en.png +0 -0
  386. package/docs/images/admin/admin-runtime-en.png +0 -0
  387. package/docs/images/admin/admin-search-en.png +0 -0
  388. package/docs/images/admin/admin-stats-en.png +0 -0
  389. package/docs/images/admin/admin-summary-en.png +0 -0
  390. package/docs/images/connectors/connector-discord-en.png +0 -0
  391. package/docs/images/connectors/connector-feishu-en.png +0 -0
  392. package/docs/images/connectors/connector-lingzhu-en.png +0 -0
  393. package/docs/images/connectors/connector-qq-en.png +0 -0
  394. package/docs/images/connectors/connector-slack-en.png +0 -0
  395. package/docs/images/connectors/connector-telegram-en.png +0 -0
  396. package/docs/images/connectors/connector-weixin-en.png +0 -0
  397. package/docs/images/connectors/connector-whatsapp-en.png +0 -0
  398. package/docs/images/settings/settings-baselines-en.png +0 -0
  399. package/docs/images/settings/settings-config-en.png +0 -0
  400. package/docs/images/settings/settings-connectors-overview-en.png +0 -0
  401. package/docs/images/settings/settings-deepxiv-en.png +0 -0
  402. package/docs/images/settings/settings-mcp-servers-en.png +0 -0
  403. package/docs/images/settings/settings-plugins-en.png +0 -0
  404. package/docs/images/settings/settings-runners-en.png +0 -0
  405. package/docs/zh/00_QUICK_START.md +142 -18
  406. package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
  407. package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
  408. package/docs/zh/05_TUI_GUIDE.md +171 -2
  409. package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
  410. package/docs/zh/09_DOCTOR.md +54 -8
  411. package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
  412. package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
  413. package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
  414. package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
  415. package/docs/zh/15_CODEX_PROVIDER_SETUP.md +552 -181
  416. package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +384 -0
  417. package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
  418. package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
  419. package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
  420. package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
  421. package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
  422. package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
  423. package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
  424. package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
  425. package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
  426. package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
  427. package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
  428. package/docs/zh/README.md +33 -7
  429. package/install.sh +168 -20
  430. package/package.json +5 -1
  431. package/pyproject.toml +2 -1
  432. package/src/deepscientist/__init__.py +1 -1
  433. package/src/deepscientist/acp/envelope.py +13 -0
  434. package/src/deepscientist/admin/__init__.py +3 -0
  435. package/src/deepscientist/admin/charts.py +681 -0
  436. package/src/deepscientist/admin/logs.py +119 -0
  437. package/src/deepscientist/admin/repairs.py +217 -0
  438. package/src/deepscientist/admin/service.py +1310 -0
  439. package/src/deepscientist/admin/system_info.py +700 -0
  440. package/src/deepscientist/admin/tasks.py +465 -0
  441. package/src/deepscientist/admin/tool_metrics.py +600 -0
  442. package/src/deepscientist/artifact/guidance.py +8 -4
  443. package/src/deepscientist/artifact/schemas.py +115 -0
  444. package/src/deepscientist/artifact/service.py +4268 -260
  445. package/src/deepscientist/bash_exec/monitor.py +30 -3
  446. package/src/deepscientist/bash_exec/service.py +134 -1
  447. package/src/deepscientist/benchstore/__init__.py +4 -0
  448. package/src/deepscientist/benchstore/prompt_builder.py +224 -0
  449. package/src/deepscientist/benchstore/service.py +1716 -0
  450. package/src/deepscientist/bridges/connectors.py +8 -2
  451. package/src/deepscientist/channels/weixin_ilink.py +8 -1
  452. package/src/deepscientist/cli.py +92 -17
  453. package/src/deepscientist/codex_cli_compat.py +187 -74
  454. package/src/deepscientist/config/models.py +82 -11
  455. package/src/deepscientist/config/service.py +1077 -93
  456. package/src/deepscientist/connector/weixin_support.py +48 -17
  457. package/src/deepscientist/daemon/api/handlers.py +827 -235
  458. package/src/deepscientist/daemon/api/router.py +81 -1
  459. package/src/deepscientist/daemon/app.py +1512 -85
  460. package/src/deepscientist/diagnostics/__init__.py +6 -0
  461. package/src/deepscientist/diagnostics/runner_failures.py +277 -0
  462. package/src/deepscientist/doctor.py +407 -56
  463. package/src/deepscientist/evidence_packets.py +590 -0
  464. package/src/deepscientist/home.py +52 -4
  465. package/src/deepscientist/kimi_cli_compat.py +50 -0
  466. package/src/deepscientist/latex_runtime.py +2 -2
  467. package/src/deepscientist/mcp/context.py +2 -0
  468. package/src/deepscientist/mcp/schemas.py +114 -0
  469. package/src/deepscientist/mcp/server.py +1566 -126
  470. package/src/deepscientist/memory/service.py +203 -16
  471. package/src/deepscientist/process_control.py +8 -1
  472. package/src/deepscientist/prompts/builder.py +850 -88
  473. package/src/deepscientist/quest/__init__.py +2 -2
  474. package/src/deepscientist/quest/layout.py +12 -1
  475. package/src/deepscientist/quest/node_traces.py +10 -0
  476. package/src/deepscientist/quest/service.py +1852 -161
  477. package/src/deepscientist/quest/stage_views.py +1 -1
  478. package/src/deepscientist/runners/__init__.py +18 -0
  479. package/src/deepscientist/runners/base.py +89 -1
  480. package/src/deepscientist/runners/builtins.py +13 -1
  481. package/src/deepscientist/runners/claude.py +391 -0
  482. package/src/deepscientist/runners/codex.py +480 -35
  483. package/src/deepscientist/runners/codex_telemetry.py +127 -0
  484. package/src/deepscientist/runners/kimi.py +334 -0
  485. package/src/deepscientist/runners/metadata.py +68 -0
  486. package/src/deepscientist/runners/opencode.py +414 -0
  487. package/src/deepscientist/runners/runtime_overrides.py +100 -0
  488. package/src/deepscientist/runners/simple_cli.py +538 -0
  489. package/src/deepscientist/runtime_storage.py +303 -0
  490. package/src/deepscientist/shared.py +80 -16
  491. package/src/deepscientist/skills/installer.py +37 -0
  492. package/src/deepscientist/skills/registry.py +2 -0
  493. package/src/deepscientist/tinytex.py +2 -2
  494. package/src/deepscientist/tui.py +10 -3
  495. package/src/prompts/benchstore/system.md +77 -0
  496. package/src/prompts/connectors/qq.md +33 -2
  497. package/src/prompts/connectors/weixin.md +208 -23
  498. package/src/prompts/contracts/admin_ops.md +74 -0
  499. package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
  500. package/src/prompts/contracts/shared_interaction.md +5 -10
  501. package/src/prompts/start_setup/system.md +422 -0
  502. package/src/prompts/system.md +411 -304
  503. package/src/prompts/system_copilot.md +89 -0
  504. package/src/skills/analysis-campaign/SKILL.md +239 -578
  505. package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
  506. package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
  507. package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
  508. package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
  509. package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
  510. package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
  511. package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
  512. package/src/skills/baseline/SKILL.md +183 -461
  513. package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
  514. package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
  515. package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
  516. package/src/skills/baseline/references/baseline-plan-template.md +37 -76
  517. package/src/skills/baseline/references/boundary-cases.md +86 -0
  518. package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
  519. package/src/skills/baseline/references/comparability-contract.md +7 -12
  520. package/src/skills/baseline/references/operational-guidance.md +56 -0
  521. package/src/skills/baseline/references/route-selection.md +5 -25
  522. package/src/skills/decision/SKILL.md +113 -306
  523. package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
  524. package/src/skills/decision/references/operational-guidance.md +94 -0
  525. package/src/skills/decision/references/research-route-criteria.md +7 -8
  526. package/src/skills/decision/references/strategic-decision-template.md +13 -26
  527. package/src/skills/experiment/SKILL.md +132 -670
  528. package/src/skills/experiment/references/execution-playbook.md +374 -0
  529. package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
  530. package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
  531. package/src/skills/experiment/references/operational-guidance.md +108 -0
  532. package/src/skills/finalize/SKILL.md +62 -0
  533. package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
  534. package/src/skills/finalize/references/resume-packet-template.md +7 -0
  535. package/src/skills/idea/SKILL.md +228 -15
  536. package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
  537. package/src/skills/idea/references/current-board-packet-template.md +61 -0
  538. package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
  539. package/src/skills/idea/references/idea-generation-playbook.md +21 -0
  540. package/src/skills/idea/references/idea-thinking-flow.md +6 -0
  541. package/src/skills/idea/references/literature-survey-template.md +3 -0
  542. package/src/skills/idea/references/objective-contract-template.md +54 -0
  543. package/src/skills/idea/references/outline-seeding-example.md +56 -0
  544. package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
  545. package/src/skills/idea/references/related-work-playbook.md +75 -2
  546. package/src/skills/idea/references/research-history-playbook.md +114 -0
  547. package/src/skills/idea/references/selection-gate.md +58 -6
  548. package/src/skills/intake-audit/SKILL.md +43 -2
  549. package/src/skills/intake-audit/references/state-audit-template.md +10 -0
  550. package/src/skills/nature-data/SKILL.md +128 -0
  551. package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
  552. package/src/skills/nature-data/agents/openai.yaml +4 -0
  553. package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
  554. package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
  555. package/src/skills/nature-data/references/policy-principles.md +103 -0
  556. package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
  557. package/src/skills/nature-data/references/source-basis.md +54 -0
  558. package/src/skills/nature-data/references/statement-patterns.md +153 -0
  559. package/src/skills/nature-figure/SKILL.md +197 -0
  560. package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
  561. package/src/skills/nature-figure/agents/openai.yaml +4 -0
  562. package/src/skills/nature-figure/evals/evals.json +37 -0
  563. package/src/skills/nature-figure/references/api.md +428 -0
  564. package/src/skills/nature-figure/references/backend-selection.md +100 -0
  565. package/src/skills/nature-figure/references/chart-types.md +281 -0
  566. package/src/skills/nature-figure/references/common-patterns.md +349 -0
  567. package/src/skills/nature-figure/references/design-theory.md +436 -0
  568. package/src/skills/nature-figure/references/figure-contract.md +93 -0
  569. package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
  570. package/src/skills/nature-figure/references/qa-contract.md +119 -0
  571. package/src/skills/nature-figure/references/r-template-index.md +66 -0
  572. package/src/skills/nature-figure/references/r-workflow.md +161 -0
  573. package/src/skills/nature-figure/references/tutorials.md +250 -0
  574. package/src/skills/nature-paper2ppt/SKILL.md +507 -0
  575. package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
  576. package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
  577. package/src/skills/nature-polishing/SKILL.md +385 -0
  578. package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
  579. package/src/skills/nature-polishing/agents/openai.yaml +4 -0
  580. package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
  581. package/src/skills/nature-polishing/references/section-moves.md +240 -0
  582. package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
  583. package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
  584. package/src/skills/optimize/SKILL.md +177 -1568
  585. package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
  586. package/src/skills/optimize/references/candidate-board-template.md +13 -0
  587. package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
  588. package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
  589. package/src/skills/optimize/references/debug-response-template.md +29 -0
  590. package/src/skills/optimize/references/frontier-review-template.md +32 -0
  591. package/src/skills/optimize/references/fusion-playbook.md +36 -0
  592. package/src/skills/optimize/references/method-brief-template.md +73 -0
  593. package/src/skills/optimize/references/operational-guidance.md +621 -0
  594. package/src/skills/optimize/references/optimization-memory-template.md +30 -0
  595. package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
  596. package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
  597. package/src/skills/optimize/references/prompt-patterns.md +49 -0
  598. package/src/skills/paper-outline/SKILL.md +227 -0
  599. package/src/skills/paper-outline/references/outline-patterns.md +87 -0
  600. package/src/skills/paper-plot/SKILL.md +79 -0
  601. package/src/skills/paper-plot/agents/openai.yaml +4 -0
  602. package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
  603. package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
  604. package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
  605. package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
  606. package/src/skills/paper-plot/references/line_training_curve.md +44 -0
  607. package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
  608. package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
  609. package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
  610. package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
  611. package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
  612. package/src/skills/paper-plot/scripts/line_aime.py +94 -0
  613. package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
  614. package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
  615. package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
  616. package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
  617. package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
  618. package/src/skills/rebuttal/SKILL.md +9 -0
  619. package/src/skills/references/tool-usage-by-stage.md +438 -0
  620. package/src/skills/review/SKILL.md +105 -7
  621. package/src/skills/science/PROVENANCE.md +44 -0
  622. package/src/skills/science/SKILL.md +137 -0
  623. package/src/skills/science/references/artifact-science-tool.md +110 -0
  624. package/src/skills/science/references/claim-type-discipline.md +56 -0
  625. package/src/skills/science/references/domain-index.md +422 -0
  626. package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
  627. package/src/skills/science/references/package-check-playbook.md +64 -0
  628. package/src/skills/science/references/package-index.min.json +3616 -0
  629. package/src/skills/science/references/packages/abinit.md +80 -0
  630. package/src/skills/science/references/packages/acts.md +73 -0
  631. package/src/skills/science/references/packages/aiida-core.md +80 -0
  632. package/src/skills/science/references/packages/alamode.md +80 -0
  633. package/src/skills/science/references/packages/amuse.md +88 -0
  634. package/src/skills/science/references/packages/anndata.md +88 -0
  635. package/src/skills/science/references/packages/arbor.md +80 -0
  636. package/src/skills/science/references/packages/arc.md +73 -0
  637. package/src/skills/science/references/packages/astropy.md +88 -0
  638. package/src/skills/science/references/packages/astroquery.md +88 -0
  639. package/src/skills/science/references/packages/atomate2.md +80 -0
  640. package/src/skills/science/references/packages/atomsmltr.md +73 -0
  641. package/src/skills/science/references/packages/awkward.md +73 -0
  642. package/src/skills/science/references/packages/batman.md +88 -0
  643. package/src/skills/science/references/packages/biopython.md +88 -0
  644. package/src/skills/science/references/packages/bloqade.md +73 -0
  645. package/src/skills/science/references/packages/brian2.md +73 -0
  646. package/src/skills/science/references/packages/bullet3.md +73 -0
  647. package/src/skills/science/references/packages/calculix.md +80 -0
  648. package/src/skills/science/references/packages/cantera.md +73 -0
  649. package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
  650. package/src/skills/science/references/packages/ccdproc.md +88 -0
  651. package/src/skills/science/references/packages/celerite2.md +88 -0
  652. package/src/skills/science/references/packages/cellrank.md +73 -0
  653. package/src/skills/science/references/packages/cesm.md +80 -0
  654. package/src/skills/science/references/packages/chemicals.md +73 -0
  655. package/src/skills/science/references/packages/chempy.md +73 -0
  656. package/src/skills/science/references/packages/cirq.md +73 -0
  657. package/src/skills/science/references/packages/coffea.md +73 -0
  658. package/src/skills/science/references/packages/cp2k.md +88 -0
  659. package/src/skills/science/references/packages/custodian.md +80 -0
  660. package/src/skills/science/references/packages/dart.md +73 -0
  661. package/src/skills/science/references/packages/datamol.md +88 -0
  662. package/src/skills/science/references/packages/dd4hep.md +73 -0
  663. package/src/skills/science/references/packages/dealii.md +80 -0
  664. package/src/skills/science/references/packages/deepchem.md +88 -0
  665. package/src/skills/science/references/packages/delphes.md +73 -0
  666. package/src/skills/science/references/packages/devito.md +80 -0
  667. package/src/skills/science/references/packages/dftb.md +88 -0
  668. package/src/skills/science/references/packages/dftd4.md +88 -0
  669. package/src/skills/science/references/packages/dftk-jl.md +80 -0
  670. package/src/skills/science/references/packages/dolfinx.md +80 -0
  671. package/src/skills/science/references/packages/drake.md +73 -0
  672. package/src/skills/science/references/packages/dumux.md +73 -0
  673. package/src/skills/science/references/packages/elk.md +80 -0
  674. package/src/skills/science/references/packages/elmerfem.md +80 -0
  675. package/src/skills/science/references/packages/enzo-e.md +88 -0
  676. package/src/skills/science/references/packages/espresso.md +80 -0
  677. package/src/skills/science/references/packages/exoplanet.md +88 -0
  678. package/src/skills/science/references/packages/fairroot.md +73 -0
  679. package/src/skills/science/references/packages/fbpic.md +80 -0
  680. package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
  681. package/src/skills/science/references/packages/geant4.md +73 -0
  682. package/src/skills/science/references/packages/geosx.md +80 -0
  683. package/src/skills/science/references/packages/gprmax.md +80 -0
  684. package/src/skills/science/references/packages/gromacs.md +80 -0
  685. package/src/skills/science/references/packages/gwaslab.md +73 -0
  686. package/src/skills/science/references/packages/gz-sim.md +73 -0
  687. package/src/skills/science/references/packages/hail.md +88 -0
  688. package/src/skills/science/references/packages/hiphive.md +80 -0
  689. package/src/skills/science/references/packages/hoomd-blue.md +80 -0
  690. package/src/skills/science/references/packages/itensor.md +73 -0
  691. package/src/skills/science/references/packages/itensors-jl.md +73 -0
  692. package/src/skills/science/references/packages/jdftx.md +73 -0
  693. package/src/skills/science/references/packages/jobflow.md +80 -0
  694. package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
  695. package/src/skills/science/references/packages/kite.md +80 -0
  696. package/src/skills/science/references/packages/kratos.md +80 -0
  697. package/src/skills/science/references/packages/kwant.md +73 -0
  698. package/src/skills/science/references/packages/lammps.md +80 -0
  699. package/src/skills/science/references/packages/lightkurve.md +88 -0
  700. package/src/skills/science/references/packages/limix.md +73 -0
  701. package/src/skills/science/references/packages/maxwelllink.md +80 -0
  702. package/src/skills/science/references/packages/mcdc.md +73 -0
  703. package/src/skills/science/references/packages/meep.md +80 -0
  704. package/src/skills/science/references/packages/mfem.md +80 -0
  705. package/src/skills/science/references/packages/mitgcm.md +73 -0
  706. package/src/skills/science/references/packages/modflow6.md +73 -0
  707. package/src/skills/science/references/packages/molecool.md +73 -0
  708. package/src/skills/science/references/packages/mom6.md +73 -0
  709. package/src/skills/science/references/packages/moose.md +80 -0
  710. package/src/skills/science/references/packages/mpas-model.md +73 -0
  711. package/src/skills/science/references/packages/mujoco.md +73 -0
  712. package/src/skills/science/references/packages/mumax3.md +73 -0
  713. package/src/skills/science/references/packages/nekrs.md +80 -0
  714. package/src/skills/science/references/packages/nessi.md +73 -0
  715. package/src/skills/science/references/packages/nest-simulator.md +73 -0
  716. package/src/skills/science/references/packages/netket.md +73 -0
  717. package/src/skills/science/references/packages/neuron.md +73 -0
  718. package/src/skills/science/references/packages/nextflow.md +88 -0
  719. package/src/skills/science/references/packages/nwchem.md +88 -0
  720. package/src/skills/science/references/packages/openbabel.md +88 -0
  721. package/src/skills/science/references/packages/openems.md +80 -0
  722. package/src/skills/science/references/packages/openff-toolkit.md +88 -0
  723. package/src/skills/science/references/packages/openfoam-dev.md +80 -0
  724. package/src/skills/science/references/packages/openmc.md +73 -0
  725. package/src/skills/science/references/packages/openmm.md +80 -0
  726. package/src/skills/science/references/packages/openmoc.md +73 -0
  727. package/src/skills/science/references/packages/openmx.md +80 -0
  728. package/src/skills/science/references/packages/opensees.md +80 -0
  729. package/src/skills/science/references/packages/opensn.md +80 -0
  730. package/src/skills/science/references/packages/opm-simulators.md +73 -0
  731. package/src/skills/science/references/packages/oqupy.md +73 -0
  732. package/src/skills/science/references/packages/packmol.md +80 -0
  733. package/src/skills/science/references/packages/palabos.md +80 -0
  734. package/src/skills/science/references/packages/parflow.md +80 -0
  735. package/src/skills/science/references/packages/pennylane.md +88 -0
  736. package/src/skills/science/references/packages/perceval.md +73 -0
  737. package/src/skills/science/references/packages/phono3py.md +73 -0
  738. package/src/skills/science/references/packages/phonopy.md +73 -0
  739. package/src/skills/science/references/packages/photutils.md +88 -0
  740. package/src/skills/science/references/packages/picongpu.md +80 -0
  741. package/src/skills/science/references/packages/plink-ng.md +88 -0
  742. package/src/skills/science/references/packages/precice.md +73 -0
  743. package/src/skills/science/references/packages/psc.md +80 -0
  744. package/src/skills/science/references/packages/psi4.md +88 -0
  745. package/src/skills/science/references/packages/pybinding.md +73 -0
  746. package/src/skills/science/references/packages/pyfr.md +80 -0
  747. package/src/skills/science/references/packages/pyhf.md +73 -0
  748. package/src/skills/science/references/packages/pyiron_base.md +80 -0
  749. package/src/skills/science/references/packages/pylcp.md +73 -0
  750. package/src/skills/science/references/packages/pylith.md +80 -0
  751. package/src/skills/science/references/packages/pynbody.md +88 -0
  752. package/src/skills/science/references/packages/pysam.md +88 -0
  753. package/src/skills/science/references/packages/pyscf.md +88 -0
  754. package/src/skills/science/references/packages/q-e.md +73 -0
  755. package/src/skills/science/references/packages/qibo.md +73 -0
  756. package/src/skills/science/references/packages/qiskit.md +73 -0
  757. package/src/skills/science/references/packages/quantica-jl.md +73 -0
  758. package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
  759. package/src/skills/science/references/packages/quimb.md +73 -0
  760. package/src/skills/science/references/packages/qulacs.md +73 -0
  761. package/src/skills/science/references/packages/qutip.md +73 -0
  762. package/src/skills/science/references/packages/rdkit.md +88 -0
  763. package/src/skills/science/references/packages/rmg-py.md +73 -0
  764. package/src/skills/science/references/packages/root.md +73 -0
  765. package/src/skills/science/references/packages/scanpy.md +88 -0
  766. package/src/skills/science/references/packages/scikit-allel.md +88 -0
  767. package/src/skills/science/references/packages/scikit-bio.md +88 -0
  768. package/src/skills/science/references/packages/scqubits.md +73 -0
  769. package/src/skills/science/references/packages/scuff-em.md +80 -0
  770. package/src/skills/science/references/packages/scvi-tools.md +73 -0
  771. package/src/skills/science/references/packages/seissol.md +73 -0
  772. package/src/skills/science/references/packages/sfepy.md +80 -0
  773. package/src/skills/science/references/packages/sisl.md +73 -0
  774. package/src/skills/science/references/packages/smilei.md +80 -0
  775. package/src/skills/science/references/packages/snakemake.md +88 -0
  776. package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
  777. package/src/skills/science/references/packages/specutils.md +88 -0
  778. package/src/skills/science/references/packages/spglib.md +80 -0
  779. package/src/skills/science/references/packages/squidpy.md +88 -0
  780. package/src/skills/science/references/packages/starry.md +88 -0
  781. package/src/skills/science/references/packages/strawberryfields.md +73 -0
  782. package/src/skills/science/references/packages/su2.md +80 -0
  783. package/src/skills/science/references/packages/sunny-jl.md +73 -0
  784. package/src/skills/science/references/packages/sw4.md +73 -0
  785. package/src/skills/science/references/packages/swift.md +88 -0
  786. package/src/skills/science/references/packages/tdnegf.md +73 -0
  787. package/src/skills/science/references/packages/tenpy.md +73 -0
  788. package/src/skills/science/references/packages/thermo.md +73 -0
  789. package/src/skills/science/references/packages/tkwant.md +73 -0
  790. package/src/skills/science/references/packages/tvb-root.md +73 -0
  791. package/src/skills/science/references/packages/uproot5.md +73 -0
  792. package/src/skills/science/references/packages/vampire.md +80 -0
  793. package/src/skills/science/references/packages/wannier_tools.md +73 -0
  794. package/src/skills/science/references/packages/warpx.md +80 -0
  795. package/src/skills/science/references/packages/wrf.md +73 -0
  796. package/src/skills/science/references/packages/xtb.md +88 -0
  797. package/src/skills/science/references/packages/yt.md +73 -0
  798. package/src/skills/science/references/science-task-brief-template.md +71 -0
  799. package/src/skills/scout/SKILL.md +83 -425
  800. package/src/skills/scout/references/literature-scout-template.md +5 -24
  801. package/src/skills/scout/references/operational-guidance.md +191 -0
  802. package/src/skills/scout/references/paper-triage-playbook.md +11 -35
  803. package/src/skills/write/SKILL.md +744 -1246
  804. package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
  805. package/src/skills/write/references/oral_package_patterns.md +252 -0
  806. package/src/skills/write/references/oral_writing_principles.md +291 -0
  807. package/src/skills/write/references/section_rewrite_checklist.md +234 -0
  808. package/src/tui/dist/app/AppContainer.js +1314 -27
  809. package/src/tui/dist/components/Composer.js +26 -1
  810. package/src/tui/dist/components/ConfigScreen.js +2 -1
  811. package/src/tui/dist/components/InputPrompt.js +25 -9
  812. package/src/tui/dist/components/MainContent.js +18 -3
  813. package/src/tui/dist/components/QuestScreen.js +3 -2
  814. package/src/tui/dist/components/UtilityScreen.js +37 -0
  815. package/src/tui/dist/hooks/useSafeInput.js +10 -0
  816. package/src/tui/dist/index.js +13 -1
  817. package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
  818. package/src/tui/dist/lib/api.js +89 -1
  819. package/src/tui/package.json +1 -1
  820. package/src/ui/dist/assets/{AnalysisPlugin-DnSm0GZn.js → AnalysisPlugin-CA94NGmI.js} +1 -1
  821. package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
  822. package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
  823. package/src/ui/dist/assets/{CodeViewerPlugin-itb0tltR.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
  824. package/src/ui/dist/assets/{DocViewerPlugin-DqKkiCI6.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
  825. package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
  826. package/src/ui/dist/assets/{GitDiffViewerPlugin-DxL2ezFG.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
  827. package/src/ui/dist/assets/{GitSnapshotViewer-B_RQm1YZ.js → GitSnapshotViewer-CweA6VON.js} +2 -2
  828. package/src/ui/dist/assets/{ImageViewerPlugin-tHqlXY3n.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
  829. package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
  830. package/src/ui/dist/assets/{LatexPlugin-B495DTXC.js → LatexPlugin-BQjAaA5J.js} +4 -4
  831. package/src/ui/dist/assets/{MarkdownViewerPlugin-DG28-61B.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
  832. package/src/ui/dist/assets/{MarketplacePlugin-BiOGT-Kj.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
  833. package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
  834. package/src/ui/dist/assets/{NotebookEditor-CVsj8h_T.js → NotebookEditor-WFyd8Ybt.js} +23 -23
  835. package/src/ui/dist/assets/{PdfLoader-CASDQmxJ.js → PdfLoader-CLE5u5TS.js} +3 -3
  836. package/src/ui/dist/assets/{PdfMarkdownPlugin-BFhwoKsY.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
  837. package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
  838. package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
  839. package/src/ui/dist/assets/{TextViewerPlugin-CB4DYfWO.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
  840. package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
  841. package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
  842. package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
  843. package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
  844. package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
  845. package/src/ui/dist/assets/{code-DLC6G24T.js → code-DbsmSd3Y.js} +1 -1
  846. package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
  847. package/src/ui/dist/assets/{wrap-text-CwMn-iqb.js → file-jump-queue-DeQBikaw.js} +3 -3
  848. package/src/ui/dist/assets/{file-socket-Cu4Qln7Y.js → file-socket-DA5XIx88.js} +1 -1
  849. package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
  850. package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
  851. package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
  852. package/src/ui/dist/assets/{index-wQ7RIIRd.js → index-BsO46tJA.js} +1 -1
  853. package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
  854. package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
  855. package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
  856. package/src/ui/dist/assets/{project-sync-CsX08Qno.js → project-sync-DPmWKmKD.js} +1 -1
  857. package/src/ui/dist/assets/{zoom-out-R-GWEhzS.js → zoom-out-DAukFWen.js} +3 -3
  858. package/src/ui/dist/index.html +3 -3
  859. package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
  860. package/src/skills/baseline/references/memory-playbook.md +0 -40
  861. package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
  862. package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
  863. package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
  864. package/src/skills/write/references/paper-section-playbook.md +0 -64
  865. package/src/skills/write/references/reviewer-first-writing.md +0 -64
  866. package/src/skills/write/references/revision-checklist.md +0 -70
  867. package/src/skills/write/references/section-contracts.md +0 -82
  868. package/src/skills/write/references/sentence-level-proofing.md +0 -49
  869. package/src/ui/dist/assets/AiManusChatView-COFACy7V.js +0 -204
  870. package/src/ui/dist/assets/CliPlugin-CvwCmDQ5.js +0 -109
  871. package/src/ui/dist/assets/CodeEditorPlugin-cOqSa0xq.js +0 -2
  872. package/src/ui/dist/assets/GitCommitViewerPlugin-DVgNHBCS.js +0 -1
  873. package/src/ui/dist/assets/LabCopilotPanel-ClMbq5Yu.js +0 -14
  874. package/src/ui/dist/assets/LabPlugin-L_SuE8ow.js +0 -22
  875. package/src/ui/dist/assets/NotebookEditor-C-4Kt1p9.js +0 -81
  876. package/src/ui/dist/assets/PdfViewerPlugin-DcOzU9vd.js +0 -17
  877. package/src/ui/dist/assets/SearchPlugin-CHj7M58O.js +0 -16
  878. package/src/ui/dist/assets/VNCViewer-CjlbyCB3.js +0 -11
  879. package/src/ui/dist/assets/bot-CFkZY-JP.js +0 -6
  880. package/src/ui/dist/assets/chevron-up-Dq5ofbht.js +0 -6
  881. package/src/ui/dist/assets/file-content-Dv4LoZec.js +0 -1
  882. package/src/ui/dist/assets/file-diff-panel-Denq-lC3.js +0 -1
  883. package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
  884. package/src/ui/dist/assets/git-commit-horizontal-BUh6G52n.js +0 -6
  885. package/src/ui/dist/assets/image-B9HUUddG.js +0 -6
  886. package/src/ui/dist/assets/index-B2B1sg-M.js +0 -1
  887. package/src/ui/dist/assets/index-Cgla8biy.css +0 -33
  888. package/src/ui/dist/assets/index-DRyx7vAc.js +0 -1
  889. package/src/ui/dist/assets/index-Gbl53BNp.js +0 -2496
  890. package/src/ui/dist/assets/pdf-effect-queue-ZtnHFCAi.js +0 -6
  891. package/src/ui/dist/assets/popover-DL6h35vr.js +0 -1
  892. package/src/ui/dist/assets/select-DvmXt1yY.js +0 -11
  893. package/src/ui/dist/assets/sigma-7jpXazui.js +0 -6
  894. package/src/ui/dist/assets/trash-xA7kFt8i.js +0 -11
  895. package/src/ui/dist/assets/useCliAccess-DsMwDjOp.js +0 -1
  896. package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
@@ -0,0 +1,182 @@
1
+ schema_version: 1
2
+ id: aisb.t3.027_citeeval
3
+ name: 'CiteEval: Principle-Driven Citation Evaluation for Source Attribution'
4
+ version: 0.1.0
5
+ one_line: 'Evaluate and optimize citation quality metrics on the CiteBench benchmark
6
+ using CiteEval-Auto, a principle-driven framework that assesses citations against
7
+ full retrieval context, user queries, and generated text — measuring statement-level
8
+ Pearson correlation with human judgments.
9
+
10
+ '
11
+ task_description: 'This packaged benchmark covers principle-driven citation evaluation
12
+ for retrieval-augmented generation (RAG) systems. The core task is to run the CiteEval-Auto
13
+ metric suite — comprising context attribution (CA), citation editing (CE), and citation
14
+ rating (CR via IterCoE and EditDist) modules — against the CiteBench dataset, which
15
+ contains multi-domain queries (ASQA, ELI5, MS MARCO, LFRQA) with statement-level
16
+ human annotations of citation quality on a 1-5 Likert scale. The primary metric
17
+ is statement-level Pearson correlation between predicted and human citation ratings.
18
+ The evaluation goes beyond simple NLI-based supportiveness by considering full retrieval
19
+ sources, user context, response context, and parametric knowledge. The benchmark
20
+ supports two evaluation scenarios: "Full" (all citable statements, uncited ones
21
+ penalized) and "Cited" (only cited statements evaluated). Execution requires an
22
+ LLM API (OpenAI or DeepSeek) for the CiteEval-Auto modules, which use prompted LLM
23
+ calls for context attribution and citation rating. The CiteBench dataset must be
24
+ downloaded separately from Google Drive. Pre-computed metric outputs for baselines
25
+ (AutoAIS, LQAC, AttriScore) are bundled in the snapshot.
26
+
27
+ '
28
+ capability_tags:
29
+ - research_code_optimization
30
+ - citation_evaluation
31
+ - retrieval_augmented_generation
32
+ - source_attribution
33
+ - evaluation
34
+ - meta_evaluation
35
+ aisb_direction: T3
36
+ track_fit:
37
+ - paper_track
38
+ - benchmark_track
39
+ task_mode: evaluation_driven
40
+ requires_execution: true
41
+ requires_paper: true
42
+ integrity_level: cas_plus_canary
43
+ snapshot_status: runnable
44
+ support_level: advanced
45
+ cost_band: medium
46
+ time_band: 2-6h
47
+ difficulty: medium
48
+ data_access: public
49
+ primary_outputs:
50
+ - statement_level_pearson
51
+ - context_attribution_correlation
52
+ - citation_rating_correlation
53
+ - citation_eval_report
54
+ - principle_judgments
55
+ launch_profiles:
56
+ - id: quick_check
57
+ label: Quick Check
58
+ description: 'Run the packaged CiteEval-Auto evaluation on a small example batch
59
+ or the pre-computed metric outputs to verify the pipeline and reproduce baseline
60
+ correlation numbers.
61
+
62
+ '
63
+ - id: full_eval
64
+ label: Full Metric Evaluation
65
+ description: 'Run the full CiteEval-Auto pipeline (CA, CE, CR modules) over the
66
+ CiteBench metric test set from scratch using an LLM API, then compute human correlation
67
+ metrics against statement-level and response-level human annotations.
68
+
69
+ '
70
+ - id: system_eval
71
+ label: System Evaluation
72
+ description: 'Evaluate citation quality of a custom RAG system''s outputs using
73
+ CiteEval-Auto. Requires converting system output to .citeeval format, running
74
+ the metric suite, and printing results with run_system_eval.sh. Supports --cited
75
+ and full scenarios.
76
+
77
+ '
78
+ dataset_download:
79
+ primary_method: manual
80
+ sources:
81
+ - kind: google_drive
82
+ url: https://drive.google.com/drive/folders/12Evj0f92wKz_7OGuuwq3KShTdSM8eu4v?usp=drive_link
83
+ access: public
84
+ note: 'CiteBench dataset including metric_dev, metric_test, full dev, and full
85
+ test splits. Must be downloaded manually and placed under data/ in the project
86
+ root.
87
+
88
+ '
89
+ notes:
90
+ - Pre-computed metric outputs for baseline metrics (AutoAIS, LQAC, AttriScore) are
91
+ bundled in data/metric_eval_outputs/.
92
+ - Dataset size is moderate (thousands of queries with annotations); exact download
93
+ size not documented but expected under 1 GB.
94
+ credential_requirements:
95
+ mode: api_key
96
+ items:
97
+ - OPENAI_API_KEY (for GPT-4o-based CiteEval-Auto runs, or substitute with DeepSeek
98
+ API)
99
+ notes:
100
+ - The default config in run_citeeval.sh uses model=deepseek-chat; run_system_eval.sh
101
+ references gpt-4o.
102
+ - API costs depend on the number of statements evaluated and the chosen model. The
103
+ metric test set has ~1000 responses.
104
+ - CPU-only execution of the evaluation scripts is possible but still requires an
105
+ LLM API for the CiteEval-Auto modules.
106
+ resources:
107
+ minimum:
108
+ cpu_cores: 8
109
+ ram_gb: 32
110
+ disk_gb: 50
111
+ gpu_count: 0
112
+ gpu_vram_gb: 0
113
+ recommended:
114
+ cpu_cores: 16
115
+ ram_gb: 64
116
+ disk_gb: 100
117
+ gpu_count: 1
118
+ gpu_vram_gb: 16
119
+ environment:
120
+ python: '3.10'
121
+ cuda: '11.7'
122
+ pytorch: 1.13.0
123
+ key_packages:
124
+ - openai==1.16.2
125
+ - transformers==4.38.2
126
+ notes:
127
+ - CPU-only execution is plausible for the minimum route; GPU is only needed if running
128
+ local NLI models for baselines.
129
+ - The requirements.txt file has a typo in the README (requirments.txt) — use the
130
+ actual file in the snapshot.
131
+ - CITEEVAL_ROOT and PYTHONPATH environment variables must be set as described in
132
+ README.
133
+ risk_flags:
134
+ - api_dependency
135
+ - external_dataset_download
136
+ - api_cost_variable
137
+ risk_notes:
138
+ - CiteEval-Auto modules require live LLM API calls (OpenAI or DeepSeek). Without an
139
+ API key, only pre-computed metric outputs can be evaluated.
140
+ - CiteBench data must be manually downloaded from Google Drive; it is not bundled
141
+ in the snapshot.
142
+ - API costs scale with the number of evaluated statements and the LLM model used.
143
+ The metric test set has ~1000 responses with multiple statements each.
144
+ - The default model in run_citeeval.sh is deepseek-chat, not GPT-4o as used in the
145
+ paper's main results. Reproducing paper numbers requires GPT-4o.
146
+ - No benchmark execution was performed during the packaging pass; metric values should
147
+ be verified at runtime.
148
+ recommended_when: 'Use this benchmark when you want a citation-quality metric task
149
+ that evaluates full retrieval context, user queries, and generated text rather than
150
+ simple NLI-based supportiveness proxies. Ideal for research on improving RAG citation
151
+ quality, developing better automatic citation metrics, or benchmarking citation
152
+ quality of new RAG systems against human judgments.
153
+
154
+ '
155
+ not_recommended_when: 'Do not use this if you need a fully self-contained benchmark
156
+ with no external API calls, if you lack access to OpenAI or DeepSeek APIs, or if
157
+ your task does not involve retrieval passages and source attribution. Not suitable
158
+ if you need a benchmark that can run entirely offline without any LLM API dependency.
159
+
160
+ '
161
+ paper:
162
+ title: 'CiteEval: Principle-Driven Citation Evaluation for Source Attribution'
163
+ venue: arXiv preprint
164
+ year: 2025
165
+ url: https://arxiv.org/abs/2506.01829
166
+ download:
167
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.027_citeeval.zip
168
+ archive_type: zip
169
+ local_dir_name: paper-27-CiteEval
170
+ provider: github_release
171
+ repo: ResearAI/DeepScientist
172
+ tag: aisb-v0.0.1
173
+ asset_name: aisb.t3.027_citeeval.zip
174
+ sha256: 5eb48e11b91ec4856d18461899c236f658b110dc49ff29487205908230ac8d0b
175
+ size_bytes: 83173
176
+ commercial:
177
+ annual_fee: null
178
+ display:
179
+ palette_seed: cream-navy-citation
180
+ art_style: reference-audit
181
+ accent_priority: high
182
+ image_path: ../image/027_aisb.t3.027_citeeval.jpg
@@ -0,0 +1,135 @@
1
+ schema_version: 1
2
+ id: aisb.t3.027_citeeval
3
+ name: 'CiteEval: 基于原则驱动的引用评估用于来源归属'
4
+ version: 0.1.0
5
+ one_line: '在CiteBench基准上使用CiteEval-Auto(一个原则驱动的框架)评估和优化引用质量指标,该框架根据完整检索上下文、用户查询和生成文本对引用进行评估——衡量语句级皮尔逊相关系数与人工判断的一致性。
6
+
7
+ '
8
+ task_description: '该打包基准测试涵盖用于检索增强生成(RAG)系统的原则驱动引用评估。核心任务是在CiteBench数据集上运行CiteEval-Auto指标套件——包括上下文归因(CA)、引用编辑(CE)和引用评分(CR,通过IterCoE和EditDist)模块。该数据集包含多领域查询(ASQA、ELI5、MS MARCO、LFRQA),带有引用质量的语句级人工标注(1-5李克特量表)。主要指标是预测引用评分与人工引用评分之间的语句级皮尔逊相关系数。该评估超越了简单的基于NLI的支持度考量,考虑了完整检索来源、用户上下文、响应上下文和参数知识。基准测试支持两种评估场景:"完整"(所有可引用语句,未引用者受罚)和"已引用"(仅评估已引用的语句)。执行需要LLM API(OpenAI或DeepSeek)来运行CiteEval-Auto模块,这些模块使用提示的LLM调用进行上下文归因和引用评分。CiteBench数据集需从Google Drive单独下载。基线(AutoAIS、LQAC、AttriScore)的预计算指标输出已捆绑在快照中。
9
+
10
+ '
11
+ capability_tags:
12
+ - research_code_optimization
13
+ - citation_evaluation
14
+ - retrieval_augmented_generation
15
+ - source_attribution
16
+ - evaluation
17
+ - meta_evaluation
18
+ aisb_direction: T3
19
+ track_fit:
20
+ - paper_track
21
+ - benchmark_track
22
+ task_mode: evaluation_driven
23
+ requires_execution: true
24
+ requires_paper: true
25
+ integrity_level: cas_plus_canary
26
+ snapshot_status: runnable
27
+ support_level: advanced
28
+ cost_band: medium
29
+ time_band: 2-6h
30
+ difficulty: medium
31
+ data_access: public
32
+ primary_outputs:
33
+ - statement_level_pearson
34
+ - context_attribution_correlation
35
+ - citation_rating_correlation
36
+ - citation_eval_report
37
+ - principle_judgments
38
+ launch_profiles:
39
+ - id: quick_check
40
+ label: 快速检查
41
+ description: '在小样本批次或预计算指标输出上运行打包的CiteEval-Auto评估,以验证流程并复现基线相关系数。
42
+
43
+ '
44
+ - id: full_eval
45
+ label: 完整指标评估
46
+ description: '使用LLM API从头开始对CiteBench指标测试集运行完整的CiteEval-Auto流程(CA、CE、CR模块),然后根据语句级和响应级人工标注计算与人工的相关性指标。
47
+
48
+ '
49
+ - id: system_eval
50
+ label: 系统评估
51
+ description: '使用CiteEval-Auto评估自定义RAG系统输出的引用质量。需要将系统输出转换为.citeeval格式,运行指标套件,并使用run_system_eval.sh打印结果。支持--cited和完整场景。
52
+
53
+ '
54
+ dataset_download:
55
+ primary_method: manual
56
+ sources:
57
+ - kind: google_drive
58
+ url: https://drive.google.com/drive/folders/12Evj0f92wKz_7OGuuwq3KShTdSM8eu4v?usp=drive_link
59
+ access: public
60
+ note: 'CiteBench数据集,包含metric_dev、metric_test、full dev和full test划分。必须手动下载并放置在项目根目录下的data/中。
61
+
62
+ '
63
+ notes:
64
+ - 基线指标(AutoAIS、LQAC、AttriScore)的预计算指标输出已捆绑在data/metric_eval_outputs/中。
65
+ - 数据集大小适中(数千个带标注的查询);确切下载大小未记录,但预计在1GB以下。
66
+ credential_requirements:
67
+ mode: api_key
68
+ items:
69
+ - OPENAI_API_KEY(用于基于GPT-4o的CiteEval-Auto运行,或使用DeepSeek API替代)
70
+ notes:
71
+ - run_citeeval.sh中的默认配置使用model=deepseek-chat;run_system_eval.sh引用gpt-4o。
72
+ - API成本取决于评估的语句数量和所选模型。指标测试集约有1000个响应。
73
+ - 评估脚本可以在仅CPU模式下执行,但仍需要LLM API来运行CiteEval-Auto模块。
74
+ resources:
75
+ minimum:
76
+ cpu_cores: 8
77
+ ram_gb: 32
78
+ disk_gb: 50
79
+ gpu_count: 0
80
+ gpu_vram_gb: 0
81
+ recommended:
82
+ cpu_cores: 16
83
+ ram_gb: 64
84
+ disk_gb: 100
85
+ gpu_count: 1
86
+ gpu_vram_gb: 16
87
+ environment:
88
+ python: '3.10'
89
+ cuda: '11.7'
90
+ pytorch: 1.13.0
91
+ key_packages:
92
+ - openai==1.16.2
93
+ - transformers==4.38.2
94
+ notes:
95
+ - 最低规格路线可以仅用CPU执行;仅在运行本地NLI模型进行基线评估时才需要GPU。
96
+ - requirements.txt文件在README中有拼写错误(requirments.txt)——请使用快照中的实际文件。
97
+ - 必须按README中的说明设置CITEEVAL_ROOT和PYTHONPATH环境变量。
98
+ risk_flags:
99
+ - api_dependency
100
+ - external_dataset_download
101
+ - api_cost_variable
102
+ risk_notes:
103
+ - CiteEval-Auto模块需要实时LLM API调用(OpenAI或DeepSeek)。如果没有API密钥,只能评估预计算的指标输出。
104
+ - CiteBench数据必须从Google Drive手动下载;未捆绑在快照中。
105
+ - API成本随评估的语句数量和使用的LLM模型而增加。指标测试集约有1000个响应,每个响应包含多个语句。
106
+ - run_citeeval.sh中的默认模型是deepseek-chat,而非论文主要结果中使用的GPT-4o。复现论文数据需要GPT-4o。
107
+ - 打包过程中未执行基准测试;指标值应在运行时验证。
108
+ recommended_when: '当您需要一个评估完整检索上下文、用户查询和生成文本的引用质量指标任务,而非简单的基于NLI的支持度代理时,使用此基准测试。非常适合研究改进RAG引用质量、开发更好的自动引用指标,或根据人工判断基准测试新RAG系统的引用质量。
109
+
110
+ '
111
+ not_recommended_when: '如果您需要一个完全自包含、无外部API调用的基准测试,或者您无法访问OpenAI或DeepSeek API,或者您的任务不涉及检索段落和来源归属,请勿使用此基准测试。如果您需要一个可以完全离线运行、没有任何LLM API依赖的基准测试,则不适合。
112
+
113
+ '
114
+ paper:
115
+ title: 'CiteEval: Principle-Driven Citation Evaluation for Source Attribution'
116
+ venue: arXiv preprint
117
+ year: 2025
118
+ url: https://arxiv.org/abs/2506.01829
119
+ download:
120
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.027_citeeval.zip
121
+ archive_type: zip
122
+ local_dir_name: paper-27-CiteEval
123
+ provider: github_release
124
+ repo: ResearAI/DeepScientist
125
+ tag: aisb-v0.0.1
126
+ asset_name: aisb.t3.027_citeeval.zip
127
+ sha256: 5eb48e11b91ec4856d18461899c236f658b110dc49ff29487205908230ac8d0b
128
+ size_bytes: 83173
129
+ commercial:
130
+ annual_fee: null
131
+ display:
132
+ palette_seed: cream-navy-citation
133
+ art_style: reference-audit
134
+ accent_priority: high
135
+ image_path: ../image/027_aisb.t3.027_citeeval.jpg
@@ -0,0 +1,206 @@
1
+ schema_version: 1
2
+ id: aisb.t3.028_sbam
3
+ name: Segment-Based Attention Masking for GPTs
4
+ version: 0.1.0
5
+ one_line: Fine-tune and evaluate Masked Attention by Segment (MAS) on Llama-3.2-1B
6
+ across eight commonsense reasoning datasets, comparing segment-aware prefill masking
7
+ against standard causal masking.
8
+ task_description: 'This benchmark evaluates the Masked Attention by Segment (MAS)
9
+ technique, which modifies the causal attention mask of decoder-only GPTs so that
10
+ tokens within each prefill segment (system prompt, user prompt) can attend bidirectionally,
11
+ while the autoregressive generation phase retains standard causal masking. The packaged
12
+ task uses a LoRA-adapted Llama-3.2-1B model and evaluates on eight commonsense reasoning
13
+ datasets: BoolQ, PIQA, SocialIQA, HellaSwag, WinoGrande, ARC-Challenge, ARC-Easy,
14
+ and OpenBookQA. A pre-trained MAS LoRA checkpoint is bundled (trained_models_and_results/Llama-3.2-1B_epoch3_MAS),
15
+ so evaluation can proceed without re-training. The evaluation script (run_eval_all.sh)
16
+ parallelizes across two GPUs and reports per-dataset accuracy plus an average. The
17
+ base Llama-3.2-1B model must be downloaded from HuggingFace (gated access). The
18
+ fine-tuning dataset (170k commonsense reasoning samples in chat-template format)
19
+ is referenced via ft_utils.py but is not bundled; re-training requires sourcing
20
+ it externally. The core evaluation route is self-contained given the bundled LoRA
21
+ weights and base model access.
22
+
23
+ '
24
+ capability_tags:
25
+ - research_code_optimization
26
+ - large_language_models
27
+ - transformers
28
+ - prompt_engineering
29
+ - attention_analysis
30
+ - commonsense_reasoning
31
+ aisb_direction: T3
32
+ track_fit:
33
+ - paper_track
34
+ - benchmark_track
35
+ task_mode: evaluation_driven
36
+ requires_execution: true
37
+ requires_paper: true
38
+ integrity_level: cas_plus_canary
39
+ snapshot_status: runnable
40
+ support_level: turnkey
41
+ cost_band: medium
42
+ time_band: 6-24h
43
+ difficulty: medium
44
+ data_access: public
45
+ primary_outputs:
46
+ - average_accuracy
47
+ - arc_c_accuracy
48
+ - obqa_accuracy
49
+ - boolq_accuracy
50
+ - piqa_accuracy
51
+ - siqa_accuracy
52
+ - hellaswag_accuracy
53
+ - winogrande_accuracy
54
+ - arc_e_accuracy
55
+ - masked_attention_report
56
+ - prompt_variant_scores
57
+ launch_profiles:
58
+ - id: quick_check
59
+ label: Quick Check
60
+ description: 'Run mas_eval.py on a single dataset (e.g. ARC-Challenge) with the
61
+ bundled LoRA checkpoint to verify the evaluation pipeline works end-to-end on
62
+ one GPU.
63
+
64
+ '
65
+ - id: full_eval
66
+ label: Full Eval
67
+ description: 'Run run_eval_all.sh to evaluate the bundled MAS LoRA checkpoint on
68
+ all eight commonsense reasoning datasets in parallel across two GPUs, producing
69
+ per-dataset and average accuracy.
70
+
71
+ '
72
+ - id: retrain_and_eval
73
+ label: Retrain + Eval
74
+ description: 'Re-run LoRA fine-tuning with MAS masking on the 170k commonsense dataset
75
+ (must be sourced externally), then evaluate. This reproduces the full paper pipeline.
76
+
77
+ '
78
+ dataset_download:
79
+ primary_method: mixed
80
+ sources:
81
+ - kind: huggingface
82
+ url: https://huggingface.co/meta-llama/Llama-3.2-1B
83
+ access: gated
84
+ note: 'Base Llama-3.2-1B model weights. Requires HuggingFace account and Meta
85
+ license acceptance. ~2.5 GB download.
86
+
87
+ '
88
+ - kind: bundled
89
+ url: null
90
+ access: local
91
+ note: 'Pre-trained MAS LoRA weights are bundled in trained_models_and_results/Llama-3.2-1B_epoch3_MAS.
92
+ A standard causal LoRA checkpoint is in Llama-3.2-1B_epoch3.
93
+
94
+ '
95
+ - kind: external
96
+ url: null
97
+ access: public
98
+ note: 'The 170k commonsense reasoning training dataset (referenced by ft_utils.py)
99
+ is not bundled. It follows the setup from Liu et al. (2024) / Hu et al. (2023).
100
+ Only needed if re-training.
101
+
102
+ '
103
+ - kind: external
104
+ url: null
105
+ access: public
106
+ note: 'Test splits for BoolQ, PIQA, SocialIQA, HellaSwag, WinoGrande, ARC-Challenge,
107
+ ARC-Easy, OpenBookQA are loaded via ft_utils.py at eval time. These are public
108
+ datasets typically fetched from HuggingFace datasets hub.
109
+
110
+ '
111
+ notes:
112
+ - Total disk usage with base model and evaluation datasets is approximately 10-20
113
+ GB.
114
+ - If re-training, the 170k training samples add several GB.
115
+ credential_requirements:
116
+ mode: api_key
117
+ items:
118
+ - HuggingFace token with Meta Llama license acceptance (for base model download)
119
+ notes:
120
+ - No other API keys required for evaluation.
121
+ - If base model is pre-cached locally, no credentials are needed at runtime.
122
+ resources:
123
+ minimum:
124
+ cpu_cores: 8
125
+ ram_gb: 32
126
+ disk_gb: 50
127
+ gpu_count: 1
128
+ gpu_vram_gb: 12
129
+ recommended:
130
+ cpu_cores: 16
131
+ ram_gb: 64
132
+ disk_gb: 120
133
+ gpu_count: 2
134
+ gpu_vram_gb: 24
135
+ environment:
136
+ python: '3.10'
137
+ cuda: '11.8'
138
+ pytorch: 2.1.0
139
+ flash_attn: null
140
+ key_packages:
141
+ - transformers==4.47.0
142
+ - peft
143
+ - torch
144
+ - tqdm
145
+ notes:
146
+ - MAS implementation uses eager attention (attn_implementation="eager"), not flash
147
+ attention.
148
+ - See bundled requirements.txt for full dependency set.
149
+ - CUDA is required; the code checks torch.cuda.is_available() and defaults to "cuda".
150
+ risk_flags:
151
+ - gated_model_dependency
152
+ - training_data_not_bundled
153
+ - eager_attention_only
154
+ risk_notes:
155
+ - Base Llama-3.2-1B requires gated HuggingFace access; evaluation cannot proceed without
156
+ it.
157
+ - The 170k commonsense training dataset is not bundled; only the eval route with pre-trained
158
+ LoRA is self-contained.
159
+ - MAS currently only supports Llama models with eager attention mode; flash_attn is
160
+ not compatible.
161
+ - run_eval_all.sh hardcodes paths (/models/Llama-3.2-1B, /repo/); these must be adjusted
162
+ to local layout.
163
+ - No benchmark execution was performed during the packaging pass; metric values are
164
+ not pre-validated.
165
+ - Batch size is set to 1 in run_eval_all.sh; full eval on 8 datasets takes several
166
+ hours.
167
+ recommended_when: 'Use this benchmark when you want a manageable, single-GPU evaluation
168
+ of a novel attention masking strategy for decoder-only LLMs on commonsense reasoning.
169
+ Good for studying how segment-level bidirectional attention during prefill affects
170
+ downstream accuracy compared to standard causal masking, with lightweight LoRA fine-tuning.
171
+
172
+ '
173
+ not_recommended_when: 'Do not use this if you need a large-scale multi-billion parameter
174
+ LLM training benchmark, a non-transformer architecture task, or a benchmark with
175
+ fully bundled training data. Also not suitable if you cannot obtain gated access
176
+ to Meta Llama models.
177
+
178
+ '
179
+ paper:
180
+ title: Segment-Based Attention Masking for GPTs
181
+ authors:
182
+ - Shahar Katz
183
+ - Liran Ringel
184
+ - Yaniv Romano
185
+ - Lior Wolf
186
+ venue: arXiv preprint
187
+ year: 2024
188
+ url: https://arxiv.org/abs/2412.18487
189
+ doi: null
190
+ download:
191
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.028_sbam.zip
192
+ archive_type: zip
193
+ local_dir_name: paper-28-SBAM
194
+ provider: github_release
195
+ repo: ResearAI/DeepScientist
196
+ tag: aisb-v0.0.1
197
+ asset_name: aisb.t3.028_sbam.zip
198
+ sha256: e6a4ea39b6fa60b49a9ed55f8386355bae8ed0b92188064bc351a5f71c8f9066
199
+ size_bytes: 104097
200
+ commercial:
201
+ annual_fee: null
202
+ display:
203
+ palette_seed: violet-graphite-mask
204
+ art_style: attention-map
205
+ accent_priority: medium
206
+ image_path: ../image/028_aisb.t3.028_sbam.jpg
@@ -0,0 +1,166 @@
1
+ schema_version: 1
2
+ id: aisb.t3.028_sbam
3
+ name: 基于分段注意力的GPT掩码技术
4
+ version: 0.1.0
5
+ one_line: 在Llama-3.2-1B上对分段掩码注意力(MAS)进行微调和评估,在八个常识推理数据集上比较分段感知的预填充掩码与标准因果掩码。
6
+ task_description: '该基准测试评估分段掩码注意力(MAS)技术,该技术修改了解码器专用GPT的因果注意力掩码,使每个预填充分段(系统提示、用户提示)内的token能够双向注意力,而自回归生成阶段保留标准因果掩码。打包的任务使用LoRA适配的Llama-3.2-1B模型,并在八个常识推理数据集上进行评估:BoolQ、PIQA、SocialIQA、HellaSwag、WinoGrande、ARC-Challenge、ARC-Easy和OpenBookQA。预训练的MAS LoRA检查点已打包(trained_models_and_results/Llama-3.2-1B_epoch3_MAS),因此评估可以在不重新训练的情况下进行。评估脚本(run_eval_all.sh)在两个GPU上并行处理,并报告每个数据集的准确率及平均值。基础Llama-3.2-1B模型必须从HuggingFace下载(需要 gated 访问)。微调数据集(170k条聊天模板格式的常识推理样本)通过ft_utils.py引用,但未打包;如需重新训练需要外部获取。核心评估流程在有打包的LoRA权重和基础模型访问权限的情况下是自包含的。
7
+
8
+ '
9
+ capability_tags:
10
+ - research_code_optimization
11
+ - large_language_models
12
+ - transformers
13
+ - prompt_engineering
14
+ - attention_analysis
15
+ - commonsense_reasoning
16
+ aisb_direction: T3
17
+ track_fit:
18
+ - paper_track
19
+ - benchmark_track
20
+ task_mode: evaluation_driven
21
+ requires_execution: true
22
+ requires_paper: true
23
+ integrity_level: cas_plus_canary
24
+ snapshot_status: runnable
25
+ support_level: turnkey
26
+ cost_band: medium
27
+ time_band: 6-24h
28
+ difficulty: medium
29
+ data_access: public
30
+ primary_outputs:
31
+ - average_accuracy
32
+ - arc_c_accuracy
33
+ - obqa_accuracy
34
+ - boolq_accuracy
35
+ - piqa_accuracy
36
+ - siqa_accuracy
37
+ - hellaswag_accuracy
38
+ - winogrande_accuracy
39
+ - arc_e_accuracy
40
+ - masked_attention_report
41
+ - prompt_variant_scores
42
+ launch_profiles:
43
+ - id: quick_check
44
+ label: 快速检查
45
+ description: '使用打包的LoRA检查点在单个数据集(例如ARC-Challenge)上运行mas_eval.py,以验证评估流程在单个GPU上端到端工作。
46
+
47
+ '
48
+ - id: full_eval
49
+ label: 完整评估
50
+ description: '运行run_eval_all.sh在两个GPU上并行评估打包的MAS LoRA检查点在所有八个常识推理数据集上的表现,生成每个数据集的准确率和平均准确率。
51
+
52
+ '
53
+ - id: retrain_and_eval
54
+ label: 重新训练 + 评估
55
+ description: '使用MAS掩码在170k常识数据集上重新运行LoRA微调(必须从外部获取),然后进行评估。这将重现完整的论文流程。
56
+
57
+ '
58
+ dataset_download:
59
+ primary_method: mixed
60
+ sources:
61
+ - kind: huggingface
62
+ url: https://huggingface.co/meta-llama/Llama-3.2-1B
63
+ access: gated
64
+ note: '基础Llama-3.2-1B模型权重。需要HuggingFace账户和Meta许可协议接受。约2.5GB下载量。
65
+
66
+ '
67
+ - kind: bundled
68
+ url: null
69
+ access: local
70
+ note: '预训练的MAS LoRA权重打包在trained_models_and_results/Llama-3.2-1B_epoch3_MAS中。标准因果LoRA检查点在Llama-3.2-1B_epoch3中。
71
+
72
+ '
73
+ - kind: external
74
+ url: null
75
+ access: public
76
+ note: '170k常识推理训练数据集(由ft_utils.py引用)未打包。它遵循Liu等人(2024)/Hu等人(2023)的设置。仅在重新训练时需要。
77
+
78
+ '
79
+ - kind: external
80
+ url: null
81
+ access: public
82
+ note: 'BoolQ、PIQA、SocialIQA、HellaSwag、WinoGrande、ARC-Challenge、ARC-Easy、OpenBookQA的测试集通过ft_utils.py在评估时加载。这些是通常从HuggingFace数据集中心获取的公共数据集。
83
+
84
+ '
85
+ notes:
86
+ - 包含基础模型和评估数据集的总磁盘使用量约为10-20GB。
87
+ - 如果重新训练,170k训练样本会增加数GB。
88
+ credential_requirements:
89
+ mode: api_key
90
+ items:
91
+ - 具有Meta Llama许可协议接受权限的HuggingFace令牌(用于基础模型下载)
92
+ notes:
93
+ - 评估不需要其他API密钥。
94
+ - 如果基础模型已在本地缓存,运行时不需要凭据。
95
+ resources:
96
+ minimum:
97
+ cpu_cores: 8
98
+ ram_gb: 32
99
+ disk_gb: 50
100
+ gpu_count: 1
101
+ gpu_vram_gb: 12
102
+ recommended:
103
+ cpu_cores: 16
104
+ ram_gb: 64
105
+ disk_gb: 120
106
+ gpu_count: 2
107
+ gpu_vram_gb: 24
108
+ environment:
109
+ python: '3.10'
110
+ cuda: '11.8'
111
+ pytorch: 2.1.0
112
+ flash_attn: null
113
+ key_packages:
114
+ - transformers==4.47.0
115
+ - peft
116
+ - torch
117
+ - tqdm
118
+ notes:
119
+ - MAS实现使用eager注意力(attn_implementation="eager"),而非flash注意力。
120
+ - 有关完整的依赖项列表,请参阅打包的requirements.txt。
121
+ - 需要CUDA;代码检查torch.cuda.is_available()并默认为"cuda"。
122
+ risk_flags:
123
+ - gated_model_dependency
124
+ - training_data_not_bundled
125
+ - eager_attention_only
126
+ risk_notes:
127
+ - 基础Llama-3.2-1B需要gated HuggingFace访问;没有它评估无法进行。
128
+ - 170k常识训练数据集未打包;只有带有预训练LoRA的评估流程是自包含的。
129
+ - MAS目前仅支持使用eager注意力模式的Llama模型;flash_attn不兼容。
130
+ - run_eval_all.sh硬编码路径(/models/Llama-3.2-1B、/repo/);必须调整为本地布局。
131
+ - 打包过程中未执行基准测试;指标值未预先验证。
132
+ - run_eval_all.sh中批大小设置为1;在8个数据集上完整评估需要数小时。
133
+ recommended_when: '当您想要对解码器专用LLM的新型注意力掩码策略在常识推理上进行可管理的单GPU评估时使用此基准测试。适合研究预填充期间分段级双向注意力如何影响下游准确率(与标准因果掩码相比),并使用轻量级LoRA微调。
134
+
135
+ '
136
+ not_recommended_when: '如果需要数十亿参数LLM的大规模训练基准测试、非Transformer架构任务或完全打包训练数据的基准测试,请勿使用此基准测试。如果无法获得Meta Llama模型的gated访问权限,也不适用。
137
+
138
+ '
139
+ paper:
140
+ title: Segment-Based Attention Masking for GPTs
141
+ authors:
142
+ - Shahar Katz
143
+ - Liran Ringel
144
+ - Yaniv Romano
145
+ - Lior Wolf
146
+ venue: arXiv preprint
147
+ year: 2024
148
+ url: https://arxiv.org/abs/2412.18487
149
+ doi: null
150
+ download:
151
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.028_sbam.zip
152
+ archive_type: zip
153
+ local_dir_name: paper-28-SBAM
154
+ provider: github_release
155
+ repo: ResearAI/DeepScientist
156
+ tag: aisb-v0.0.1
157
+ asset_name: aisb.t3.028_sbam.zip
158
+ sha256: e6a4ea39b6fa60b49a9ed55f8386355bae8ed0b92188064bc351a5f71c8f9066
159
+ size_bytes: 104097
160
+ commercial:
161
+ annual_fee: null
162
+ display:
163
+ palette_seed: violet-graphite-mask
164
+ art_style: attention-map
165
+ accent_priority: medium
166
+ image_path: ../image/028_aisb.t3.028_sbam.jpg