@researai/deepscientist 1.5.16 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (896) hide show
  1. package/AGENTS.md +309 -130
  2. package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
  3. package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
  4. package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
  5. package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
  6. package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
  7. package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
  8. package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
  9. package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
  10. package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
  11. package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
  12. package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
  13. package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
  14. package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
  15. package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
  16. package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
  17. package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
  18. package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
  19. package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
  20. package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
  21. package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
  22. package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
  23. package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
  24. package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
  25. package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
  26. package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
  27. package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
  28. package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
  29. package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
  30. package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
  31. package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
  32. package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
  33. package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
  34. package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
  35. package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
  36. package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
  37. package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
  38. package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
  39. package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
  40. package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
  41. package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
  42. package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
  43. package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
  44. package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
  45. package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
  46. package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
  47. package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
  48. package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
  49. package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
  50. package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
  51. package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
  52. package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
  53. package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
  54. package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
  55. package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
  56. package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
  57. package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
  58. package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
  59. package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
  60. package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
  61. package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
  62. package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
  63. package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
  64. package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
  65. package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
  66. package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
  67. package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
  68. package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
  69. package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
  70. package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
  71. package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
  72. package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
  73. package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
  74. package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
  75. package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
  76. package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
  77. package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
  78. package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
  79. package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
  80. package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
  81. package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
  82. package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
  83. package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
  84. package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
  85. package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
  86. package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
  87. package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
  88. package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
  89. package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
  90. package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
  91. package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
  92. package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
  93. package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
  94. package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
  95. package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
  96. package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
  97. package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
  98. package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
  99. package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
  100. package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
  101. package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
  102. package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
  103. package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
  104. package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
  105. package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
  106. package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
  107. package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
  108. package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
  109. package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
  110. package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
  111. package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
  112. package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
  113. package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
  114. package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
  115. package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
  116. package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
  117. package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
  118. package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
  119. package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
  120. package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
  121. package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
  122. package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
  123. package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
  124. package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
  125. package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
  126. package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
  127. package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
  128. package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
  129. package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
  130. package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
  131. package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
  132. package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
  133. package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
  134. package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
  135. package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
  136. package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
  137. package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
  138. package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
  139. package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
  140. package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
  141. package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
  142. package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
  143. package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
  144. package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
  145. package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
  146. package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
  147. package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
  148. package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
  149. package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
  150. package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
  151. package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
  152. package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
  153. package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
  154. package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
  155. package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
  156. package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
  157. package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
  158. package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
  159. package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
  160. package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
  161. package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
  162. package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
  163. package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
  164. package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
  165. package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
  166. package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
  167. package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
  168. package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
  169. package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
  170. package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
  171. package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
  172. package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
  173. package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
  174. package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
  175. package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
  176. package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
  177. package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
  178. package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
  179. package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
  180. package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
  181. package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
  182. package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
  183. package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
  184. package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
  185. package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
  186. package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
  187. package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
  188. package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
  189. package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
  190. package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
  191. package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
  192. package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
  193. package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
  194. package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
  195. package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
  196. package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
  197. package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
  198. package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
  199. package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
  200. package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
  201. package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
  202. package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
  203. package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
  204. package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
  205. package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
  206. package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
  207. package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
  208. package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
  209. package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
  210. package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
  211. package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
  212. package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
  213. package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
  214. package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
  215. package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
  216. package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
  217. package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
  218. package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
  219. package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
  220. package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
  221. package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
  222. package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
  223. package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
  224. package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
  225. package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
  226. package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
  227. package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
  228. package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
  229. package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
  230. package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
  231. package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
  232. package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
  233. package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
  234. package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
  235. package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
  236. package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
  237. package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
  238. package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
  239. package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
  240. package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
  241. package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
  242. package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
  243. package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
  244. package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
  245. package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
  246. package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
  247. package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
  248. package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
  249. package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
  250. package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
  251. package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
  252. package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
  253. package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
  254. package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
  255. package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
  256. package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
  257. package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
  258. package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
  259. package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
  260. package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
  261. package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
  262. package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
  263. package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
  264. package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
  265. package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
  266. package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
  267. package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
  268. package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
  269. package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
  270. package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
  271. package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
  272. package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
  273. package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
  274. package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
  275. package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
  276. package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
  277. package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
  278. package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
  279. package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
  280. package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
  281. package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
  282. package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
  283. package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
  284. package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
  285. package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
  286. package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
  287. package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
  288. package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
  289. package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
  290. package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
  291. package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
  292. package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
  293. package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
  294. package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
  295. package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
  296. package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
  297. package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
  298. package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
  299. package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
  300. package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
  301. package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
  302. package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
  303. package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
  304. package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
  305. package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
  306. package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
  307. package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
  308. package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
  309. package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
  310. package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
  311. package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
  312. package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
  313. package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
  314. package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
  315. package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
  316. package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
  317. package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
  318. package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
  319. package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
  320. package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
  321. package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
  322. package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
  323. package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
  324. package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
  325. package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
  326. package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
  327. package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
  328. package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
  329. package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
  330. package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
  331. package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
  332. package/AISB/image/aisb.b10.climate_earth.svg +16 -0
  333. package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
  334. package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
  335. package/AISB/image/aisb.b2.agent_systems.svg +16 -0
  336. package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
  337. package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
  338. package/AISB/image/aisb.b5.math_proof.svg +16 -0
  339. package/AISB/image/aisb.b6.research_process.svg +16 -0
  340. package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
  341. package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
  342. package/AISB/image/aisb.b9.material_science.svg +16 -0
  343. package/README.md +196 -32
  344. package/bin/ds.js +924 -66
  345. package/docs/en/00_QUICK_START.md +195 -18
  346. package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
  347. package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
  348. package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
  349. package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
  350. package/docs/en/05_TUI_GUIDE.md +171 -2
  351. package/docs/en/07_MEMORY_AND_MCP.md +38 -2
  352. package/docs/en/09_DOCTOR.md +78 -7
  353. package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
  354. package/docs/en/11_LICENSE_AND_RISK.md +4 -0
  355. package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
  356. package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
  357. package/docs/en/15_CODEX_PROVIDER_SETUP.md +624 -180
  358. package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
  359. package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
  360. package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
  361. package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +386 -0
  362. package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
  363. package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
  364. package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
  365. package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
  366. package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
  367. package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
  368. package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
  369. package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
  370. package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
  371. package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
  372. package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
  373. package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
  374. package/docs/en/91_DEVELOPMENT.md +266 -0
  375. package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
  376. package/docs/en/README.md +48 -7
  377. package/docs/images/admin/admin-connectors-health-en.png +0 -0
  378. package/docs/images/admin/admin-controllers-en.png +0 -0
  379. package/docs/images/admin/admin-diagnostics-en.png +0 -0
  380. package/docs/images/admin/admin-errors-en.png +0 -0
  381. package/docs/images/admin/admin-issues-en.png +0 -0
  382. package/docs/images/admin/admin-logs-en.png +0 -0
  383. package/docs/images/admin/admin-quest-detail-en.png +0 -0
  384. package/docs/images/admin/admin-quests-en.png +0 -0
  385. package/docs/images/admin/admin-repairs-en.png +0 -0
  386. package/docs/images/admin/admin-runtime-en.png +0 -0
  387. package/docs/images/admin/admin-search-en.png +0 -0
  388. package/docs/images/admin/admin-stats-en.png +0 -0
  389. package/docs/images/admin/admin-summary-en.png +0 -0
  390. package/docs/images/connectors/connector-discord-en.png +0 -0
  391. package/docs/images/connectors/connector-feishu-en.png +0 -0
  392. package/docs/images/connectors/connector-lingzhu-en.png +0 -0
  393. package/docs/images/connectors/connector-qq-en.png +0 -0
  394. package/docs/images/connectors/connector-slack-en.png +0 -0
  395. package/docs/images/connectors/connector-telegram-en.png +0 -0
  396. package/docs/images/connectors/connector-weixin-en.png +0 -0
  397. package/docs/images/connectors/connector-whatsapp-en.png +0 -0
  398. package/docs/images/settings/settings-baselines-en.png +0 -0
  399. package/docs/images/settings/settings-config-en.png +0 -0
  400. package/docs/images/settings/settings-connectors-overview-en.png +0 -0
  401. package/docs/images/settings/settings-deepxiv-en.png +0 -0
  402. package/docs/images/settings/settings-mcp-servers-en.png +0 -0
  403. package/docs/images/settings/settings-plugins-en.png +0 -0
  404. package/docs/images/settings/settings-runners-en.png +0 -0
  405. package/docs/zh/00_QUICK_START.md +142 -18
  406. package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
  407. package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
  408. package/docs/zh/05_TUI_GUIDE.md +171 -2
  409. package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
  410. package/docs/zh/09_DOCTOR.md +54 -8
  411. package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
  412. package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
  413. package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
  414. package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
  415. package/docs/zh/15_CODEX_PROVIDER_SETUP.md +552 -181
  416. package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +384 -0
  417. package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
  418. package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
  419. package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
  420. package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
  421. package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
  422. package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
  423. package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
  424. package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
  425. package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
  426. package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
  427. package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
  428. package/docs/zh/README.md +33 -7
  429. package/install.sh +168 -20
  430. package/package.json +5 -1
  431. package/pyproject.toml +2 -1
  432. package/src/deepscientist/__init__.py +1 -1
  433. package/src/deepscientist/acp/envelope.py +13 -0
  434. package/src/deepscientist/admin/__init__.py +3 -0
  435. package/src/deepscientist/admin/charts.py +681 -0
  436. package/src/deepscientist/admin/logs.py +119 -0
  437. package/src/deepscientist/admin/repairs.py +217 -0
  438. package/src/deepscientist/admin/service.py +1310 -0
  439. package/src/deepscientist/admin/system_info.py +700 -0
  440. package/src/deepscientist/admin/tasks.py +465 -0
  441. package/src/deepscientist/admin/tool_metrics.py +600 -0
  442. package/src/deepscientist/artifact/guidance.py +8 -4
  443. package/src/deepscientist/artifact/schemas.py +115 -0
  444. package/src/deepscientist/artifact/service.py +4268 -260
  445. package/src/deepscientist/bash_exec/monitor.py +30 -3
  446. package/src/deepscientist/bash_exec/service.py +134 -1
  447. package/src/deepscientist/benchstore/__init__.py +4 -0
  448. package/src/deepscientist/benchstore/prompt_builder.py +224 -0
  449. package/src/deepscientist/benchstore/service.py +1716 -0
  450. package/src/deepscientist/bridges/connectors.py +8 -2
  451. package/src/deepscientist/channels/weixin_ilink.py +8 -1
  452. package/src/deepscientist/cli.py +92 -17
  453. package/src/deepscientist/codex_cli_compat.py +187 -74
  454. package/src/deepscientist/config/models.py +82 -11
  455. package/src/deepscientist/config/service.py +1077 -93
  456. package/src/deepscientist/connector/weixin_support.py +48 -17
  457. package/src/deepscientist/daemon/api/handlers.py +827 -235
  458. package/src/deepscientist/daemon/api/router.py +81 -1
  459. package/src/deepscientist/daemon/app.py +1512 -85
  460. package/src/deepscientist/diagnostics/__init__.py +6 -0
  461. package/src/deepscientist/diagnostics/runner_failures.py +277 -0
  462. package/src/deepscientist/doctor.py +407 -56
  463. package/src/deepscientist/evidence_packets.py +590 -0
  464. package/src/deepscientist/home.py +52 -4
  465. package/src/deepscientist/kimi_cli_compat.py +50 -0
  466. package/src/deepscientist/latex_runtime.py +2 -2
  467. package/src/deepscientist/mcp/context.py +2 -0
  468. package/src/deepscientist/mcp/schemas.py +114 -0
  469. package/src/deepscientist/mcp/server.py +1566 -126
  470. package/src/deepscientist/memory/service.py +203 -16
  471. package/src/deepscientist/process_control.py +8 -1
  472. package/src/deepscientist/prompts/builder.py +850 -88
  473. package/src/deepscientist/quest/__init__.py +2 -2
  474. package/src/deepscientist/quest/layout.py +12 -1
  475. package/src/deepscientist/quest/node_traces.py +10 -0
  476. package/src/deepscientist/quest/service.py +1852 -161
  477. package/src/deepscientist/quest/stage_views.py +1 -1
  478. package/src/deepscientist/runners/__init__.py +18 -0
  479. package/src/deepscientist/runners/base.py +89 -1
  480. package/src/deepscientist/runners/builtins.py +13 -1
  481. package/src/deepscientist/runners/claude.py +391 -0
  482. package/src/deepscientist/runners/codex.py +480 -35
  483. package/src/deepscientist/runners/codex_telemetry.py +127 -0
  484. package/src/deepscientist/runners/kimi.py +334 -0
  485. package/src/deepscientist/runners/metadata.py +68 -0
  486. package/src/deepscientist/runners/opencode.py +414 -0
  487. package/src/deepscientist/runners/runtime_overrides.py +100 -0
  488. package/src/deepscientist/runners/simple_cli.py +538 -0
  489. package/src/deepscientist/runtime_storage.py +303 -0
  490. package/src/deepscientist/shared.py +80 -16
  491. package/src/deepscientist/skills/installer.py +37 -0
  492. package/src/deepscientist/skills/registry.py +2 -0
  493. package/src/deepscientist/tinytex.py +2 -2
  494. package/src/deepscientist/tui.py +10 -3
  495. package/src/prompts/benchstore/system.md +77 -0
  496. package/src/prompts/connectors/qq.md +33 -2
  497. package/src/prompts/connectors/weixin.md +208 -23
  498. package/src/prompts/contracts/admin_ops.md +74 -0
  499. package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
  500. package/src/prompts/contracts/shared_interaction.md +5 -10
  501. package/src/prompts/start_setup/system.md +422 -0
  502. package/src/prompts/system.md +411 -304
  503. package/src/prompts/system_copilot.md +89 -0
  504. package/src/skills/analysis-campaign/SKILL.md +239 -578
  505. package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
  506. package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
  507. package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
  508. package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
  509. package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
  510. package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
  511. package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
  512. package/src/skills/baseline/SKILL.md +183 -461
  513. package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
  514. package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
  515. package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
  516. package/src/skills/baseline/references/baseline-plan-template.md +37 -76
  517. package/src/skills/baseline/references/boundary-cases.md +86 -0
  518. package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
  519. package/src/skills/baseline/references/comparability-contract.md +7 -12
  520. package/src/skills/baseline/references/operational-guidance.md +56 -0
  521. package/src/skills/baseline/references/route-selection.md +5 -25
  522. package/src/skills/decision/SKILL.md +113 -306
  523. package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
  524. package/src/skills/decision/references/operational-guidance.md +94 -0
  525. package/src/skills/decision/references/research-route-criteria.md +7 -8
  526. package/src/skills/decision/references/strategic-decision-template.md +13 -26
  527. package/src/skills/experiment/SKILL.md +132 -670
  528. package/src/skills/experiment/references/execution-playbook.md +374 -0
  529. package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
  530. package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
  531. package/src/skills/experiment/references/operational-guidance.md +108 -0
  532. package/src/skills/finalize/SKILL.md +62 -0
  533. package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
  534. package/src/skills/finalize/references/resume-packet-template.md +7 -0
  535. package/src/skills/idea/SKILL.md +228 -15
  536. package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
  537. package/src/skills/idea/references/current-board-packet-template.md +61 -0
  538. package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
  539. package/src/skills/idea/references/idea-generation-playbook.md +21 -0
  540. package/src/skills/idea/references/idea-thinking-flow.md +6 -0
  541. package/src/skills/idea/references/literature-survey-template.md +3 -0
  542. package/src/skills/idea/references/objective-contract-template.md +54 -0
  543. package/src/skills/idea/references/outline-seeding-example.md +56 -0
  544. package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
  545. package/src/skills/idea/references/related-work-playbook.md +75 -2
  546. package/src/skills/idea/references/research-history-playbook.md +114 -0
  547. package/src/skills/idea/references/selection-gate.md +58 -6
  548. package/src/skills/intake-audit/SKILL.md +43 -2
  549. package/src/skills/intake-audit/references/state-audit-template.md +10 -0
  550. package/src/skills/nature-data/SKILL.md +128 -0
  551. package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
  552. package/src/skills/nature-data/agents/openai.yaml +4 -0
  553. package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
  554. package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
  555. package/src/skills/nature-data/references/policy-principles.md +103 -0
  556. package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
  557. package/src/skills/nature-data/references/source-basis.md +54 -0
  558. package/src/skills/nature-data/references/statement-patterns.md +153 -0
  559. package/src/skills/nature-figure/SKILL.md +197 -0
  560. package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
  561. package/src/skills/nature-figure/agents/openai.yaml +4 -0
  562. package/src/skills/nature-figure/evals/evals.json +37 -0
  563. package/src/skills/nature-figure/references/api.md +428 -0
  564. package/src/skills/nature-figure/references/backend-selection.md +100 -0
  565. package/src/skills/nature-figure/references/chart-types.md +281 -0
  566. package/src/skills/nature-figure/references/common-patterns.md +349 -0
  567. package/src/skills/nature-figure/references/design-theory.md +436 -0
  568. package/src/skills/nature-figure/references/figure-contract.md +93 -0
  569. package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
  570. package/src/skills/nature-figure/references/qa-contract.md +119 -0
  571. package/src/skills/nature-figure/references/r-template-index.md +66 -0
  572. package/src/skills/nature-figure/references/r-workflow.md +161 -0
  573. package/src/skills/nature-figure/references/tutorials.md +250 -0
  574. package/src/skills/nature-paper2ppt/SKILL.md +507 -0
  575. package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
  576. package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
  577. package/src/skills/nature-polishing/SKILL.md +385 -0
  578. package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
  579. package/src/skills/nature-polishing/agents/openai.yaml +4 -0
  580. package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
  581. package/src/skills/nature-polishing/references/section-moves.md +240 -0
  582. package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
  583. package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
  584. package/src/skills/optimize/SKILL.md +177 -1568
  585. package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
  586. package/src/skills/optimize/references/candidate-board-template.md +13 -0
  587. package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
  588. package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
  589. package/src/skills/optimize/references/debug-response-template.md +29 -0
  590. package/src/skills/optimize/references/frontier-review-template.md +32 -0
  591. package/src/skills/optimize/references/fusion-playbook.md +36 -0
  592. package/src/skills/optimize/references/method-brief-template.md +73 -0
  593. package/src/skills/optimize/references/operational-guidance.md +621 -0
  594. package/src/skills/optimize/references/optimization-memory-template.md +30 -0
  595. package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
  596. package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
  597. package/src/skills/optimize/references/prompt-patterns.md +49 -0
  598. package/src/skills/paper-outline/SKILL.md +227 -0
  599. package/src/skills/paper-outline/references/outline-patterns.md +87 -0
  600. package/src/skills/paper-plot/SKILL.md +79 -0
  601. package/src/skills/paper-plot/agents/openai.yaml +4 -0
  602. package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
  603. package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
  604. package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
  605. package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
  606. package/src/skills/paper-plot/references/line_training_curve.md +44 -0
  607. package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
  608. package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
  609. package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
  610. package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
  611. package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
  612. package/src/skills/paper-plot/scripts/line_aime.py +94 -0
  613. package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
  614. package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
  615. package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
  616. package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
  617. package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
  618. package/src/skills/rebuttal/SKILL.md +9 -0
  619. package/src/skills/references/tool-usage-by-stage.md +438 -0
  620. package/src/skills/review/SKILL.md +105 -7
  621. package/src/skills/science/PROVENANCE.md +44 -0
  622. package/src/skills/science/SKILL.md +137 -0
  623. package/src/skills/science/references/artifact-science-tool.md +110 -0
  624. package/src/skills/science/references/claim-type-discipline.md +56 -0
  625. package/src/skills/science/references/domain-index.md +422 -0
  626. package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
  627. package/src/skills/science/references/package-check-playbook.md +64 -0
  628. package/src/skills/science/references/package-index.min.json +3616 -0
  629. package/src/skills/science/references/packages/abinit.md +80 -0
  630. package/src/skills/science/references/packages/acts.md +73 -0
  631. package/src/skills/science/references/packages/aiida-core.md +80 -0
  632. package/src/skills/science/references/packages/alamode.md +80 -0
  633. package/src/skills/science/references/packages/amuse.md +88 -0
  634. package/src/skills/science/references/packages/anndata.md +88 -0
  635. package/src/skills/science/references/packages/arbor.md +80 -0
  636. package/src/skills/science/references/packages/arc.md +73 -0
  637. package/src/skills/science/references/packages/astropy.md +88 -0
  638. package/src/skills/science/references/packages/astroquery.md +88 -0
  639. package/src/skills/science/references/packages/atomate2.md +80 -0
  640. package/src/skills/science/references/packages/atomsmltr.md +73 -0
  641. package/src/skills/science/references/packages/awkward.md +73 -0
  642. package/src/skills/science/references/packages/batman.md +88 -0
  643. package/src/skills/science/references/packages/biopython.md +88 -0
  644. package/src/skills/science/references/packages/bloqade.md +73 -0
  645. package/src/skills/science/references/packages/brian2.md +73 -0
  646. package/src/skills/science/references/packages/bullet3.md +73 -0
  647. package/src/skills/science/references/packages/calculix.md +80 -0
  648. package/src/skills/science/references/packages/cantera.md +73 -0
  649. package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
  650. package/src/skills/science/references/packages/ccdproc.md +88 -0
  651. package/src/skills/science/references/packages/celerite2.md +88 -0
  652. package/src/skills/science/references/packages/cellrank.md +73 -0
  653. package/src/skills/science/references/packages/cesm.md +80 -0
  654. package/src/skills/science/references/packages/chemicals.md +73 -0
  655. package/src/skills/science/references/packages/chempy.md +73 -0
  656. package/src/skills/science/references/packages/cirq.md +73 -0
  657. package/src/skills/science/references/packages/coffea.md +73 -0
  658. package/src/skills/science/references/packages/cp2k.md +88 -0
  659. package/src/skills/science/references/packages/custodian.md +80 -0
  660. package/src/skills/science/references/packages/dart.md +73 -0
  661. package/src/skills/science/references/packages/datamol.md +88 -0
  662. package/src/skills/science/references/packages/dd4hep.md +73 -0
  663. package/src/skills/science/references/packages/dealii.md +80 -0
  664. package/src/skills/science/references/packages/deepchem.md +88 -0
  665. package/src/skills/science/references/packages/delphes.md +73 -0
  666. package/src/skills/science/references/packages/devito.md +80 -0
  667. package/src/skills/science/references/packages/dftb.md +88 -0
  668. package/src/skills/science/references/packages/dftd4.md +88 -0
  669. package/src/skills/science/references/packages/dftk-jl.md +80 -0
  670. package/src/skills/science/references/packages/dolfinx.md +80 -0
  671. package/src/skills/science/references/packages/drake.md +73 -0
  672. package/src/skills/science/references/packages/dumux.md +73 -0
  673. package/src/skills/science/references/packages/elk.md +80 -0
  674. package/src/skills/science/references/packages/elmerfem.md +80 -0
  675. package/src/skills/science/references/packages/enzo-e.md +88 -0
  676. package/src/skills/science/references/packages/espresso.md +80 -0
  677. package/src/skills/science/references/packages/exoplanet.md +88 -0
  678. package/src/skills/science/references/packages/fairroot.md +73 -0
  679. package/src/skills/science/references/packages/fbpic.md +80 -0
  680. package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
  681. package/src/skills/science/references/packages/geant4.md +73 -0
  682. package/src/skills/science/references/packages/geosx.md +80 -0
  683. package/src/skills/science/references/packages/gprmax.md +80 -0
  684. package/src/skills/science/references/packages/gromacs.md +80 -0
  685. package/src/skills/science/references/packages/gwaslab.md +73 -0
  686. package/src/skills/science/references/packages/gz-sim.md +73 -0
  687. package/src/skills/science/references/packages/hail.md +88 -0
  688. package/src/skills/science/references/packages/hiphive.md +80 -0
  689. package/src/skills/science/references/packages/hoomd-blue.md +80 -0
  690. package/src/skills/science/references/packages/itensor.md +73 -0
  691. package/src/skills/science/references/packages/itensors-jl.md +73 -0
  692. package/src/skills/science/references/packages/jdftx.md +73 -0
  693. package/src/skills/science/references/packages/jobflow.md +80 -0
  694. package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
  695. package/src/skills/science/references/packages/kite.md +80 -0
  696. package/src/skills/science/references/packages/kratos.md +80 -0
  697. package/src/skills/science/references/packages/kwant.md +73 -0
  698. package/src/skills/science/references/packages/lammps.md +80 -0
  699. package/src/skills/science/references/packages/lightkurve.md +88 -0
  700. package/src/skills/science/references/packages/limix.md +73 -0
  701. package/src/skills/science/references/packages/maxwelllink.md +80 -0
  702. package/src/skills/science/references/packages/mcdc.md +73 -0
  703. package/src/skills/science/references/packages/meep.md +80 -0
  704. package/src/skills/science/references/packages/mfem.md +80 -0
  705. package/src/skills/science/references/packages/mitgcm.md +73 -0
  706. package/src/skills/science/references/packages/modflow6.md +73 -0
  707. package/src/skills/science/references/packages/molecool.md +73 -0
  708. package/src/skills/science/references/packages/mom6.md +73 -0
  709. package/src/skills/science/references/packages/moose.md +80 -0
  710. package/src/skills/science/references/packages/mpas-model.md +73 -0
  711. package/src/skills/science/references/packages/mujoco.md +73 -0
  712. package/src/skills/science/references/packages/mumax3.md +73 -0
  713. package/src/skills/science/references/packages/nekrs.md +80 -0
  714. package/src/skills/science/references/packages/nessi.md +73 -0
  715. package/src/skills/science/references/packages/nest-simulator.md +73 -0
  716. package/src/skills/science/references/packages/netket.md +73 -0
  717. package/src/skills/science/references/packages/neuron.md +73 -0
  718. package/src/skills/science/references/packages/nextflow.md +88 -0
  719. package/src/skills/science/references/packages/nwchem.md +88 -0
  720. package/src/skills/science/references/packages/openbabel.md +88 -0
  721. package/src/skills/science/references/packages/openems.md +80 -0
  722. package/src/skills/science/references/packages/openff-toolkit.md +88 -0
  723. package/src/skills/science/references/packages/openfoam-dev.md +80 -0
  724. package/src/skills/science/references/packages/openmc.md +73 -0
  725. package/src/skills/science/references/packages/openmm.md +80 -0
  726. package/src/skills/science/references/packages/openmoc.md +73 -0
  727. package/src/skills/science/references/packages/openmx.md +80 -0
  728. package/src/skills/science/references/packages/opensees.md +80 -0
  729. package/src/skills/science/references/packages/opensn.md +80 -0
  730. package/src/skills/science/references/packages/opm-simulators.md +73 -0
  731. package/src/skills/science/references/packages/oqupy.md +73 -0
  732. package/src/skills/science/references/packages/packmol.md +80 -0
  733. package/src/skills/science/references/packages/palabos.md +80 -0
  734. package/src/skills/science/references/packages/parflow.md +80 -0
  735. package/src/skills/science/references/packages/pennylane.md +88 -0
  736. package/src/skills/science/references/packages/perceval.md +73 -0
  737. package/src/skills/science/references/packages/phono3py.md +73 -0
  738. package/src/skills/science/references/packages/phonopy.md +73 -0
  739. package/src/skills/science/references/packages/photutils.md +88 -0
  740. package/src/skills/science/references/packages/picongpu.md +80 -0
  741. package/src/skills/science/references/packages/plink-ng.md +88 -0
  742. package/src/skills/science/references/packages/precice.md +73 -0
  743. package/src/skills/science/references/packages/psc.md +80 -0
  744. package/src/skills/science/references/packages/psi4.md +88 -0
  745. package/src/skills/science/references/packages/pybinding.md +73 -0
  746. package/src/skills/science/references/packages/pyfr.md +80 -0
  747. package/src/skills/science/references/packages/pyhf.md +73 -0
  748. package/src/skills/science/references/packages/pyiron_base.md +80 -0
  749. package/src/skills/science/references/packages/pylcp.md +73 -0
  750. package/src/skills/science/references/packages/pylith.md +80 -0
  751. package/src/skills/science/references/packages/pynbody.md +88 -0
  752. package/src/skills/science/references/packages/pysam.md +88 -0
  753. package/src/skills/science/references/packages/pyscf.md +88 -0
  754. package/src/skills/science/references/packages/q-e.md +73 -0
  755. package/src/skills/science/references/packages/qibo.md +73 -0
  756. package/src/skills/science/references/packages/qiskit.md +73 -0
  757. package/src/skills/science/references/packages/quantica-jl.md +73 -0
  758. package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
  759. package/src/skills/science/references/packages/quimb.md +73 -0
  760. package/src/skills/science/references/packages/qulacs.md +73 -0
  761. package/src/skills/science/references/packages/qutip.md +73 -0
  762. package/src/skills/science/references/packages/rdkit.md +88 -0
  763. package/src/skills/science/references/packages/rmg-py.md +73 -0
  764. package/src/skills/science/references/packages/root.md +73 -0
  765. package/src/skills/science/references/packages/scanpy.md +88 -0
  766. package/src/skills/science/references/packages/scikit-allel.md +88 -0
  767. package/src/skills/science/references/packages/scikit-bio.md +88 -0
  768. package/src/skills/science/references/packages/scqubits.md +73 -0
  769. package/src/skills/science/references/packages/scuff-em.md +80 -0
  770. package/src/skills/science/references/packages/scvi-tools.md +73 -0
  771. package/src/skills/science/references/packages/seissol.md +73 -0
  772. package/src/skills/science/references/packages/sfepy.md +80 -0
  773. package/src/skills/science/references/packages/sisl.md +73 -0
  774. package/src/skills/science/references/packages/smilei.md +80 -0
  775. package/src/skills/science/references/packages/snakemake.md +88 -0
  776. package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
  777. package/src/skills/science/references/packages/specutils.md +88 -0
  778. package/src/skills/science/references/packages/spglib.md +80 -0
  779. package/src/skills/science/references/packages/squidpy.md +88 -0
  780. package/src/skills/science/references/packages/starry.md +88 -0
  781. package/src/skills/science/references/packages/strawberryfields.md +73 -0
  782. package/src/skills/science/references/packages/su2.md +80 -0
  783. package/src/skills/science/references/packages/sunny-jl.md +73 -0
  784. package/src/skills/science/references/packages/sw4.md +73 -0
  785. package/src/skills/science/references/packages/swift.md +88 -0
  786. package/src/skills/science/references/packages/tdnegf.md +73 -0
  787. package/src/skills/science/references/packages/tenpy.md +73 -0
  788. package/src/skills/science/references/packages/thermo.md +73 -0
  789. package/src/skills/science/references/packages/tkwant.md +73 -0
  790. package/src/skills/science/references/packages/tvb-root.md +73 -0
  791. package/src/skills/science/references/packages/uproot5.md +73 -0
  792. package/src/skills/science/references/packages/vampire.md +80 -0
  793. package/src/skills/science/references/packages/wannier_tools.md +73 -0
  794. package/src/skills/science/references/packages/warpx.md +80 -0
  795. package/src/skills/science/references/packages/wrf.md +73 -0
  796. package/src/skills/science/references/packages/xtb.md +88 -0
  797. package/src/skills/science/references/packages/yt.md +73 -0
  798. package/src/skills/science/references/science-task-brief-template.md +71 -0
  799. package/src/skills/scout/SKILL.md +83 -425
  800. package/src/skills/scout/references/literature-scout-template.md +5 -24
  801. package/src/skills/scout/references/operational-guidance.md +191 -0
  802. package/src/skills/scout/references/paper-triage-playbook.md +11 -35
  803. package/src/skills/write/SKILL.md +744 -1246
  804. package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
  805. package/src/skills/write/references/oral_package_patterns.md +252 -0
  806. package/src/skills/write/references/oral_writing_principles.md +291 -0
  807. package/src/skills/write/references/section_rewrite_checklist.md +234 -0
  808. package/src/tui/dist/app/AppContainer.js +1314 -27
  809. package/src/tui/dist/components/Composer.js +26 -1
  810. package/src/tui/dist/components/ConfigScreen.js +2 -1
  811. package/src/tui/dist/components/InputPrompt.js +25 -9
  812. package/src/tui/dist/components/MainContent.js +18 -3
  813. package/src/tui/dist/components/QuestScreen.js +3 -2
  814. package/src/tui/dist/components/UtilityScreen.js +37 -0
  815. package/src/tui/dist/hooks/useSafeInput.js +10 -0
  816. package/src/tui/dist/index.js +13 -1
  817. package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
  818. package/src/tui/dist/lib/api.js +89 -1
  819. package/src/tui/package.json +1 -1
  820. package/src/ui/dist/assets/{AnalysisPlugin-DnSm0GZn.js → AnalysisPlugin-CA94NGmI.js} +1 -1
  821. package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
  822. package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
  823. package/src/ui/dist/assets/{CodeViewerPlugin-itb0tltR.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
  824. package/src/ui/dist/assets/{DocViewerPlugin-DqKkiCI6.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
  825. package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
  826. package/src/ui/dist/assets/{GitDiffViewerPlugin-DxL2ezFG.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
  827. package/src/ui/dist/assets/{GitSnapshotViewer-B_RQm1YZ.js → GitSnapshotViewer-CweA6VON.js} +2 -2
  828. package/src/ui/dist/assets/{ImageViewerPlugin-tHqlXY3n.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
  829. package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
  830. package/src/ui/dist/assets/{LatexPlugin-B495DTXC.js → LatexPlugin-BQjAaA5J.js} +4 -4
  831. package/src/ui/dist/assets/{MarkdownViewerPlugin-DG28-61B.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
  832. package/src/ui/dist/assets/{MarketplacePlugin-BiOGT-Kj.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
  833. package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
  834. package/src/ui/dist/assets/{NotebookEditor-CVsj8h_T.js → NotebookEditor-WFyd8Ybt.js} +23 -23
  835. package/src/ui/dist/assets/{PdfLoader-CASDQmxJ.js → PdfLoader-CLE5u5TS.js} +3 -3
  836. package/src/ui/dist/assets/{PdfMarkdownPlugin-BFhwoKsY.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
  837. package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
  838. package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
  839. package/src/ui/dist/assets/{TextViewerPlugin-CB4DYfWO.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
  840. package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
  841. package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
  842. package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
  843. package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
  844. package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
  845. package/src/ui/dist/assets/{code-DLC6G24T.js → code-DbsmSd3Y.js} +1 -1
  846. package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
  847. package/src/ui/dist/assets/{wrap-text-CwMn-iqb.js → file-jump-queue-DeQBikaw.js} +3 -3
  848. package/src/ui/dist/assets/{file-socket-Cu4Qln7Y.js → file-socket-DA5XIx88.js} +1 -1
  849. package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
  850. package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
  851. package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
  852. package/src/ui/dist/assets/{index-wQ7RIIRd.js → index-BsO46tJA.js} +1 -1
  853. package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
  854. package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
  855. package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
  856. package/src/ui/dist/assets/{project-sync-CsX08Qno.js → project-sync-DPmWKmKD.js} +1 -1
  857. package/src/ui/dist/assets/{zoom-out-R-GWEhzS.js → zoom-out-DAukFWen.js} +3 -3
  858. package/src/ui/dist/index.html +3 -3
  859. package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
  860. package/src/skills/baseline/references/memory-playbook.md +0 -40
  861. package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
  862. package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
  863. package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
  864. package/src/skills/write/references/paper-section-playbook.md +0 -64
  865. package/src/skills/write/references/reviewer-first-writing.md +0 -64
  866. package/src/skills/write/references/revision-checklist.md +0 -70
  867. package/src/skills/write/references/section-contracts.md +0 -82
  868. package/src/skills/write/references/sentence-level-proofing.md +0 -49
  869. package/src/ui/dist/assets/AiManusChatView-COFACy7V.js +0 -204
  870. package/src/ui/dist/assets/CliPlugin-CvwCmDQ5.js +0 -109
  871. package/src/ui/dist/assets/CodeEditorPlugin-cOqSa0xq.js +0 -2
  872. package/src/ui/dist/assets/GitCommitViewerPlugin-DVgNHBCS.js +0 -1
  873. package/src/ui/dist/assets/LabCopilotPanel-ClMbq5Yu.js +0 -14
  874. package/src/ui/dist/assets/LabPlugin-L_SuE8ow.js +0 -22
  875. package/src/ui/dist/assets/NotebookEditor-C-4Kt1p9.js +0 -81
  876. package/src/ui/dist/assets/PdfViewerPlugin-DcOzU9vd.js +0 -17
  877. package/src/ui/dist/assets/SearchPlugin-CHj7M58O.js +0 -16
  878. package/src/ui/dist/assets/VNCViewer-CjlbyCB3.js +0 -11
  879. package/src/ui/dist/assets/bot-CFkZY-JP.js +0 -6
  880. package/src/ui/dist/assets/chevron-up-Dq5ofbht.js +0 -6
  881. package/src/ui/dist/assets/file-content-Dv4LoZec.js +0 -1
  882. package/src/ui/dist/assets/file-diff-panel-Denq-lC3.js +0 -1
  883. package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
  884. package/src/ui/dist/assets/git-commit-horizontal-BUh6G52n.js +0 -6
  885. package/src/ui/dist/assets/image-B9HUUddG.js +0 -6
  886. package/src/ui/dist/assets/index-B2B1sg-M.js +0 -1
  887. package/src/ui/dist/assets/index-Cgla8biy.css +0 -33
  888. package/src/ui/dist/assets/index-DRyx7vAc.js +0 -1
  889. package/src/ui/dist/assets/index-Gbl53BNp.js +0 -2496
  890. package/src/ui/dist/assets/pdf-effect-queue-ZtnHFCAi.js +0 -6
  891. package/src/ui/dist/assets/popover-DL6h35vr.js +0 -1
  892. package/src/ui/dist/assets/select-DvmXt1yY.js +0 -11
  893. package/src/ui/dist/assets/sigma-7jpXazui.js +0 -6
  894. package/src/ui/dist/assets/trash-xA7kFt8i.js +0 -11
  895. package/src/ui/dist/assets/useCliAccess-DsMwDjOp.js +0 -1
  896. package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
@@ -0,0 +1,179 @@
1
+ schema_version: 1
2
+ id: aisb.t3.009_scoremissing
3
+ name: Score Matching with Missing Data
4
+ version: 0.1.0
5
+ one_line: 'Learn full score functions from partially observed tabular data using importance-weighting
6
+ and variational marginal score matching, evaluated on simulated GGM/ICA/normal-estimation
7
+ tasks and real-world S&P 100 and yeast graphical model recovery.
8
+
9
+ '
10
+ task_description: 'This benchmark packages the full experimental pipeline from the
11
+ ICML 2025 Spotlight paper "Score Matching with Missing Data". The core task is to
12
+ estimate the score (gradient of the log-density) of a multivariate distribution
13
+ when training samples have missing coordinates under a Missing Completely At Random
14
+ (MCAR) mechanism. Two complementary methods are provided: (1) marginal importance-weighting
15
+ (IW) score matching, which excels in lower-dimensional and small-sample settings,
16
+ and (2) marginal variational score matching, which is stronger in high-dimensional
17
+ problems such as Gaussian Graphical Model (GGM) estimation.
18
+
19
+ The experiment suite spans five sub-tasks run from Python scripts in HPC/: GGM structure
20
+ recovery (simulated), ICA parameter estimation, truncated/untruncated normal parameter
21
+ estimation, S&P 100 GGM estimation, and yeast GGM estimation. Each script accepts
22
+ a repeat count and a parameter-grid index as CLI arguments. Metrics (AUC, accuracy,
23
+ TPR, FPR) are computed inside the scripts via MSM/utils/data.py helper functions
24
+ and aggregated in Jupyter notebooks under plotcode/. Real-world data (yeast tensor,
25
+ S&P 100 CSV) is bundled in real_world_experiments/RealData/. No external evaluation
26
+ service or API credentials are needed; the benchmark is fully self-contained.
27
+
28
+ '
29
+ capability_tags:
30
+ - research_code_optimization
31
+ - missing_data
32
+ - probabilistic_modeling
33
+ - score_matching
34
+ - tabular_ml
35
+ - graphical_model_estimation
36
+ aisb_direction: T3
37
+ track_fit:
38
+ - paper_track
39
+ - benchmark_track
40
+ task_mode: experiment_driven
41
+ requires_execution: true
42
+ requires_paper: true
43
+ integrity_level: cas_plus_canary
44
+ snapshot_status: runnable
45
+ support_level: advanced
46
+ cost_band: medium
47
+ time_band: 6-24h
48
+ difficulty: hard
49
+ data_access: public
50
+ primary_outputs:
51
+ - auc
52
+ - accuracy
53
+ - tpr
54
+ - fpr
55
+ - structure_recovery_report
56
+ launch_profiles:
57
+ - id: quick_check
58
+ label: Quick Check
59
+ description: 'Run a single yeast or GGM experiment script with nrep=1 and one parameter-grid
60
+ index to verify the environment and produce a small set of AUC/accuracy values.
61
+ Expected wall-time ~10–30 minutes on CPU.
62
+
63
+ '
64
+ - id: full_experiments
65
+ label: Full Experiments
66
+ description: 'Sweep all HPC experiment scripts (GGM, ICA, NormalEstimation, snp100,
67
+ yeast) across the full parameter grids using the bash scripts in HPC/bashscripts/.
68
+ Each script runs 10 000 training epochs per configuration. Expect 6–24 hours on
69
+ a 16-core CPU or faster with a single GPU.
70
+
71
+ '
72
+ - id: plot_reproduction
73
+ label: Plot Reproduction
74
+ description: 'After full experiments, run the Jupyter notebooks in plotcode/ to
75
+ regenerate all paper figures and metric summaries from saved results.
76
+
77
+ '
78
+ dataset_download:
79
+ primary_method: bundled
80
+ sources:
81
+ - kind: archive
82
+ url: https://deepscientist.cc/AISB/009_scoremissing
83
+ access: public
84
+ note: 'Full snapshot including real-world data tensors (yeast, S&P 100) in real_world_experiments/RealData/.
85
+ Simulated data is generated on-the-fly by the experiment scripts. Archive is
86
+ a zip file.
87
+
88
+ '
89
+ notes:
90
+ - All datasets are either bundled or generated programmatically; no additional downloads
91
+ required.
92
+ - Total disk footprint after extraction is modest (well under 10 GB including results).
93
+ credential_requirements:
94
+ mode: none
95
+ items: []
96
+ notes: []
97
+ resources:
98
+ minimum:
99
+ cpu_cores: 8
100
+ ram_gb: 32
101
+ disk_gb: 80
102
+ gpu_count: 0
103
+ gpu_vram_gb: 0
104
+ recommended:
105
+ cpu_cores: 16
106
+ ram_gb: 64
107
+ disk_gb: 150
108
+ gpu_count: 1
109
+ gpu_vram_gb: 16
110
+ environment:
111
+ python: '3.10'
112
+ cuda: null
113
+ pytorch: 1.12.1
114
+ flash_attn: null
115
+ key_packages:
116
+ - jax==0.3.25
117
+ - jaxlib==0.3.25
118
+ - tensorflow==2.11.0
119
+ - scikit-learn
120
+ - tqdm
121
+ notes:
122
+ - CPU-only execution is plausible for the minimum route; GPU accelerates PyTorch
123
+ training loops.
124
+ - The MSM package is PyTorch-based; JAX/TensorFlow dependencies support specific
125
+ experiment variants.
126
+ - An environment.yml is bundled in the snapshot for conda-based setup.
127
+ - See the bundled README and requirements for the full dependency set.
128
+ risk_flags:
129
+ - mixed_framework_dependencies
130
+ - long_sweep_walltime
131
+ risk_notes:
132
+ - 'The project mixes PyTorch, JAX, and TensorFlow dependencies. Resolving compatible
133
+ versions in one environment may require careful pinning or separate conda environments.
134
+
135
+ '
136
+ - 'Full experiment sweeps (all five sub-tasks × all parameter-grid indices × multiple
137
+ repeats) can take many hours. Plan for at least 6 hours wall-time on recommended
138
+ hardware.
139
+
140
+ '
141
+ - 'No benchmark execution was performed during the packaging pass; metrics are code-backed
142
+ but not yet runtime-verified.
143
+
144
+ '
145
+ recommended_when: 'Use this benchmark when you want a self-contained probabilistic
146
+ missing-data task combining theoretical score-matching methods with both synthetic
147
+ and real-world graphical model evaluation. Good fit for agents that can navigate
148
+ multi-script experiment pipelines, handle CLI-parameterised sweeps, and interpret
149
+ structure-recovery metrics (AUC, TPR, FPR).
150
+
151
+ '
152
+ not_recommended_when: 'Avoid if you need a lightweight single-script benchmark, a
153
+ task centred on large pretrained foundation models, or a benchmark with GPU-intensive
154
+ deep learning. Also not suitable if you cannot install mixed PyTorch/JAX/TensorFlow
155
+ environments.
156
+
157
+ '
158
+ paper:
159
+ title: Score Matching with Missing Data
160
+ venue: ICML 2025 Spotlight
161
+ year: 2025
162
+ url: https://arxiv.org/abs/2506.00557
163
+ download:
164
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.009_scoremissing.zip
165
+ archive_type: zip
166
+ local_dir_name: paper-9-ScoreMissing
167
+ provider: github_release
168
+ repo: ResearAI/DeepScientist
169
+ tag: aisb-v0.0.1
170
+ asset_name: aisb.t3.009_scoremissing.zip
171
+ sha256: bed639544fdb6e317d4a5b2c2663c8765aa508e39566551817d1586541cccb27
172
+ size_bytes: 27750054
173
+ commercial:
174
+ annual_fee: null
175
+ display:
176
+ palette_seed: moss-indigo-density
177
+ art_style: statistical-notebook
178
+ accent_priority: medium
179
+ image_path: ../image/009_aisb.t3.009_scoremissing.jpg
@@ -0,0 +1,119 @@
1
+ schema_version: 1
2
+ id: aisb.t3.009_scoremissing
3
+ name: 缺失数据的分数匹配
4
+ version: 0.1.0
5
+ one_line: '从部分观测的表格数据中学习完整分数函数,采用重要性加权和变分边际分数匹配方法,在模拟的GGM/ICA/正态估计任务以及真实的S&P 100和酵母图形模型恢复任务上进行评估。'
6
+ task_description: '该基准测试打包了ICML 2025 Spotlight论文"Score Matching with Missing Data"的完整实验流程。核心任务是在训练样本存在缺失坐标且缺失机制为完全随机缺失(MCAR)的条件下,估计多元分布的分数(对数密度梯度)。提供了两种互补方法:(1)边际重要性加权(IW)分数匹配,在低维和小样本场景表现优异;(2)边际变分分数匹配,在高维问题(如高斯图形模型GGM估计)上更强。
7
+
8
+ 实验套件包含五个子任务,从HPC/目录下的Python脚本运行:GGM结构恢复(模拟)、ICA参数估计、截断/非截断正态参数估计、S&P 100 GGM估计和酵母GGM估计。每个脚本接受重复次数和参数网格索引作为命令行参数。指标(AUC、准确率、TPR、FPR)通过MSM/utils/data.py辅助函数在脚本内计算,并在plotcode/目录下的Jupyter笔记本中聚合。真实数据(酵母张量、S&P 100 CSV)打包在real_world_experiments/RealData/中。无需外部评估服务或API凭据;该基准测试完全自包含。'
9
+ capability_tags:
10
+ - research_code_optimization
11
+ - missing_data
12
+ - probabilistic_modeling
13
+ - score_matching
14
+ - tabular_ml
15
+ - graphical_model_estimation
16
+ aisb_direction: T3
17
+ track_fit:
18
+ - paper_track
19
+ - benchmark_track
20
+ task_mode: experiment_driven
21
+ requires_execution: true
22
+ requires_paper: true
23
+ integrity_level: cas_plus_canary
24
+ snapshot_status: runnable
25
+ support_level: advanced
26
+ cost_band: medium
27
+ time_band: 6-24h
28
+ difficulty: hard
29
+ data_access: public
30
+ primary_outputs:
31
+ - auc
32
+ - accuracy
33
+ - tpr
34
+ - fpr
35
+ - structure_recovery_report
36
+ launch_profiles:
37
+ - id: quick_check
38
+ label: 快速检查
39
+ description: '运行单个酵母或GGM实验脚本,nrep=1和一个参数网格索引,以验证环境并生成少量AUC/准确率值。预期CPU运行时间约10-30分钟。'
40
+ - id: full_experiments
41
+ label: 完整实验
42
+ description: '使用HPC/bashscripts/目录下的bash脚本,对所有HPC实验脚本(GGM、ICA、NormalEstimation、snp100、yeast)进行完整参数网格扫描。每个脚本每个配置运行10000个训练轮次。预期在16核CPU或配备单GPU的更快硬件上需要6-24小时。'
43
+ - id: plot_reproduction
44
+ label: 图表复现
45
+ description: '在完成完整实验后,运行plotcode/目录下的Jupyter笔记本,从保存的结果中重新生成所有论文图表和指标摘要。'
46
+ dataset_download:
47
+ primary_method: bundled
48
+ sources:
49
+ - kind: archive
50
+ url: https://deepscientist.cc/AISB/009_scoremissing
51
+ access: public
52
+ note: '完整快照,包含真实世界数据张量(酵母、S&P 100)位于real_world_experiments/RealData/。模拟数据由实验脚本动态生成。压缩包为zip格式。'
53
+ notes:
54
+ - 所有数据集均为内置或程序生成,无需额外下载。
55
+ - 解压后磁盘占用很小(包括结果文件在内远低于10 GB)。
56
+ credential_requirements:
57
+ mode: none
58
+ items: []
59
+ notes: []
60
+ resources:
61
+ minimum:
62
+ cpu_cores: 8
63
+ ram_gb: 32
64
+ disk_gb: 80
65
+ gpu_count: 0
66
+ gpu_vram_gb: 0
67
+ recommended:
68
+ cpu_cores: 16
69
+ ram_gb: 64
70
+ disk_gb: 150
71
+ gpu_count: 1
72
+ gpu_vram_gb: 16
73
+ environment:
74
+ python: '3.10'
75
+ cuda: null
76
+ pytorch: 1.12.1
77
+ flash_attn: null
78
+ key_packages:
79
+ - jax==0.3.25
80
+ - jaxlib==0.3.25
81
+ - tensorflow==2.11.0
82
+ - scikit-learn
83
+ - tqdm
84
+ notes:
85
+ - 最小配置下支持纯CPU执行;GPU可加速PyTorch训练循环。
86
+ - MSM包基于PyTorch;JAX/TensorFlow依赖支持特定的实验变体。
87
+ - 快照中捆绑了conda环境配置文件environment.yml。
88
+ - 详见捆绑的README和requirements文件以了解完整依赖集。
89
+ risk_flags:
90
+ - mixed_framework_dependencies
91
+ - long_sweep_walltime
92
+ risk_notes:
93
+ - '项目混合了PyTorch、JAX和TensorFlow依赖。在单一环境中解决兼容版本可能需要仔细固定版本或使用独立的conda环境。'
94
+ - '完整实验扫描(所有五个子任务×所有参数网格索引×多次重复)可能需要数小时。在推荐硬件上至少预留6小时运行时间。'
95
+ - '打包过程中未执行基准测试;指标有代码支持但尚未运行时验证。'
96
+ recommended_when: '当你需要一个自包含的概率缺失数据任务,结合理论分数匹配方法与合成和真实世界图形模型评估时使用。非常适合能够处理多脚本实验流程、处理CLI参数化扫描以及解读结构恢复指标(AUC、TPR、FPR)的智能体。'
97
+ not_recommended_when: '避免用于以下场景:需要轻量级单脚本基准测试、以大型预训练基础模型为中心的任务、或GPU密集型深度学习基准测试。如果无法安装混合PyTorch/JAX/TensorFlow环境,也不适用。'
98
+ paper:
99
+ title: Score Matching with Missing Data
100
+ venue: ICML 2025 Spotlight
101
+ year: 2025
102
+ url: https://arxiv.org/abs/2506.00557
103
+ download:
104
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.009_scoremissing.zip
105
+ archive_type: zip
106
+ local_dir_name: paper-9-ScoreMissing
107
+ provider: github_release
108
+ repo: ResearAI/DeepScientist
109
+ tag: aisb-v0.0.1
110
+ asset_name: aisb.t3.009_scoremissing.zip
111
+ sha256: bed639544fdb6e317d4a5b2c2663c8765aa508e39566551817d1586541cccb27
112
+ size_bytes: 27750054
113
+ commercial:
114
+ annual_fee: null
115
+ display:
116
+ palette_seed: moss-indigo-density
117
+ art_style: statistical-notebook
118
+ accent_priority: medium
119
+ image_path: ../image/009_aisb.t3.009_scoremissing.jpg
@@ -0,0 +1,221 @@
1
+ schema_version: 1
2
+ id: aisb.t3.010_suitabilityfilter
3
+ name: 'Suitability Filter: A Statistical Framework for Classifier Evaluation in Real-World
4
+ Deployment Settings'
5
+ version: 0.1.0
6
+ one_line: 'Optimize a deployment-time statistical filter that uses non-inferiority
7
+ hypothesis testing on model-derived suitability signals to decide whether a classifier''s
8
+ accuracy has degraded beyond an acceptable margin on unlabeled, distribution-shifted
9
+ user data (evaluated on WILDS FMoW/RxRx1/CivilComments).
10
+
11
+ '
12
+ task_description: 'This benchmark packages the Suitability Filter framework from ICML
13
+ 2025. The core task is to improve a statistical decision system that determines
14
+ whether a pre-trained classifier remains safe to deploy under covariate shift, without
15
+ access to ground-truth labels on the target data.
16
+
17
+ The filter works by: (1) extracting suitability signals (max softmax, predictive
18
+ entropy, logit statistics, loss/energy features — 12 signals total) from both labeled
19
+ test data and unlabeled user data; (2) training a lightweight prediction-correctness
20
+ estimator (logistic regression by default, calibrated via CalibratedClassifierCV)
21
+ on a labeled holdout set to map signals to per-sample correctness probabilities
22
+ p_c; (3) comparing the p_c distributions between test and user data via a one-sided
23
+ non-inferiority test (Welch''s t-test or z-test) with configurable margin m and
24
+ significance level alpha; (4) outputting SUITABLE (reject H0) or INCONCLUSIVE.
25
+
26
+ The packaged evaluation route runs cached FMoW-WILDS features across multiple seeds
27
+ (0, 1, 2), fold configurations, feature subsets (confidence/logit/loss-energy),
28
+ and classifier hyperparameters. Primary metrics are the OOD suitability score (ROC-AUC
29
+ of the filter''s p-values against ground-truth suitability labels across ~29k experiments),
30
+ the raw p-value, and the binary suitability decision. Optimization targets include
31
+ the correctness estimator''s regularization (C sweep), signal subset selection,
32
+ fold count, calibration strategy, and Stouffer''s method for combining p-values
33
+ across feature subsets.
34
+
35
+ Pre-extracted feature caches and split indices are bundled under results/features/
36
+ and results/split_indices/. The WILDS datasets themselves (FMoW, RxRx1, CivilComments)
37
+ and pretrained model weights are needed only if regenerating features from scratch;
38
+ the default execution route uses cached features and does not require downloading
39
+ WILDS data.
40
+
41
+ '
42
+ capability_tags:
43
+ - research_code_optimization
44
+ - classifier_evaluation
45
+ - distribution_shift
46
+ - statistical_testing
47
+ - reliability
48
+ - hypothesis_testing
49
+ - deployment_monitoring
50
+ aisb_direction: T3
51
+ track_fit:
52
+ - paper_track
53
+ - benchmark_track
54
+ task_mode: experiment_driven
55
+ requires_execution: true
56
+ requires_paper: true
57
+ integrity_level: cas_plus_canary
58
+ snapshot_status: runnable
59
+ support_level: turnkey
60
+ cost_band: medium
61
+ time_band: 6-24h
62
+ difficulty: hard
63
+ data_access: public
64
+ primary_outputs:
65
+ - ood_suitability_score
66
+ - p_value
67
+ - suitability_decision
68
+ launch_profiles:
69
+ - id: quick_check
70
+ label: Quick Check
71
+ description: 'Run the packaged suitability filter logic on cached FMoW features
72
+ for a single seed to verify statistical test outputs (p-value, suitability decision)
73
+ and basic ROC-AUC.
74
+
75
+ '
76
+ - id: shift_eval
77
+ label: Full Shift Evaluation
78
+ description: 'Run the complete FMoW-WILDS deployment-shift evaluation across all
79
+ seeds (0, 1, 2), feature subsets, fold configurations, and classifier settings
80
+ using run_fmow.py. Reports OOD suitability ROC-AUC across ~29k experiment configurations.
81
+
82
+ '
83
+ - id: lr_tune
84
+ label: LR Hyperparameter Sweep
85
+ description: 'Run run_fmow_lr_tune.py to sweep logistic regression regularization
86
+ C values (0.01, 0.1, 1.0, 5.0, 10.0) on OOD data for seed=0 to find optimal correctness
87
+ estimator configuration.
88
+
89
+ '
90
+ - id: seed_search
91
+ label: Seed Search
92
+ description: 'Run run_fmow_seed_search.py variants to explore reproducibility and
93
+ seed sensitivity of the suitability filter decisions.
94
+
95
+ '
96
+ dataset_download:
97
+ primary_method: bundled_cache
98
+ sources:
99
+ - kind: bundled
100
+ url: null
101
+ access: public
102
+ note: 'Pre-extracted feature caches (results/features/*.pkl) and split indices
103
+ (results/split_indices/*.pkl) are included in the snapshot. No download needed
104
+ for the default execution route.
105
+
106
+ '
107
+ - kind: pip_package
108
+ url: https://pypi.org/project/wilds/
109
+ access: public
110
+ note: 'WILDS 2.0.0 package needed for feature re-extraction from raw data. FMoW-WILDS,
111
+ RxRx1-WILDS, and CivilComments-WILDS datasets are downloaded on first use via
112
+ the wilds library. FMoW alone is ~50 GB; all three together can exceed 100 GB.
113
+
114
+ '
115
+ notes:
116
+ - Default route uses bundled cached features; WILDS download only needed for feature
117
+ regeneration.
118
+ - Pretrained model weights (ERM) are required for feature regeneration but not for
119
+ cached route.
120
+ credential_requirements:
121
+ mode: none
122
+ items: []
123
+ notes:
124
+ - No API keys or credentials required for any execution route.
125
+ resources:
126
+ minimum:
127
+ cpu_cores: 8
128
+ ram_gb: 32
129
+ disk_gb: 100
130
+ gpu_count: 1
131
+ gpu_vram_gb: 16
132
+ recommended:
133
+ cpu_cores: 16
134
+ ram_gb: 64
135
+ disk_gb: 200
136
+ gpu_count: 1
137
+ gpu_vram_gb: 24
138
+ environment:
139
+ python: 3.11.9
140
+ cuda: '11.7'
141
+ pytorch: 2.0.0
142
+ flash_attn: null
143
+ key_packages:
144
+ - transformers==4.41.2
145
+ - torch-geometric==2.5.3
146
+ - wilds==2.0.0
147
+ - scikit-learn
148
+ - scipy
149
+ - statsmodels
150
+ - pandas
151
+ - numpy
152
+ notes:
153
+ - See the bundled requirements.txt for the full conda environment specification.
154
+ - GPU is used for feature extraction (torch.device("cuda" if available)); the cached-feature
155
+ route runs statistical tests on CPU only (scikit-learn, scipy).
156
+ - The run_fmow.py scripts set sys.path to '/repo' — adjust if running outside the
157
+ expected container.
158
+ risk_flags:
159
+ - large_experiment_matrix
160
+ - external_dataset_for_regeneration
161
+ risk_notes:
162
+ - 'The full evaluation route runs ~29k experiment configurations across seeds, folds,
163
+ feature subsets, and margins. Wall time can reach several hours on CPU.
164
+
165
+ '
166
+ - 'Feature regeneration from raw WILDS data requires downloading FMoW-WILDS (~50 GB)
167
+ and pretrained ERM model weights, but this is not needed for the default cached
168
+ route.
169
+
170
+ '
171
+ - 'The run scripts hardcode sys.path to ''/repo'' and os.chdir(''/repo''); execution
172
+ outside the expected container environment may require path adjustments.
173
+
174
+ '
175
+ - 'No runtime execution was performed during the packaging pass; metric values should
176
+ be verified by running the benchmark.
177
+
178
+ '
179
+ recommended_when: 'Use this benchmark when you want a model-monitoring optimization
180
+ task that combines statistical hypothesis testing (non-inferiority tests) with realistic
181
+ WILDS deployment shifts. Well-suited for research on deployment-time safety, distribution
182
+ shift detection, unsupervised accuracy estimation, and selective prediction. The
183
+ cached-feature route makes iteration fast without needing to re-run model inference.
184
+
185
+ '
186
+ not_recommended_when: 'Not suitable if you need a tiny or quick-to-iterate toy benchmark,
187
+ cannot allocate 32+ GB RAM for the experiment matrices, or want to evaluate non-classification
188
+ tasks. Also not ideal if you need end-to-end training of the base classifier — this
189
+ benchmark focuses on the filter/monitoring layer, not the upstream model training.
190
+
191
+ '
192
+ paper:
193
+ title: 'Suitability Filter: A Statistical Framework for Classifier Evaluation in
194
+ Real-World Deployment Settings'
195
+ authors:
196
+ - Angéline Pouget
197
+ - Mohammad Yaghini
198
+ - Stephan Rabanser
199
+ - Nicolas Papernot
200
+ venue: ICML 2025
201
+ year: 2025
202
+ url: https://arxiv.org/abs/2505.22356
203
+ volume: 267
204
+ publisher: PMLR
205
+ download:
206
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.010_suitabilityfilter.zip
207
+ archive_type: zip
208
+ local_dir_name: paper-10-SuitabilityFilter
209
+ provider: github_release
210
+ repo: ResearAI/DeepScientist
211
+ tag: aisb-v0.0.1
212
+ asset_name: aisb.t3.010_suitabilityfilter.zip
213
+ sha256: 146f3477fb91d43974f51dac9013768be094e9ad355d2196c820915f062645f1
214
+ size_bytes: 54606221
215
+ commercial:
216
+ annual_fee: null
217
+ display:
218
+ palette_seed: sand-graphite-verify
219
+ art_style: clean-statistical
220
+ accent_priority: high
221
+ image_path: ../image/010_aisb.t3.010_suitabilityfilter.jpg
@@ -0,0 +1,141 @@
1
+ schema_version: 1
2
+ id: aisb.t3.010_suitabilityfilter
3
+ name: "适用性过滤器:真实世界部署场景下分类器评估的统计框架"
4
+ version: 0.1.0
5
+ one_line: "优化一个部署时的统计过滤器,使用非劣性假设检验对模型衍生的适用性信号进行测试,以判断分类器在无标签、分布偏移的用户数据上的准确率是否已退化到不可接受的阈值(基于WILDS FMoW/RxRx1/CivilComments评估)。"
6
+ task_description: |
7
+ 本基准测试打包了ICML 2025的适用性过滤器框架。核心任务是改进一个统计决策系统,在无法获得目标数据真实标签的情况下,判断预训练分类器在协变量偏移下是否仍然安全可部署。
8
+
9
+ 过滤器的工作流程为:(1) 从带标签的测试数据和无标签的用户数据中提取适用性信号(最大softmax值、预测熵、logit统计量、损失/能量特征——共12个信号);(2) 在带标签的保留集上训练一个轻量级的预测正确性估计器(默认使用逻辑回归,通过CalibratedClassifierCV进行校准),将信号映射到每个样本的正确概率p_c;(3) 通过单侧非劣性检验(Welch''s t检验或z检验,可配置边际m和显著性水平alpha)比较测试数据和用户数据之间的p_c分布;(4) 输出SUITABLE(拒绝H0)或INCONCLUSIVE。
10
+
11
+ 打包的评估流程跨多个随机种子(0、1、2)、折数配置、特征子集(置信度/logit/损失-能量)和分类器超参数运行缓存的FMoW-WILDS特征。主要指标包括:OOD适用性得分(过滤器p值相对于真实适用性标签的ROC-AUC,跨约29k个实验)、原始p值和二元适用性决策。优化目标包括正确性估计器的正则化(C扫描)、信号子集选择、折数、校准策略,以及用于组合特征子集p值的Stouffer方法。
12
+
13
+ 预提取的特征缓存和分割索引分别打包在results/features/和results/split_indices/下。仅在从头重新生成特征时才需要WILDS数据集本身(FMoW、RxRx1、CivilComments)和预训练模型权重;默认执行流程使用缓存特征,无需下载WILDS数据。
14
+ capability_tags:
15
+ - 研究代码优化
16
+ - 分类器评估
17
+ - 分布偏移
18
+ - 统计检验
19
+ - 可靠性
20
+ - 假设检验
21
+ - 部署监控
22
+ aisb_direction: T3
23
+ track_fit:
24
+ - paper_track
25
+ - benchmark_track
26
+ task_mode: experiment_driven
27
+ requires_execution: true
28
+ requires_paper: true
29
+ integrity_level: cas_plus_canary
30
+ snapshot_status: runnable
31
+ support_level: turnkey
32
+ cost_band: medium
33
+ time_band: 6-24h
34
+ difficulty: hard
35
+ data_access: public
36
+ primary_outputs:
37
+ - ood_suitability_score
38
+ - p_value
39
+ - suitability_decision
40
+ launch_profiles:
41
+ - id: quick_check
42
+ label: 快速检查
43
+ description: "在缓存的FMoW特征上运行打包的适用性过滤器逻辑,使用单个种子验证统计检验输出(p值、适用性决策)和基本ROC-AUC。"
44
+ - id: shift_eval
45
+ label: 完整偏移评估
46
+ description: "跨所有种子(0、1、2)、特征子集、折数配置和分类器设置运行完整的FMoW-WILDS部署偏移评估。使用run_fmow.py,报告约29k个实验配置下的OOD适用性ROC-AUC。"
47
+ - id: lr_tune
48
+ label: LR超参数扫描
49
+ description: "运行run_fmow_lr_tune.py,在OOD数据上扫描逻辑回归正则化C值(0.01、0.1、1.0、5.0、10.0),种子=0,以找到最优的正确性估计器配置。"
50
+ - id: seed_search
51
+ label: 种子搜索
52
+ description: "运行run_fmow_seed_search.py变体,探索适用性过滤器决策的可重复性和种子敏感性。"
53
+ dataset_download:
54
+ primary_method: bundled_cache
55
+ sources:
56
+ - kind: bundled
57
+ url: null
58
+ access: public
59
+ note: "预提取的特征缓存(results/features/*.pkl)和分割索引(results/split_indices/*.pkl)已包含在快照中。默认执行流程无需下载。"
60
+ - kind: pip_package
61
+ url: https://pypi.org/project/wilds/
62
+ access: public
63
+ note: "WILDS 2.0.0包用于从原始数据重新提取特征。FMoW-WILDS、RxRx1-WILDS和CivilComments-WILDS数据集通过wilds库首次使用时下载。仅FMoW约50GB;三个数据集合计可超过100GB。"
64
+ notes:
65
+ - 默认流程使用打包的缓存特征;仅在需要重新生成特征时才需要下载WILDS。
66
+ - 预训练模型权重(ERM)用于特征重新生成,缓存流程无需此权重。
67
+ credential_requirements:
68
+ mode: none
69
+ items: []
70
+ notes:
71
+ - 任何执行流程都无需API密钥或凭据。
72
+ resources:
73
+ minimum:
74
+ cpu_cores: 8
75
+ ram_gb: 32
76
+ disk_gb: 100
77
+ gpu_count: 1
78
+ gpu_vram_gb: 16
79
+ recommended:
80
+ cpu_cores: 16
81
+ ram_gb: 64
82
+ disk_gb: 200
83
+ gpu_count: 1
84
+ gpu_vram_gb: 24
85
+ environment:
86
+ python: 3.11.9
87
+ cuda: "11.7"
88
+ pytorch: 2.0.0
89
+ flash_attn: null
90
+ key_packages:
91
+ - transformers==4.41.2
92
+ - torch-geometric==2.5.3
93
+ - wilds==2.0.0
94
+ - scikit-learn
95
+ - scipy
96
+ - statsmodels
97
+ - pandas
98
+ - numpy
99
+ notes:
100
+ - 完整的conda环境规范请参见打包的requirements.txt。
101
+ - GPU用于特征提取(torch.device("cuda" if available));缓存特征流程仅在CPU上运行统计检验(scikit-learn、scipy)。
102
+ - run_fmow.py脚本设置sys.path为'/repo'——如在预期容器外运行需调整路径。
103
+ risk_flags:
104
+ - large_experiment_matrix
105
+ - external_dataset_for_regeneration
106
+ risk_notes:
107
+ - "完整评估流程跨种子、折数、特征子集和边际运行约29k个实验配置。CPU上运行时间可达数小时。"
108
+ - "从原始WILDS数据重新生成特征需要下载FMoW-WILDS(约50GB)和预训练ERM模型权重,但默认缓存流程无需此操作。"
109
+ - "运行脚本硬编码sys.path为'/repo'和os.chdir('/repo');在预期容器环境外执行可能需要调整路径。"
110
+ - "打包过程中未执行运行时测试;指标值应通过运行基准测试进行验证。"
111
+ recommended_when: "当您需要一个结合统计假设检验(非劣性检验)与真实WILDS部署偏移的模型监控优化任务时使用此基准测试。非常适合研究部署时安全性、分布偏移检测、无监督准确率估计和选择性预测。缓存特征流程使迭代快速,无需重新运行模型推理。"
112
+ not_recommended_when: "如果需要一个小型的或快速迭代的玩具基准测试、无法分配32GB以上内存用于实验矩阵,或需要评估非分类任务,则不适合使用。此外,如果您需要端到端训练基础分类器——本基准测试专注于过滤器/监控层,而非上游模型训练——也不太理想。"
113
+ paper:
114
+ title: "Suitability Filter: A Statistical Framework for Classifier Evaluation in Real-World Deployment Settings"
115
+ authors:
116
+ - Angéline Pouget
117
+ - Mohammad Yaghini
118
+ - Stephan Rabanser
119
+ - Nicolas Papernot
120
+ venue: ICML 2025
121
+ year: 2025
122
+ url: https://arxiv.org/abs/2505.22356
123
+ volume: 267
124
+ publisher: PMLR
125
+ download:
126
+ url: https://github.com/ResearAI/DeepScientist/releases/download/aisb-v0.0.1/aisb.t3.010_suitabilityfilter.zip
127
+ archive_type: zip
128
+ local_dir_name: paper-10-SuitabilityFilter
129
+ provider: github_release
130
+ repo: ResearAI/DeepScientist
131
+ tag: aisb-v0.0.1
132
+ asset_name: aisb.t3.010_suitabilityfilter.zip
133
+ sha256: 146f3477fb91d43974f51dac9013768be094e9ad355d2196c820915f062645f1
134
+ size_bytes: 54606221
135
+ commercial:
136
+ annual_fee: null
137
+ display:
138
+ palette_seed: sand-graphite-verify
139
+ art_style: clean-statistical
140
+ accent_priority: high
141
+ image_path: ../image/010_aisb.t3.010_suitabilityfilter.jpg