@researai/deepscientist 1.5.17 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (894) hide show
  1. package/AGENTS.md +309 -130
  2. package/AISB/catalog/aisb.b1.agentic_coding.yaml +244 -0
  3. package/AISB/catalog/aisb.b10.climate_earth.yaml +235 -0
  4. package/AISB/catalog/aisb.b11.model_efficiency.yaml +231 -0
  5. package/AISB/catalog/aisb.b12.embodied_ai.yaml +238 -0
  6. package/AISB/catalog/aisb.b2.agent_systems.yaml +229 -0
  7. package/AISB/catalog/aisb.b3.self_evolving_rl.yaml +237 -0
  8. package/AISB/catalog/aisb.b4.lm_reasoning.yaml +240 -0
  9. package/AISB/catalog/aisb.b5.math_proof.yaml +235 -0
  10. package/AISB/catalog/aisb.b6.research_process.yaml +243 -0
  11. package/AISB/catalog/aisb.b7.multimodal_fusion.yaml +232 -0
  12. package/AISB/catalog/aisb.b8.lifesci_drug.yaml +275 -0
  13. package/AISB/catalog/aisb.b9.material_science.yaml +237 -0
  14. package/AISB/catalog/aisb.t3.001_savvy.yaml +159 -0
  15. package/AISB/catalog/aisb.t3.001_savvy.zh.yaml +121 -0
  16. package/AISB/catalog/aisb.t3.002_pinet.yaml +189 -0
  17. package/AISB/catalog/aisb.t3.002_pinet.zh.yaml +130 -0
  18. package/AISB/catalog/aisb.t3.004_decentralattn.yaml +184 -0
  19. package/AISB/catalog/aisb.t3.004_decentralattn.zh.yaml +153 -0
  20. package/AISB/catalog/aisb.t3.005_tsae.yaml +193 -0
  21. package/AISB/catalog/aisb.t3.005_tsae.zh.yaml +139 -0
  22. package/AISB/catalog/aisb.t3.006_physense.yaml +194 -0
  23. package/AISB/catalog/aisb.t3.006_physense.zh.yaml +118 -0
  24. package/AISB/catalog/aisb.t3.007_reasoningiqa.yaml +169 -0
  25. package/AISB/catalog/aisb.t3.007_reasoningiqa.zh.yaml +133 -0
  26. package/AISB/catalog/aisb.t3.008_meanflows.yaml +188 -0
  27. package/AISB/catalog/aisb.t3.008_meanflows.zh.yaml +140 -0
  28. package/AISB/catalog/aisb.t3.009_scoremissing.yaml +179 -0
  29. package/AISB/catalog/aisb.t3.009_scoremissing.zh.yaml +119 -0
  30. package/AISB/catalog/aisb.t3.010_suitabilityfilter.yaml +221 -0
  31. package/AISB/catalog/aisb.t3.010_suitabilityfilter.zh.yaml +141 -0
  32. package/AISB/catalog/aisb.t3.011_osd.yaml +206 -0
  33. package/AISB/catalog/aisb.t3.011_osd.zh.yaml +163 -0
  34. package/AISB/catalog/aisb.t3.012_efficientqat.yaml +206 -0
  35. package/AISB/catalog/aisb.t3.012_efficientqat.zh.yaml +159 -0
  36. package/AISB/catalog/aisb.t3.013_appl.yaml +152 -0
  37. package/AISB/catalog/aisb.t3.013_appl.zh.yaml +126 -0
  38. package/AISB/catalog/aisb.t3.014_piguard.yaml +207 -0
  39. package/AISB/catalog/aisb.t3.014_piguard.zh.yaml +164 -0
  40. package/AISB/catalog/aisb.t3.015_frspec.yaml +209 -0
  41. package/AISB/catalog/aisb.t3.015_frspec.zh.yaml +163 -0
  42. package/AISB/catalog/aisb.t3.016_mathfusion.yaml +166 -0
  43. package/AISB/catalog/aisb.t3.016_mathfusion.zh.yaml +145 -0
  44. package/AISB/catalog/aisb.t3.017_multimodalglp.yaml +171 -0
  45. package/AISB/catalog/aisb.t3.017_multimodalglp.zh.yaml +122 -0
  46. package/AISB/catalog/aisb.t3.018_cotsynth.yaml +206 -0
  47. package/AISB/catalog/aisb.t3.018_cotsynth.zh.yaml +162 -0
  48. package/AISB/catalog/aisb.t3.019_dyscaleut.yaml +211 -0
  49. package/AISB/catalog/aisb.t3.019_dyscaleut.zh.yaml +148 -0
  50. package/AISB/catalog/aisb.t3.020_aristotle.yaml +173 -0
  51. package/AISB/catalog/aisb.t3.020_aristotle.zh.yaml +119 -0
  52. package/AISB/catalog/aisb.t3.021_tokenrecycling.yaml +160 -0
  53. package/AISB/catalog/aisb.t3.021_tokenrecycling.zh.yaml +129 -0
  54. package/AISB/catalog/aisb.t3.022_chainofreasoning.yaml +204 -0
  55. package/AISB/catalog/aisb.t3.022_chainofreasoning.zh.yaml +161 -0
  56. package/AISB/catalog/aisb.t3.023_guidedembed.yaml +211 -0
  57. package/AISB/catalog/aisb.t3.023_guidedembed.zh.yaml +189 -0
  58. package/AISB/catalog/aisb.t3.024_outputcentric.yaml +148 -0
  59. package/AISB/catalog/aisb.t3.024_outputcentric.zh.yaml +131 -0
  60. package/AISB/catalog/aisb.t3.025_deeper.yaml +143 -0
  61. package/AISB/catalog/aisb.t3.025_deeper.zh.yaml +116 -0
  62. package/AISB/catalog/aisb.t3.026_gartkg.yaml +195 -0
  63. package/AISB/catalog/aisb.t3.026_gartkg.zh.yaml +127 -0
  64. package/AISB/catalog/aisb.t3.027_citeeval.yaml +182 -0
  65. package/AISB/catalog/aisb.t3.027_citeeval.zh.yaml +135 -0
  66. package/AISB/catalog/aisb.t3.028_sbam.yaml +206 -0
  67. package/AISB/catalog/aisb.t3.028_sbam.zh.yaml +166 -0
  68. package/AISB/catalog/aisb.t3.029_cdqgeoembed.yaml +224 -0
  69. package/AISB/catalog/aisb.t3.029_cdqgeoembed.zh.yaml +142 -0
  70. package/AISB/catalog/aisb.t3.030_processrm.yaml +211 -0
  71. package/AISB/catalog/aisb.t3.030_processrm.zh.yaml +166 -0
  72. package/AISB/catalog/aisb.t3.031_circuitstability.yaml +172 -0
  73. package/AISB/catalog/aisb.t3.031_circuitstability.zh.yaml +134 -0
  74. package/AISB/catalog/aisb.t3.032_ptsolver.yaml +169 -0
  75. package/AISB/catalog/aisb.t3.032_ptsolver.zh.yaml +135 -0
  76. package/AISB/catalog/aisb.t3.033_gcse.yaml +144 -0
  77. package/AISB/catalog/aisb.t3.033_gcse.zh.yaml +126 -0
  78. package/AISB/catalog/aisb.t3.034_ensemblewm.yaml +183 -0
  79. package/AISB/catalog/aisb.t3.034_ensemblewm.zh.yaml +146 -0
  80. package/AISB/catalog/aisb.t3.035_moralvalueswa.yaml +207 -0
  81. package/AISB/catalog/aisb.t3.035_moralvalueswa.zh.yaml +165 -0
  82. package/AISB/catalog/aisb.t3.036_weakstrongpref.yaml +210 -0
  83. package/AISB/catalog/aisb.t3.036_weakstrongpref.zh.yaml +194 -0
  84. package/AISB/catalog/aisb.t3.037_dementiamask.yaml +172 -0
  85. package/AISB/catalog/aisb.t3.037_dementiamask.zh.yaml +132 -0
  86. package/AISB/catalog/aisb.t3.038_tinysam.yaml +284 -0
  87. package/AISB/catalog/aisb.t3.038_tinysam.zh.yaml +240 -0
  88. package/AISB/catalog/aisb.t3.039_calf.yaml +224 -0
  89. package/AISB/catalog/aisb.t3.039_calf.zh.yaml +194 -0
  90. package/AISB/catalog/aisb.t3.040_graniteguardian.yaml +199 -0
  91. package/AISB/catalog/aisb.t3.040_graniteguardian.zh.yaml +174 -0
  92. package/AISB/catalog/aisb.t3.041_amdm.yaml +149 -0
  93. package/AISB/catalog/aisb.t3.041_amdm.zh.yaml +137 -0
  94. package/AISB/catalog/aisb.t3.042_xpatch.yaml +216 -0
  95. package/AISB/catalog/aisb.t3.042_xpatch.zh.yaml +182 -0
  96. package/AISB/catalog/aisb.t3.043_vhm.yaml +268 -0
  97. package/AISB/catalog/aisb.t3.043_vhm.zh.yaml +193 -0
  98. package/AISB/catalog/aisb.t3.044_rgvi.yaml +224 -0
  99. package/AISB/catalog/aisb.t3.044_rgvi.zh.yaml +176 -0
  100. package/AISB/catalog/aisb.t3.045_pslstm.yaml +203 -0
  101. package/AISB/catalog/aisb.t3.045_pslstm.zh.yaml +179 -0
  102. package/AISB/catalog/aisb.t3.046_nonstatts.yaml +208 -0
  103. package/AISB/catalog/aisb.t3.046_nonstatts.zh.yaml +194 -0
  104. package/AISB/catalog/aisb.t3.047_timepfn.yaml +156 -0
  105. package/AISB/catalog/aisb.t3.047_timepfn.zh.yaml +124 -0
  106. package/AISB/catalog/aisb.t3.048_proxyspex.yaml +148 -0
  107. package/AISB/catalog/aisb.t3.048_proxyspex.zh.yaml +125 -0
  108. package/AISB/catalog/aisb.t3.049_hogwildinference.yaml +183 -0
  109. package/AISB/catalog/aisb.t3.049_hogwildinference.zh.yaml +138 -0
  110. package/AISB/catalog/aisb.t3.050_causalpfn.yaml +214 -0
  111. package/AISB/catalog/aisb.t3.050_causalpfn.zh.yaml +190 -0
  112. package/AISB/catalog/aisb.t3.051_flashtp.yaml +169 -0
  113. package/AISB/catalog/aisb.t3.051_flashtp.zh.yaml +124 -0
  114. package/AISB/catalog/aisb.t3.052_nsdiff.yaml +155 -0
  115. package/AISB/catalog/aisb.t3.052_nsdiff.zh.yaml +138 -0
  116. package/AISB/catalog/aisb.t3.053_k2vae.yaml +158 -0
  117. package/AISB/catalog/aisb.t3.053_k2vae.zh.yaml +132 -0
  118. package/AISB/catalog/aisb.t3.054_timebase.yaml +178 -0
  119. package/AISB/catalog/aisb.t3.054_timebase.zh.yaml +158 -0
  120. package/AISB/catalog/aisb.t3.055_csbrain.yaml +238 -0
  121. package/AISB/catalog/aisb.t3.055_csbrain.zh.yaml +184 -0
  122. package/AISB/catalog/aisb.t3.056_infosam.yaml +224 -0
  123. package/AISB/catalog/aisb.t3.056_infosam.zh.yaml +189 -0
  124. package/AISB/catalog/aisb.t3.057_mdreid.yaml +129 -0
  125. package/AISB/catalog/aisb.t3.057_mdreid.zh.yaml +117 -0
  126. package/AISB/catalog/aisb.t3.058_mindglitch.yaml +171 -0
  127. package/AISB/catalog/aisb.t3.058_mindglitch.zh.yaml +145 -0
  128. package/AISB/catalog/aisb.t3.059_selfsupervised.yaml +154 -0
  129. package/AISB/catalog/aisb.t3.059_selfsupervised.zh.yaml +125 -0
  130. package/AISB/catalog/aisb.t3.060_iaggad.yaml +121 -0
  131. package/AISB/catalog/aisb.t3.060_iaggad.zh.yaml +100 -0
  132. package/AISB/catalog/aisb.t3.061_hsgkn.yaml +136 -0
  133. package/AISB/catalog/aisb.t3.061_hsgkn.zh.yaml +113 -0
  134. package/AISB/catalog/aisb.t3.062_visionts.yaml +237 -0
  135. package/AISB/catalog/aisb.t3.062_visionts.zh.yaml +216 -0
  136. package/AISB/catalog/aisb.t3.063_tsrag.yaml +162 -0
  137. package/AISB/catalog/aisb.t3.063_tsrag.zh.yaml +138 -0
  138. package/AISB/catalog/aisb.t3.064_pir.yaml +221 -0
  139. package/AISB/catalog/aisb.t3.064_pir.zh.yaml +197 -0
  140. package/AISB/catalog/aisb.t3.065_proteinbinding.yaml +234 -0
  141. package/AISB/catalog/aisb.t3.065_proteinbinding.zh.yaml +167 -0
  142. package/AISB/catalog/aisb.t3.066_tropicalattention.yaml +267 -0
  143. package/AISB/catalog/aisb.t3.066_tropicalattention.zh.yaml +229 -0
  144. package/AISB/catalog/aisb.t3.067_kanad.yaml +193 -0
  145. package/AISB/catalog/aisb.t3.067_kanad.zh.yaml +167 -0
  146. package/AISB/catalog/aisb.t3.068_sempo.yaml +187 -0
  147. package/AISB/catalog/aisb.t3.068_sempo.zh.yaml +148 -0
  148. package/AISB/catalog/aisb.t3.069_treehfd.yaml +129 -0
  149. package/AISB/catalog/aisb.t3.069_treehfd.zh.yaml +111 -0
  150. package/AISB/catalog/aisb.t3.070_certifiedunlearning.yaml +224 -0
  151. package/AISB/catalog/aisb.t3.070_certifiedunlearning.zh.yaml +171 -0
  152. package/AISB/catalog/aisb.t3.071_neuralmjd.yaml +142 -0
  153. package/AISB/catalog/aisb.t3.071_neuralmjd.zh.yaml +120 -0
  154. package/AISB/catalog/aisb.t3.072_fedgmt.yaml +181 -0
  155. package/AISB/catalog/aisb.t3.072_fedgmt.zh.yaml +158 -0
  156. package/AISB/catalog/aisb.t3.073_rld.yaml +161 -0
  157. package/AISB/catalog/aisb.t3.073_rld.zh.yaml +129 -0
  158. package/AISB/catalog/aisb.t3.074_lsvi.yaml +163 -0
  159. package/AISB/catalog/aisb.t3.074_lsvi.zh.yaml +129 -0
  160. package/AISB/catalog/aisb.t3.075_treeslicedentropy.yaml +201 -0
  161. package/AISB/catalog/aisb.t3.075_treeslicedentropy.zh.yaml +148 -0
  162. package/AISB/catalog/aisb.t3.076_aanet.yaml +169 -0
  163. package/AISB/catalog/aisb.t3.076_aanet.zh.yaml +129 -0
  164. package/AISB/catalog/aisb.t3.077_cmnn.yaml +199 -0
  165. package/AISB/catalog/aisb.t3.077_cmnn.zh.yaml +165 -0
  166. package/AISB/catalog/aisb.t3.078_conformalanomaly.yaml +146 -0
  167. package/AISB/catalog/aisb.t3.078_conformalanomaly.zh.yaml +117 -0
  168. package/AISB/catalog/aisb.t3.079_dpfkmeans.yaml +131 -0
  169. package/AISB/catalog/aisb.t3.079_dpfkmeans.zh.yaml +104 -0
  170. package/AISB/catalog/aisb.t3.080_latentscorereweight.yaml +169 -0
  171. package/AISB/catalog/aisb.t3.080_latentscorereweight.zh.yaml +123 -0
  172. package/AISB/catalog/aisb.t3.081_qmamba.yaml +150 -0
  173. package/AISB/catalog/aisb.t3.081_qmamba.zh.yaml +117 -0
  174. package/AISB/catalog/aisb.t3.082_onlinellmrouting.yaml +160 -0
  175. package/AISB/catalog/aisb.t3.082_onlinellmrouting.zh.yaml +133 -0
  176. package/AISB/catalog/aisb.t3.083_starformer.yaml +178 -0
  177. package/AISB/catalog/aisb.t3.083_starformer.zh.yaml +140 -0
  178. package/AISB/catalog/aisb.t3.084_ift.yaml +139 -0
  179. package/AISB/catalog/aisb.t3.084_ift.zh.yaml +111 -0
  180. package/AISB/catalog/aisb.t3.085_neuralsurv.yaml +183 -0
  181. package/AISB/catalog/aisb.t3.085_neuralsurv.zh.yaml +143 -0
  182. package/AISB/catalog/aisb.t3.086_stella.yaml +197 -0
  183. package/AISB/catalog/aisb.t3.086_stella.zh.yaml +142 -0
  184. package/AISB/catalog/aisb.t3.087_moses.yaml +167 -0
  185. package/AISB/catalog/aisb.t3.087_moses.zh.yaml +132 -0
  186. package/AISB/catalog/aisb.t3.088_channelnorm.yaml +140 -0
  187. package/AISB/catalog/aisb.t3.088_channelnorm.zh.yaml +109 -0
  188. package/AISB/catalog/aisb.t3.089_causalvelocity.yaml +730 -0
  189. package/AISB/catalog/aisb.t3.089_causalvelocity.zh.yaml +668 -0
  190. package/AISB/catalog/aisb.t3.090_rstib.yaml +144 -0
  191. package/AISB/catalog/aisb.t3.090_rstib.zh.yaml +109 -0
  192. package/AISB/catalog/aisb.t3.091_timeawarecausal.yaml +132 -0
  193. package/AISB/catalog/aisb.t3.091_timeawarecausal.zh.yaml +107 -0
  194. package/AISB/catalog/aisb.t3.092_kmeanslocalopt.yaml +138 -0
  195. package/AISB/catalog/aisb.t3.092_kmeanslocalopt.zh.yaml +110 -0
  196. package/AISB/catalog/aisb.t3.093_fedwmsam.yaml +134 -0
  197. package/AISB/catalog/aisb.t3.093_fedwmsam.zh.yaml +106 -0
  198. package/AISB/catalog/aisb.t3.094_boundre.yaml +147 -0
  199. package/AISB/catalog/aisb.t3.094_boundre.zh.yaml +114 -0
  200. package/AISB/catalog/aisb.t3.095_fastfeaturecp.yaml +153 -0
  201. package/AISB/catalog/aisb.t3.095_fastfeaturecp.zh.yaml +118 -0
  202. package/AISB/catalog/aisb.t3.096_m3svm.yaml +189 -0
  203. package/AISB/catalog/aisb.t3.096_m3svm.zh.yaml +149 -0
  204. package/AISB/catalog/aisb.t3.097_wassersteintl.yaml +212 -0
  205. package/AISB/catalog/aisb.t3.097_wassersteintl.zh.yaml +169 -0
  206. package/AISB/catalog/aisb.t3.098_xmahalanobis.yaml +171 -0
  207. package/AISB/catalog/aisb.t3.098_xmahalanobis.zh.yaml +127 -0
  208. package/AISB/catalog/aisb.t3.099_ollalanding.yaml +248 -0
  209. package/AISB/catalog/aisb.t3.099_ollalanding.zh.yaml +182 -0
  210. package/AISB/catalog/aisb.t3.100_invmissingdata.yaml +179 -0
  211. package/AISB/catalog/aisb.t3.100_invmissingdata.zh.yaml +150 -0
  212. package/AISB/catalog/aisb.t3.101_acia.yaml +164 -0
  213. package/AISB/catalog/aisb.t3.101_acia.zh.yaml +109 -0
  214. package/AISB/catalog/aisb.t3.102_stochasticff.yaml +178 -0
  215. package/AISB/catalog/aisb.t3.102_stochasticff.zh.yaml +130 -0
  216. package/AISB/catalog/aisb.t3.103_qdcp.yaml +150 -0
  217. package/AISB/catalog/aisb.t3.103_qdcp.zh.yaml +116 -0
  218. package/AISB/catalog/aisb.t3.104_balancedactiveinf.yaml +137 -0
  219. package/AISB/catalog/aisb.t3.104_balancedactiveinf.zh.yaml +104 -0
  220. package/AISB/catalog/aisb.t3.105_binaryclasseval.yaml +161 -0
  221. package/AISB/catalog/aisb.t3.105_binaryclasseval.zh.yaml +130 -0
  222. package/AISB/image/001_aisb.t3.001_savvy.jpg +0 -0
  223. package/AISB/image/002_aisb.t3.002_pinet.jpg +0 -0
  224. package/AISB/image/003_aisb.t3.003_dmsqd.jpg +0 -0
  225. package/AISB/image/004_aisb.t3.004_decentralattn.jpg +0 -0
  226. package/AISB/image/005_aisb.t3.005_tsae.jpg +0 -0
  227. package/AISB/image/006_aisb.t3.006_physense.jpg +0 -0
  228. package/AISB/image/007_aisb.t3.007_reasoningiqa.jpg +0 -0
  229. package/AISB/image/008_aisb.t3.008_meanflows.jpg +0 -0
  230. package/AISB/image/009_aisb.t3.009_scoremissing.jpg +0 -0
  231. package/AISB/image/010_aisb.t3.010_suitabilityfilter.jpg +0 -0
  232. package/AISB/image/011_aisb.t3.011_osd.jpg +0 -0
  233. package/AISB/image/012_aisb.t3.012_efficientqat.jpg +0 -0
  234. package/AISB/image/013_aisb.t3.013_appl.jpg +0 -0
  235. package/AISB/image/014_aisb.t3.014_piguard.jpg +0 -0
  236. package/AISB/image/015_aisb.t3.015_frspec.jpg +0 -0
  237. package/AISB/image/016_aisb.t3.016_mathfusion.jpg +0 -0
  238. package/AISB/image/017_aisb.t3.017_multimodalglp.jpg +0 -0
  239. package/AISB/image/018_aisb.t3.018_cotsynth.jpg +0 -0
  240. package/AISB/image/019_aisb.t3.019_dyscaleut.jpg +0 -0
  241. package/AISB/image/020_aisb.t3.020_aristotle.jpg +0 -0
  242. package/AISB/image/021_aisb.t3.021_tokenrecycling.jpg +0 -0
  243. package/AISB/image/022_aisb.t3.022_chainofreasoning.jpg +0 -0
  244. package/AISB/image/023_aisb.t3.023_guidedembed.jpg +0 -0
  245. package/AISB/image/024_aisb.t3.024_outputcentric.jpg +0 -0
  246. package/AISB/image/025_aisb.t3.025_deeper.jpg +0 -0
  247. package/AISB/image/026_aisb.t3.026_gartkg.jpg +0 -0
  248. package/AISB/image/027_aisb.t3.027_citeeval.jpg +0 -0
  249. package/AISB/image/028_aisb.t3.028_sbam.jpg +0 -0
  250. package/AISB/image/029_aisb.t3.029_cdqgeoembed.jpg +0 -0
  251. package/AISB/image/030_aisb.t3.030_processrm.jpg +0 -0
  252. package/AISB/image/031_aisb.t3.031_circuitstability.jpg +0 -0
  253. package/AISB/image/032_aisb.t3.032_ptsolver.jpg +0 -0
  254. package/AISB/image/033_aisb.t3.033_gcse.jpg +0 -0
  255. package/AISB/image/034_aisb.t3.034_ensemblewm.jpg +0 -0
  256. package/AISB/image/035_aisb.t3.035_moralvalueswa.jpg +0 -0
  257. package/AISB/image/036_aisb.t3.036_weakstrongpref.jpg +0 -0
  258. package/AISB/image/037_aisb.t3.037_dementiamask.jpg +0 -0
  259. package/AISB/image/038_aisb.t3.038_tinysam.jpg +0 -0
  260. package/AISB/image/039_aisb.t3.039_calf.jpg +0 -0
  261. package/AISB/image/040_aisb.t3.040_graniteguardian.jpg +0 -0
  262. package/AISB/image/041_aisb.t3.041_amdm.jpg +0 -0
  263. package/AISB/image/042_aisb.t3.042_xpatch.jpg +0 -0
  264. package/AISB/image/043_aisb.t3.043_vhm.jpg +0 -0
  265. package/AISB/image/044_aisb.t3.044_rgvi.jpg +0 -0
  266. package/AISB/image/045_aisb.t3.045_pslstm.jpg +0 -0
  267. package/AISB/image/046_aisb.t3.046_nonstatts.jpg +0 -0
  268. package/AISB/image/047_aisb.t3.047_timepfn.jpg +0 -0
  269. package/AISB/image/048_aisb.t3.048_proxyspex.jpg +0 -0
  270. package/AISB/image/049_aisb.t3.049_hogwildinference.jpg +0 -0
  271. package/AISB/image/050_aisb.t3.050_causalpfn.jpg +0 -0
  272. package/AISB/image/051_aisb.t3.051_flashtp.jpg +0 -0
  273. package/AISB/image/052_aisb.t3.052_nsdiff.jpg +0 -0
  274. package/AISB/image/053_aisb.t3.053_k2vae.jpg +0 -0
  275. package/AISB/image/054_aisb.t3.054_timebase.jpg +0 -0
  276. package/AISB/image/055_aisb.t3.055_csbrain.jpg +0 -0
  277. package/AISB/image/056_aisb.t3.056_infosam.jpg +0 -0
  278. package/AISB/image/057_aisb.t3.057_mdreid.jpg +0 -0
  279. package/AISB/image/058_aisb.t3.058_mindglitch.jpg +0 -0
  280. package/AISB/image/059_aisb.t3.059_selfsupervised.jpg +0 -0
  281. package/AISB/image/060_aisb.t3.060_iaggad.jpg +0 -0
  282. package/AISB/image/061_aisb.t3.061_hsgkn.jpg +0 -0
  283. package/AISB/image/062_aisb.t3.062_visionts.jpg +0 -0
  284. package/AISB/image/063_aisb.t3.063_tsrag.jpg +0 -0
  285. package/AISB/image/064_aisb.t3.064_pir.jpg +0 -0
  286. package/AISB/image/065_aisb.t3.065_proteinbinding.jpg +0 -0
  287. package/AISB/image/066_aisb.t3.066_tropicalattention.jpg +0 -0
  288. package/AISB/image/067_aisb.t3.067_kanad.jpg +0 -0
  289. package/AISB/image/068_aisb.t3.068_sempo.jpg +0 -0
  290. package/AISB/image/069_aisb.t3.069_treehfd.jpg +0 -0
  291. package/AISB/image/070_aisb.t3.070_certifiedunlearning.jpg +0 -0
  292. package/AISB/image/071_aisb.t3.071_neuralmjd.jpg +0 -0
  293. package/AISB/image/072_aisb.t3.072_fedgmt.jpg +0 -0
  294. package/AISB/image/073_aisb.t3.073_rld.jpg +0 -0
  295. package/AISB/image/074_aisb.t3.074_lsvi.jpg +0 -0
  296. package/AISB/image/075_aisb.t3.075_treeslicedentropy.jpg +0 -0
  297. package/AISB/image/076_aisb.t3.076_aanet.jpg +0 -0
  298. package/AISB/image/077_aisb.t3.077_cmnn.jpg +0 -0
  299. package/AISB/image/078_aisb.t3.078_conformalanomaly.jpg +0 -0
  300. package/AISB/image/079_aisb.t3.079_dpfkmeans.jpg +0 -0
  301. package/AISB/image/080_aisb.t3.080_latentscorereweight.jpg +0 -0
  302. package/AISB/image/081_aisb.t3.081_qmamba.jpg +0 -0
  303. package/AISB/image/082_aisb.t3.082_onlinellmrouting.jpg +0 -0
  304. package/AISB/image/083_aisb.t3.083_starformer.jpg +0 -0
  305. package/AISB/image/084_aisb.t3.084_ift.jpg +0 -0
  306. package/AISB/image/085_aisb.t3.085_neuralsurv.jpg +0 -0
  307. package/AISB/image/086_aisb.t3.086_stella.jpg +0 -0
  308. package/AISB/image/087_aisb.t3.087_moses.jpg +0 -0
  309. package/AISB/image/088_aisb.t3.088_channelnorm.jpg +0 -0
  310. package/AISB/image/089_aisb.t3.089_causalvelocity.jpg +0 -0
  311. package/AISB/image/090_aisb.t3.090_rstib.jpg +0 -0
  312. package/AISB/image/091_aisb.t3.091_timeawarecausal.jpg +0 -0
  313. package/AISB/image/092_aisb.t3.092_kmeanslocalopt.jpg +0 -0
  314. package/AISB/image/093_aisb.t3.093_fedwmsam.jpg +0 -0
  315. package/AISB/image/094_aisb.t3.094_boundre.jpg +0 -0
  316. package/AISB/image/095_aisb.t3.095_fastfeaturecp.jpg +0 -0
  317. package/AISB/image/096_aisb.t3.096_m3svm.jpg +0 -0
  318. package/AISB/image/097_aisb.t3.097_wassersteintl.jpg +0 -0
  319. package/AISB/image/098_aisb.t3.098_xmahalanobis.jpg +0 -0
  320. package/AISB/image/099_aisb.t3.099_ollalanding.jpg +0 -0
  321. package/AISB/image/100_aisb.t3.100_invmissingdata.jpg +0 -0
  322. package/AISB/image/101_aisb.t3.101_acia.jpg +0 -0
  323. package/AISB/image/102_aisb.t3.102_stochasticff.jpg +0 -0
  324. package/AISB/image/103_aisb.t3.103_qdcp.jpg +0 -0
  325. package/AISB/image/104_aisb.t3.104_balancedactiveinf.jpg +0 -0
  326. package/AISB/image/105_aisb.t3.105_binaryclasseval.jpg +0 -0
  327. package/AISB/image/106_aisb.t1.reasoning_lite.jpg +0 -0
  328. package/AISB/image/107_aisb.t2.paper_audit.jpg +0 -0
  329. package/AISB/image/108_aisb.t3.multi_gpu_search.jpg +0 -0
  330. package/AISB/image/109_aisb.t3.tdc_admet.jpg +0 -0
  331. package/AISB/image/aisb.b1.agentic_coding.svg +16 -0
  332. package/AISB/image/aisb.b10.climate_earth.svg +16 -0
  333. package/AISB/image/aisb.b11.model_efficiency.svg +16 -0
  334. package/AISB/image/aisb.b12.embodied_ai.svg +16 -0
  335. package/AISB/image/aisb.b2.agent_systems.svg +16 -0
  336. package/AISB/image/aisb.b3.self_evolving_rl.svg +16 -0
  337. package/AISB/image/aisb.b4.lm_reasoning.svg +16 -0
  338. package/AISB/image/aisb.b5.math_proof.svg +16 -0
  339. package/AISB/image/aisb.b6.research_process.svg +16 -0
  340. package/AISB/image/aisb.b7.multimodal_fusion.svg +16 -0
  341. package/AISB/image/aisb.b8.lifesci_drug.svg +16 -0
  342. package/AISB/image/aisb.b9.material_science.svg +16 -0
  343. package/README.md +132 -11
  344. package/bin/ds.js +376 -49
  345. package/docs/en/00_QUICK_START.md +135 -18
  346. package/docs/en/01_SETTINGS_REFERENCE.md +468 -96
  347. package/docs/en/02_START_RESEARCH_GUIDE.md +26 -5
  348. package/docs/en/03_QQ_CONNECTOR_GUIDE.md +14 -3
  349. package/docs/en/04_LINGZHU_CONNECTOR_GUIDE.md +2 -0
  350. package/docs/en/05_TUI_GUIDE.md +171 -2
  351. package/docs/en/07_MEMORY_AND_MCP.md +38 -2
  352. package/docs/en/09_DOCTOR.md +64 -4
  353. package/docs/en/10_WEIXIN_CONNECTOR_GUIDE.md +38 -1
  354. package/docs/en/11_LICENSE_AND_RISK.md +4 -0
  355. package/docs/en/12_GUIDED_WORKFLOW_TOUR.md +15 -0
  356. package/docs/en/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
  357. package/docs/en/15_CODEX_PROVIDER_SETUP.md +622 -187
  358. package/docs/en/16_TELEGRAM_CONNECTOR_GUIDE.md +14 -0
  359. package/docs/en/17_WHATSAPP_CONNECTOR_GUIDE.md +14 -0
  360. package/docs/en/18_FEISHU_CONNECTOR_GUIDE.md +14 -0
  361. package/docs/en/21_LOCAL_MODEL_BACKENDS_GUIDE.md +105 -2
  362. package/docs/en/22_BENCHSTORE_YAML_REFERENCE.md +469 -0
  363. package/docs/en/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +316 -0
  364. package/docs/en/24_CLAUDE_CODE_PROVIDER_SETUP.md +469 -0
  365. package/docs/en/25_OPENCODE_PROVIDER_SETUP.md +653 -0
  366. package/docs/en/26_CITATION_AND_ATTRIBUTION.md +119 -0
  367. package/docs/en/27_KIMI_CODE_PROVIDER_SETUP.md +180 -0
  368. package/docs/en/28_DISCORD_CONNECTOR_GUIDE.md +61 -0
  369. package/docs/en/29_SLACK_CONNECTOR_GUIDE.md +60 -0
  370. package/docs/en/30_SETTINGS_CONTROL_CENTER_GUIDE.md +371 -0
  371. package/docs/en/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
  372. package/docs/en/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +273 -0
  373. package/docs/en/33_WORKSPACE_EXPLORER_QA.md +121 -0
  374. package/docs/en/91_DEVELOPMENT.md +29 -0
  375. package/docs/en/99_ACKNOWLEDGEMENTS.md +24 -19
  376. package/docs/en/README.md +44 -7
  377. package/docs/images/admin/admin-connectors-health-en.png +0 -0
  378. package/docs/images/admin/admin-controllers-en.png +0 -0
  379. package/docs/images/admin/admin-diagnostics-en.png +0 -0
  380. package/docs/images/admin/admin-errors-en.png +0 -0
  381. package/docs/images/admin/admin-issues-en.png +0 -0
  382. package/docs/images/admin/admin-logs-en.png +0 -0
  383. package/docs/images/admin/admin-quest-detail-en.png +0 -0
  384. package/docs/images/admin/admin-quests-en.png +0 -0
  385. package/docs/images/admin/admin-repairs-en.png +0 -0
  386. package/docs/images/admin/admin-runtime-en.png +0 -0
  387. package/docs/images/admin/admin-search-en.png +0 -0
  388. package/docs/images/admin/admin-stats-en.png +0 -0
  389. package/docs/images/admin/admin-summary-en.png +0 -0
  390. package/docs/images/connectors/connector-discord-en.png +0 -0
  391. package/docs/images/connectors/connector-feishu-en.png +0 -0
  392. package/docs/images/connectors/connector-lingzhu-en.png +0 -0
  393. package/docs/images/connectors/connector-qq-en.png +0 -0
  394. package/docs/images/connectors/connector-slack-en.png +0 -0
  395. package/docs/images/connectors/connector-telegram-en.png +0 -0
  396. package/docs/images/connectors/connector-weixin-en.png +0 -0
  397. package/docs/images/connectors/connector-whatsapp-en.png +0 -0
  398. package/docs/images/settings/settings-baselines-en.png +0 -0
  399. package/docs/images/settings/settings-config-en.png +0 -0
  400. package/docs/images/settings/settings-connectors-overview-en.png +0 -0
  401. package/docs/images/settings/settings-deepxiv-en.png +0 -0
  402. package/docs/images/settings/settings-mcp-servers-en.png +0 -0
  403. package/docs/images/settings/settings-plugins-en.png +0 -0
  404. package/docs/images/settings/settings-runners-en.png +0 -0
  405. package/docs/zh/00_QUICK_START.md +92 -17
  406. package/docs/zh/01_SETTINGS_REFERENCE.md +219 -98
  407. package/docs/zh/02_START_RESEARCH_GUIDE.md +26 -5
  408. package/docs/zh/05_TUI_GUIDE.md +171 -2
  409. package/docs/zh/07_MEMORY_AND_MCP.md +29 -2
  410. package/docs/zh/09_DOCTOR.md +39 -4
  411. package/docs/zh/10_WEIXIN_CONNECTOR_GUIDE.md +24 -1
  412. package/docs/zh/11_LICENSE_AND_RISK.md +4 -0
  413. package/docs/zh/12_GUIDED_WORKFLOW_TOUR.md +15 -0
  414. package/docs/zh/14_PROMPT_SKILLS_AND_MCP_GUIDE.md +9 -0
  415. package/docs/zh/15_CODEX_PROVIDER_SETUP.md +550 -188
  416. package/docs/zh/21_LOCAL_MODEL_BACKENDS_GUIDE.md +105 -2
  417. package/docs/zh/22_BENCHSTORE_YAML_REFERENCE.md +459 -0
  418. package/docs/zh/23_BENCHSTORE_GITHUB_RELEASES_SPEC.md +287 -0
  419. package/docs/zh/23_CLAUDE_RUNNER_GUIDE.md +103 -0
  420. package/docs/zh/24_CLAUDE_CODE_PROVIDER_SETUP.md +460 -0
  421. package/docs/zh/25_OPENCODE_PROVIDER_SETUP.md +660 -0
  422. package/docs/zh/26_CITATION_AND_ATTRIBUTION.md +102 -0
  423. package/docs/zh/27_KIMI_CODE_PROVIDER_SETUP.md +51 -0
  424. package/docs/zh/{19_LOCAL_BROWSER_AUTH.md → 31_LOCAL_BROWSER_AUTH.md} +1 -1
  425. package/docs/zh/32_WINDOWS_WSL2_DEPLOYMENT_GUIDE.md +264 -0
  426. package/docs/zh/33_WORKSPACE_EXPLORER_QA.md +127 -0
  427. package/docs/zh/99_ACKNOWLEDGEMENTS.md +23 -19
  428. package/docs/zh/README.md +29 -7
  429. package/install.sh +122 -16
  430. package/package.json +4 -1
  431. package/pyproject.toml +2 -1
  432. package/src/deepscientist/__init__.py +1 -1
  433. package/src/deepscientist/acp/envelope.py +13 -0
  434. package/src/deepscientist/admin/__init__.py +3 -0
  435. package/src/deepscientist/admin/charts.py +681 -0
  436. package/src/deepscientist/admin/logs.py +119 -0
  437. package/src/deepscientist/admin/repairs.py +217 -0
  438. package/src/deepscientist/admin/service.py +1310 -0
  439. package/src/deepscientist/admin/system_info.py +700 -0
  440. package/src/deepscientist/admin/tasks.py +465 -0
  441. package/src/deepscientist/admin/tool_metrics.py +600 -0
  442. package/src/deepscientist/artifact/guidance.py +8 -4
  443. package/src/deepscientist/artifact/schemas.py +115 -0
  444. package/src/deepscientist/artifact/service.py +4268 -260
  445. package/src/deepscientist/bash_exec/monitor.py +30 -3
  446. package/src/deepscientist/bash_exec/service.py +134 -1
  447. package/src/deepscientist/benchstore/__init__.py +4 -0
  448. package/src/deepscientist/benchstore/prompt_builder.py +224 -0
  449. package/src/deepscientist/benchstore/service.py +1716 -0
  450. package/src/deepscientist/channels/weixin_ilink.py +8 -1
  451. package/src/deepscientist/cli.py +92 -17
  452. package/src/deepscientist/codex_cli_compat.py +2 -2
  453. package/src/deepscientist/config/models.py +82 -11
  454. package/src/deepscientist/config/service.py +927 -91
  455. package/src/deepscientist/connector/weixin_support.py +48 -17
  456. package/src/deepscientist/daemon/api/handlers.py +697 -210
  457. package/src/deepscientist/daemon/api/router.py +76 -1
  458. package/src/deepscientist/daemon/app.py +1054 -51
  459. package/src/deepscientist/diagnostics/runner_failures.py +147 -0
  460. package/src/deepscientist/doctor.py +212 -65
  461. package/src/deepscientist/evidence_packets.py +590 -0
  462. package/src/deepscientist/home.py +52 -4
  463. package/src/deepscientist/kimi_cli_compat.py +50 -0
  464. package/src/deepscientist/latex_runtime.py +2 -2
  465. package/src/deepscientist/mcp/context.py +2 -0
  466. package/src/deepscientist/mcp/schemas.py +114 -0
  467. package/src/deepscientist/mcp/server.py +1566 -126
  468. package/src/deepscientist/memory/service.py +203 -16
  469. package/src/deepscientist/process_control.py +8 -1
  470. package/src/deepscientist/prompts/builder.py +836 -92
  471. package/src/deepscientist/quest/__init__.py +2 -2
  472. package/src/deepscientist/quest/layout.py +12 -1
  473. package/src/deepscientist/quest/node_traces.py +10 -0
  474. package/src/deepscientist/quest/service.py +1430 -139
  475. package/src/deepscientist/quest/stage_views.py +1 -1
  476. package/src/deepscientist/runners/__init__.py +18 -0
  477. package/src/deepscientist/runners/base.py +89 -1
  478. package/src/deepscientist/runners/builtins.py +13 -1
  479. package/src/deepscientist/runners/claude.py +391 -0
  480. package/src/deepscientist/runners/codex.py +421 -21
  481. package/src/deepscientist/runners/codex_telemetry.py +127 -0
  482. package/src/deepscientist/runners/kimi.py +334 -0
  483. package/src/deepscientist/runners/metadata.py +68 -0
  484. package/src/deepscientist/runners/opencode.py +414 -0
  485. package/src/deepscientist/runners/runtime_overrides.py +100 -0
  486. package/src/deepscientist/runners/simple_cli.py +538 -0
  487. package/src/deepscientist/runtime_storage.py +303 -0
  488. package/src/deepscientist/shared.py +61 -16
  489. package/src/deepscientist/skills/installer.py +37 -0
  490. package/src/deepscientist/skills/registry.py +2 -0
  491. package/src/deepscientist/tinytex.py +2 -2
  492. package/src/deepscientist/tui.py +10 -3
  493. package/src/prompts/benchstore/system.md +77 -0
  494. package/src/prompts/connectors/qq.md +33 -2
  495. package/src/prompts/connectors/weixin.md +208 -23
  496. package/src/prompts/contracts/admin_ops.md +74 -0
  497. package/src/prompts/contracts/admin_ops_knowledge.md +138 -0
  498. package/src/prompts/contracts/shared_interaction.md +5 -11
  499. package/src/prompts/start_setup/system.md +422 -0
  500. package/src/prompts/system.md +409 -315
  501. package/src/prompts/system_copilot.md +88 -12
  502. package/src/skills/analysis-campaign/SKILL.md +239 -578
  503. package/src/skills/analysis-campaign/references/artifact-flow-examples.md +102 -0
  504. package/src/skills/analysis-campaign/references/boundary-cases.md +98 -0
  505. package/src/skills/analysis-campaign/references/campaign-checklist-template.md +39 -24
  506. package/src/skills/analysis-campaign/references/campaign-design.md +26 -10
  507. package/src/skills/analysis-campaign/references/campaign-plan-template.md +53 -54
  508. package/src/skills/analysis-campaign/references/operational-guidance.md +97 -0
  509. package/src/skills/analysis-campaign/references/writing-facing-slice-examples.md +10 -20
  510. package/src/skills/baseline/SKILL.md +183 -461
  511. package/src/skills/baseline/references/artifact-flow-examples.md +106 -0
  512. package/src/skills/baseline/references/artifact-payload-examples.md +1 -1
  513. package/src/skills/baseline/references/baseline-checklist-template.md +27 -35
  514. package/src/skills/baseline/references/baseline-plan-template.md +37 -76
  515. package/src/skills/baseline/references/boundary-cases.md +86 -0
  516. package/src/skills/baseline/references/codebase-audit-checklist.md +2 -6
  517. package/src/skills/baseline/references/comparability-contract.md +7 -12
  518. package/src/skills/baseline/references/operational-guidance.md +56 -0
  519. package/src/skills/baseline/references/route-selection.md +5 -25
  520. package/src/skills/decision/SKILL.md +113 -306
  521. package/src/skills/decision/references/checkpoint-memory-template.md +47 -0
  522. package/src/skills/decision/references/operational-guidance.md +94 -0
  523. package/src/skills/decision/references/research-route-criteria.md +7 -8
  524. package/src/skills/decision/references/strategic-decision-template.md +13 -26
  525. package/src/skills/experiment/SKILL.md +132 -670
  526. package/src/skills/experiment/references/execution-playbook.md +374 -0
  527. package/src/skills/experiment/references/main-experiment-checklist-template.md +26 -2
  528. package/src/skills/experiment/references/main-experiment-plan-template.md +28 -17
  529. package/src/skills/experiment/references/operational-guidance.md +108 -0
  530. package/src/skills/finalize/SKILL.md +62 -0
  531. package/src/skills/finalize/references/checkpoint-memory-template.md +49 -0
  532. package/src/skills/finalize/references/resume-packet-template.md +7 -0
  533. package/src/skills/idea/SKILL.md +228 -15
  534. package/src/skills/idea/references/controlled-brainstorming-playbook.md +78 -0
  535. package/src/skills/idea/references/current-board-packet-template.md +61 -0
  536. package/src/skills/idea/references/high-value-idea-sourcing.md +119 -0
  537. package/src/skills/idea/references/idea-generation-playbook.md +21 -0
  538. package/src/skills/idea/references/idea-thinking-flow.md +6 -0
  539. package/src/skills/idea/references/literature-survey-template.md +3 -0
  540. package/src/skills/idea/references/objective-contract-template.md +54 -0
  541. package/src/skills/idea/references/outline-seeding-example.md +56 -0
  542. package/src/skills/idea/references/pre-idea-draft-template.md +105 -0
  543. package/src/skills/idea/references/related-work-playbook.md +75 -2
  544. package/src/skills/idea/references/research-history-playbook.md +114 -0
  545. package/src/skills/idea/references/selection-gate.md +58 -6
  546. package/src/skills/intake-audit/SKILL.md +43 -2
  547. package/src/skills/intake-audit/references/state-audit-template.md +10 -0
  548. package/src/skills/nature-data/SKILL.md +128 -0
  549. package/src/skills/nature-data/UPSTREAM_LICENSE.txt +21 -0
  550. package/src/skills/nature-data/agents/openai.yaml +4 -0
  551. package/src/skills/nature-data/references/chinese-author-alignment.md +84 -0
  552. package/src/skills/nature-data/references/fair-metadata-checklist.md +105 -0
  553. package/src/skills/nature-data/references/policy-principles.md +103 -0
  554. package/src/skills/nature-data/references/repository-and-identifiers.md +96 -0
  555. package/src/skills/nature-data/references/source-basis.md +54 -0
  556. package/src/skills/nature-data/references/statement-patterns.md +153 -0
  557. package/src/skills/nature-figure/SKILL.md +197 -0
  558. package/src/skills/nature-figure/UPSTREAM_LICENSE.txt +21 -0
  559. package/src/skills/nature-figure/agents/openai.yaml +4 -0
  560. package/src/skills/nature-figure/evals/evals.json +37 -0
  561. package/src/skills/nature-figure/references/api.md +428 -0
  562. package/src/skills/nature-figure/references/backend-selection.md +100 -0
  563. package/src/skills/nature-figure/references/chart-types.md +281 -0
  564. package/src/skills/nature-figure/references/common-patterns.md +349 -0
  565. package/src/skills/nature-figure/references/design-theory.md +436 -0
  566. package/src/skills/nature-figure/references/figure-contract.md +93 -0
  567. package/src/skills/nature-figure/references/nature-2026-observations.md +112 -0
  568. package/src/skills/nature-figure/references/qa-contract.md +119 -0
  569. package/src/skills/nature-figure/references/r-template-index.md +66 -0
  570. package/src/skills/nature-figure/references/r-workflow.md +161 -0
  571. package/src/skills/nature-figure/references/tutorials.md +250 -0
  572. package/src/skills/nature-paper2ppt/SKILL.md +507 -0
  573. package/src/skills/nature-paper2ppt/UPSTREAM_LICENSE.txt +21 -0
  574. package/src/skills/nature-paper2ppt/agents/openai.yaml +4 -0
  575. package/src/skills/nature-polishing/SKILL.md +385 -0
  576. package/src/skills/nature-polishing/UPSTREAM_LICENSE.txt +21 -0
  577. package/src/skills/nature-polishing/agents/openai.yaml +4 -0
  578. package/src/skills/nature-polishing/references/phrasebank-playbook.md +162 -0
  579. package/src/skills/nature-polishing/references/section-moves.md +240 -0
  580. package/src/skills/nature-polishing/references/style-guardrails.md +94 -0
  581. package/src/skills/nature-polishing/references/writing-strategy.md +148 -0
  582. package/src/skills/optimize/SKILL.md +177 -1568
  583. package/src/skills/optimize/references/brief-shaping-playbook.md +95 -0
  584. package/src/skills/optimize/references/candidate-board-template.md +13 -0
  585. package/src/skills/optimize/references/candidate-ranking-template.md +51 -0
  586. package/src/skills/optimize/references/codegen-route-playbook.md +50 -0
  587. package/src/skills/optimize/references/debug-response-template.md +29 -0
  588. package/src/skills/optimize/references/frontier-review-template.md +32 -0
  589. package/src/skills/optimize/references/fusion-playbook.md +36 -0
  590. package/src/skills/optimize/references/method-brief-template.md +73 -0
  591. package/src/skills/optimize/references/operational-guidance.md +621 -0
  592. package/src/skills/optimize/references/optimization-memory-template.md +30 -0
  593. package/src/skills/optimize/references/optimize-checklist-template.md +18 -0
  594. package/src/skills/optimize/references/plateau-response-playbook.md +28 -0
  595. package/src/skills/optimize/references/prompt-patterns.md +49 -0
  596. package/src/skills/paper-outline/SKILL.md +227 -0
  597. package/src/skills/paper-outline/references/outline-patterns.md +87 -0
  598. package/src/skills/paper-plot/SKILL.md +79 -0
  599. package/src/skills/paper-plot/agents/openai.yaml +4 -0
  600. package/src/skills/paper-plot/references/bar_grouped_hatch.md +96 -0
  601. package/src/skills/paper-plot/references/bar_paired_delta.md +72 -0
  602. package/src/skills/paper-plot/references/line_confidence_band.md +75 -0
  603. package/src/skills/paper-plot/references/line_loss_with_inset.md +65 -0
  604. package/src/skills/paper-plot/references/line_training_curve.md +44 -0
  605. package/src/skills/paper-plot/references/radar_dual_series.md +59 -0
  606. package/src/skills/paper-plot/references/scatter_broken_axis.md +59 -0
  607. package/src/skills/paper-plot/references/scatter_tsne_cluster.md +72 -0
  608. package/src/skills/paper-plot/scripts/bar_memevolve.py +109 -0
  609. package/src/skills/paper-plot/scripts/bar_spice.py +166 -0
  610. package/src/skills/paper-plot/scripts/line_aime.py +94 -0
  611. package/src/skills/paper-plot/scripts/line_loss_inset.py +157 -0
  612. package/src/skills/paper-plot/scripts/line_selfdistill.py +168 -0
  613. package/src/skills/paper-plot/scripts/radar_dora.py +151 -0
  614. package/src/skills/paper-plot/scripts/scatter_break.py +169 -0
  615. package/src/skills/paper-plot/scripts/scatter_tsne.py +133 -0
  616. package/src/skills/rebuttal/SKILL.md +9 -0
  617. package/src/skills/references/tool-usage-by-stage.md +438 -0
  618. package/src/skills/review/SKILL.md +105 -7
  619. package/src/skills/science/PROVENANCE.md +44 -0
  620. package/src/skills/science/SKILL.md +137 -0
  621. package/src/skills/science/references/artifact-science-tool.md +110 -0
  622. package/src/skills/science/references/claim-type-discipline.md +56 -0
  623. package/src/skills/science/references/domain-index.md +422 -0
  624. package/src/skills/science/references/hpc-via-bash-exec.md +42 -0
  625. package/src/skills/science/references/package-check-playbook.md +64 -0
  626. package/src/skills/science/references/package-index.min.json +3616 -0
  627. package/src/skills/science/references/packages/abinit.md +80 -0
  628. package/src/skills/science/references/packages/acts.md +73 -0
  629. package/src/skills/science/references/packages/aiida-core.md +80 -0
  630. package/src/skills/science/references/packages/alamode.md +80 -0
  631. package/src/skills/science/references/packages/amuse.md +88 -0
  632. package/src/skills/science/references/packages/anndata.md +88 -0
  633. package/src/skills/science/references/packages/arbor.md +80 -0
  634. package/src/skills/science/references/packages/arc.md +73 -0
  635. package/src/skills/science/references/packages/astropy.md +88 -0
  636. package/src/skills/science/references/packages/astroquery.md +88 -0
  637. package/src/skills/science/references/packages/atomate2.md +80 -0
  638. package/src/skills/science/references/packages/atomsmltr.md +73 -0
  639. package/src/skills/science/references/packages/awkward.md +73 -0
  640. package/src/skills/science/references/packages/batman.md +88 -0
  641. package/src/skills/science/references/packages/biopython.md +88 -0
  642. package/src/skills/science/references/packages/bloqade.md +73 -0
  643. package/src/skills/science/references/packages/brian2.md +73 -0
  644. package/src/skills/science/references/packages/bullet3.md +73 -0
  645. package/src/skills/science/references/packages/calculix.md +80 -0
  646. package/src/skills/science/references/packages/cantera.md +73 -0
  647. package/src/skills/science/references/packages/cavity-md-ipi.md +80 -0
  648. package/src/skills/science/references/packages/ccdproc.md +88 -0
  649. package/src/skills/science/references/packages/celerite2.md +88 -0
  650. package/src/skills/science/references/packages/cellrank.md +73 -0
  651. package/src/skills/science/references/packages/cesm.md +80 -0
  652. package/src/skills/science/references/packages/chemicals.md +73 -0
  653. package/src/skills/science/references/packages/chempy.md +73 -0
  654. package/src/skills/science/references/packages/cirq.md +73 -0
  655. package/src/skills/science/references/packages/coffea.md +73 -0
  656. package/src/skills/science/references/packages/cp2k.md +88 -0
  657. package/src/skills/science/references/packages/custodian.md +80 -0
  658. package/src/skills/science/references/packages/dart.md +73 -0
  659. package/src/skills/science/references/packages/datamol.md +88 -0
  660. package/src/skills/science/references/packages/dd4hep.md +73 -0
  661. package/src/skills/science/references/packages/dealii.md +80 -0
  662. package/src/skills/science/references/packages/deepchem.md +88 -0
  663. package/src/skills/science/references/packages/delphes.md +73 -0
  664. package/src/skills/science/references/packages/devito.md +80 -0
  665. package/src/skills/science/references/packages/dftb.md +88 -0
  666. package/src/skills/science/references/packages/dftd4.md +88 -0
  667. package/src/skills/science/references/packages/dftk-jl.md +80 -0
  668. package/src/skills/science/references/packages/dolfinx.md +80 -0
  669. package/src/skills/science/references/packages/drake.md +73 -0
  670. package/src/skills/science/references/packages/dumux.md +73 -0
  671. package/src/skills/science/references/packages/elk.md +80 -0
  672. package/src/skills/science/references/packages/elmerfem.md +80 -0
  673. package/src/skills/science/references/packages/enzo-e.md +88 -0
  674. package/src/skills/science/references/packages/espresso.md +80 -0
  675. package/src/skills/science/references/packages/exoplanet.md +88 -0
  676. package/src/skills/science/references/packages/fairroot.md +73 -0
  677. package/src/skills/science/references/packages/fbpic.md +80 -0
  678. package/src/skills/science/references/packages/fdtdbath-meep.md +80 -0
  679. package/src/skills/science/references/packages/geant4.md +73 -0
  680. package/src/skills/science/references/packages/geosx.md +80 -0
  681. package/src/skills/science/references/packages/gprmax.md +80 -0
  682. package/src/skills/science/references/packages/gromacs.md +80 -0
  683. package/src/skills/science/references/packages/gwaslab.md +73 -0
  684. package/src/skills/science/references/packages/gz-sim.md +73 -0
  685. package/src/skills/science/references/packages/hail.md +88 -0
  686. package/src/skills/science/references/packages/hiphive.md +80 -0
  687. package/src/skills/science/references/packages/hoomd-blue.md +80 -0
  688. package/src/skills/science/references/packages/itensor.md +73 -0
  689. package/src/skills/science/references/packages/itensors-jl.md +73 -0
  690. package/src/skills/science/references/packages/jdftx.md +73 -0
  691. package/src/skills/science/references/packages/jobflow.md +80 -0
  692. package/src/skills/science/references/packages/kadanoffbaym-jl.md +73 -0
  693. package/src/skills/science/references/packages/kite.md +80 -0
  694. package/src/skills/science/references/packages/kratos.md +80 -0
  695. package/src/skills/science/references/packages/kwant.md +73 -0
  696. package/src/skills/science/references/packages/lammps.md +80 -0
  697. package/src/skills/science/references/packages/lightkurve.md +88 -0
  698. package/src/skills/science/references/packages/limix.md +73 -0
  699. package/src/skills/science/references/packages/maxwelllink.md +80 -0
  700. package/src/skills/science/references/packages/mcdc.md +73 -0
  701. package/src/skills/science/references/packages/meep.md +80 -0
  702. package/src/skills/science/references/packages/mfem.md +80 -0
  703. package/src/skills/science/references/packages/mitgcm.md +73 -0
  704. package/src/skills/science/references/packages/modflow6.md +73 -0
  705. package/src/skills/science/references/packages/molecool.md +73 -0
  706. package/src/skills/science/references/packages/mom6.md +73 -0
  707. package/src/skills/science/references/packages/moose.md +80 -0
  708. package/src/skills/science/references/packages/mpas-model.md +73 -0
  709. package/src/skills/science/references/packages/mujoco.md +73 -0
  710. package/src/skills/science/references/packages/mumax3.md +73 -0
  711. package/src/skills/science/references/packages/nekrs.md +80 -0
  712. package/src/skills/science/references/packages/nessi.md +73 -0
  713. package/src/skills/science/references/packages/nest-simulator.md +73 -0
  714. package/src/skills/science/references/packages/netket.md +73 -0
  715. package/src/skills/science/references/packages/neuron.md +73 -0
  716. package/src/skills/science/references/packages/nextflow.md +88 -0
  717. package/src/skills/science/references/packages/nwchem.md +88 -0
  718. package/src/skills/science/references/packages/openbabel.md +88 -0
  719. package/src/skills/science/references/packages/openems.md +80 -0
  720. package/src/skills/science/references/packages/openff-toolkit.md +88 -0
  721. package/src/skills/science/references/packages/openfoam-dev.md +80 -0
  722. package/src/skills/science/references/packages/openmc.md +73 -0
  723. package/src/skills/science/references/packages/openmm.md +80 -0
  724. package/src/skills/science/references/packages/openmoc.md +73 -0
  725. package/src/skills/science/references/packages/openmx.md +80 -0
  726. package/src/skills/science/references/packages/opensees.md +80 -0
  727. package/src/skills/science/references/packages/opensn.md +80 -0
  728. package/src/skills/science/references/packages/opm-simulators.md +73 -0
  729. package/src/skills/science/references/packages/oqupy.md +73 -0
  730. package/src/skills/science/references/packages/packmol.md +80 -0
  731. package/src/skills/science/references/packages/palabos.md +80 -0
  732. package/src/skills/science/references/packages/parflow.md +80 -0
  733. package/src/skills/science/references/packages/pennylane.md +88 -0
  734. package/src/skills/science/references/packages/perceval.md +73 -0
  735. package/src/skills/science/references/packages/phono3py.md +73 -0
  736. package/src/skills/science/references/packages/phonopy.md +73 -0
  737. package/src/skills/science/references/packages/photutils.md +88 -0
  738. package/src/skills/science/references/packages/picongpu.md +80 -0
  739. package/src/skills/science/references/packages/plink-ng.md +88 -0
  740. package/src/skills/science/references/packages/precice.md +73 -0
  741. package/src/skills/science/references/packages/psc.md +80 -0
  742. package/src/skills/science/references/packages/psi4.md +88 -0
  743. package/src/skills/science/references/packages/pybinding.md +73 -0
  744. package/src/skills/science/references/packages/pyfr.md +80 -0
  745. package/src/skills/science/references/packages/pyhf.md +73 -0
  746. package/src/skills/science/references/packages/pyiron_base.md +80 -0
  747. package/src/skills/science/references/packages/pylcp.md +73 -0
  748. package/src/skills/science/references/packages/pylith.md +80 -0
  749. package/src/skills/science/references/packages/pynbody.md +88 -0
  750. package/src/skills/science/references/packages/pysam.md +88 -0
  751. package/src/skills/science/references/packages/pyscf.md +88 -0
  752. package/src/skills/science/references/packages/q-e.md +73 -0
  753. package/src/skills/science/references/packages/qibo.md +73 -0
  754. package/src/skills/science/references/packages/qiskit.md +73 -0
  755. package/src/skills/science/references/packages/quantica-jl.md +73 -0
  756. package/src/skills/science/references/packages/quantumoptics-jl.md +73 -0
  757. package/src/skills/science/references/packages/quimb.md +73 -0
  758. package/src/skills/science/references/packages/qulacs.md +73 -0
  759. package/src/skills/science/references/packages/qutip.md +73 -0
  760. package/src/skills/science/references/packages/rdkit.md +88 -0
  761. package/src/skills/science/references/packages/rmg-py.md +73 -0
  762. package/src/skills/science/references/packages/root.md +73 -0
  763. package/src/skills/science/references/packages/scanpy.md +88 -0
  764. package/src/skills/science/references/packages/scikit-allel.md +88 -0
  765. package/src/skills/science/references/packages/scikit-bio.md +88 -0
  766. package/src/skills/science/references/packages/scqubits.md +73 -0
  767. package/src/skills/science/references/packages/scuff-em.md +80 -0
  768. package/src/skills/science/references/packages/scvi-tools.md +73 -0
  769. package/src/skills/science/references/packages/seissol.md +73 -0
  770. package/src/skills/science/references/packages/sfepy.md +80 -0
  771. package/src/skills/science/references/packages/sisl.md +73 -0
  772. package/src/skills/science/references/packages/smilei.md +80 -0
  773. package/src/skills/science/references/packages/snakemake.md +88 -0
  774. package/src/skills/science/references/packages/specfem3d-globe.md +80 -0
  775. package/src/skills/science/references/packages/specutils.md +88 -0
  776. package/src/skills/science/references/packages/spglib.md +80 -0
  777. package/src/skills/science/references/packages/squidpy.md +88 -0
  778. package/src/skills/science/references/packages/starry.md +88 -0
  779. package/src/skills/science/references/packages/strawberryfields.md +73 -0
  780. package/src/skills/science/references/packages/su2.md +80 -0
  781. package/src/skills/science/references/packages/sunny-jl.md +73 -0
  782. package/src/skills/science/references/packages/sw4.md +73 -0
  783. package/src/skills/science/references/packages/swift.md +88 -0
  784. package/src/skills/science/references/packages/tdnegf.md +73 -0
  785. package/src/skills/science/references/packages/tenpy.md +73 -0
  786. package/src/skills/science/references/packages/thermo.md +73 -0
  787. package/src/skills/science/references/packages/tkwant.md +73 -0
  788. package/src/skills/science/references/packages/tvb-root.md +73 -0
  789. package/src/skills/science/references/packages/uproot5.md +73 -0
  790. package/src/skills/science/references/packages/vampire.md +80 -0
  791. package/src/skills/science/references/packages/wannier_tools.md +73 -0
  792. package/src/skills/science/references/packages/warpx.md +80 -0
  793. package/src/skills/science/references/packages/wrf.md +73 -0
  794. package/src/skills/science/references/packages/xtb.md +88 -0
  795. package/src/skills/science/references/packages/yt.md +73 -0
  796. package/src/skills/science/references/science-task-brief-template.md +71 -0
  797. package/src/skills/scout/SKILL.md +83 -425
  798. package/src/skills/scout/references/literature-scout-template.md +5 -24
  799. package/src/skills/scout/references/operational-guidance.md +191 -0
  800. package/src/skills/scout/references/paper-triage-playbook.md +11 -35
  801. package/src/skills/write/SKILL.md +744 -1246
  802. package/src/skills/write/references/experiments_analysis_patterns.md +129 -0
  803. package/src/skills/write/references/oral_package_patterns.md +252 -0
  804. package/src/skills/write/references/oral_writing_principles.md +291 -0
  805. package/src/skills/write/references/section_rewrite_checklist.md +234 -0
  806. package/src/tui/dist/app/AppContainer.js +1314 -27
  807. package/src/tui/dist/components/Composer.js +26 -1
  808. package/src/tui/dist/components/ConfigScreen.js +2 -1
  809. package/src/tui/dist/components/InputPrompt.js +25 -9
  810. package/src/tui/dist/components/MainContent.js +18 -3
  811. package/src/tui/dist/components/QuestScreen.js +3 -2
  812. package/src/tui/dist/components/UtilityScreen.js +37 -0
  813. package/src/tui/dist/hooks/useSafeInput.js +10 -0
  814. package/src/tui/dist/index.js +13 -1
  815. package/src/tui/dist/layouts/DefaultAppLayout.js +11 -8
  816. package/src/tui/dist/lib/api.js +89 -1
  817. package/src/tui/package.json +1 -1
  818. package/src/ui/dist/assets/{AnalysisPlugin-BCKAfjba.js → AnalysisPlugin-CA94NGmI.js} +1 -1
  819. package/src/ui/dist/assets/CliPlugin-DHBzphZU.js +79 -0
  820. package/src/ui/dist/assets/CodeEditorPlugin-BOFwD2rn.js +2 -0
  821. package/src/ui/dist/assets/{CodeViewerPlugin-CbaFRrUU.js → CodeViewerPlugin-CqDpgjik.js} +4 -4
  822. package/src/ui/dist/assets/{DocViewerPlugin-DAjLVeQD.js → DocViewerPlugin-UDBgt8-4.js} +3 -3
  823. package/src/ui/dist/assets/GitCommitViewerPlugin-BmHtZ0bZ.js +6 -0
  824. package/src/ui/dist/assets/{GitDiffViewerPlugin-CQACjoAA.js → GitDiffViewerPlugin-CAxjNorQ.js} +2 -2
  825. package/src/ui/dist/assets/{GitSnapshotViewer-0r4nLPke.js → GitSnapshotViewer-CweA6VON.js} +2 -2
  826. package/src/ui/dist/assets/{ImageViewerPlugin-nBOmI2v_.js → ImageViewerPlugin-C8wHGvGN.js} +5 -5
  827. package/src/ui/dist/assets/LabPlugin-COyyLUol.js +32 -0
  828. package/src/ui/dist/assets/{LatexPlugin-ZwtV8pIp.js → LatexPlugin-BQjAaA5J.js} +4 -4
  829. package/src/ui/dist/assets/{MarkdownViewerPlugin-DKqVfKyW.js → MarkdownViewerPlugin-Dy1NE2dI.js} +3 -3
  830. package/src/ui/dist/assets/{MarketplacePlugin-BwxStZ9D.js → MarketplacePlugin-DMIZtEJ2.js} +2 -2
  831. package/src/ui/dist/assets/NotebookEditor-CFHMq_Qt.js +91 -0
  832. package/src/ui/dist/assets/{NotebookEditor-DB9N_T9q.js → NotebookEditor-WFyd8Ybt.js} +3 -3
  833. package/src/ui/dist/assets/{PdfLoader-eWBONbQP.js → PdfLoader-CLE5u5TS.js} +3 -3
  834. package/src/ui/dist/assets/{PdfMarkdownPlugin-D22YOZL3.js → PdfMarkdownPlugin-_iNK_H83.js} +1 -1
  835. package/src/ui/dist/assets/PdfViewerPlugin-DgWsbInT.js +22 -0
  836. package/src/ui/dist/assets/SearchPlugin-DrZmn5iw.js +11 -0
  837. package/src/ui/dist/assets/{TextViewerPlugin-C5xqeeUH.js → TextViewerPlugin-D1-T3aC7.js} +4 -4
  838. package/src/ui/dist/assets/branding/runner-claude.svg +107 -0
  839. package/src/ui/dist/assets/branding/runner-codex.svg +10 -0
  840. package/src/ui/dist/assets/branding/runner-kimi.svg +14 -0
  841. package/src/ui/dist/assets/branding/runner-opencode.svg +7 -0
  842. package/src/ui/dist/assets/cli-store-CoZ-x5Ip.js +1 -0
  843. package/src/ui/dist/assets/{code-WlFHE7z_.js → code-DbsmSd3Y.js} +1 -1
  844. package/src/ui/dist/assets/file-diff-panel-DsvyRz47.js +1 -0
  845. package/src/ui/dist/assets/{wrap-text-BC-Hltpd.js → file-jump-queue-DeQBikaw.js} +3 -3
  846. package/src/ui/dist/assets/{file-socket-CfQPKQKj.js → file-socket-DA5XIx88.js} +1 -1
  847. package/src/ui/dist/assets/fonts/ds-fonts.css +50 -4
  848. package/src/ui/dist/assets/images/deepxiv/register-guide.png +0 -0
  849. package/src/ui/dist/assets/index-39vY9LmZ.js +1 -0
  850. package/src/ui/dist/assets/{index-CwNu1aH4.js → index-BsO46tJA.js} +1 -1
  851. package/src/ui/dist/assets/index-CHzJ2xtB.js +3530 -0
  852. package/src/ui/dist/assets/index-DH-zxoZ3.css +33 -0
  853. package/src/ui/dist/assets/{plugin-notebook-HbW2K-1c.js → plugin-notebook-JRhysCqj.js} +2 -2
  854. package/src/ui/dist/assets/{project-sync-C9IdzdZW.js → project-sync-DPmWKmKD.js} +1 -1
  855. package/src/ui/dist/assets/{zoom-out-E_gaeAxL.js → zoom-out-DAukFWen.js} +3 -3
  856. package/src/ui/dist/index.html +3 -3
  857. package/src/skills/analysis-campaign/references/artifact-orchestration.md +0 -58
  858. package/src/skills/baseline/references/memory-playbook.md +0 -40
  859. package/src/skills/baseline/references/publishable-baseline-package.md +0 -30
  860. package/src/skills/write/references/outline-evidence-contract-example.md +0 -107
  861. package/src/skills/write/references/paper-experiment-matrix-template.md +0 -131
  862. package/src/skills/write/references/paper-section-playbook.md +0 -64
  863. package/src/skills/write/references/reviewer-first-writing.md +0 -64
  864. package/src/skills/write/references/revision-checklist.md +0 -70
  865. package/src/skills/write/references/section-contracts.md +0 -82
  866. package/src/skills/write/references/sentence-level-proofing.md +0 -49
  867. package/src/ui/dist/assets/AiManusChatView-Bv-Z8YpU.js +0 -204
  868. package/src/ui/dist/assets/CliPlugin-BCKcpc35.js +0 -109
  869. package/src/ui/dist/assets/CodeEditorPlugin-DbOfSJ8K.js +0 -2
  870. package/src/ui/dist/assets/GitCommitViewerPlugin-CIUqbUDO.js +0 -1
  871. package/src/ui/dist/assets/LabCopilotPanel-BHxOxF4z.js +0 -14
  872. package/src/ui/dist/assets/LabPlugin-BKoZGs95.js +0 -22
  873. package/src/ui/dist/assets/NotebookEditor-BEQhaQbt.js +0 -81
  874. package/src/ui/dist/assets/PdfViewerPlugin-c-RK9DLM.js +0 -17
  875. package/src/ui/dist/assets/SearchPlugin-CxF9ytAx.js +0 -16
  876. package/src/ui/dist/assets/VNCViewer-BoLGLnHz.js +0 -11
  877. package/src/ui/dist/assets/bot-DREQOxzP.js +0 -6
  878. package/src/ui/dist/assets/chevron-up-C9Qpx4DE.js +0 -6
  879. package/src/ui/dist/assets/file-content-BZMz3RYp.js +0 -1
  880. package/src/ui/dist/assets/file-diff-panel-CQhw0jS2.js +0 -1
  881. package/src/ui/dist/assets/file-jump-queue-DA-SdG__.js +0 -1
  882. package/src/ui/dist/assets/git-commit-horizontal-DxZ8DCZh.js +0 -6
  883. package/src/ui/dist/assets/image-Bgl4VIyx.js +0 -6
  884. package/src/ui/dist/assets/index-BpV6lusQ.css +0 -33
  885. package/src/ui/dist/assets/index-CBNVuWcP.js +0 -2496
  886. package/src/ui/dist/assets/index-DrUnlf6K.js +0 -1
  887. package/src/ui/dist/assets/index-NW-h8VzN.js +0 -1
  888. package/src/ui/dist/assets/pdf-effect-queue-J8OnM0jE.js +0 -6
  889. package/src/ui/dist/assets/popover-CLc0pPP8.js +0 -1
  890. package/src/ui/dist/assets/select-Cs2PmzwL.js +0 -11
  891. package/src/ui/dist/assets/sigma-ClKcHAXm.js +0 -6
  892. package/src/ui/dist/assets/trash-DwpbFr3w.js +0 -11
  893. package/src/ui/dist/assets/useCliAccess-NQ8m0Let.js +0 -1
  894. package/src/ui/dist/assets/useFileDiffOverlay-FuhcnKiw.js +0 -1
@@ -0,0 +1,1716 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import re
6
+ import shutil
7
+ import tarfile
8
+ import time
9
+ from pathlib import Path
10
+ from typing import Any
11
+ from urllib.request import Request
12
+ import zipfile
13
+
14
+ from .prompt_builder import BenchStorePromptBuilder
15
+ from ..config import ConfigManager
16
+ from ..network import urlopen_with_proxy as urlopen
17
+ from ..runners.metadata import get_runner_metadata
18
+ from ..shared import ensure_dir, read_json, read_yaml, resolve_within, slugify, utc_now, write_json
19
+
20
+
21
+ _ENTRY_ID_PATTERN = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
22
+ _RESOURCE_FIELDS: tuple[tuple[str, str], ...] = (
23
+ ("cpu_cores", "CPU"),
24
+ ("ram_gb", "RAM"),
25
+ ("disk_gb", "Disk"),
26
+ ("gpu_count", "GPU count"),
27
+ ("gpu_vram_gb", "GPU VRAM"),
28
+ )
29
+ _COST_BAND_RANK = {
30
+ "very_low": 0,
31
+ "low": 1,
32
+ "medium": 2,
33
+ "high": 3,
34
+ "very_high": 4,
35
+ }
36
+ _DIFFICULTY_RANK = {
37
+ "easy": 0,
38
+ "medium": 1,
39
+ "hard": 2,
40
+ "expert": 3,
41
+ }
42
+
43
+
44
+ def _time_band_upper_hours(value: str | None) -> float | None:
45
+ text = str(value or "").strip().lower()
46
+ if not text:
47
+ return None
48
+ plus_match = re.match(r"^\s*(\d+(?:\.\d+)?)\s*([mhd])\s*\+\s*$", text)
49
+ if plus_match:
50
+ upper = float(plus_match.group(1))
51
+ unit = plus_match.group(2)
52
+ if unit == "m":
53
+ return upper / 60.0
54
+ if unit == "d":
55
+ return upper * 24.0
56
+ return upper
57
+ if text.endswith("m") and text[:-1].strip().isdigit():
58
+ return int(text[:-1].strip()) / 60.0
59
+ if text.endswith("h") and text[:-1].strip().isdigit():
60
+ return float(text[:-1].strip())
61
+ if text.endswith("d") and text[:-1].strip().isdigit():
62
+ return float(text[:-1].strip()) * 24.0
63
+ band_match = re.match(r"^\s*(\d+(?:\.\d+)?)\s*-\s*(\d+(?:\.\d+)?)\s*([mh]|d)\s*$", text)
64
+ if band_match:
65
+ upper = float(band_match.group(2))
66
+ unit = band_match.group(3)
67
+ if unit == "m":
68
+ return upper / 60.0
69
+ if unit == "d":
70
+ return upper * 24.0
71
+ return upper
72
+ return None
73
+
74
+
75
+ def _normalize_catalog_locale(value: str | None) -> str:
76
+ normalized = str(value or "en").strip().lower()
77
+ return "zh" if normalized.startswith("zh") else "en"
78
+
79
+
80
+ def _resource_confidence(resources: dict[str, Any]) -> str:
81
+ minimum = resources.get("minimum") if isinstance(resources.get("minimum"), dict) else {}
82
+ recommended = resources.get("recommended") if isinstance(resources.get("recommended"), dict) else {}
83
+ if minimum and recommended:
84
+ return "full"
85
+ if minimum or recommended:
86
+ return "partial"
87
+ return "none"
88
+
89
+
90
+ def _optional_str(value: Any) -> str | None:
91
+ text = str(value or "").strip()
92
+ return text or None
93
+
94
+
95
+ def _optional_bool(value: Any) -> bool | None:
96
+ if value is None:
97
+ return None
98
+ if isinstance(value, bool):
99
+ return value
100
+ if isinstance(value, str):
101
+ normalized = value.strip().lower()
102
+ if normalized in {"1", "true", "yes", "on"}:
103
+ return True
104
+ if normalized in {"0", "false", "no", "off"}:
105
+ return False
106
+ raise ValueError(f"Expected boolean, got {value!r}.")
107
+
108
+
109
+ def _sanitize_catalog_value(value: Any) -> Any:
110
+ if value is None or isinstance(value, (str, int, float, bool)):
111
+ return value
112
+ if isinstance(value, list):
113
+ return [_sanitize_catalog_value(item) for item in value]
114
+ if isinstance(value, dict):
115
+ normalized: dict[str, Any] = {}
116
+ for key, item in value.items():
117
+ normalized[str(key)] = _sanitize_catalog_value(item)
118
+ return normalized
119
+ return str(value)
120
+
121
+
122
+ def _collect_search_values(value: Any) -> list[str]:
123
+ if value is None:
124
+ return []
125
+ if isinstance(value, bool):
126
+ return ["true" if value else "false"]
127
+ if isinstance(value, (str, int, float)):
128
+ normalized = str(value).strip()
129
+ return [normalized] if normalized else []
130
+ if isinstance(value, list):
131
+ items: list[str] = []
132
+ for item in value:
133
+ items.extend(_collect_search_values(item))
134
+ return items
135
+ if isinstance(value, dict):
136
+ items: list[str] = []
137
+ for key, item in value.items():
138
+ key_text = str(key).strip()
139
+ if key_text:
140
+ items.append(key_text)
141
+ items.extend(_collect_search_values(item))
142
+ return items
143
+ normalized = str(value).strip()
144
+ return [normalized] if normalized else []
145
+
146
+
147
+ def _optional_number(value: Any) -> float | None:
148
+ if value is None or value == "":
149
+ return None
150
+ if isinstance(value, bool):
151
+ raise ValueError(f"Expected number, got {value!r}.")
152
+ if isinstance(value, (int, float)):
153
+ return float(value)
154
+ if isinstance(value, str):
155
+ try:
156
+ return float(value.strip())
157
+ except ValueError as exc: # pragma: no cover - defensive
158
+ raise ValueError(f"Expected number, got {value!r}.") from exc
159
+ raise ValueError(f"Expected number, got {value!r}.")
160
+
161
+
162
+ def _normalize_string_list(value: Any, *, field_name: str) -> list[str]:
163
+ if value is None:
164
+ return []
165
+ if not isinstance(value, list):
166
+ raise ValueError(f"`{field_name}` must be a list of strings.")
167
+ items: list[str] = []
168
+ for raw in value:
169
+ normalized = _optional_str(raw)
170
+ if normalized:
171
+ items.append(normalized)
172
+ return items
173
+
174
+
175
+ def _normalize_resource_spec(value: Any, *, field_name: str) -> dict[str, float]:
176
+ if value is None:
177
+ return {}
178
+ if not isinstance(value, dict):
179
+ raise ValueError(f"`{field_name}` must be an object.")
180
+ normalized: dict[str, float] = {}
181
+ for key, _label in _RESOURCE_FIELDS:
182
+ number = _optional_number(value.get(key))
183
+ if number is not None:
184
+ normalized[key] = number
185
+ return normalized
186
+
187
+
188
+ def _normalize_environment_spec(value: Any) -> dict[str, Any]:
189
+ if value is None:
190
+ return {
191
+ "python": None,
192
+ "cuda": None,
193
+ "pytorch": None,
194
+ "flash_attn": None,
195
+ "key_packages": [],
196
+ "notes": [],
197
+ }
198
+ if not isinstance(value, dict):
199
+ raise ValueError("`environment` must be an object.")
200
+ return {
201
+ "python": _optional_str(value.get("python")),
202
+ "cuda": _optional_str(value.get("cuda")),
203
+ "pytorch": _optional_str(value.get("pytorch")),
204
+ "flash_attn": _optional_str(value.get("flash_attn")),
205
+ "key_packages": _normalize_string_list(value.get("key_packages"), field_name="environment.key_packages"),
206
+ "notes": _normalize_string_list(value.get("notes"), field_name="environment.notes"),
207
+ }
208
+
209
+
210
+ def _normalize_dataset_sources(value: Any) -> list[dict[str, Any]]:
211
+ if value is None:
212
+ return []
213
+ if not isinstance(value, list):
214
+ raise ValueError("`dataset_download.sources` must be a list.")
215
+ sources: list[dict[str, Any]] = []
216
+ for index, raw in enumerate(value):
217
+ if isinstance(raw, dict):
218
+ sources.append(
219
+ {
220
+ "kind": _optional_str(raw.get("kind")),
221
+ "url": _optional_str(raw.get("url")),
222
+ "access": _optional_str(raw.get("access")),
223
+ "note": _optional_str(raw.get("note")),
224
+ }
225
+ )
226
+ continue
227
+ normalized = _optional_str(raw)
228
+ if normalized:
229
+ sources.append(
230
+ {
231
+ "kind": None,
232
+ "url": normalized,
233
+ "access": None,
234
+ "note": None,
235
+ }
236
+ )
237
+ continue
238
+ raise ValueError(f"`dataset_download.sources[{index}]` must be an object or string.")
239
+ return sources
240
+
241
+
242
+ def _normalize_dataset_download_spec(value: Any) -> dict[str, Any]:
243
+ if value is None:
244
+ return {
245
+ "primary_method": None,
246
+ "sources": [],
247
+ "notes": [],
248
+ }
249
+ if not isinstance(value, dict):
250
+ raise ValueError("`dataset_download` must be an object.")
251
+ return {
252
+ "primary_method": _optional_str(value.get("primary_method")),
253
+ "sources": _normalize_dataset_sources(value.get("sources")),
254
+ "notes": _normalize_string_list(value.get("notes"), field_name="dataset_download.notes"),
255
+ }
256
+
257
+
258
+ def _normalize_credential_requirements_spec(value: Any) -> dict[str, Any]:
259
+ if value is None:
260
+ return {
261
+ "mode": None,
262
+ "items": [],
263
+ "notes": [],
264
+ }
265
+ if not isinstance(value, dict):
266
+ raise ValueError("`credential_requirements` must be an object.")
267
+ return {
268
+ "mode": _optional_str(value.get("mode")),
269
+ "items": _normalize_string_list(value.get("items"), field_name="credential_requirements.items"),
270
+ "notes": _normalize_string_list(value.get("notes"), field_name="credential_requirements.notes"),
271
+ }
272
+
273
+
274
+ def _normalize_launch_profiles(value: Any) -> list[dict[str, Any]]:
275
+ if value is None:
276
+ return []
277
+ if not isinstance(value, list):
278
+ raise ValueError("`launch_profiles` must be a list.")
279
+ profiles: list[dict[str, Any]] = []
280
+ for index, raw in enumerate(value):
281
+ if not isinstance(raw, dict):
282
+ raise ValueError(f"`launch_profiles[{index}]` must be an object.")
283
+ profile = {
284
+ "id": _optional_str(raw.get("id")),
285
+ "label": _optional_str(raw.get("label")),
286
+ "description": _optional_str(raw.get("description")),
287
+ }
288
+ if profile["id"] or profile["label"] or profile["description"]:
289
+ profiles.append(profile)
290
+ return profiles
291
+
292
+
293
+ def _device_summary_from_profile(profile: dict[str, Any]) -> str:
294
+ cpu = profile.get("cpu_cores")
295
+ ram = profile.get("ram_gb")
296
+ disk = profile.get("disk_gb")
297
+ gpu_count = profile.get("gpu_count")
298
+ gpu_vram = profile.get("gpu_vram_gb")
299
+ return (
300
+ f"CPU {cpu if cpu is not None else '?'} cores | "
301
+ f"RAM {ram if ram is not None else '?'}GB | "
302
+ f"Disk {disk if disk is not None else '?'}GB free | "
303
+ f"GPU {gpu_count if gpu_count is not None else '?'} | "
304
+ f"VRAM {gpu_vram if gpu_vram is not None else '?'}GB"
305
+ )
306
+
307
+
308
+ class BenchStoreService:
309
+ def _default_runner_label(self) -> str:
310
+ config = ConfigManager(self.home).load_named_normalized("config")
311
+ runner_name = str(config.get("default_runner") or "codex").strip().lower() or "codex"
312
+ try:
313
+ return get_runner_metadata(runner_name).label
314
+ except KeyError:
315
+ return runner_name.capitalize()
316
+
317
+ def __init__(self, home: Path, *, repo_root: Path) -> None:
318
+ self.home = Path(home)
319
+ self.repo_root = Path(repo_root)
320
+ self.workspace_root = self.repo_root.parent
321
+ self.catalog_root = self.repo_root / "AISB" / "catalog"
322
+ self.prompt_builder = BenchStorePromptBuilder(self.repo_root)
323
+
324
+ @property
325
+ def runtime_root(self) -> Path:
326
+ return ensure_dir(self.home / "runtime" / "benchstore")
327
+
328
+ @property
329
+ def downloads_root(self) -> Path:
330
+ return ensure_dir(self.runtime_root / "downloads")
331
+
332
+ @property
333
+ def install_records_root(self) -> Path:
334
+ return ensure_dir(self.runtime_root / "installs")
335
+
336
+ @property
337
+ def install_root(self) -> Path:
338
+ return ensure_dir(self.home / "AISB" / "installs")
339
+
340
+ def list_entries(self, *, hardware_payload: dict[str, Any] | None = None, locale: str = "en") -> dict[str, Any]:
341
+ catalog = self._scan_catalog(hardware_payload=hardware_payload, locale=locale)
342
+ return {
343
+ "ok": True,
344
+ "catalog_root": str(self.catalog_root),
345
+ "device_profile": catalog["device_profile"],
346
+ "device_capacity": catalog["device_capacity"],
347
+ "device_summary": catalog["device_summary"],
348
+ "invalid_entries": catalog["invalid_entries"],
349
+ "filter_options": catalog["filter_options"],
350
+ "shelves": catalog["shelves"],
351
+ "items": catalog["items"],
352
+ "total": len(catalog["items"]),
353
+ }
354
+
355
+ def get_entry(self, entry_id: str, *, hardware_payload: dict[str, Any] | None = None, locale: str = "en") -> dict[str, Any]:
356
+ normalized_id = self._normalize_identifier(entry_id, fallback="")
357
+ if not normalized_id:
358
+ raise FileNotFoundError("Benchmark id is required.")
359
+ catalog = self._scan_catalog(hardware_payload=hardware_payload, locale=locale)
360
+ entry_path = self._find_entry_path(normalized_id, locale=locale)
361
+ raw_entry = self._load_entry_file(entry_path, include_raw_payload=True)
362
+ for item in catalog["items"]:
363
+ if str(item.get("id") or "") == normalized_id:
364
+ detail = dict(item)
365
+ install_state = self.install_state(normalized_id)
366
+ detail["install_state"] = install_state
367
+ detail["raw_payload"] = raw_entry.get("raw_payload")
368
+ detail["setup_prompt_preview"] = self.prompt_builder.build_setup_prompt(
369
+ entry=detail,
370
+ hardware_payload=hardware_payload,
371
+ benchmark_local_path=str(install_state.get("local_path") or ""),
372
+ locale=locale,
373
+ )
374
+ return {
375
+ "ok": True,
376
+ "device_profile": catalog["device_profile"],
377
+ "device_summary": catalog["device_summary"],
378
+ "entry": detail,
379
+ }
380
+ raise FileNotFoundError(f"Unknown BenchStore entry `{normalized_id}`.")
381
+
382
+ def _scan_catalog(self, *, hardware_payload: dict[str, Any] | None = None, locale: str = "en") -> dict[str, Any]:
383
+ items: list[dict[str, Any]] = []
384
+ invalid_entries: list[dict[str, str]] = []
385
+ by_id: dict[str, dict[str, Any]] = {}
386
+ device_profile = self._device_profile(hardware_payload)
387
+ device_capacity = self._device_capacity_profile(device_profile)
388
+ device_summary = (
389
+ str(hardware_payload.get("prompt_hardware_summary") or "").strip()
390
+ if isinstance(hardware_payload, dict)
391
+ else ""
392
+ ) or _device_summary_from_profile(device_profile)
393
+
394
+ if self.catalog_root.exists():
395
+ for path in self._catalog_entry_paths(locale=locale):
396
+ try:
397
+ entry = self._load_entry_file(path)
398
+ except ValueError as exc:
399
+ invalid_entries.append(
400
+ {
401
+ "source_file": str(path.relative_to(self.repo_root)),
402
+ "message": str(exc),
403
+ }
404
+ )
405
+ continue
406
+ if entry["id"] in by_id:
407
+ invalid_entries.append(
408
+ {
409
+ "source_file": str(path.relative_to(self.repo_root)),
410
+ "message": f"Duplicate benchmark id `{entry['id']}`.",
411
+ }
412
+ )
413
+ continue
414
+ if hardware_payload is not None:
415
+ entry["compatibility"] = self._compatibility(entry=entry, device_profile=device_profile, device_summary=device_summary)
416
+ entry["recommendation"] = self._recommendation_profile(
417
+ entry=entry,
418
+ device_profile=device_profile,
419
+ device_capacity=device_capacity,
420
+ compatibility=entry["compatibility"],
421
+ )
422
+ entry["install_state"] = self.install_state(entry["id"])
423
+ by_id[entry["id"]] = entry
424
+ items.append(entry)
425
+
426
+ items.sort(key=self._entry_sort_key)
427
+ filter_options = self._filter_options(items)
428
+ shelves = self._shelves(items)
429
+ return {
430
+ "items": items,
431
+ "invalid_entries": invalid_entries,
432
+ "device_profile": device_profile,
433
+ "device_capacity": device_capacity,
434
+ "device_summary": device_summary,
435
+ "filter_options": filter_options,
436
+ "shelves": shelves,
437
+ }
438
+
439
+ def _catalog_entry_paths(self, *, locale: str = "en") -> list[Path]:
440
+ normalized_locale = _normalize_catalog_locale(locale)
441
+ base_paths = sorted(
442
+ path
443
+ for path in self.catalog_root.rglob("*.yaml")
444
+ if not path.name.endswith(".zh.yaml")
445
+ )
446
+ resolved: list[Path] = []
447
+ for path in base_paths:
448
+ if normalized_locale == "zh":
449
+ zh_path = path.with_name(f"{path.stem}.zh.yaml")
450
+ if zh_path.exists():
451
+ resolved.append(zh_path)
452
+ continue
453
+ resolved.append(path)
454
+ return resolved
455
+
456
+ def _load_entry_file(self, path: Path, *, include_raw_payload: bool = False) -> dict[str, Any]:
457
+ payload = read_yaml(path, {})
458
+ if not isinstance(payload, dict):
459
+ raise ValueError("BenchStore entry must be a YAML object.")
460
+
461
+ name = _optional_str(payload.get("name"))
462
+ if not name:
463
+ raise ValueError("BenchStore entry requires non-empty `name`.")
464
+
465
+ entry_id = self._normalize_identifier(payload.get("id"), fallback=path.stem)
466
+ if not entry_id:
467
+ raise ValueError("BenchStore entry id could not be derived.")
468
+
469
+ paper_raw = payload.get("paper") if isinstance(payload.get("paper"), dict) else {}
470
+ download_raw = payload.get("download") if isinstance(payload.get("download"), dict) else {}
471
+ dataset_download_raw = payload.get("dataset_download") if isinstance(payload.get("dataset_download"), dict) else payload.get("dataset_download")
472
+ credential_requirements_raw = payload.get("credential_requirements") if isinstance(payload.get("credential_requirements"), dict) else payload.get("credential_requirements")
473
+ resources_raw = payload.get("resources") if isinstance(payload.get("resources"), dict) else {}
474
+ environment_raw = payload.get("environment") if isinstance(payload.get("environment"), dict) else payload.get("environment")
475
+ commercial_raw = payload.get("commercial") if isinstance(payload.get("commercial"), dict) else {}
476
+ display_raw = payload.get("display") if isinstance(payload.get("display"), dict) else {}
477
+ official_links_raw = payload.get("official_links") if isinstance(payload.get("official_links"), dict) else {}
478
+ discovery_raw = payload.get("discovery") if isinstance(payload.get("discovery"), dict) else {}
479
+ official_links = {
480
+ "homepage": _optional_str(official_links_raw.get("homepage")),
481
+ "github": _optional_str(official_links_raw.get("github")),
482
+ "docs": _optional_str(official_links_raw.get("docs")),
483
+ }
484
+ if not any(official_links.values()):
485
+ official_links = {}
486
+ collection_priority = _optional_number(discovery_raw.get("collection_priority"))
487
+ recommendation_weight = _optional_number(discovery_raw.get("recommendation_weight"))
488
+ featured = _optional_bool(discovery_raw.get("featured"))
489
+ discovery = {
490
+ "collection": _optional_str(discovery_raw.get("collection")),
491
+ "collection_priority": int(collection_priority) if collection_priority is not None else None,
492
+ "recommendation_weight": int(recommendation_weight) if recommendation_weight is not None else None,
493
+ "featured": featured,
494
+ "featured_reason": _optional_str(discovery_raw.get("featured_reason")),
495
+ }
496
+ if not (
497
+ discovery["collection"]
498
+ or discovery["collection_priority"] is not None
499
+ or discovery["recommendation_weight"] is not None
500
+ or discovery["featured"] is True
501
+ or discovery["featured_reason"]
502
+ ):
503
+ discovery = {}
504
+ image_path = _optional_str(payload.get("image_path"))
505
+
506
+ entry = {
507
+ "schema_version": int(_optional_number(payload.get("schema_version")) or 1),
508
+ "id": entry_id,
509
+ "name": name,
510
+ "version": _optional_str(payload.get("version")),
511
+ "one_line": _optional_str(payload.get("one_line")),
512
+ "task_description": _optional_str(payload.get("task_description")),
513
+ "homepage": _optional_str(payload.get("homepage")),
514
+ "official_links": official_links,
515
+ "capability_tags": _normalize_string_list(payload.get("capability_tags"), field_name="capability_tags"),
516
+ "aisb_direction": _optional_str(payload.get("aisb_direction")),
517
+ "track_fit": _normalize_string_list(payload.get("track_fit"), field_name="track_fit"),
518
+ "task_mode": _optional_str(payload.get("task_mode")),
519
+ "requires_execution": _optional_bool(payload.get("requires_execution")),
520
+ "requires_paper": _optional_bool(payload.get("requires_paper")),
521
+ "integrity_level": _optional_str(payload.get("integrity_level")),
522
+ "snapshot_status": _optional_str(payload.get("snapshot_status")),
523
+ "support_level": _optional_str(payload.get("support_level")),
524
+ "primary_outputs": _normalize_string_list(payload.get("primary_outputs"), field_name="primary_outputs"),
525
+ "discovery": discovery,
526
+ "launch_profiles": _normalize_launch_profiles(payload.get("launch_profiles")),
527
+ "cost_band": _optional_str(payload.get("cost_band")),
528
+ "time_band": _optional_str(payload.get("time_band")),
529
+ "difficulty": _optional_str(payload.get("difficulty")),
530
+ "data_access": _optional_str(payload.get("data_access")),
531
+ "risk_flags": _normalize_string_list(payload.get("risk_flags"), field_name="risk_flags"),
532
+ "risk_notes": _normalize_string_list(payload.get("risk_notes"), field_name="risk_notes"),
533
+ "recommended_when": _optional_str(payload.get("recommended_when")),
534
+ "not_recommended_when": _optional_str(payload.get("not_recommended_when")),
535
+ "paper": {
536
+ "title": _optional_str(paper_raw.get("title")),
537
+ "venue": _optional_str(paper_raw.get("venue")),
538
+ "year": int(_optional_number(paper_raw.get("year"))) if _optional_number(paper_raw.get("year")) is not None else None,
539
+ "url": _optional_str(paper_raw.get("url")),
540
+ },
541
+ "download": {
542
+ "url": _optional_str(download_raw.get("url")),
543
+ "archive_type": _optional_str(download_raw.get("archive_type")),
544
+ "local_dir_name": _optional_str(download_raw.get("local_dir_name")),
545
+ "sha256": _optional_str(download_raw.get("sha256")),
546
+ "size_bytes": int(_optional_number(download_raw.get("size_bytes"))) if _optional_number(download_raw.get("size_bytes")) is not None else None,
547
+ "provider": _optional_str(download_raw.get("provider")),
548
+ "repo": _optional_str(download_raw.get("repo")),
549
+ "tag": _optional_str(download_raw.get("tag")),
550
+ "asset_name": _optional_str(download_raw.get("asset_name")),
551
+ },
552
+ "dataset_download": _normalize_dataset_download_spec(dataset_download_raw),
553
+ "credential_requirements": _normalize_credential_requirements_spec(credential_requirements_raw),
554
+ "resources": {
555
+ "minimum": _normalize_resource_spec(resources_raw.get("minimum"), field_name="resources.minimum"),
556
+ "recommended": _normalize_resource_spec(resources_raw.get("recommended"), field_name="resources.recommended"),
557
+ },
558
+ "environment": _normalize_environment_spec(environment_raw),
559
+ "commercial": {
560
+ "annual_fee": commercial_raw.get("annual_fee"),
561
+ },
562
+ "display": {
563
+ "palette_seed": _optional_str(display_raw.get("palette_seed")),
564
+ "art_style": _optional_str(display_raw.get("art_style")),
565
+ "accent_priority": _optional_str(display_raw.get("accent_priority")),
566
+ "placement": _optional_str(display_raw.get("placement")),
567
+ "card_size": _optional_str(display_raw.get("card_size")),
568
+ "badge": _optional_str(display_raw.get("badge")),
569
+ },
570
+ "image_path": image_path,
571
+ "image_url": f"/api/benchstore/entries/{entry_id}/image" if image_path else None,
572
+ "source_file": str(path.relative_to(self.repo_root)),
573
+ }
574
+ if include_raw_payload:
575
+ entry["raw_payload"] = _sanitize_catalog_value(payload)
576
+ entry["search_text"] = self._search_text(entry, raw_payload=payload)
577
+ return entry
578
+
579
+ def install_state(self, entry_id: str) -> dict[str, Any]:
580
+ normalized_id = self._normalize_identifier(entry_id, fallback="")
581
+ if not normalized_id:
582
+ return {"status": "not_installed"}
583
+ payload = read_json(self.install_record_path(normalized_id), default=None)
584
+ if not isinstance(payload, dict):
585
+ return {"status": "not_installed"}
586
+ local_path = str(payload.get("local_path") or "").strip()
587
+ if local_path and not Path(local_path).exists():
588
+ return {
589
+ **payload,
590
+ "status": "missing",
591
+ }
592
+ return payload
593
+
594
+ def install_record_path(self, entry_id: str) -> Path:
595
+ normalized_id = self._normalize_identifier(entry_id, fallback="")
596
+ return self.install_records_root / f"{normalized_id}.json"
597
+
598
+ def entry_install_dir(self, entry: dict[str, Any]) -> Path:
599
+ download = entry.get("download") if isinstance(entry.get("download"), dict) else {}
600
+ preferred_name = _optional_str(download.get("local_dir_name"))
601
+ base_name = preferred_name or self._normalize_identifier(entry.get("id"), fallback=str(entry.get("name") or "bench"))
602
+ return self.install_root / base_name
603
+
604
+ @staticmethod
605
+ def _local_reference_paths(local_root: Path | None) -> dict[str, Any]:
606
+ if local_root is None or not local_root.exists():
607
+ return {
608
+ "benchmark_root": None,
609
+ "latex_markdown_path": None,
610
+ "dataset_paths": [],
611
+ }
612
+ latex_path: Path | None = None
613
+ direct_latex = local_root / "latex.md"
614
+ if direct_latex.exists() and direct_latex.is_file():
615
+ latex_path = direct_latex
616
+ else:
617
+ for candidate in sorted(local_root.rglob("latex.md")):
618
+ if candidate.is_file():
619
+ latex_path = candidate
620
+ break
621
+ dataset_paths: list[str] = []
622
+ seen_paths: set[str] = set()
623
+ for relative in ("dataset", "datasets", "data", "bench_data", "corpus", "inputs"):
624
+ candidate = local_root / relative
625
+ if not candidate.exists():
626
+ continue
627
+ normalized = str(candidate)
628
+ if normalized in seen_paths:
629
+ continue
630
+ seen_paths.add(normalized)
631
+ dataset_paths.append(normalized)
632
+ return {
633
+ "benchmark_root": str(local_root),
634
+ "latex_markdown_path": str(latex_path) if latex_path is not None else None,
635
+ "dataset_paths": dataset_paths,
636
+ }
637
+
638
+ def infer_archive_type(self, entry: dict[str, Any]) -> str:
639
+ download = entry.get("download") if isinstance(entry.get("download"), dict) else {}
640
+ explicit = str(download.get("archive_type") or "").strip().lower()
641
+ if explicit:
642
+ return explicit
643
+ url = str(download.get("url") or "").strip().lower()
644
+ if url.endswith(".tar.gz") or url.endswith(".tgz"):
645
+ return "tar.gz"
646
+ if url.endswith(".tar"):
647
+ return "tar"
648
+ return "zip"
649
+
650
+ def write_install_record(self, entry_id: str, payload: dict[str, Any]) -> dict[str, Any]:
651
+ write_json(self.install_record_path(entry_id), payload)
652
+ return payload
653
+
654
+ def run_install_task(
655
+ self,
656
+ *,
657
+ entry_id: str,
658
+ reporter: Any,
659
+ task_id: str,
660
+ ) -> dict[str, Any]:
661
+ entry = self._find_entry(entry_id)
662
+ download = entry.get("download") if isinstance(entry.get("download"), dict) else {}
663
+ url = str(download.get("url") or "").strip()
664
+ if not url:
665
+ raise ValueError(f"BenchStore entry `{entry_id}` does not define `download.url`.")
666
+
667
+ archive_type = self.infer_archive_type(entry)
668
+ expected_sha256 = str(download.get("sha256") or "").strip().lower() or None
669
+ expected_size_bytes = int(download.get("size_bytes")) if download.get("size_bytes") is not None else None
670
+ install_dir = self.entry_install_dir(entry)
671
+ temp_extract_root = self.install_root / f".extract-{install_dir.name}-{task_id}"
672
+ archive_suffix = ".zip" if archive_type == "zip" else ".tar.gz" if archive_type == "tar.gz" else ".tar"
673
+ archive_path = self.downloads_root / f"{task_id}{archive_suffix}"
674
+ initial_metadata = {
675
+ "entry_id": entry["id"],
676
+ "entry_name": entry["name"],
677
+ "download_url": url,
678
+ "archive_type": archive_type,
679
+ "install_dir": str(install_dir),
680
+ "bytes_downloaded": 0,
681
+ "bytes_total": None,
682
+ "speed_bytes_per_sec": None,
683
+ "eta_seconds": None,
684
+ "expected_sha256": expected_sha256,
685
+ "expected_size_bytes": expected_size_bytes,
686
+ }
687
+ reporter.start(total=None, current_step="download", message=f"Downloading `{entry['name']}`.")
688
+ reporter.progress(current_step="download", message=f"Downloading `{entry['name']}`.", metadata=initial_metadata)
689
+
690
+ self.write_install_record(
691
+ entry["id"],
692
+ {
693
+ "entry_id": entry["id"],
694
+ "status": "installing",
695
+ "task_id": task_id,
696
+ "local_path": str(install_dir),
697
+ "download_url": url,
698
+ "archive_type": archive_type,
699
+ "updated_at": utc_now(),
700
+ },
701
+ )
702
+
703
+ bytes_downloaded = 0
704
+ bytes_total: int | None = None
705
+ sha256 = hashlib.sha256()
706
+ request = Request(url, headers={"User-Agent": "DeepScientist-BenchStore/1.0"})
707
+ try:
708
+ with urlopen(request, timeout=30.0) as response:
709
+ raw_length = response.headers.get("Content-Length")
710
+ if raw_length and raw_length.isdigit():
711
+ bytes_total = int(raw_length)
712
+ ensure_dir(archive_path.parent)
713
+ with archive_path.open("wb") as handle:
714
+ started_at = time.monotonic()
715
+ while True:
716
+ chunk = response.read(1024 * 256)
717
+ if not chunk:
718
+ break
719
+ handle.write(chunk)
720
+ sha256.update(chunk)
721
+ bytes_downloaded += len(chunk)
722
+ elapsed = max(time.monotonic() - started_at, 0.001)
723
+ speed = bytes_downloaded / elapsed
724
+ eta = ((bytes_total - bytes_downloaded) / speed) if bytes_total and speed > 0 else None
725
+ reporter.progress(
726
+ current=bytes_downloaded,
727
+ total=bytes_total,
728
+ current_step="download",
729
+ message=f"Downloading `{entry['name']}`.",
730
+ metadata={
731
+ **initial_metadata,
732
+ "bytes_downloaded": bytes_downloaded,
733
+ "bytes_total": bytes_total,
734
+ "speed_bytes_per_sec": round(speed, 2),
735
+ "eta_seconds": round(eta, 2) if eta is not None else None,
736
+ },
737
+ )
738
+
739
+ if temp_extract_root.exists():
740
+ shutil.rmtree(temp_extract_root)
741
+ ensure_dir(temp_extract_root)
742
+ actual_sha256 = sha256.hexdigest()
743
+ if expected_size_bytes is not None and bytes_downloaded != expected_size_bytes:
744
+ raise ValueError(f"Downloaded archive size mismatch for `{entry['id']}`: expected {expected_size_bytes} bytes, got {bytes_downloaded} bytes.")
745
+ reporter.progress(
746
+ current=bytes_total or bytes_downloaded,
747
+ total=bytes_total or bytes_downloaded or None,
748
+ current_step="verify",
749
+ message=f"Verifying SHA-256 for `{entry['name']}`.",
750
+ metadata={
751
+ **initial_metadata,
752
+ "bytes_downloaded": bytes_downloaded,
753
+ "bytes_total": bytes_total,
754
+ "archive_path": str(archive_path),
755
+ "archive_sha256": actual_sha256,
756
+ "expected_sha256": expected_sha256,
757
+ },
758
+ )
759
+ if expected_sha256 and actual_sha256.lower() != expected_sha256.lower():
760
+ raise ValueError(f"SHA-256 mismatch for `{entry['id']}`: expected {expected_sha256}, got {actual_sha256}.")
761
+ reporter.progress(
762
+ current=bytes_total or bytes_downloaded,
763
+ total=bytes_total or bytes_downloaded or None,
764
+ current_step="extract",
765
+ message=f"Extracting `{entry['name']}`.",
766
+ metadata={
767
+ **initial_metadata,
768
+ "bytes_downloaded": bytes_downloaded,
769
+ "bytes_total": bytes_total,
770
+ "archive_path": str(archive_path),
771
+ },
772
+ )
773
+ self._extract_archive(archive_path=archive_path, archive_type=archive_type, extract_root=temp_extract_root)
774
+ install_source = self._resolved_install_source(temp_extract_root)
775
+ if install_dir.exists():
776
+ shutil.rmtree(install_dir)
777
+ ensure_dir(install_dir.parent)
778
+ install_source.replace(install_dir)
779
+ if temp_extract_root.exists() and temp_extract_root != install_dir:
780
+ shutil.rmtree(temp_extract_root, ignore_errors=True)
781
+
782
+ record = self.write_install_record(
783
+ entry["id"],
784
+ {
785
+ "entry_id": entry["id"],
786
+ "entry_name": entry["name"],
787
+ "status": "installed",
788
+ "task_id": task_id,
789
+ "local_path": str(install_dir),
790
+ "download_url": url,
791
+ "archive_type": archive_type,
792
+ "archive_path": str(archive_path),
793
+ "archive_sha256": actual_sha256,
794
+ "expected_sha256": expected_sha256,
795
+ "bytes_downloaded": bytes_downloaded,
796
+ "bytes_total": bytes_total,
797
+ "installed_at": utc_now(),
798
+ "updated_at": utc_now(),
799
+ },
800
+ )
801
+ reporter.complete(
802
+ message=f"Installed `{entry['name']}`.",
803
+ result_path=str(self.install_record_path(entry["id"])),
804
+ data={"install_record": record},
805
+ )
806
+ return record
807
+ except Exception:
808
+ self.write_install_record(
809
+ entry["id"],
810
+ {
811
+ "entry_id": entry["id"],
812
+ "entry_name": entry["name"],
813
+ "status": "failed",
814
+ "task_id": task_id,
815
+ "local_path": str(install_dir),
816
+ "download_url": url,
817
+ "archive_type": archive_type,
818
+ "updated_at": utc_now(),
819
+ },
820
+ )
821
+ raise
822
+ finally:
823
+ if temp_extract_root.exists():
824
+ shutil.rmtree(temp_extract_root, ignore_errors=True)
825
+
826
+ def _find_entry(self, entry_id: str, *, locale: str = "en") -> dict[str, Any]:
827
+ normalized_id = self._normalize_identifier(entry_id, fallback="")
828
+ if not normalized_id:
829
+ raise FileNotFoundError("Benchmark id is required.")
830
+ path = self._find_entry_path(normalized_id, locale=locale)
831
+ return self._load_entry_file(path, include_raw_payload=True)
832
+
833
+ def _find_entry_path(self, entry_id: str, *, locale: str = "en") -> Path:
834
+ normalized_id = self._normalize_identifier(entry_id, fallback="")
835
+ if not normalized_id:
836
+ raise FileNotFoundError("Benchmark id is required.")
837
+ for path in self._catalog_entry_paths(locale=locale):
838
+ try:
839
+ entry = self._load_entry_file(path)
840
+ except ValueError:
841
+ continue
842
+ if str(entry.get("id") or "") == normalized_id:
843
+ return path
844
+ raise FileNotFoundError(f"Unknown BenchStore entry `{normalized_id}`.")
845
+
846
+ def entry_image_asset_path(self, entry_id: str, *, locale: str = "en") -> Path:
847
+ path = self._find_entry_path(entry_id, locale=locale)
848
+ entry = self._load_entry_file(path)
849
+ image_path = _optional_str(entry.get("image_path"))
850
+ if not image_path:
851
+ raise FileNotFoundError(f"BenchStore entry `{entry_id}` does not define `image_path`.")
852
+ resolved = self._resolve_entry_asset_path(path, image_path)
853
+ if resolved is None or not resolved.exists() or not resolved.is_file():
854
+ raise FileNotFoundError(f"BenchStore image asset for `{entry_id}` was not found.")
855
+ return resolved
856
+
857
+ def build_setup_packet(
858
+ self,
859
+ *,
860
+ entry_id: str,
861
+ hardware_payload: dict[str, Any] | None = None,
862
+ locale: str = "zh",
863
+ ) -> dict[str, Any]:
864
+ is_zh = str(locale or "").strip().lower().startswith("zh")
865
+ entry = self._find_entry(entry_id, locale=locale)
866
+ install_state = self.install_state(entry["id"])
867
+ device_profile = self._device_profile(hardware_payload)
868
+ device_summary = (
869
+ str(hardware_payload.get("prompt_hardware_summary") or "").strip()
870
+ if isinstance(hardware_payload, dict)
871
+ else ""
872
+ ) or _device_summary_from_profile(device_profile)
873
+ compatibility = self._compatibility(entry=entry, device_profile=device_profile, device_summary=device_summary)
874
+ local_path = str(install_state.get("local_path") or "").strip() or None
875
+ if bool(entry.get("requires_execution")) and install_state.get("status") != "installed":
876
+ raise ValueError("This benchmark must be installed locally before launch.")
877
+ local_refs = self._local_reference_paths(Path(local_path) if local_path else None)
878
+ local_dataset_paths = [str(item).strip() for item in (local_refs.get("dataset_paths") or []) if str(item).strip()]
879
+ latex_markdown_path = str(local_refs.get("latex_markdown_path") or "").strip() or None
880
+
881
+ paper = entry.get("paper") if isinstance(entry.get("paper"), dict) else {}
882
+ dataset_download = entry.get("dataset_download") if isinstance(entry.get("dataset_download"), dict) else {}
883
+ credential_requirements = entry.get("credential_requirements") if isinstance(entry.get("credential_requirements"), dict) else {}
884
+ project_title = f"{entry['name']} 全自动研究" if is_zh else f"{entry['name']} Autonomous Research"
885
+ one_line = str(entry.get("one_line") or "").strip()
886
+ task_description = str(entry.get("task_description") or "").strip()
887
+ venue = str(paper.get("venue") or "").strip()
888
+ year = paper.get("year")
889
+ requires_paper = bool(entry.get("requires_paper")) if entry.get("requires_paper") is not None else True
890
+ recommendation_tier = str(compatibility.get("recommendation_tier") or "").strip() or "unknown"
891
+ unknown_text = "未知" if is_zh else "unknown"
892
+ not_available_text = "不可用" if is_zh else "not available"
893
+ none_text = "无" if is_zh else "none"
894
+ unspecified_text = "未说明" if is_zh else "unspecified"
895
+ localized_device_fit = (
896
+ {
897
+ "recommended": "推荐配置",
898
+ "minimum": "满足最低配置",
899
+ "unsupported": "低于最低配置",
900
+ "unknown": "未知",
901
+ }.get(recommendation_tier, recommendation_tier)
902
+ if is_zh
903
+ else recommendation_tier
904
+ )
905
+
906
+ benchmark_summary = task_description or one_line or (
907
+ f"忠实运行 benchmark `{entry['name']}`。"
908
+ if is_zh
909
+ else f"Run the benchmark `{entry['name']}` faithfully."
910
+ )
911
+ benchmark_goal = (
912
+ (
913
+ f"{benchmark_summary}\n\n"
914
+ "核心研究目标:把 baseline 视为可信起点,而不是终点。先建立 faithful 且可比较的 baseline,"
915
+ "再围绕该任务持续迭代具有新颖性的改进方向,争取稳健超越强基线 / SoTA;"
916
+ "当主结果足够稳定后,继续进入分析实验,并在证据足够时进入文献搜索、图片制作和论文撰写协作。"
917
+ )
918
+ if is_zh
919
+ else (
920
+ f"{benchmark_summary}\n\n"
921
+ "Main research target: treat the baseline as the credible starting point rather than the endpoint. "
922
+ "First establish a faithful, comparable baseline; then iteratively develop and test novel improvement directions "
923
+ "for this task until the result robustly surpasses strong baselines / SoTA; once the main gain is stable, "
924
+ "continue into analysis experiments and, when justified, into literature search, figure making, and paper-writing collaboration."
925
+ )
926
+ )
927
+ fit_lines = compatibility.get("recommended_reasons") or compatibility.get("minimum_reasons") or []
928
+ constraints = [
929
+ (
930
+ f"- 基准本地路径: {local_path or not_available_text}"
931
+ if is_zh
932
+ else f"- benchmark_local_path: {local_path or not_available_text}"
933
+ ),
934
+ (
935
+ f"- 设备摘要: {device_summary}"
936
+ if is_zh
937
+ else f"- device_summary: {device_summary}"
938
+ ),
939
+ (
940
+ f"- 设备适配: {localized_device_fit or unknown_text}"
941
+ if is_zh
942
+ else f"- device_fit: {localized_device_fit or unknown_text}"
943
+ ),
944
+ (
945
+ "- 设备边界规则: 不要围绕当前本机有效设备边界之外的算力做计划。"
946
+ if is_zh
947
+ else "- device_boundary_rule: do not plan around compute outside the current effective local device boundary."
948
+ ),
949
+ ]
950
+ if fit_lines:
951
+ constraints.append("- 设备兼容性说明:" if is_zh else "- compatibility_notes:")
952
+ constraints.extend([f" - {item}" for item in fit_lines[:6]])
953
+ if local_dataset_paths:
954
+ constraints.append("- 本地数据路径:" if is_zh else "- local_dataset_paths:")
955
+ constraints.extend([f" - {item}" for item in local_dataset_paths[:8]])
956
+ if latex_markdown_path:
957
+ constraints.append(
958
+ f"- LaTeX Markdown 路径: {latex_markdown_path}"
959
+ if is_zh
960
+ else f"- latex_markdown_path: {latex_markdown_path}"
961
+ )
962
+ credential_mode = str(credential_requirements.get("mode") or "").strip()
963
+ credential_items = [str(item).strip() for item in (credential_requirements.get("items") or []) if str(item).strip()]
964
+ if credential_mode or credential_items:
965
+ constraints.append(
966
+ (
967
+ f"- 凭证要求: 模式={credential_mode or unspecified_text};项目={', '.join(credential_items) or none_text}"
968
+ if is_zh
969
+ else f"- credential_requirements: mode={credential_mode or unspecified_text} items={', '.join(credential_items) or none_text}"
970
+ )
971
+ )
972
+ resources = entry.get("resources") if isinstance(entry.get("resources"), dict) else {}
973
+ minimum_resources = resources.get("minimum") if isinstance(resources.get("minimum"), dict) else {}
974
+ recommended_resources = resources.get("recommended") if isinstance(resources.get("recommended"), dict) else {}
975
+ if minimum_resources:
976
+ constraints.append(
977
+ f"- 最低资源需求: {json.dumps(minimum_resources, ensure_ascii=False)}"
978
+ if is_zh
979
+ else f"- minimum_resources: {json.dumps(minimum_resources, ensure_ascii=False)}"
980
+ )
981
+ if recommended_resources:
982
+ constraints.append(
983
+ f"- 推荐资源需求: {json.dumps(recommended_resources, ensure_ascii=False)}"
984
+ if is_zh
985
+ else f"- recommended_resources: {json.dumps(recommended_resources, ensure_ascii=False)}"
986
+ )
987
+ if recommendation_tier == "unsupported":
988
+ constraints.append(
989
+ "- 启动提醒: 当前设备低于 benchmark 的最低目标,但仍允许启动。"
990
+ if is_zh
991
+ else "- launch_warning: current device is below the benchmark minimum target, but launch remains allowed."
992
+ )
993
+ constraints.append(
994
+ "- 启动提醒规则: 如果本地算力成为瓶颈,应在 benchmark 边界内保守降级,而不是假设额外硬件。"
995
+ if is_zh
996
+ else "- launch_warning_rule: if local compute becomes the bottleneck, stay within the benchmark scope and degrade gracefully instead of expanding hardware assumptions."
997
+ )
998
+ if venue:
999
+ constraints.append(f"- 论文场地: {venue}" if is_zh else f"- paper_venue: {venue}")
1000
+ if year:
1001
+ constraints.append(f"- 论文年份: {year}" if is_zh else f"- paper_year: {year}")
1002
+ if requires_paper:
1003
+ constraints.append(
1004
+ "- 交付规则: 除非用户后续主动收窄范围,否则论文级交付仍然在 scope 内。"
1005
+ if is_zh
1006
+ else "- delivery_rule: paper-facing output remains in scope unless the user later narrows scope."
1007
+ )
1008
+ else:
1009
+ constraints.append(
1010
+ "- 交付规则: 当前以 benchmark 结果优先,不默认进入论文打包。"
1011
+ if is_zh
1012
+ else "- delivery_rule: optimize for the strongest justified benchmark result rather than paper-first packaging."
1013
+ )
1014
+
1015
+ objectives = (
1016
+ [
1017
+ "1. 先建立一个与 benchmark 保持一致、可比较、可复用的 baseline。",
1018
+ "2. baseline 完成后,自主进入多轮优化与性能增强,而不是停留在复现本身。",
1019
+ "3. 以具备新颖性的方向稳健超越强基线 / SoTA,而不是接受一次性的小幅提升。",
1020
+ "4. 主结果稳定后,继续做分析实验,补充 ablation、robustness、failure analysis 等证据。",
1021
+ "5. 如果论文级交付仍在范围内,再进入文献搜索、图片制作、论文撰写等协作阶段。",
1022
+ "6. 整个过程都要尽量保持 benchmark faithful,并且受当前设备边界约束。",
1023
+ ]
1024
+ if is_zh
1025
+ else [
1026
+ "1. First establish a faithful, comparable, reusable baseline for the benchmark.",
1027
+ "2. After the baseline is credible, move into repeated optimization and performance improvement rather than stopping at reproduction.",
1028
+ "3. Robustly surpass strong baselines / SoTA through a method direction with clear novelty, rather than accepting a one-off minor gain.",
1029
+ "4. Once the main result is stable, continue into analysis experiments such as ablations, robustness checks, and failure analysis.",
1030
+ "5. If paper-facing delivery remains in scope, continue into literature search, figure making, and paper-writing collaboration.",
1031
+ "6. Keep the whole plan benchmark-faithful and inside the current device boundary.",
1032
+ ]
1033
+ )
1034
+ if requires_paper:
1035
+ objectives.append("7. 保持论文级交付仍然在范围内。" if is_zh else "7. Keep paper-facing delivery in scope.")
1036
+ else:
1037
+ objectives.append(
1038
+ "7. 当前以结果优先,不默认进入论文写作。"
1039
+ if is_zh
1040
+ else "7. Optimize for benchmark results first without defaulting into paper writing."
1041
+ )
1042
+ if recommendation_tier == "unsupported":
1043
+ objectives.append(
1044
+ "8. 允许直接启动,但要先识别当前设备不足会影响哪些环节,并优先选择在本机可落地的 faithful 路径。"
1045
+ if is_zh
1046
+ else "8. Launch is still allowed, but first identify which steps are limited by the current device and prefer a faithful path that can actually run locally."
1047
+ )
1048
+ if credential_items:
1049
+ objectives.append(
1050
+ "9. 启动前确认可用的 API Key / 资源凭证,并根据可用资源收窄执行路线。"
1051
+ if is_zh
1052
+ else "9. Confirm available API keys / resource credentials before launch and narrow the execution path accordingly."
1053
+ )
1054
+
1055
+ baseline_url_lines: list[str] = []
1056
+ download_url = str(entry.get("download", {}).get("url") or "").strip() if isinstance(entry.get("download"), dict) else ""
1057
+ if download_url:
1058
+ baseline_url_lines.append(download_url)
1059
+ if local_path:
1060
+ baseline_url_lines.append(local_path)
1061
+ baseline_url_lines.extend(local_dataset_paths)
1062
+
1063
+ paper_url_lines: list[str] = []
1064
+ paper_url = str(paper.get("url") or "").strip()
1065
+ if paper_url:
1066
+ paper_url_lines.append(paper_url)
1067
+ if latex_markdown_path:
1068
+ paper_url_lines.append(latex_markdown_path)
1069
+
1070
+ suggested_form = {
1071
+ "title": project_title,
1072
+ "goal": benchmark_goal,
1073
+ "baseline_id": "",
1074
+ "baseline_variant_id": "",
1075
+ "baseline_source_mode": "auto",
1076
+ "execution_start_mode": "execute_immediately",
1077
+ "baseline_acceptance_target": "comparison_ready",
1078
+ "baseline_urls": "\n".join(baseline_url_lines),
1079
+ "paper_urls": "\n".join(paper_url_lines),
1080
+ "runtime_constraints": "\n".join(constraints),
1081
+ "objectives": "\n".join(objectives),
1082
+ "need_research_paper": requires_paper,
1083
+ "research_intensity": "balanced",
1084
+ "decision_policy": "autonomous",
1085
+ "launch_mode": "standard",
1086
+ "standard_profile": "canonical_research_graph",
1087
+ "custom_profile": "freeform",
1088
+ "review_followup_policy": "audit_only",
1089
+ "baseline_execution_policy": "auto",
1090
+ "manuscript_edit_mode": "none",
1091
+ "entry_state_summary": "\n".join(
1092
+ [
1093
+ item
1094
+ for item in [
1095
+ (
1096
+ f"基准本地路径: {local_path}"
1097
+ if is_zh and local_path
1098
+ else f"benchmark_local_path: {local_path}"
1099
+ if local_path
1100
+ else ""
1101
+ ),
1102
+ (
1103
+ f"本地数据路径: {', '.join(local_dataset_paths)}"
1104
+ if is_zh and local_dataset_paths
1105
+ else f"local_dataset_paths: {', '.join(local_dataset_paths)}"
1106
+ if local_dataset_paths
1107
+ else ""
1108
+ ),
1109
+ (
1110
+ f"LaTeX Markdown 路径: {latex_markdown_path}"
1111
+ if is_zh and latex_markdown_path
1112
+ else f"latex_markdown_path: {latex_markdown_path}"
1113
+ if latex_markdown_path
1114
+ else ""
1115
+ ),
1116
+ ]
1117
+ if item
1118
+ ]
1119
+ ),
1120
+ "review_summary": "",
1121
+ "review_materials": "",
1122
+ "custom_brief": (
1123
+ (
1124
+ f"基准来源: {entry['name']} ({entry['id']})。"
1125
+ f"本地路径: {local_path or unknown_text}。"
1126
+ f"设备适配: {localized_device_fit or unknown_text}。"
1127
+ f"LaTeX 路径: {latex_markdown_path or none_text}。"
1128
+ "请把这次任务当作完整研究,而不是 baseline-only 复现。"
1129
+ "baseline 只是可信起点;完成后应继续自主优化,并围绕具有新颖性的方向稳健超越强基线 / SoTA;"
1130
+ "主结果稳定后继续进入分析实验,再在需要时进入文献、图片和论文协作。"
1131
+ "需要和用户确认:这次任务是否真的是 baseline-only,是否要求新颖性,以及超越后是否继续做分析实验和论文协作。"
1132
+ f"是否需要用户确认凭证/资源: {', '.join(credential_items) if credential_items else '可能仅剩少量运行时 / API 细节'}。"
1133
+ )
1134
+ if is_zh
1135
+ else (
1136
+ f"Benchmark source: {entry['name']} ({entry['id']}). "
1137
+ f"Local path: {local_path or unknown_text}. "
1138
+ f"Device fit: {localized_device_fit or unknown_text}. "
1139
+ f"Latex path: {latex_markdown_path or none_text}. "
1140
+ "Treat this as a full research task rather than a baseline-only reproduction task. "
1141
+ "The baseline is only the credible starting point; after that the system should continue autonomous optimization, "
1142
+ "push toward robust gains beyond strong baselines / SoTA through a novel method direction, "
1143
+ "then continue into analysis experiments and, when needed, into literature, figures, and paper-writing collaboration. "
1144
+ "Confirm with the user whether this is really baseline-only or full research, whether novelty is required, and whether post-win analysis and paper-facing collaboration should remain in scope. "
1145
+ f"Need user confirmation for credentials/resources: {', '.join(credential_items) if credential_items else 'maybe runtime/API specifics only'}."
1146
+ )
1147
+ ),
1148
+ "user_language": "zh" if is_zh else "en",
1149
+ }
1150
+
1151
+ startup_instruction = "\n".join(
1152
+ [
1153
+ "BenchStore 全自动启动" if is_zh else "BenchStore Autonomous Launch",
1154
+ f"- 基准 ID: {entry['id']}" if is_zh else f"- benchmark_id: {entry['id']}",
1155
+ f"- 基准名称: {entry['name']}" if is_zh else f"- benchmark_name: {entry['name']}",
1156
+ (
1157
+ f"- 基准本地路径: {local_path or unknown_text}"
1158
+ if is_zh
1159
+ else f"- benchmark_local_path: {local_path or unknown_text}"
1160
+ ),
1161
+ (
1162
+ f"- 设备适配: {localized_device_fit or unknown_text}"
1163
+ if is_zh
1164
+ else f"- device_fit: {localized_device_fit or unknown_text}"
1165
+ ),
1166
+ "",
1167
+ "核心 benchmark 目标" if is_zh else "Primary Benchmark Goal",
1168
+ benchmark_goal,
1169
+ "",
1170
+ "运行约束" if is_zh else "Operational Constraints",
1171
+ "\n".join(constraints),
1172
+ "",
1173
+ "Setup Agent 指引" if is_zh else "Setup Agent Guidance",
1174
+ self.prompt_builder.build_setup_prompt(
1175
+ entry=entry,
1176
+ hardware_payload=hardware_payload,
1177
+ benchmark_local_path=local_path,
1178
+ locale=locale,
1179
+ ),
1180
+ ]
1181
+ ).strip()
1182
+
1183
+ accent_color = "clay" if compatibility.get("recommended_ok") else "mist"
1184
+ launch_payload = {
1185
+ "title": project_title,
1186
+ "goal": startup_instruction,
1187
+ "initial_message": startup_instruction,
1188
+ "startup_contract": {
1189
+ "schema_version": 1,
1190
+ "workspace_mode": "autonomous",
1191
+ "launch_mode": "custom",
1192
+ "custom_profile": "freeform",
1193
+ "decision_policy": "autonomous",
1194
+ "need_research_paper": requires_paper,
1195
+ "project_display": {
1196
+ "template": "experiment",
1197
+ "accent_color": accent_color,
1198
+ "background_style": "cloud",
1199
+ },
1200
+ "benchstore_context": {
1201
+ "schema_version": 1,
1202
+ "entry_id": entry.get("id"),
1203
+ "entry_name": entry.get("name"),
1204
+ "one_line": entry.get("one_line"),
1205
+ "task_description": entry.get("task_description"),
1206
+ "homepage": entry.get("homepage"),
1207
+ "official_links": entry.get("official_links") or {},
1208
+ "discovery": entry.get("discovery") or {},
1209
+ "paper": paper,
1210
+ "capability_tags": entry.get("capability_tags") or [],
1211
+ "track_fit": entry.get("track_fit") or [],
1212
+ "task_mode": entry.get("task_mode"),
1213
+ "requires_execution": entry.get("requires_execution"),
1214
+ "requires_paper": entry.get("requires_paper"),
1215
+ "snapshot_status": entry.get("snapshot_status"),
1216
+ "support_level": entry.get("support_level"),
1217
+ "primary_outputs": entry.get("primary_outputs") or [],
1218
+ "launch_profiles": entry.get("launch_profiles") or [],
1219
+ "resources": entry.get("resources") or {},
1220
+ "environment": entry.get("environment") or {},
1221
+ "image_path": entry.get("image_path"),
1222
+ "image_url": entry.get("image_url"),
1223
+ "recommended_when": entry.get("recommended_when"),
1224
+ "not_recommended_when": entry.get("not_recommended_when"),
1225
+ "download": entry.get("download") or {},
1226
+ "dataset_download": dataset_download,
1227
+ "credential_requirements": credential_requirements,
1228
+ "risk_flags": entry.get("risk_flags") or [],
1229
+ "risk_notes": entry.get("risk_notes") or [],
1230
+ "integrity_level": entry.get("integrity_level"),
1231
+ "version": entry.get("version"),
1232
+ "commercial": entry.get("commercial") or {},
1233
+ "display": entry.get("display") or {},
1234
+ "compatibility": compatibility,
1235
+ "benchmark_local_path": local_path,
1236
+ "local_dataset_paths": local_dataset_paths,
1237
+ "latex_markdown_path": latex_markdown_path,
1238
+ "setup_agent_label": f"BenchStore Setup Agent · {self._default_runner_label()}",
1239
+ "catalog_source_file": entry.get("source_file"),
1240
+ "raw_payload": entry.get("raw_payload") or {},
1241
+ },
1242
+ },
1243
+ }
1244
+ return {
1245
+ "entry_id": entry["id"],
1246
+ "assistant_label": f"BenchStore Setup Agent · {self._default_runner_label()}",
1247
+ "project_title": project_title,
1248
+ "benchmark_local_path": local_path,
1249
+ "local_dataset_paths": local_dataset_paths,
1250
+ "latex_markdown_path": latex_markdown_path,
1251
+ "device_summary": device_summary,
1252
+ "device_fit": compatibility.get("recommendation_tier"),
1253
+ "requires_paper": requires_paper,
1254
+ "benchmark_goal": benchmark_goal,
1255
+ "constraints": constraints,
1256
+ "suggested_form": suggested_form,
1257
+ "startup_instruction": startup_instruction,
1258
+ "launch_payload": launch_payload,
1259
+ }
1260
+
1261
+ def _extract_archive(self, *, archive_path: Path, archive_type: str, extract_root: Path) -> None:
1262
+ if archive_type == "zip":
1263
+ with zipfile.ZipFile(archive_path) as archive:
1264
+ for member in archive.infolist():
1265
+ member_name = str(member.filename or "").replace("\\", "/").lstrip("/")
1266
+ if not member_name:
1267
+ continue
1268
+ resolve_within(extract_root, member_name)
1269
+ archive.extract(member, path=extract_root)
1270
+ return
1271
+ if archive_type in {"tar.gz", "tar"}:
1272
+ mode = "r:gz" if archive_type == "tar.gz" else "r:"
1273
+ with tarfile.open(archive_path, mode) as archive:
1274
+ for member in archive.getmembers():
1275
+ member_name = str(member.name or "").replace("\\", "/").lstrip("/")
1276
+ if not member_name:
1277
+ continue
1278
+ resolve_within(extract_root, member_name)
1279
+ archive.extractall(path=extract_root)
1280
+ return
1281
+ raise ValueError(f"Unsupported archive type `{archive_type}`.")
1282
+
1283
+ @staticmethod
1284
+ def _resolved_install_source(extract_root: Path) -> Path:
1285
+ children = [
1286
+ item
1287
+ for item in extract_root.iterdir()
1288
+ if item.name not in {"__MACOSX"} and not item.name.startswith(".")
1289
+ ]
1290
+ directories = [item for item in children if item.is_dir()]
1291
+ files = [item for item in children if item.is_file()]
1292
+ if len(directories) == 1 and not files:
1293
+ return directories[0]
1294
+ return extract_root
1295
+
1296
+ def _normalize_identifier(self, value: Any, *, fallback: str) -> str:
1297
+ candidate = _optional_str(value) or _optional_str(fallback) or slugify(fallback or "bench")
1298
+ normalized = str(candidate).strip()
1299
+ if not normalized:
1300
+ return ""
1301
+ if _ENTRY_ID_PATTERN.match(normalized):
1302
+ return normalized
1303
+ fallback_id = slugify(normalized, default="bench").replace("-", "_")
1304
+ return fallback_id
1305
+
1306
+ @staticmethod
1307
+ def _search_text(entry: dict[str, Any], *, raw_payload: dict[str, Any] | None = None) -> str:
1308
+ paper = entry.get("paper") if isinstance(entry.get("paper"), dict) else {}
1309
+ environment = entry.get("environment") if isinstance(entry.get("environment"), dict) else {}
1310
+ dataset_download = entry.get("dataset_download") if isinstance(entry.get("dataset_download"), dict) else {}
1311
+ credential_requirements = entry.get("credential_requirements") if isinstance(entry.get("credential_requirements"), dict) else {}
1312
+ parts = [
1313
+ entry.get("id"),
1314
+ entry.get("name"),
1315
+ entry.get("one_line"),
1316
+ entry.get("task_description"),
1317
+ entry.get("aisb_direction"),
1318
+ entry.get("task_mode"),
1319
+ entry.get("difficulty"),
1320
+ entry.get("time_band"),
1321
+ entry.get("cost_band"),
1322
+ entry.get("data_access"),
1323
+ entry.get("integrity_level"),
1324
+ entry.get("snapshot_status"),
1325
+ entry.get("support_level"),
1326
+ entry.get("recommended_when"),
1327
+ entry.get("not_recommended_when"),
1328
+ paper.get("title"),
1329
+ paper.get("venue"),
1330
+ paper.get("url"),
1331
+ environment.get("python"),
1332
+ environment.get("cuda"),
1333
+ environment.get("pytorch"),
1334
+ environment.get("flash_attn"),
1335
+ dataset_download.get("primary_method"),
1336
+ credential_requirements.get("mode"),
1337
+ ]
1338
+ parts.extend(entry.get("capability_tags") or [])
1339
+ parts.extend(entry.get("track_fit") or [])
1340
+ parts.extend(entry.get("primary_outputs") or [])
1341
+ for profile in entry.get("launch_profiles") or []:
1342
+ if isinstance(profile, dict):
1343
+ parts.extend([profile.get("id"), profile.get("label"), profile.get("description")])
1344
+ parts.extend(entry.get("risk_flags") or [])
1345
+ parts.extend(entry.get("risk_notes") or [])
1346
+ parts.extend(environment.get("key_packages") or [])
1347
+ parts.extend(environment.get("notes") or [])
1348
+ parts.extend(dataset_download.get("notes") or [])
1349
+ parts.extend(credential_requirements.get("items") or [])
1350
+ parts.extend(credential_requirements.get("notes") or [])
1351
+ for source in dataset_download.get("sources") or []:
1352
+ if not isinstance(source, dict):
1353
+ continue
1354
+ parts.extend([source.get("kind"), source.get("url"), source.get("access"), source.get("note")])
1355
+ if isinstance(raw_payload, dict):
1356
+ parts.extend(_collect_search_values(raw_payload))
1357
+ return " ".join(str(item).strip().lower() for item in parts if str(item or "").strip())
1358
+
1359
+ @staticmethod
1360
+ def _has_risk_markers(entry: dict[str, Any]) -> bool:
1361
+ risk_flags = entry.get("risk_flags") if isinstance(entry.get("risk_flags"), list) else []
1362
+ risk_notes = entry.get("risk_notes") if isinstance(entry.get("risk_notes"), list) else []
1363
+ return bool(risk_flags or risk_notes)
1364
+
1365
+ def _resolve_entry_asset_path(self, catalog_path: Path, asset_path: str) -> Path | None:
1366
+ candidate = Path(asset_path)
1367
+ resolved = candidate.resolve() if candidate.is_absolute() else (catalog_path.parent / candidate).resolve()
1368
+ try:
1369
+ resolved.relative_to(self.workspace_root.resolve())
1370
+ except ValueError:
1371
+ return None
1372
+ return resolved
1373
+
1374
+ @staticmethod
1375
+ def _entry_sort_key(entry: dict[str, Any]) -> tuple[int, int, float, float, str]:
1376
+ compatibility = entry.get("compatibility") if isinstance(entry.get("compatibility"), dict) else {}
1377
+ recommendation = entry.get("recommendation") if isinstance(entry.get("recommendation"), dict) else {}
1378
+ recommended_ok = 1 if compatibility.get("recommended_ok") else 0
1379
+ minimum_ok = 1 if compatibility.get("minimum_ok") else 0
1380
+ has_risk_markers = 1 if (entry.get("risk_flags") or entry.get("risk_notes")) else 0
1381
+ score = float(recommendation.get("score") or compatibility.get("score") or 0.0)
1382
+ affinity = float(recommendation.get("affinity_score") or 0.0)
1383
+ name = str(entry.get("name") or "").lower()
1384
+ return (has_risk_markers, -recommended_ok, -minimum_ok, -score, -affinity, name)
1385
+
1386
+ @staticmethod
1387
+ def _device_profile(hardware_payload: dict[str, Any] | None) -> dict[str, Any]:
1388
+ system = hardware_payload.get("system") if isinstance(hardware_payload, dict) and isinstance(hardware_payload.get("system"), dict) else {}
1389
+ preferences = hardware_payload.get("preferences") if isinstance(hardware_payload, dict) and isinstance(hardware_payload.get("preferences"), dict) else {}
1390
+ cpu = system.get("cpu") if isinstance(system.get("cpu"), dict) else {}
1391
+ memory = system.get("memory") if isinstance(system.get("memory"), dict) else {}
1392
+ disks = system.get("disks") if isinstance(system.get("disks"), list) else []
1393
+ gpus = [item for item in (system.get("gpus") or []) if isinstance(item, dict)] if isinstance(system, dict) else []
1394
+ effective_gpu_ids = [str(item).strip() for item in (preferences.get("effective_gpu_ids") or []) if str(item).strip()]
1395
+ selection_mode = str(preferences.get("gpu_selection_mode") or "all").strip().lower() if isinstance(preferences, dict) else "all"
1396
+ if selection_mode == "selected":
1397
+ effective_gpus = [gpu for gpu in gpus if str(gpu.get("gpu_id") or "").strip() in set(effective_gpu_ids)]
1398
+ else:
1399
+ effective_gpus = gpus
1400
+ disk_free_gb = None
1401
+ if disks:
1402
+ first_disk = disks[0] if isinstance(disks[0], dict) else {}
1403
+ raw_disk = _optional_number(first_disk.get("free_gb"))
1404
+ disk_free_gb = raw_disk
1405
+ gpu_vram_gb = None
1406
+ if effective_gpus:
1407
+ gpu_vram_values = [_optional_number(item.get("memory_total_gb")) for item in effective_gpus]
1408
+ gpu_vram_values = [item for item in gpu_vram_values if item is not None]
1409
+ gpu_vram_gb = max(gpu_vram_values) if gpu_vram_values else None
1410
+ return {
1411
+ "cpu_cores": _optional_number(cpu.get("logical_cores")),
1412
+ "ram_gb": _optional_number(memory.get("total_gb")),
1413
+ "disk_gb": disk_free_gb,
1414
+ "gpu_count": float(len(effective_gpus)),
1415
+ "gpu_vram_gb": gpu_vram_gb,
1416
+ }
1417
+
1418
+ @staticmethod
1419
+ def _device_capacity_profile(device_profile: dict[str, Any]) -> dict[str, Any]:
1420
+ cpu = float(device_profile.get("cpu_cores") or 0.0)
1421
+ ram = float(device_profile.get("ram_gb") or 0.0)
1422
+ disk = float(device_profile.get("disk_gb") or 0.0)
1423
+ gpu_count = float(device_profile.get("gpu_count") or 0.0)
1424
+ gpu_vram = float(device_profile.get("gpu_vram_gb") or 0.0)
1425
+ score = (
1426
+ min(cpu / 16.0, 1.6) * 0.20
1427
+ + min(ram / 32.0, 1.8) * 0.24
1428
+ + min(disk / 120.0, 1.5) * 0.10
1429
+ + min(gpu_count / 1.0, 2.0) * 0.18
1430
+ + min(gpu_vram / 16.0, 2.0) * 0.28
1431
+ )
1432
+ if gpu_count <= 0 and gpu_vram <= 0:
1433
+ capacity_class = "low"
1434
+ elif score < 0.90:
1435
+ capacity_class = "low"
1436
+ elif score < 1.45:
1437
+ capacity_class = "medium"
1438
+ else:
1439
+ capacity_class = "high"
1440
+ return {
1441
+ "score": round(score * 100.0, 2),
1442
+ "capacity_class": capacity_class,
1443
+ }
1444
+
1445
+ def _compatibility(self, *, entry: dict[str, Any], device_profile: dict[str, Any], device_summary: str) -> dict[str, Any]:
1446
+ resources = entry.get("resources") if isinstance(entry.get("resources"), dict) else {}
1447
+ minimum = resources.get("minimum") if isinstance(resources.get("minimum"), dict) else {}
1448
+ recommended = resources.get("recommended") if isinstance(resources.get("recommended"), dict) else {}
1449
+ minimum_eval = self._evaluate_requirement(minimum, device_profile)
1450
+ recommended_eval = self._evaluate_requirement(recommended, device_profile)
1451
+ score = round((recommended_eval["coverage"] * 70.0) + (minimum_eval["coverage"] * 30.0), 2)
1452
+ if recommended_eval["ok"]:
1453
+ recommendation_tier = "recommended"
1454
+ elif minimum_eval["ok"]:
1455
+ recommendation_tier = "minimum"
1456
+ else:
1457
+ recommendation_tier = "unsupported"
1458
+ return {
1459
+ "minimum_ok": minimum_eval["ok"],
1460
+ "recommended_ok": recommended_eval["ok"],
1461
+ "minimum_reasons": minimum_eval["reasons"],
1462
+ "recommended_reasons": recommended_eval["reasons"],
1463
+ "score": score,
1464
+ "recommendation_tier": recommendation_tier,
1465
+ "device_summary": device_summary,
1466
+ "resource_confidence": _resource_confidence(resources),
1467
+ }
1468
+
1469
+ def _recommendation_profile(
1470
+ self,
1471
+ *,
1472
+ entry: dict[str, Any],
1473
+ device_profile: dict[str, Any],
1474
+ device_capacity: dict[str, Any],
1475
+ compatibility: dict[str, Any],
1476
+ ) -> dict[str, Any]:
1477
+ capacity_class = str(device_capacity.get("capacity_class") or "medium")
1478
+ score = float(compatibility.get("score") or 0.0)
1479
+ reasons: list[str] = []
1480
+ affinity = 0.0
1481
+
1482
+ if compatibility.get("recommended_ok"):
1483
+ score += 32.0
1484
+ reasons.append("Meets the recommended hardware target.")
1485
+ elif compatibility.get("minimum_ok"):
1486
+ score += 14.0
1487
+ reasons.append("Meets the minimum hardware target.")
1488
+ else:
1489
+ score -= 35.0
1490
+ reasons.append("Current device is below the benchmark minimum target.")
1491
+
1492
+ install_status = str((entry.get("install_state") or {}).get("status") or "").strip().lower()
1493
+ has_risk_markers = self._has_risk_markers(entry)
1494
+ if install_status == "installed":
1495
+ score += 12.0
1496
+ affinity += 12.0
1497
+ reasons.append("Already installed locally.")
1498
+
1499
+ cost_rank = _COST_BAND_RANK.get(str(entry.get("cost_band") or "").strip().lower())
1500
+ difficulty_rank = _DIFFICULTY_RANK.get(str(entry.get("difficulty") or "").strip().lower())
1501
+ time_upper_hours = _time_band_upper_hours(str(entry.get("time_band") or "").strip())
1502
+ requires_execution = bool(entry.get("requires_execution")) if entry.get("requires_execution") is not None else False
1503
+ requires_paper = bool(entry.get("requires_paper")) if entry.get("requires_paper") is not None else False
1504
+
1505
+ if capacity_class == "low":
1506
+ if cost_rank is not None:
1507
+ delta = {0: 10.0, 1: 8.0, 2: 2.0, 3: -6.0, 4: -10.0}.get(cost_rank, 0.0)
1508
+ score += delta
1509
+ affinity += delta
1510
+ if difficulty_rank is not None:
1511
+ delta = {0: 10.0, 1: 4.0, 2: -5.0, 3: -9.0}.get(difficulty_rank, 0.0)
1512
+ score += delta
1513
+ affinity += delta
1514
+ if time_upper_hours is not None:
1515
+ delta = 8.0 if time_upper_hours <= 2.0 else 3.0 if time_upper_hours <= 6.0 else -5.0
1516
+ score += delta
1517
+ affinity += delta
1518
+ if not requires_execution:
1519
+ score += 4.0
1520
+ affinity += 4.0
1521
+ if requires_paper:
1522
+ score -= 2.0
1523
+ affinity -= 2.0
1524
+ elif capacity_class == "medium":
1525
+ if cost_rank is not None:
1526
+ delta = {0: 4.0, 1: 5.0, 2: 3.0, 3: -2.0, 4: -6.0}.get(cost_rank, 0.0)
1527
+ score += delta
1528
+ affinity += delta
1529
+ if difficulty_rank is not None:
1530
+ delta = {0: 3.0, 1: 5.0, 2: 1.0, 3: -4.0}.get(difficulty_rank, 0.0)
1531
+ score += delta
1532
+ affinity += delta
1533
+ if time_upper_hours is not None:
1534
+ delta = 5.0 if time_upper_hours <= 4.0 else 2.0 if time_upper_hours <= 12.0 else -3.0
1535
+ score += delta
1536
+ affinity += delta
1537
+ else:
1538
+ if cost_rank is not None:
1539
+ delta = {0: 1.0, 1: 2.0, 2: 4.0, 3: 5.0, 4: 2.0}.get(cost_rank, 0.0)
1540
+ score += delta
1541
+ affinity += delta
1542
+ if difficulty_rank is not None:
1543
+ delta = {0: 1.0, 1: 3.0, 2: 5.0, 3: 5.0}.get(difficulty_rank, 0.0)
1544
+ score += delta
1545
+ affinity += delta
1546
+ if time_upper_hours is not None:
1547
+ delta = 1.0 if time_upper_hours <= 2.0 else 3.0 if time_upper_hours <= 12.0 else 5.0
1548
+ score += delta
1549
+ affinity += delta
1550
+ if requires_execution:
1551
+ score += 2.0
1552
+ affinity += 2.0
1553
+
1554
+ snapshot_status = str(entry.get("snapshot_status") or "").strip().lower()
1555
+ support_level = str(entry.get("support_level") or "").strip().lower()
1556
+ snapshot_delta = {
1557
+ "runnable": 10.0,
1558
+ "runnable_not_verified": 5.0,
1559
+ "partial": -6.0,
1560
+ "restore_needed": -18.0,
1561
+ "external_eval_required": -12.0,
1562
+ "data_only": -20.0,
1563
+ }.get(snapshot_status, 0.0)
1564
+ support_delta = {
1565
+ "turnkey": 8.0,
1566
+ "advanced": 3.0,
1567
+ "recovery": -12.0,
1568
+ }.get(support_level, 0.0)
1569
+ score += snapshot_delta + support_delta
1570
+ affinity += max(snapshot_delta, 0.0) + max(support_delta, 0.0)
1571
+ if snapshot_status == "runnable":
1572
+ reasons.append("Current snapshot is marked runnable.")
1573
+ elif snapshot_status == "partial":
1574
+ reasons.append("Current snapshot is only partially runnable.")
1575
+ elif snapshot_status == "restore_needed":
1576
+ reasons.append("Current snapshot still needs restoration before dependable execution.")
1577
+ elif snapshot_status == "external_eval_required":
1578
+ reasons.append("Current snapshot still depends on an external evaluation route.")
1579
+ elif snapshot_status == "data_only":
1580
+ reasons.append("Current snapshot is data-only and not directly executable yet.")
1581
+ if support_level == "turnkey":
1582
+ reasons.append("This benchmark is packaged as a turnkey route.")
1583
+ elif support_level == "recovery":
1584
+ reasons.append("This benchmark is still in recovery mode rather than ready-to-run mode.")
1585
+
1586
+ confidence = str(compatibility.get("resource_confidence") or "none").strip().lower() or "none"
1587
+ if confidence == "full":
1588
+ score += 6.0
1589
+ elif confidence == "partial":
1590
+ score += 3.0
1591
+ else:
1592
+ score -= 2.0
1593
+ reasons.append("Structured hardware requirements are incomplete, so recommendation confidence is lower.")
1594
+
1595
+ if capacity_class == "low" and compatibility.get("recommended_ok"):
1596
+ reasons.append("This benchmark is one of the stronger fits for a modest local machine.")
1597
+ elif compatibility.get("recommended_ok"):
1598
+ reasons.append("This benchmark is a strong match for the current machine.")
1599
+ elif compatibility.get("minimum_ok"):
1600
+ reasons.append("This benchmark should run, but the safer choice may be a lighter option.")
1601
+
1602
+ shelf_bucket = "needs_stronger_device"
1603
+ if has_risk_markers:
1604
+ score -= 40.0
1605
+ affinity -= 20.0
1606
+ reasons.append("Risk-marked benchmarks are excluded from BenchStore recommendations.")
1607
+ shelf_bucket = "risk_flagged"
1608
+ elif install_status == "installed" and compatibility.get("minimum_ok"):
1609
+ shelf_bucket = "installed"
1610
+ elif compatibility.get("recommended_ok"):
1611
+ shelf_bucket = "best_match"
1612
+ elif compatibility.get("minimum_ok"):
1613
+ shelf_bucket = "runnable"
1614
+
1615
+ return {
1616
+ "score": round(max(0.0, min(140.0, score)), 2),
1617
+ "affinity_score": round(affinity, 2),
1618
+ "capacity_class": capacity_class,
1619
+ "shelf_bucket": shelf_bucket,
1620
+ "reasons": reasons[:5],
1621
+ "cost_rank": cost_rank,
1622
+ "difficulty_rank": difficulty_rank,
1623
+ "time_upper_hours": time_upper_hours,
1624
+ }
1625
+
1626
+ @staticmethod
1627
+ def _filter_options(items: list[dict[str, Any]]) -> dict[str, list[str]]:
1628
+ def collect(key: str) -> list[str]:
1629
+ values = sorted(
1630
+ {
1631
+ str(item.get(key) or "").strip()
1632
+ for item in items
1633
+ if str(item.get(key) or "").strip()
1634
+ }
1635
+ )
1636
+ return values
1637
+
1638
+ track_fit_values = sorted(
1639
+ {
1640
+ str(value).strip()
1641
+ for item in items
1642
+ for value in (item.get("track_fit") or [])
1643
+ if str(value).strip()
1644
+ }
1645
+ )
1646
+ return {
1647
+ "aisb_direction": collect("aisb_direction"),
1648
+ "task_mode": collect("task_mode"),
1649
+ "cost_band": collect("cost_band"),
1650
+ "difficulty": collect("difficulty"),
1651
+ "data_access": collect("data_access"),
1652
+ "track_fit": track_fit_values,
1653
+ "requires_execution": sorted(
1654
+ {
1655
+ "true" if bool(item.get("requires_execution")) else "false"
1656
+ for item in items
1657
+ if item.get("requires_execution") is not None
1658
+ }
1659
+ ),
1660
+ "requires_paper": sorted(
1661
+ {
1662
+ "true" if bool(item.get("requires_paper")) else "false"
1663
+ for item in items
1664
+ if item.get("requires_paper") is not None
1665
+ }
1666
+ ),
1667
+ }
1668
+
1669
+ @staticmethod
1670
+ def _shelves(items: list[dict[str, Any]]) -> dict[str, list[str]]:
1671
+ def ids_for(bucket: str, *, limit: int = 8) -> list[str]:
1672
+ return [str(item.get("id") or "") for item in items if str(((item.get("recommendation") or {}).get("shelf_bucket") or "")).strip() == bucket][:limit]
1673
+
1674
+ return {
1675
+ "best_match_ids": ids_for("best_match"),
1676
+ "runnable_ids": ids_for("runnable"),
1677
+ "installed_ids": ids_for("installed"),
1678
+ "needs_stronger_device_ids": ids_for("needs_stronger_device"),
1679
+ }
1680
+
1681
+ @staticmethod
1682
+ def _evaluate_requirement(requirement: dict[str, Any], device_profile: dict[str, Any]) -> dict[str, Any]:
1683
+ if not requirement:
1684
+ return {
1685
+ "ok": True,
1686
+ "coverage": 1.0,
1687
+ "reasons": ["No structured requirement was provided."],
1688
+ }
1689
+ reasons: list[str] = []
1690
+ considered = 0
1691
+ passed = 0
1692
+ for field_name, label in _RESOURCE_FIELDS:
1693
+ required = _optional_number(requirement.get(field_name))
1694
+ if required is None:
1695
+ continue
1696
+ considered += 1
1697
+ available = _optional_number(device_profile.get(field_name))
1698
+ if available is None:
1699
+ reasons.append(f"{label}: unavailable on this machine summary, need {required:g}.")
1700
+ continue
1701
+ if available >= required:
1702
+ passed += 1
1703
+ reasons.append(f"{label}: {available:g} available, need {required:g}.")
1704
+ continue
1705
+ reasons.append(f"{label}: {available:g} available, need {required:g}.")
1706
+ if considered == 0:
1707
+ return {
1708
+ "ok": True,
1709
+ "coverage": 1.0,
1710
+ "reasons": ["No structured requirement was provided."],
1711
+ }
1712
+ return {
1713
+ "ok": passed == considered,
1714
+ "coverage": passed / considered,
1715
+ "reasons": reasons,
1716
+ }