wormclaude 1.0.119 → 1.0.121

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/dist/theme.js +1 -1
  2. package/dist/tui.js +6 -1
  3. package/package.json +1 -1
  4. package/skills/build-mcp-app/SKILL.md +0 -393
  5. package/skills/build-mcp-app/references/abuse-protection.md +0 -60
  6. package/skills/build-mcp-app/references/apps-sdk-messages.md +0 -227
  7. package/skills/build-mcp-app/references/directory-checklist.md +0 -18
  8. package/skills/build-mcp-app/references/iframe-sandbox.md +0 -164
  9. package/skills/build-mcp-app/references/payload-budgeting.md +0 -54
  10. package/skills/build-mcp-app/references/widget-templates.md +0 -249
  11. package/skills/build-mcp-server/SKILL.md +0 -222
  12. package/skills/build-mcp-server/references/auth.md +0 -108
  13. package/skills/build-mcp-server/references/deploy-cloudflare-workers.md +0 -106
  14. package/skills/build-mcp-server/references/elicitation.md +0 -129
  15. package/skills/build-mcp-server/references/remote-http-scaffold.md +0 -211
  16. package/skills/build-mcp-server/references/resources-and-prompts.md +0 -122
  17. package/skills/build-mcp-server/references/server-capabilities.md +0 -164
  18. package/skills/build-mcp-server/references/tool-design.md +0 -189
  19. package/skills/build-mcp-server/references/versions.md +0 -25
  20. package/skills/build-mcpb/SKILL.md +0 -200
  21. package/skills/build-mcpb/references/local-security.md +0 -149
  22. package/skills/build-mcpb/references/manifest-schema.md +0 -156
  23. package/skills/docx/script/__init__.py +0 -1
  24. package/skills/docx/script/accept_chages.py +0 -135
  25. package/skills/docx/script/comment.py +0 -318
  26. package/skills/docx/script/office/helpers/__init__.py +0 -0
  27. package/skills/docx/script/office/helpers/merge_runs.py +0 -199
  28. package/skills/docx/script/office/helpers/simplify_redlines.py +0 -197
  29. package/skills/docx/script/office/pack.py +0 -159
  30. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  31. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  32. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  33. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  34. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  35. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  36. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  37. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  38. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  39. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  40. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  41. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  42. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  43. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  44. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  45. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  46. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  47. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  48. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  49. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  50. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  51. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  52. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  53. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  54. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  55. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  56. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  57. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  58. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  59. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  60. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  61. package/skills/docx/script/office/schemas/mce/mc.xsd +0 -75
  62. package/skills/docx/script/office/schemas/microsoft/wml-2010.xsd +0 -560
  63. package/skills/docx/script/office/schemas/microsoft/wml-2012.xsd +0 -67
  64. package/skills/docx/script/office/schemas/microsoft/wml-2018.xsd +0 -14
  65. package/skills/docx/script/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  66. package/skills/docx/script/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  67. package/skills/docx/script/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  68. package/skills/docx/script/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  69. package/skills/docx/script/office/soffice.py +0 -183
  70. package/skills/docx/script/office/unpack.py +0 -132
  71. package/skills/docx/script/office/validate.py +0 -117
  72. package/skills/docx/script/office/validators/__init__.py +0 -15
  73. package/skills/docx/script/office/validators/base.py +0 -851
  74. package/skills/docx/script/office/validators/docx.py +0 -446
  75. package/skills/docx/script/office/validators/pptx.py +0 -275
  76. package/skills/docx/script/office/validators/redlining.py +0 -247
  77. package/skills/docx/script/templates/comments.xml +0 -3
  78. package/skills/docx/script/templates/commentsExtended.xml +0 -3
  79. package/skills/docx/script/templates/commentsExtensible.xml +0 -3
  80. package/skills/docx/script/templates/commentsIds.xml +0 -3
  81. package/skills/docx/script/templates/people.xml +0 -3
  82. package/skills/docx/skill.md +0 -593
  83. package/skills/explain.md +0 -14
  84. package/skills/frontend-design/SKILL.md +0 -42
  85. package/skills/pdf/FORMS.md +0 -294
  86. package/skills/pdf/REFERENCE.md +0 -612
  87. package/skills/pdf/SKILL.md +0 -314
  88. package/skills/pdf/scripts/check_bounding_boxes.py +0 -65
  89. package/skills/pdf/scripts/check_fillable_fields.py +0 -11
  90. package/skills/pdf/scripts/convert_pdf_to_images.py +0 -33
  91. package/skills/pdf/scripts/create_validation_image.py +0 -37
  92. package/skills/pdf/scripts/extract_form_field_info.py +0 -122
  93. package/skills/pdf/scripts/extract_form_structure.py +0 -115
  94. package/skills/pdf/scripts/fill_fillable_fields.py +0 -98
  95. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +0 -107
  96. package/skills/playground/SKILL.md +0 -77
  97. package/skills/playground/templates/code-map.md +0 -158
  98. package/skills/playground/templates/concept-map.md +0 -73
  99. package/skills/playground/templates/data-explorer.md +0 -67
  100. package/skills/playground/templates/design-playground.md +0 -67
  101. package/skills/playground/templates/diff-review.md +0 -179
  102. package/skills/playground/templates/document-critique.md +0 -171
  103. package/skills/pptx/SKILL.md +0 -230
  104. package/skills/pptx/editing.md +0 -205
  105. package/skills/pptx/pptxgenjs.md +0 -437
  106. package/skills/pptx/scripts/__init__.py +0 -0
  107. package/skills/pptx/scripts/add_slide.py +0 -195
  108. package/skills/pptx/scripts/clean.py +0 -286
  109. package/skills/pptx/scripts/office/helpers/__init__.py +0 -0
  110. package/skills/pptx/scripts/office/helpers/merge_runs.py +0 -199
  111. package/skills/pptx/scripts/office/helpers/simplify_redlines.py +0 -197
  112. package/skills/pptx/scripts/office/pack.py +0 -159
  113. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  114. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  115. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  116. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  117. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  118. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  119. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  120. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  121. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  122. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  123. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  124. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  125. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  126. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  127. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  128. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  129. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  130. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  131. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  132. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  133. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  134. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  135. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  136. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  137. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  138. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  139. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  140. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  141. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  142. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  143. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  144. package/skills/pptx/scripts/office/schemas/mce/mc.xsd +0 -75
  145. package/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd +0 -560
  146. package/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd +0 -67
  147. package/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd +0 -14
  148. package/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  149. package/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  150. package/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  151. package/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  152. package/skills/pptx/scripts/office/soffice.py +0 -183
  153. package/skills/pptx/scripts/office/unpack.py +0 -132
  154. package/skills/pptx/scripts/office/validate.py +0 -117
  155. package/skills/pptx/scripts/office/validators/__init__.py +0 -15
  156. package/skills/pptx/scripts/office/validators/base.py +0 -851
  157. package/skills/pptx/scripts/office/validators/docx.py +0 -446
  158. package/skills/pptx/scripts/office/validators/pptx.py +0 -275
  159. package/skills/pptx/scripts/office/validators/redlining.py +0 -247
  160. package/skills/pptx/scripts/thumbnail.py +0 -289
  161. package/skills/recon.md +0 -16
  162. package/skills/security-audit/SKILL.md +0 -26
  163. package/skills/talent-creator/SKILL.md +0 -486
  164. package/skills/talent-creator/agents/analyzer.md +0 -274
  165. package/skills/talent-creator/agents/comparator.md +0 -202
  166. package/skills/talent-creator/agents/grader.md +0 -223
  167. package/skills/talent-creator/assets/eval_review.html +0 -146
  168. package/skills/talent-creator/eval-viewer/generate_review.py +0 -471
  169. package/skills/talent-creator/eval-viewer/viewer.html +0 -1325
  170. package/skills/talent-creator/references/schemas.md +0 -430
  171. package/skills/talent-creator/scripts/__init__.py +0 -0
  172. package/skills/talent-creator/scripts/aggregate_benchmark.py +0 -401
  173. package/skills/talent-creator/scripts/generate_report.py +0 -326
  174. package/skills/talent-creator/scripts/improve_description.py +0 -247
  175. package/skills/talent-creator/scripts/package_skill.py +0 -136
  176. package/skills/talent-creator/scripts/quick_validate.py +0 -146
  177. package/skills/talent-creator/scripts/run_eval.py +0 -310
  178. package/skills/talent-creator/scripts/run_loop.py +0 -328
  179. package/skills/talent-creator/scripts/utils.py +0 -47
  180. package/skills/xlsx/SKILL.md +0 -300
  181. package/skills/xlsx/scripts/office/helpers/__init__.py +0 -0
  182. package/skills/xlsx/scripts/office/helpers/merge_runs.py +0 -199
  183. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +0 -197
  184. package/skills/xlsx/scripts/office/pack.py +0 -159
  185. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  186. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  187. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  188. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  189. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  190. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  191. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  192. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  193. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  194. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  195. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  196. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  197. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  198. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  199. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  200. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  201. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  202. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  203. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  204. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  205. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  206. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  207. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  208. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  209. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  210. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  211. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  212. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  213. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  214. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  215. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  216. package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +0 -75
  217. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +0 -560
  218. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +0 -67
  219. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +0 -14
  220. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  221. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  222. package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  223. package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  224. package/skills/xlsx/scripts/office/soffice.py +0 -183
  225. package/skills/xlsx/scripts/office/unpack.py +0 -132
  226. package/skills/xlsx/scripts/office/validate.py +0 -117
  227. package/skills/xlsx/scripts/office/validators/__init__.py +0 -15
  228. package/skills/xlsx/scripts/office/validators/base.py +0 -851
  229. package/skills/xlsx/scripts/office/validators/docx.py +0 -446
  230. package/skills/xlsx/scripts/office/validators/pptx.py +0 -275
  231. package/skills/xlsx/scripts/office/validators/redlining.py +0 -247
  232. package/skills/xlsx/scripts/recalc.py +0 -184
@@ -1,1325 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Eval Review</title>
7
- <link rel="preconnect" href="https://fonts.googleapis.com">
8
- <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
9
- <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@500;600&family=Lora:wght@400;500&display=swap" rel="stylesheet">
10
- <script src="https://cdn.sheetjs.com/xlsx-0.20.3/package/dist/xlsx.full.min.js" integrity="sha384-EnyY0/GSHQGSxSgMwaIPzSESbqoOLSexfnSMN2AP+39Ckmn92stwABZynq1JyzdT" crossorigin="anonymous"></script>
11
- <style>
12
- :root {
13
- --bg: #faf9f5;
14
- --surface: #ffffff;
15
- --border: #e8e6dc;
16
- --text: #141413;
17
- --text-muted: #b0aea5;
18
- --accent: #d97757;
19
- --accent-hover: #c4613f;
20
- --green: #788c5d;
21
- --green-bg: #eef2e8;
22
- --red: #c44;
23
- --red-bg: #fceaea;
24
- --header-bg: #141413;
25
- --header-text: #faf9f5;
26
- --radius: 6px;
27
- }
28
-
29
- * { box-sizing: border-box; margin: 0; padding: 0; }
30
-
31
- body {
32
- font-family: 'Lora', Georgia, serif;
33
- background: var(--bg);
34
- color: var(--text);
35
- height: 100vh;
36
- display: flex;
37
- flex-direction: column;
38
- }
39
-
40
- /* ---- Header ---- */
41
- .header {
42
- background: var(--header-bg);
43
- color: var(--header-text);
44
- padding: 1rem 2rem;
45
- display: flex;
46
- justify-content: space-between;
47
- align-items: center;
48
- flex-shrink: 0;
49
- }
50
- .header h1 {
51
- font-family: 'Poppins', sans-serif;
52
- font-size: 1.25rem;
53
- font-weight: 600;
54
- }
55
- .header .instructions {
56
- font-size: 0.8rem;
57
- opacity: 0.7;
58
- margin-top: 0.25rem;
59
- }
60
- .header .progress {
61
- font-size: 0.875rem;
62
- opacity: 0.8;
63
- text-align: right;
64
- }
65
-
66
- /* ---- Main content ---- */
67
- .main {
68
- flex: 1;
69
- overflow-y: auto;
70
- padding: 1.5rem 2rem;
71
- display: flex;
72
- flex-direction: column;
73
- gap: 1.25rem;
74
- }
75
-
76
- /* ---- Sections ---- */
77
- .section {
78
- background: var(--surface);
79
- border: 1px solid var(--border);
80
- border-radius: var(--radius);
81
- flex-shrink: 0;
82
- }
83
- .section-header {
84
- font-family: 'Poppins', sans-serif;
85
- padding: 0.75rem 1rem;
86
- font-size: 0.75rem;
87
- font-weight: 500;
88
- text-transform: uppercase;
89
- letter-spacing: 0.05em;
90
- color: var(--text-muted);
91
- border-bottom: 1px solid var(--border);
92
- background: var(--bg);
93
- }
94
- .section-body {
95
- padding: 1rem;
96
- }
97
-
98
- /* ---- Config badge ---- */
99
- .config-badge {
100
- display: inline-block;
101
- padding: 0.2rem 0.625rem;
102
- border-radius: 9999px;
103
- font-family: 'Poppins', sans-serif;
104
- font-size: 0.6875rem;
105
- font-weight: 600;
106
- text-transform: uppercase;
107
- letter-spacing: 0.03em;
108
- margin-left: 0.75rem;
109
- vertical-align: middle;
110
- }
111
- .config-badge.config-primary {
112
- background: rgba(33, 150, 243, 0.12);
113
- color: #1976d2;
114
- }
115
- .config-badge.config-baseline {
116
- background: rgba(255, 193, 7, 0.15);
117
- color: #f57f17;
118
- }
119
-
120
- /* ---- Prompt ---- */
121
- .prompt-text {
122
- white-space: pre-wrap;
123
- font-size: 0.9375rem;
124
- line-height: 1.6;
125
- }
126
-
127
- /* ---- Outputs ---- */
128
- .output-file {
129
- border: 1px solid var(--border);
130
- border-radius: var(--radius);
131
- overflow: hidden;
132
- }
133
- .output-file + .output-file {
134
- margin-top: 1rem;
135
- }
136
- .output-file-header {
137
- padding: 0.5rem 0.75rem;
138
- font-size: 0.8rem;
139
- font-weight: 600;
140
- color: var(--text-muted);
141
- background: var(--bg);
142
- border-bottom: 1px solid var(--border);
143
- font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
144
- display: flex;
145
- justify-content: space-between;
146
- align-items: center;
147
- }
148
- .output-file-header .dl-btn {
149
- font-size: 0.7rem;
150
- color: var(--accent);
151
- text-decoration: none;
152
- cursor: pointer;
153
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
154
- font-weight: 500;
155
- opacity: 0.8;
156
- }
157
- .output-file-header .dl-btn:hover {
158
- opacity: 1;
159
- text-decoration: underline;
160
- }
161
- .output-file-content {
162
- padding: 0.75rem;
163
- overflow-x: auto;
164
- }
165
- .output-file-content pre {
166
- font-size: 0.8125rem;
167
- line-height: 1.5;
168
- white-space: pre-wrap;
169
- word-break: break-word;
170
- font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
171
- }
172
- .output-file-content img {
173
- max-width: 100%;
174
- height: auto;
175
- border-radius: 4px;
176
- }
177
- .output-file-content iframe {
178
- width: 100%;
179
- height: 600px;
180
- border: none;
181
- }
182
- .output-file-content table {
183
- border-collapse: collapse;
184
- font-size: 0.8125rem;
185
- width: 100%;
186
- }
187
- .output-file-content table td,
188
- .output-file-content table th {
189
- border: 1px solid var(--border);
190
- padding: 0.375rem 0.5rem;
191
- text-align: left;
192
- }
193
- .output-file-content table th {
194
- background: var(--bg);
195
- font-weight: 600;
196
- }
197
- .output-file-content .download-link {
198
- display: inline-flex;
199
- align-items: center;
200
- gap: 0.5rem;
201
- padding: 0.5rem 1rem;
202
- background: var(--bg);
203
- border: 1px solid var(--border);
204
- border-radius: 4px;
205
- color: var(--accent);
206
- text-decoration: none;
207
- font-size: 0.875rem;
208
- cursor: pointer;
209
- }
210
- .output-file-content .download-link:hover {
211
- background: var(--border);
212
- }
213
- .empty-state {
214
- color: var(--text-muted);
215
- font-style: italic;
216
- padding: 2rem;
217
- text-align: center;
218
- }
219
-
220
- /* ---- Feedback ---- */
221
- .prev-feedback {
222
- background: var(--bg);
223
- border: 1px solid var(--border);
224
- border-radius: 4px;
225
- padding: 0.625rem 0.75rem;
226
- margin-top: 0.75rem;
227
- font-size: 0.8125rem;
228
- color: var(--text-muted);
229
- line-height: 1.5;
230
- }
231
- .prev-feedback-label {
232
- font-size: 0.7rem;
233
- font-weight: 600;
234
- text-transform: uppercase;
235
- letter-spacing: 0.04em;
236
- margin-bottom: 0.25rem;
237
- color: var(--text-muted);
238
- }
239
- .feedback-textarea {
240
- width: 100%;
241
- min-height: 100px;
242
- padding: 0.75rem;
243
- border: 1px solid var(--border);
244
- border-radius: 4px;
245
- font-family: inherit;
246
- font-size: 0.9375rem;
247
- line-height: 1.5;
248
- resize: vertical;
249
- color: var(--text);
250
- }
251
- .feedback-textarea:focus {
252
- outline: none;
253
- border-color: var(--accent);
254
- box-shadow: 0 0 0 3px rgba(37, 99, 235, 0.1);
255
- }
256
- .feedback-status {
257
- font-size: 0.75rem;
258
- color: var(--text-muted);
259
- margin-top: 0.5rem;
260
- min-height: 1.1em;
261
- }
262
-
263
- /* ---- Grades (collapsible) ---- */
264
- .grades-toggle {
265
- display: flex;
266
- align-items: center;
267
- cursor: pointer;
268
- user-select: none;
269
- }
270
- .grades-toggle:hover {
271
- color: var(--accent);
272
- }
273
- .grades-toggle .arrow {
274
- margin-right: 0.5rem;
275
- transition: transform 0.15s;
276
- font-size: 0.75rem;
277
- }
278
- .grades-toggle .arrow.open {
279
- transform: rotate(90deg);
280
- }
281
- .grades-content {
282
- display: none;
283
- margin-top: 0.75rem;
284
- }
285
- .grades-content.open {
286
- display: block;
287
- }
288
- .grades-summary {
289
- font-size: 0.875rem;
290
- margin-bottom: 0.75rem;
291
- display: flex;
292
- align-items: center;
293
- gap: 0.5rem;
294
- }
295
- .grade-badge {
296
- display: inline-block;
297
- padding: 0.125rem 0.5rem;
298
- border-radius: 9999px;
299
- font-size: 0.75rem;
300
- font-weight: 600;
301
- }
302
- .grade-pass { background: var(--green-bg); color: var(--green); }
303
- .grade-fail { background: var(--red-bg); color: var(--red); }
304
- .assertion-list {
305
- list-style: none;
306
- }
307
- .assertion-item {
308
- padding: 0.625rem 0;
309
- border-bottom: 1px solid var(--border);
310
- font-size: 0.8125rem;
311
- }
312
- .assertion-item:last-child { border-bottom: none; }
313
- .assertion-status {
314
- font-weight: 600;
315
- margin-right: 0.5rem;
316
- }
317
- .assertion-status.pass { color: var(--green); }
318
- .assertion-status.fail { color: var(--red); }
319
- .assertion-evidence {
320
- color: var(--text-muted);
321
- font-size: 0.75rem;
322
- margin-top: 0.25rem;
323
- padding-left: 1.5rem;
324
- }
325
-
326
- /* ---- View tabs ---- */
327
- .view-tabs {
328
- display: flex;
329
- gap: 0;
330
- padding: 0 2rem;
331
- background: var(--bg);
332
- border-bottom: 1px solid var(--border);
333
- flex-shrink: 0;
334
- }
335
- .view-tab {
336
- font-family: 'Poppins', sans-serif;
337
- padding: 0.625rem 1.25rem;
338
- font-size: 0.8125rem;
339
- font-weight: 500;
340
- cursor: pointer;
341
- border: none;
342
- background: none;
343
- color: var(--text-muted);
344
- border-bottom: 2px solid transparent;
345
- transition: all 0.15s;
346
- }
347
- .view-tab:hover { color: var(--text); }
348
- .view-tab.active {
349
- color: var(--accent);
350
- border-bottom-color: var(--accent);
351
- }
352
- .view-panel { display: none; }
353
- .view-panel.active { display: flex; flex-direction: column; flex: 1; overflow: hidden; }
354
-
355
- /* ---- Benchmark view ---- */
356
- .benchmark-view {
357
- padding: 1.5rem 2rem;
358
- overflow-y: auto;
359
- flex: 1;
360
- }
361
- .benchmark-table {
362
- border-collapse: collapse;
363
- background: var(--surface);
364
- border: 1px solid var(--border);
365
- border-radius: var(--radius);
366
- font-size: 0.8125rem;
367
- width: 100%;
368
- margin-bottom: 1.5rem;
369
- }
370
- .benchmark-table th, .benchmark-table td {
371
- padding: 0.625rem 0.75rem;
372
- text-align: left;
373
- border: 1px solid var(--border);
374
- }
375
- .benchmark-table th {
376
- font-family: 'Poppins', sans-serif;
377
- background: var(--header-bg);
378
- color: var(--header-text);
379
- font-weight: 500;
380
- font-size: 0.75rem;
381
- text-transform: uppercase;
382
- letter-spacing: 0.04em;
383
- }
384
- .benchmark-table tr:hover { background: var(--bg); }
385
- .benchmark-table tr.benchmark-row-with { background: rgba(33, 150, 243, 0.06); }
386
- .benchmark-table tr.benchmark-row-without { background: rgba(255, 193, 7, 0.06); }
387
- .benchmark-table tr.benchmark-row-with:hover { background: rgba(33, 150, 243, 0.12); }
388
- .benchmark-table tr.benchmark-row-without:hover { background: rgba(255, 193, 7, 0.12); }
389
- .benchmark-table tr.benchmark-row-avg { font-weight: 600; border-top: 2px solid var(--border); }
390
- .benchmark-table tr.benchmark-row-avg.benchmark-row-with { background: rgba(33, 150, 243, 0.12); }
391
- .benchmark-table tr.benchmark-row-avg.benchmark-row-without { background: rgba(255, 193, 7, 0.12); }
392
- .benchmark-delta-positive { color: var(--green); font-weight: 600; }
393
- .benchmark-delta-negative { color: var(--red); font-weight: 600; }
394
- .benchmark-notes {
395
- background: var(--surface);
396
- border: 1px solid var(--border);
397
- border-radius: var(--radius);
398
- padding: 1rem;
399
- }
400
- .benchmark-notes h3 {
401
- font-family: 'Poppins', sans-serif;
402
- font-size: 0.875rem;
403
- margin-bottom: 0.75rem;
404
- }
405
- .benchmark-notes ul {
406
- list-style: disc;
407
- padding-left: 1.25rem;
408
- }
409
- .benchmark-notes li {
410
- font-size: 0.8125rem;
411
- line-height: 1.6;
412
- margin-bottom: 0.375rem;
413
- }
414
- .benchmark-empty {
415
- color: var(--text-muted);
416
- font-style: italic;
417
- text-align: center;
418
- padding: 3rem;
419
- }
420
-
421
- /* ---- Navigation ---- */
422
- .nav {
423
- display: flex;
424
- justify-content: space-between;
425
- align-items: center;
426
- padding: 1rem 2rem;
427
- border-top: 1px solid var(--border);
428
- background: var(--surface);
429
- flex-shrink: 0;
430
- }
431
- .nav-btn {
432
- font-family: 'Poppins', sans-serif;
433
- padding: 0.5rem 1.25rem;
434
- border: 1px solid var(--border);
435
- border-radius: var(--radius);
436
- background: var(--surface);
437
- cursor: pointer;
438
- font-size: 0.875rem;
439
- font-weight: 500;
440
- color: var(--text);
441
- transition: all 0.15s;
442
- }
443
- .nav-btn:hover:not(:disabled) {
444
- background: var(--bg);
445
- border-color: var(--text-muted);
446
- }
447
- .nav-btn:disabled {
448
- opacity: 0.4;
449
- cursor: not-allowed;
450
- }
451
- .done-btn {
452
- font-family: 'Poppins', sans-serif;
453
- padding: 0.5rem 1.5rem;
454
- border: 1px solid var(--border);
455
- border-radius: var(--radius);
456
- background: var(--surface);
457
- color: var(--text);
458
- cursor: pointer;
459
- font-size: 0.875rem;
460
- font-weight: 500;
461
- transition: all 0.15s;
462
- }
463
- .done-btn:hover {
464
- background: var(--bg);
465
- border-color: var(--text-muted);
466
- }
467
- .done-btn.ready {
468
- border: none;
469
- background: var(--accent);
470
- color: white;
471
- font-weight: 600;
472
- }
473
- .done-btn.ready:hover {
474
- background: var(--accent-hover);
475
- }
476
- /* ---- Done overlay ---- */
477
- .done-overlay {
478
- display: none;
479
- position: fixed;
480
- inset: 0;
481
- background: rgba(0, 0, 0, 0.5);
482
- z-index: 100;
483
- justify-content: center;
484
- align-items: center;
485
- }
486
- .done-overlay.visible {
487
- display: flex;
488
- }
489
- .done-card {
490
- background: var(--surface);
491
- border-radius: 12px;
492
- padding: 2rem 3rem;
493
- text-align: center;
494
- box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
495
- max-width: 500px;
496
- }
497
- .done-card h2 {
498
- font-size: 1.5rem;
499
- margin-bottom: 0.5rem;
500
- }
501
- .done-card p {
502
- color: var(--text-muted);
503
- margin-bottom: 1.5rem;
504
- line-height: 1.5;
505
- }
506
- .done-card .btn-row {
507
- display: flex;
508
- gap: 0.5rem;
509
- justify-content: center;
510
- }
511
- .done-card button {
512
- padding: 0.5rem 1.25rem;
513
- border: 1px solid var(--border);
514
- border-radius: var(--radius);
515
- background: var(--surface);
516
- cursor: pointer;
517
- font-size: 0.875rem;
518
- }
519
- .done-card button:hover {
520
- background: var(--bg);
521
- }
522
- /* ---- Toast ---- */
523
- .toast {
524
- position: fixed;
525
- bottom: 5rem;
526
- left: 50%;
527
- transform: translateX(-50%);
528
- background: var(--header-bg);
529
- color: var(--header-text);
530
- padding: 0.625rem 1.25rem;
531
- border-radius: var(--radius);
532
- font-size: 0.875rem;
533
- opacity: 0;
534
- transition: opacity 0.3s;
535
- pointer-events: none;
536
- z-index: 200;
537
- }
538
- .toast.visible {
539
- opacity: 1;
540
- }
541
- </style>
542
- </head>
543
- <body>
544
- <div id="app" style="height:100vh; display:flex; flex-direction:column;">
545
- <div class="header">
546
- <div>
547
- <h1>Eval Review: <span id="skill-name"></span></h1>
548
- <div class="instructions">Review each output and leave feedback below. Navigate with arrow keys or buttons. When done, copy feedback and paste into Claude Code.</div>
549
- </div>
550
- <div class="progress" id="progress"></div>
551
- </div>
552
-
553
- <!-- View tabs (only shown when benchmark data exists) -->
554
- <div class="view-tabs" id="view-tabs" style="display:none;">
555
- <button class="view-tab active" onclick="switchView('outputs')">Outputs</button>
556
- <button class="view-tab" onclick="switchView('benchmark')">Benchmark</button>
557
- </div>
558
-
559
- <!-- Outputs panel (qualitative review) -->
560
- <div class="view-panel active" id="panel-outputs">
561
- <div class="main">
562
- <!-- Prompt -->
563
- <div class="section">
564
- <div class="section-header">Prompt <span class="config-badge" id="config-badge" style="display:none;"></span></div>
565
- <div class="section-body">
566
- <div class="prompt-text" id="prompt-text"></div>
567
- </div>
568
- </div>
569
-
570
- <!-- Outputs -->
571
- <div class="section">
572
- <div class="section-header">Output</div>
573
- <div class="section-body" id="outputs-body">
574
- <div class="empty-state">No output files found</div>
575
- </div>
576
- </div>
577
-
578
- <!-- Previous Output (collapsible) -->
579
- <div class="section" id="prev-outputs-section" style="display:none;">
580
- <div class="section-header">
581
- <div class="grades-toggle" onclick="togglePrevOutputs()">
582
- <span class="arrow" id="prev-outputs-arrow">&#9654;</span>
583
- Previous Output
584
- </div>
585
- </div>
586
- <div class="grades-content" id="prev-outputs-content"></div>
587
- </div>
588
-
589
- <!-- Grades (collapsible) -->
590
- <div class="section" id="grades-section" style="display:none;">
591
- <div class="section-header">
592
- <div class="grades-toggle" onclick="toggleGrades()">
593
- <span class="arrow" id="grades-arrow">&#9654;</span>
594
- Formal Grades
595
- </div>
596
- </div>
597
- <div class="grades-content" id="grades-content"></div>
598
- </div>
599
-
600
- <!-- Feedback -->
601
- <div class="section">
602
- <div class="section-header">Your Feedback</div>
603
- <div class="section-body">
604
- <textarea
605
- class="feedback-textarea"
606
- id="feedback"
607
- placeholder="What do you think of this output? Any issues, suggestions, or things that look great?"
608
- ></textarea>
609
- <div class="feedback-status" id="feedback-status"></div>
610
- <div class="prev-feedback" id="prev-feedback" style="display:none;">
611
- <div class="prev-feedback-label">Previous feedback</div>
612
- <div id="prev-feedback-text"></div>
613
- </div>
614
- </div>
615
- </div>
616
- </div>
617
-
618
- <div class="nav" id="outputs-nav">
619
- <button class="nav-btn" id="prev-btn" onclick="navigate(-1)">&#8592; Previous</button>
620
- <button class="done-btn" id="done-btn" onclick="showDoneDialog()">Submit All Reviews</button>
621
- <button class="nav-btn" id="next-btn" onclick="navigate(1)">Next &#8594;</button>
622
- </div>
623
- </div><!-- end panel-outputs -->
624
-
625
- <!-- Benchmark panel (quantitative stats) -->
626
- <div class="view-panel" id="panel-benchmark">
627
- <div class="benchmark-view" id="benchmark-content">
628
- <div class="benchmark-empty">No benchmark data available. Run a benchmark to see quantitative results here.</div>
629
- </div>
630
- </div>
631
- </div>
632
-
633
- <!-- Done overlay -->
634
- <div class="done-overlay" id="done-overlay">
635
- <div class="done-card">
636
- <h2>Review Complete</h2>
637
- <p>Your feedback has been saved. Go back to your Claude Code session and tell Claude you're done reviewing.</p>
638
- <div class="btn-row">
639
- <button onclick="closeDoneDialog()">OK</button>
640
- </div>
641
- </div>
642
- </div>
643
-
644
- <!-- Toast -->
645
- <div class="toast" id="toast"></div>
646
-
647
- <script>
648
- // ---- Embedded data (injected by generate_review.py) ----
649
- /*__EMBEDDED_DATA__*/
650
-
651
- // ---- State ----
652
- let feedbackMap = {}; // run_id -> feedback text
653
- let currentIndex = 0;
654
- let visitedRuns = new Set();
655
-
656
- // ---- Init ----
657
- async function init() {
658
- // Load saved feedback from server — but only if this isn't a fresh
659
- // iteration (indicated by previous_feedback being present). When
660
- // previous feedback exists, the feedback.json on disk is stale from
661
- // the prior iteration and should not pre-fill the textareas.
662
- const hasPrevious = Object.keys(EMBEDDED_DATA.previous_feedback || {}).length > 0
663
- || Object.keys(EMBEDDED_DATA.previous_outputs || {}).length > 0;
664
- if (!hasPrevious) {
665
- try {
666
- const resp = await fetch("/api/feedback");
667
- const data = await resp.json();
668
- if (data.reviews) {
669
- for (const r of data.reviews) feedbackMap[r.run_id] = r.feedback;
670
- }
671
- } catch { /* first run, no feedback yet */ }
672
- }
673
-
674
- document.getElementById("skill-name").textContent = EMBEDDED_DATA.skill_name;
675
- showRun(0);
676
-
677
- // Wire up feedback auto-save
678
- const textarea = document.getElementById("feedback");
679
- let saveTimeout = null;
680
- textarea.addEventListener("input", () => {
681
- clearTimeout(saveTimeout);
682
- document.getElementById("feedback-status").textContent = "";
683
- saveTimeout = setTimeout(() => saveCurrentFeedback(), 800);
684
- });
685
- }
686
-
687
- // ---- Navigation ----
688
- function navigate(delta) {
689
- const newIndex = currentIndex + delta;
690
- if (newIndex >= 0 && newIndex < EMBEDDED_DATA.runs.length) {
691
- saveCurrentFeedback();
692
- showRun(newIndex);
693
- }
694
- }
695
-
696
- function updateNavButtons() {
697
- document.getElementById("prev-btn").disabled = currentIndex === 0;
698
- document.getElementById("next-btn").disabled =
699
- currentIndex === EMBEDDED_DATA.runs.length - 1;
700
- }
701
-
702
- // ---- Show a run ----
703
- function showRun(index) {
704
- currentIndex = index;
705
- const run = EMBEDDED_DATA.runs[index];
706
-
707
- // Progress
708
- document.getElementById("progress").textContent =
709
- `${index + 1} of ${EMBEDDED_DATA.runs.length}`;
710
-
711
- // Prompt
712
- document.getElementById("prompt-text").textContent = run.prompt;
713
-
714
- // Config badge
715
- const badge = document.getElementById("config-badge");
716
- const configMatch = run.id.match(/(with_skill|without_skill|new_skill|old_skill)/);
717
- if (configMatch) {
718
- const config = configMatch[1];
719
- const isBaseline = config === "without_skill" || config === "old_skill";
720
- badge.textContent = config.replace(/_/g, " ");
721
- badge.className = "config-badge " + (isBaseline ? "config-baseline" : "config-primary");
722
- badge.style.display = "inline-block";
723
- } else {
724
- badge.style.display = "none";
725
- }
726
-
727
- // Outputs
728
- renderOutputs(run);
729
-
730
- // Previous outputs
731
- renderPrevOutputs(run);
732
-
733
- // Grades
734
- renderGrades(run);
735
-
736
- // Previous feedback
737
- const prevFb = (EMBEDDED_DATA.previous_feedback || {})[run.id];
738
- const prevEl = document.getElementById("prev-feedback");
739
- if (prevFb) {
740
- document.getElementById("prev-feedback-text").textContent = prevFb;
741
- prevEl.style.display = "block";
742
- } else {
743
- prevEl.style.display = "none";
744
- }
745
-
746
- // Feedback
747
- document.getElementById("feedback").value = feedbackMap[run.id] || "";
748
- document.getElementById("feedback-status").textContent = "";
749
-
750
- updateNavButtons();
751
-
752
- // Track visited runs and promote done button when all visited
753
- visitedRuns.add(index);
754
- const doneBtn = document.getElementById("done-btn");
755
- if (visitedRuns.size >= EMBEDDED_DATA.runs.length) {
756
- doneBtn.classList.add("ready");
757
- }
758
-
759
- // Scroll main content to top
760
- document.querySelector(".main").scrollTop = 0;
761
- }
762
-
763
- // ---- Render outputs ----
764
- function renderOutputs(run) {
765
- const container = document.getElementById("outputs-body");
766
- container.innerHTML = "";
767
-
768
- const outputs = run.outputs || [];
769
- if (outputs.length === 0) {
770
- container.innerHTML = '<div class="empty-state">No output files</div>';
771
- return;
772
- }
773
-
774
- for (const file of outputs) {
775
- const fileDiv = document.createElement("div");
776
- fileDiv.className = "output-file";
777
-
778
- // Always show file header with download link
779
- const header = document.createElement("div");
780
- header.className = "output-file-header";
781
- const nameSpan = document.createElement("span");
782
- nameSpan.textContent = file.name;
783
- header.appendChild(nameSpan);
784
- const dlBtn = document.createElement("a");
785
- dlBtn.className = "dl-btn";
786
- dlBtn.textContent = "Download";
787
- dlBtn.download = file.name;
788
- dlBtn.href = getDownloadUri(file);
789
- header.appendChild(dlBtn);
790
- fileDiv.appendChild(header);
791
-
792
- const content = document.createElement("div");
793
- content.className = "output-file-content";
794
-
795
- if (file.type === "text") {
796
- const pre = document.createElement("pre");
797
- pre.textContent = file.content;
798
- content.appendChild(pre);
799
- } else if (file.type === "image") {
800
- const img = document.createElement("img");
801
- img.src = file.data_uri;
802
- img.alt = file.name;
803
- content.appendChild(img);
804
- } else if (file.type === "pdf") {
805
- const iframe = document.createElement("iframe");
806
- iframe.src = file.data_uri;
807
- content.appendChild(iframe);
808
- } else if (file.type === "xlsx") {
809
- renderXlsx(content, file.data_b64);
810
- } else if (file.type === "binary") {
811
- const a = document.createElement("a");
812
- a.className = "download-link";
813
- a.href = file.data_uri;
814
- a.download = file.name;
815
- a.textContent = "Download " + file.name;
816
- content.appendChild(a);
817
- } else if (file.type === "error") {
818
- const pre = document.createElement("pre");
819
- pre.textContent = file.content;
820
- pre.style.color = "var(--red)";
821
- content.appendChild(pre);
822
- }
823
-
824
- fileDiv.appendChild(content);
825
- container.appendChild(fileDiv);
826
- }
827
- }
828
-
829
- // ---- XLSX rendering via SheetJS ----
830
- function renderXlsx(container, b64Data) {
831
- try {
832
- const raw = Uint8Array.from(atob(b64Data), c => c.charCodeAt(0));
833
- const wb = XLSX.read(raw, { type: "array" });
834
-
835
- for (let i = 0; i < wb.SheetNames.length; i++) {
836
- const sheetName = wb.SheetNames[i];
837
- const ws = wb.Sheets[sheetName];
838
-
839
- if (wb.SheetNames.length > 1) {
840
- const sheetLabel = document.createElement("div");
841
- sheetLabel.style.cssText =
842
- "font-weight:600; font-size:0.8rem; color:#b0aea5; margin-top:0.5rem; margin-bottom:0.25rem;";
843
- sheetLabel.textContent = "Sheet: " + sheetName;
844
- container.appendChild(sheetLabel);
845
- }
846
-
847
- const htmlStr = XLSX.utils.sheet_to_html(ws, { editable: false });
848
- const wrapper = document.createElement("div");
849
- wrapper.innerHTML = htmlStr;
850
- container.appendChild(wrapper);
851
- }
852
- } catch (err) {
853
- container.textContent = "Error rendering spreadsheet: " + err.message;
854
- }
855
- }
856
-
857
- // ---- Grades ----
858
- function renderGrades(run) {
859
- const section = document.getElementById("grades-section");
860
- const content = document.getElementById("grades-content");
861
-
862
- if (!run.grading) {
863
- section.style.display = "none";
864
- return;
865
- }
866
-
867
- const grading = run.grading;
868
- section.style.display = "block";
869
- // Reset to collapsed
870
- content.classList.remove("open");
871
- document.getElementById("grades-arrow").classList.remove("open");
872
-
873
- const summary = grading.summary || {};
874
- const expectations = grading.expectations || [];
875
-
876
- let html = '<div style="padding: 1rem;">';
877
-
878
- // Summary line
879
- const passRate = summary.pass_rate != null
880
- ? Math.round(summary.pass_rate * 100) + "%"
881
- : "?";
882
- const badgeClass = summary.pass_rate >= 0.8 ? "grade-pass" : summary.pass_rate >= 0.5 ? "" : "grade-fail";
883
- html += '<div class="grades-summary">';
884
- html += '<span class="grade-badge ' + badgeClass + '">' + passRate + '</span>';
885
- html += '<span>' + (summary.passed || 0) + ' passed, ' + (summary.failed || 0) + ' failed of ' + (summary.total || 0) + '</span>';
886
- html += '</div>';
887
-
888
- // Assertions list
889
- html += '<ul class="assertion-list">';
890
- for (const exp of expectations) {
891
- const statusClass = exp.passed ? "pass" : "fail";
892
- const statusIcon = exp.passed ? "\u2713" : "\u2717";
893
- html += '<li class="assertion-item">';
894
- html += '<span class="assertion-status ' + statusClass + '">' + statusIcon + '</span>';
895
- html += '<span>' + escapeHtml(exp.text) + '</span>';
896
- if (exp.evidence) {
897
- html += '<div class="assertion-evidence">' + escapeHtml(exp.evidence) + '</div>';
898
- }
899
- html += '</li>';
900
- }
901
- html += '</ul>';
902
-
903
- html += '</div>';
904
- content.innerHTML = html;
905
- }
906
-
907
- function toggleGrades() {
908
- const content = document.getElementById("grades-content");
909
- const arrow = document.getElementById("grades-arrow");
910
- content.classList.toggle("open");
911
- arrow.classList.toggle("open");
912
- }
913
-
914
- // ---- Previous outputs (collapsible) ----
915
- function renderPrevOutputs(run) {
916
- const section = document.getElementById("prev-outputs-section");
917
- const content = document.getElementById("prev-outputs-content");
918
- const prevOutputs = (EMBEDDED_DATA.previous_outputs || {})[run.id];
919
-
920
- if (!prevOutputs || prevOutputs.length === 0) {
921
- section.style.display = "none";
922
- return;
923
- }
924
-
925
- section.style.display = "block";
926
- // Reset to collapsed
927
- content.classList.remove("open");
928
- document.getElementById("prev-outputs-arrow").classList.remove("open");
929
-
930
- // Render the files into the content area
931
- content.innerHTML = "";
932
- const wrapper = document.createElement("div");
933
- wrapper.style.padding = "1rem";
934
-
935
- for (const file of prevOutputs) {
936
- const fileDiv = document.createElement("div");
937
- fileDiv.className = "output-file";
938
-
939
- const header = document.createElement("div");
940
- header.className = "output-file-header";
941
- const nameSpan = document.createElement("span");
942
- nameSpan.textContent = file.name;
943
- header.appendChild(nameSpan);
944
- const dlBtn = document.createElement("a");
945
- dlBtn.className = "dl-btn";
946
- dlBtn.textContent = "Download";
947
- dlBtn.download = file.name;
948
- dlBtn.href = getDownloadUri(file);
949
- header.appendChild(dlBtn);
950
- fileDiv.appendChild(header);
951
-
952
- const fc = document.createElement("div");
953
- fc.className = "output-file-content";
954
-
955
- if (file.type === "text") {
956
- const pre = document.createElement("pre");
957
- pre.textContent = file.content;
958
- fc.appendChild(pre);
959
- } else if (file.type === "image") {
960
- const img = document.createElement("img");
961
- img.src = file.data_uri;
962
- img.alt = file.name;
963
- fc.appendChild(img);
964
- } else if (file.type === "pdf") {
965
- const iframe = document.createElement("iframe");
966
- iframe.src = file.data_uri;
967
- fc.appendChild(iframe);
968
- } else if (file.type === "xlsx") {
969
- renderXlsx(fc, file.data_b64);
970
- } else if (file.type === "binary") {
971
- const a = document.createElement("a");
972
- a.className = "download-link";
973
- a.href = file.data_uri;
974
- a.download = file.name;
975
- a.textContent = "Download " + file.name;
976
- fc.appendChild(a);
977
- }
978
-
979
- fileDiv.appendChild(fc);
980
- wrapper.appendChild(fileDiv);
981
- }
982
-
983
- content.appendChild(wrapper);
984
- }
985
-
986
- function togglePrevOutputs() {
987
- const content = document.getElementById("prev-outputs-content");
988
- const arrow = document.getElementById("prev-outputs-arrow");
989
- content.classList.toggle("open");
990
- arrow.classList.toggle("open");
991
- }
992
-
993
- // ---- Feedback (saved to server -> feedback.json) ----
994
- function saveCurrentFeedback() {
995
- const run = EMBEDDED_DATA.runs[currentIndex];
996
- const text = document.getElementById("feedback").value;
997
-
998
- if (text.trim() === "") {
999
- delete feedbackMap[run.id];
1000
- } else {
1001
- feedbackMap[run.id] = text;
1002
- }
1003
-
1004
- // Build reviews array from map
1005
- const reviews = [];
1006
- for (const [run_id, feedback] of Object.entries(feedbackMap)) {
1007
- if (feedback.trim()) {
1008
- reviews.push({ run_id, feedback, timestamp: new Date().toISOString() });
1009
- }
1010
- }
1011
-
1012
- fetch("/api/feedback", {
1013
- method: "POST",
1014
- headers: { "Content-Type": "application/json" },
1015
- body: JSON.stringify({ reviews, status: "in_progress" }),
1016
- }).then(() => {
1017
- document.getElementById("feedback-status").textContent = "Saved";
1018
- }).catch(() => {
1019
- // Static mode or server unavailable — no-op on auto-save,
1020
- // feedback will be downloaded on final submit
1021
- document.getElementById("feedback-status").textContent = "Will download on submit";
1022
- });
1023
- }
1024
-
1025
- // ---- Done ----
1026
- function showDoneDialog() {
1027
- // Save current textarea to feedbackMap (but don't POST yet)
1028
- const run = EMBEDDED_DATA.runs[currentIndex];
1029
- const text = document.getElementById("feedback").value;
1030
- if (text.trim() === "") {
1031
- delete feedbackMap[run.id];
1032
- } else {
1033
- feedbackMap[run.id] = text;
1034
- }
1035
-
1036
- // POST once with status: complete — include ALL runs so the model
1037
- // can distinguish "no feedback" (looks good) from "not reviewed"
1038
- const reviews = [];
1039
- const ts = new Date().toISOString();
1040
- for (const r of EMBEDDED_DATA.runs) {
1041
- reviews.push({ run_id: r.id, feedback: feedbackMap[r.id] || "", timestamp: ts });
1042
- }
1043
- const payload = JSON.stringify({ reviews, status: "complete" }, null, 2);
1044
- fetch("/api/feedback", {
1045
- method: "POST",
1046
- headers: { "Content-Type": "application/json" },
1047
- body: payload,
1048
- }).then(() => {
1049
- document.getElementById("done-overlay").classList.add("visible");
1050
- }).catch(() => {
1051
- // Server not available (static mode) — download as file
1052
- const blob = new Blob([payload], { type: "application/json" });
1053
- const url = URL.createObjectURL(blob);
1054
- const a = document.createElement("a");
1055
- a.href = url;
1056
- a.download = "feedback.json";
1057
- a.click();
1058
- URL.revokeObjectURL(url);
1059
- document.getElementById("done-overlay").classList.add("visible");
1060
- });
1061
- }
1062
-
1063
- function closeDoneDialog() {
1064
- // Reset status back to in_progress
1065
- saveCurrentFeedback();
1066
- document.getElementById("done-overlay").classList.remove("visible");
1067
- }
1068
-
1069
- // ---- Toast ----
1070
- function showToast(message) {
1071
- const toast = document.getElementById("toast");
1072
- toast.textContent = message;
1073
- toast.classList.add("visible");
1074
- setTimeout(() => toast.classList.remove("visible"), 2000);
1075
- }
1076
-
1077
- // ---- Keyboard nav ----
1078
- document.addEventListener("keydown", (e) => {
1079
- // Don't capture when typing in textarea
1080
- if (e.target.tagName === "TEXTAREA") return;
1081
-
1082
- if (e.key === "ArrowLeft" || e.key === "ArrowUp") {
1083
- e.preventDefault();
1084
- navigate(-1);
1085
- } else if (e.key === "ArrowRight" || e.key === "ArrowDown") {
1086
- e.preventDefault();
1087
- navigate(1);
1088
- }
1089
- });
1090
-
1091
- // ---- Util ----
1092
- function getDownloadUri(file) {
1093
- if (file.data_uri) return file.data_uri;
1094
- if (file.data_b64) return "data:application/octet-stream;base64," + file.data_b64;
1095
- if (file.type === "text") return "data:text/plain;charset=utf-8," + encodeURIComponent(file.content);
1096
- return "#";
1097
- }
1098
-
1099
- function escapeHtml(text) {
1100
- const div = document.createElement("div");
1101
- div.textContent = text;
1102
- return div.innerHTML;
1103
- }
1104
-
1105
- // ---- View switching ----
1106
- function switchView(view) {
1107
- document.querySelectorAll(".view-tab").forEach(t => t.classList.remove("active"));
1108
- document.querySelectorAll(".view-panel").forEach(p => p.classList.remove("active"));
1109
- document.querySelector(`[onclick="switchView('${view}')"]`).classList.add("active");
1110
- document.getElementById("panel-" + view).classList.add("active");
1111
- }
1112
-
1113
- // ---- Benchmark rendering ----
1114
- function renderBenchmark() {
1115
- const data = EMBEDDED_DATA.benchmark;
1116
- if (!data) return;
1117
-
1118
- // Show the tabs
1119
- document.getElementById("view-tabs").style.display = "flex";
1120
-
1121
- const container = document.getElementById("benchmark-content");
1122
- const summary = data.run_summary || {};
1123
- const metadata = data.metadata || {};
1124
- const notes = data.notes || [];
1125
-
1126
- let html = "";
1127
-
1128
- // Header
1129
- html += "<h2 style='font-family: Poppins, sans-serif; margin-bottom: 0.5rem;'>Benchmark Results</h2>";
1130
- html += "<p style='color: var(--text-muted); font-size: 0.875rem; margin-bottom: 1.25rem;'>";
1131
- if (metadata.skill_name) html += "<strong>" + escapeHtml(metadata.skill_name) + "</strong> &mdash; ";
1132
- if (metadata.timestamp) html += metadata.timestamp + " &mdash; ";
1133
- if (metadata.evals_run) html += "Evals: " + metadata.evals_run.join(", ") + " &mdash; ";
1134
- html += (metadata.runs_per_configuration || "?") + " runs per configuration";
1135
- html += "</p>";
1136
-
1137
- // Summary table
1138
- html += '<table class="benchmark-table">';
1139
-
1140
- function fmtStat(stat, pct) {
1141
- if (!stat) return "—";
1142
- const suffix = pct ? "%" : "";
1143
- const m = pct ? (stat.mean * 100).toFixed(0) : stat.mean.toFixed(1);
1144
- const s = pct ? (stat.stddev * 100).toFixed(0) : stat.stddev.toFixed(1);
1145
- return m + suffix + " ± " + s + suffix;
1146
- }
1147
-
1148
- function deltaClass(val) {
1149
- if (!val) return "";
1150
- const n = parseFloat(val);
1151
- if (n > 0) return "benchmark-delta-positive";
1152
- if (n < 0) return "benchmark-delta-negative";
1153
- return "";
1154
- }
1155
-
1156
- // Discover config names dynamically (everything except "delta")
1157
- const configs = Object.keys(summary).filter(k => k !== "delta");
1158
- const configA = configs[0] || "config_a";
1159
- const configB = configs[1] || "config_b";
1160
- const labelA = configA.replace(/_/g, " ").replace(/\b\w/g, c => c.toUpperCase());
1161
- const labelB = configB.replace(/_/g, " ").replace(/\b\w/g, c => c.toUpperCase());
1162
- const a = summary[configA] || {};
1163
- const b = summary[configB] || {};
1164
- const delta = summary.delta || {};
1165
-
1166
- html += "<thead><tr><th>Metric</th><th>" + escapeHtml(labelA) + "</th><th>" + escapeHtml(labelB) + "</th><th>Delta</th></tr></thead>";
1167
- html += "<tbody>";
1168
-
1169
- html += "<tr><td><strong>Pass Rate</strong></td>";
1170
- html += "<td>" + fmtStat(a.pass_rate, true) + "</td>";
1171
- html += "<td>" + fmtStat(b.pass_rate, true) + "</td>";
1172
- html += '<td class="' + deltaClass(delta.pass_rate) + '">' + (delta.pass_rate || "—") + "</td></tr>";
1173
-
1174
- // Time (only show row if data exists)
1175
- if (a.time_seconds || b.time_seconds) {
1176
- html += "<tr><td><strong>Time (s)</strong></td>";
1177
- html += "<td>" + fmtStat(a.time_seconds, false) + "</td>";
1178
- html += "<td>" + fmtStat(b.time_seconds, false) + "</td>";
1179
- html += '<td class="' + deltaClass(delta.time_seconds) + '">' + (delta.time_seconds ? delta.time_seconds + "s" : "—") + "</td></tr>";
1180
- }
1181
-
1182
- // Tokens (only show row if data exists)
1183
- if (a.tokens || b.tokens) {
1184
- html += "<tr><td><strong>Tokens</strong></td>";
1185
- html += "<td>" + fmtStat(a.tokens, false) + "</td>";
1186
- html += "<td>" + fmtStat(b.tokens, false) + "</td>";
1187
- html += '<td class="' + deltaClass(delta.tokens) + '">' + (delta.tokens || "—") + "</td></tr>";
1188
- }
1189
-
1190
- html += "</tbody></table>";
1191
-
1192
- // Per-eval breakdown (if runs data available)
1193
- const runs = data.runs || [];
1194
- if (runs.length > 0) {
1195
- const evalIds = [...new Set(runs.map(r => r.eval_id))].sort((a, b) => a - b);
1196
-
1197
- html += "<h3 style='font-family: Poppins, sans-serif; margin-bottom: 0.75rem;'>Per-Eval Breakdown</h3>";
1198
-
1199
- const hasTime = runs.some(r => r.result && r.result.time_seconds != null);
1200
- const hasErrors = runs.some(r => r.result && r.result.errors > 0);
1201
-
1202
- for (const evalId of evalIds) {
1203
- const evalRuns = runs.filter(r => r.eval_id === evalId);
1204
- const evalName = evalRuns[0] && evalRuns[0].eval_name ? evalRuns[0].eval_name : "Eval " + evalId;
1205
-
1206
- html += "<h4 style='font-family: Poppins, sans-serif; margin: 1rem 0 0.5rem; color: var(--text);'>" + escapeHtml(evalName) + "</h4>";
1207
- html += '<table class="benchmark-table">';
1208
- html += "<thead><tr><th>Config</th><th>Run</th><th>Pass Rate</th>";
1209
- if (hasTime) html += "<th>Time (s)</th>";
1210
- if (hasErrors) html += "<th>Crashes During Execution</th>";
1211
- html += "</tr></thead>";
1212
- html += "<tbody>";
1213
-
1214
- // Group by config and render with average rows
1215
- const configGroups = [...new Set(evalRuns.map(r => r.configuration))];
1216
- for (let ci = 0; ci < configGroups.length; ci++) {
1217
- const config = configGroups[ci];
1218
- const configRuns = evalRuns.filter(r => r.configuration === config);
1219
- if (configRuns.length === 0) continue;
1220
-
1221
- const rowClass = ci === 0 ? "benchmark-row-with" : "benchmark-row-without";
1222
- const configLabel = config.replace(/_/g, " ").replace(/\b\w/g, c => c.toUpperCase());
1223
-
1224
- for (const run of configRuns) {
1225
- const r = run.result || {};
1226
- const prClass = r.pass_rate >= 0.8 ? "benchmark-delta-positive" : r.pass_rate < 0.5 ? "benchmark-delta-negative" : "";
1227
- html += '<tr class="' + rowClass + '">';
1228
- html += "<td>" + configLabel + "</td>";
1229
- html += "<td>" + run.run_number + "</td>";
1230
- html += '<td class="' + prClass + '">' + ((r.pass_rate || 0) * 100).toFixed(0) + "% (" + (r.passed || 0) + "/" + (r.total || 0) + ")</td>";
1231
- if (hasTime) html += "<td>" + (r.time_seconds != null ? r.time_seconds.toFixed(1) : "—") + "</td>";
1232
- if (hasErrors) html += "<td>" + (r.errors || 0) + "</td>";
1233
- html += "</tr>";
1234
- }
1235
-
1236
- // Average row
1237
- const rates = configRuns.map(r => (r.result || {}).pass_rate || 0);
1238
- const avgRate = rates.reduce((a, b) => a + b, 0) / rates.length;
1239
- const avgPrClass = avgRate >= 0.8 ? "benchmark-delta-positive" : avgRate < 0.5 ? "benchmark-delta-negative" : "";
1240
- html += '<tr class="benchmark-row-avg ' + rowClass + '">';
1241
- html += "<td>" + configLabel + "</td>";
1242
- html += "<td>Avg</td>";
1243
- html += '<td class="' + avgPrClass + '">' + (avgRate * 100).toFixed(0) + "%</td>";
1244
- if (hasTime) {
1245
- const times = configRuns.map(r => (r.result || {}).time_seconds).filter(t => t != null);
1246
- html += "<td>" + (times.length ? (times.reduce((a, b) => a + b, 0) / times.length).toFixed(1) : "—") + "</td>";
1247
- }
1248
- if (hasErrors) html += "<td></td>";
1249
- html += "</tr>";
1250
- }
1251
- html += "</tbody></table>";
1252
-
1253
- // Per-assertion detail for this eval
1254
- const runsWithExpectations = {};
1255
- for (const config of configGroups) {
1256
- runsWithExpectations[config] = evalRuns.filter(r => r.configuration === config && r.expectations && r.expectations.length > 0);
1257
- }
1258
- const hasAnyExpectations = Object.values(runsWithExpectations).some(runs => runs.length > 0);
1259
- if (hasAnyExpectations) {
1260
- // Collect all unique assertion texts across all configs
1261
- const allAssertions = [];
1262
- const seen = new Set();
1263
- for (const config of configGroups) {
1264
- for (const run of runsWithExpectations[config]) {
1265
- for (const exp of (run.expectations || [])) {
1266
- if (!seen.has(exp.text)) {
1267
- seen.add(exp.text);
1268
- allAssertions.push(exp.text);
1269
- }
1270
- }
1271
- }
1272
- }
1273
-
1274
- html += '<table class="benchmark-table" style="margin-top: 0.5rem;">';
1275
- html += "<thead><tr><th>Assertion</th>";
1276
- for (const config of configGroups) {
1277
- const label = config.replace(/_/g, " ").replace(/\b\w/g, c => c.toUpperCase());
1278
- html += "<th>" + escapeHtml(label) + "</th>";
1279
- }
1280
- html += "</tr></thead><tbody>";
1281
-
1282
- for (const assertionText of allAssertions) {
1283
- html += "<tr><td>" + escapeHtml(assertionText) + "</td>";
1284
-
1285
- for (const config of configGroups) {
1286
- html += "<td>";
1287
- for (const run of runsWithExpectations[config]) {
1288
- const exp = (run.expectations || []).find(e => e.text === assertionText);
1289
- if (exp) {
1290
- const cls = exp.passed ? "benchmark-delta-positive" : "benchmark-delta-negative";
1291
- const icon = exp.passed ? "\u2713" : "\u2717";
1292
- html += '<span class="' + cls + '" title="Run ' + run.run_number + ': ' + escapeHtml(exp.evidence || "") + '">' + icon + "</span> ";
1293
- } else {
1294
- html += "— ";
1295
- }
1296
- }
1297
- html += "</td>";
1298
- }
1299
- html += "</tr>";
1300
- }
1301
- html += "</tbody></table>";
1302
- }
1303
- }
1304
- }
1305
-
1306
- // Notes
1307
- if (notes.length > 0) {
1308
- html += '<div class="benchmark-notes">';
1309
- html += "<h3>Analysis Notes</h3>";
1310
- html += "<ul>";
1311
- for (const note of notes) {
1312
- html += "<li>" + escapeHtml(note) + "</li>";
1313
- }
1314
- html += "</ul></div>";
1315
- }
1316
-
1317
- container.innerHTML = html;
1318
- }
1319
-
1320
- // ---- Start ----
1321
- init();
1322
- renderBenchmark();
1323
- </script>
1324
- </body>
1325
- </html>