wormclaude 1.0.74 → 1.0.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. package/dist/theme.js +4 -4
  2. package/package.json +2 -2
  3. package/skills/build-mcp-app/SKILL.md +393 -0
  4. package/skills/build-mcp-app/references/abuse-protection.md +60 -0
  5. package/skills/build-mcp-app/references/apps-sdk-messages.md +227 -0
  6. package/skills/build-mcp-app/references/directory-checklist.md +18 -0
  7. package/skills/build-mcp-app/references/iframe-sandbox.md +164 -0
  8. package/skills/build-mcp-app/references/payload-budgeting.md +54 -0
  9. package/skills/build-mcp-app/references/widget-templates.md +249 -0
  10. package/skills/build-mcp-server/SKILL.md +222 -0
  11. package/skills/build-mcp-server/references/auth.md +108 -0
  12. package/skills/build-mcp-server/references/deploy-cloudflare-workers.md +106 -0
  13. package/skills/build-mcp-server/references/elicitation.md +129 -0
  14. package/skills/build-mcp-server/references/remote-http-scaffold.md +211 -0
  15. package/skills/build-mcp-server/references/resources-and-prompts.md +122 -0
  16. package/skills/build-mcp-server/references/server-capabilities.md +164 -0
  17. package/skills/build-mcp-server/references/tool-design.md +189 -0
  18. package/skills/build-mcp-server/references/versions.md +25 -0
  19. package/skills/build-mcpb/SKILL.md +200 -0
  20. package/skills/build-mcpb/references/local-security.md +149 -0
  21. package/skills/build-mcpb/references/manifest-schema.md +156 -0
  22. package/skills/docx/script/__init__.py +1 -0
  23. package/skills/docx/script/accept_chages.py +135 -0
  24. package/skills/docx/script/comment.py +318 -0
  25. package/skills/docx/script/office/helpers/__init__.py +0 -0
  26. package/skills/docx/script/office/helpers/merge_runs.py +199 -0
  27. package/skills/docx/script/office/helpers/simplify_redlines.py +197 -0
  28. package/skills/docx/script/office/pack.py +159 -0
  29. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  30. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  31. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  32. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  33. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  34. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  35. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  36. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  37. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  38. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  39. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  40. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  41. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  42. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  43. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  44. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  45. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  46. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  47. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  48. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  49. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  50. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  51. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  52. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  53. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  54. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  55. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  56. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  57. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  58. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  59. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  60. package/skills/docx/script/office/schemas/mce/mc.xsd +75 -0
  61. package/skills/docx/script/office/schemas/microsoft/wml-2010.xsd +560 -0
  62. package/skills/docx/script/office/schemas/microsoft/wml-2012.xsd +67 -0
  63. package/skills/docx/script/office/schemas/microsoft/wml-2018.xsd +14 -0
  64. package/skills/docx/script/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  65. package/skills/docx/script/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  66. package/skills/docx/script/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  67. package/skills/docx/script/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  68. package/skills/docx/script/office/soffice.py +183 -0
  69. package/skills/docx/script/office/unpack.py +132 -0
  70. package/skills/docx/script/office/validate.py +117 -0
  71. package/skills/docx/script/office/validators/__init__.py +15 -0
  72. package/skills/docx/script/office/validators/base.py +851 -0
  73. package/skills/docx/script/office/validators/docx.py +446 -0
  74. package/skills/docx/script/office/validators/pptx.py +275 -0
  75. package/skills/docx/script/office/validators/redlining.py +247 -0
  76. package/skills/docx/script/templates/comments.xml +3 -0
  77. package/skills/docx/script/templates/commentsExtended.xml +3 -0
  78. package/skills/docx/script/templates/commentsExtensible.xml +3 -0
  79. package/skills/docx/script/templates/commentsIds.xml +3 -0
  80. package/skills/docx/script/templates/people.xml +3 -0
  81. package/skills/docx/skill.md +593 -0
  82. package/skills/frontend-design/SKILL.md +42 -0
  83. package/skills/pdf/FORMS.md +294 -0
  84. package/skills/pdf/REFERENCE.md +612 -0
  85. package/skills/pdf/SKILL.md +314 -0
  86. package/skills/pdf/scripts/check_bounding_boxes.py +65 -0
  87. package/skills/pdf/scripts/check_fillable_fields.py +11 -0
  88. package/skills/pdf/scripts/convert_pdf_to_images.py +33 -0
  89. package/skills/pdf/scripts/create_validation_image.py +37 -0
  90. package/skills/pdf/scripts/extract_form_field_info.py +122 -0
  91. package/skills/pdf/scripts/extract_form_structure.py +115 -0
  92. package/skills/pdf/scripts/fill_fillable_fields.py +98 -0
  93. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +107 -0
  94. package/skills/playground/SKILL.md +77 -0
  95. package/skills/playground/templates/code-map.md +158 -0
  96. package/skills/playground/templates/concept-map.md +73 -0
  97. package/skills/playground/templates/data-explorer.md +67 -0
  98. package/skills/playground/templates/design-playground.md +67 -0
  99. package/skills/playground/templates/diff-review.md +179 -0
  100. package/skills/playground/templates/document-critique.md +171 -0
  101. package/skills/pptx/SKILL.md +230 -0
  102. package/skills/pptx/editing.md +205 -0
  103. package/skills/pptx/pptxgenjs.md +437 -0
  104. package/skills/pptx/scripts/__init__.py +0 -0
  105. package/skills/pptx/scripts/add_slide.py +195 -0
  106. package/skills/pptx/scripts/clean.py +286 -0
  107. package/skills/pptx/scripts/office/helpers/__init__.py +0 -0
  108. package/skills/pptx/scripts/office/helpers/merge_runs.py +199 -0
  109. package/skills/pptx/scripts/office/helpers/simplify_redlines.py +197 -0
  110. package/skills/pptx/scripts/office/pack.py +159 -0
  111. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  112. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  113. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  114. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  115. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  116. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  117. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  118. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  119. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  120. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  121. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  122. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  123. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  124. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  125. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  126. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  127. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  128. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  129. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  130. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  131. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  132. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  133. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  134. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  135. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  136. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  137. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  138. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  139. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  140. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  141. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  142. package/skills/pptx/scripts/office/schemas/mce/mc.xsd +75 -0
  143. package/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  144. package/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  145. package/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  146. package/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  147. package/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  148. package/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  149. package/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  150. package/skills/pptx/scripts/office/soffice.py +183 -0
  151. package/skills/pptx/scripts/office/unpack.py +132 -0
  152. package/skills/pptx/scripts/office/validate.py +117 -0
  153. package/skills/pptx/scripts/office/validators/__init__.py +15 -0
  154. package/skills/pptx/scripts/office/validators/base.py +851 -0
  155. package/skills/pptx/scripts/office/validators/docx.py +446 -0
  156. package/skills/pptx/scripts/office/validators/pptx.py +275 -0
  157. package/skills/pptx/scripts/office/validators/redlining.py +247 -0
  158. package/skills/pptx/scripts/thumbnail.py +289 -0
  159. package/skills/talent-creator/SKILL.md +486 -0
  160. package/skills/talent-creator/agents/analyzer.md +274 -0
  161. package/skills/talent-creator/agents/comparator.md +202 -0
  162. package/skills/talent-creator/agents/grader.md +223 -0
  163. package/skills/talent-creator/assets/eval_review.html +146 -0
  164. package/skills/talent-creator/eval-viewer/generate_review.py +471 -0
  165. package/skills/talent-creator/eval-viewer/viewer.html +1325 -0
  166. package/skills/talent-creator/references/schemas.md +430 -0
  167. package/skills/talent-creator/scripts/__init__.py +0 -0
  168. package/skills/talent-creator/scripts/aggregate_benchmark.py +401 -0
  169. package/skills/talent-creator/scripts/generate_report.py +326 -0
  170. package/skills/talent-creator/scripts/improve_description.py +247 -0
  171. package/skills/talent-creator/scripts/package_skill.py +136 -0
  172. package/skills/talent-creator/scripts/quick_validate.py +146 -0
  173. package/skills/talent-creator/scripts/run_eval.py +310 -0
  174. package/skills/talent-creator/scripts/run_loop.py +328 -0
  175. package/skills/talent-creator/scripts/utils.py +47 -0
  176. package/skills/xlsx/SKILL.md +300 -0
  177. package/skills/xlsx/scripts/office/helpers/__init__.py +0 -0
  178. package/skills/xlsx/scripts/office/helpers/merge_runs.py +199 -0
  179. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +197 -0
  180. package/skills/xlsx/scripts/office/pack.py +159 -0
  181. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  182. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  183. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  184. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  185. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  186. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  187. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  188. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  189. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  190. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  191. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  192. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  193. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  194. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  195. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  196. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  197. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  198. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  199. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  200. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  201. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  202. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  203. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  204. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  205. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  206. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  207. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  208. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  209. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  210. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  211. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  212. package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  213. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  214. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  215. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  216. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  217. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  218. package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  219. package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  220. package/skills/xlsx/scripts/office/soffice.py +183 -0
  221. package/skills/xlsx/scripts/office/unpack.py +132 -0
  222. package/skills/xlsx/scripts/office/validate.py +117 -0
  223. package/skills/xlsx/scripts/office/validators/__init__.py +15 -0
  224. package/skills/xlsx/scripts/office/validators/base.py +851 -0
  225. package/skills/xlsx/scripts/office/validators/docx.py +446 -0
  226. package/skills/xlsx/scripts/office/validators/pptx.py +275 -0
  227. package/skills/xlsx/scripts/office/validators/redlining.py +247 -0
  228. package/skills/xlsx/scripts/recalc.py +184 -0
@@ -0,0 +1,430 @@
1
+ # JSON Schemas
2
+
3
+ This document lays out the JSON schemas that Talent-Creator uses.
4
+
5
+ ---
6
+
7
+ ## evals.json
8
+
9
+ Sets out the Inspections for a skill. Lives at `evals/evals.json` inside the skill directory.
10
+
11
+ ```json
12
+ {
13
+ "skill_name": "example-skill",
14
+ "evals": [
15
+ {
16
+ "id": 1,
17
+ "prompt": "User's example prompt",
18
+ "expected_output": "Description of expected result",
19
+ "files": ["evals/files/sample1.pdf"],
20
+ "expectations": [
21
+ "The output includes X",
22
+ "The skill used script Y"
23
+ ]
24
+ }
25
+ ]
26
+ }
27
+ ```
28
+
29
+ **Fields:**
30
+ - `skill_name`: Name that matches the skill's frontmatter
31
+ - `evals[].id`: Unique integer identifier
32
+ - `evals[].prompt`: The task to run
33
+ - `evals[].expected_output`: Plain-language description of what success looks like
34
+ - `evals[].files`: Optional list of input file paths (relative to the skill root)
35
+ - `evals[].expectations`: List of checkable statements
36
+
37
+ ---
38
+
39
+ ## history.json
40
+
41
+ Follows how versions progress in Improve mode. Lives at the workspace root.
42
+
43
+ ```json
44
+ {
45
+ "started_at": "2026-01-15T10:30:00Z",
46
+ "skill_name": "pdf",
47
+ "current_best": "v2",
48
+ "iterations": [
49
+ {
50
+ "version": "v0",
51
+ "parent": null,
52
+ "expectation_pass_rate": 0.65,
53
+ "grading_result": "baseline",
54
+ "is_current_best": false
55
+ },
56
+ {
57
+ "version": "v1",
58
+ "parent": "v0",
59
+ "expectation_pass_rate": 0.75,
60
+ "grading_result": "won",
61
+ "is_current_best": false
62
+ },
63
+ {
64
+ "version": "v2",
65
+ "parent": "v1",
66
+ "expectation_pass_rate": 0.85,
67
+ "grading_result": "won",
68
+ "is_current_best": true
69
+ }
70
+ ]
71
+ }
72
+ ```
73
+
74
+ **Fields:**
75
+ - `started_at`: ISO timestamp marking when improvement kicked off
76
+ - `skill_name`: Name of the skill being improved
77
+ - `current_best`: Version identifier of the top performer
78
+ - `iterations[].version`: Version identifier (v0, v1, ...)
79
+ - `iterations[].parent`: The version this one was derived from
80
+ - `iterations[].expectation_pass_rate`: Pass rate coming out of grading
81
+ - `iterations[].grading_result`: "baseline", "won", "lost", or "tie"
82
+ - `iterations[].is_current_best`: Whether this is the reigning best version
83
+
84
+ ---
85
+
86
+ ## grading.json
87
+
88
+ The grader agent's output. Lives at `<run-dir>/grading.json`.
89
+
90
+ ```json
91
+ {
92
+ "expectations": [
93
+ {
94
+ "text": "The output includes the name 'John Smith'",
95
+ "passed": true,
96
+ "evidence": "Found in transcript Step 3: 'Extracted names: John Smith, Sarah Johnson'"
97
+ },
98
+ {
99
+ "text": "The spreadsheet has a SUM formula in cell B10",
100
+ "passed": false,
101
+ "evidence": "No spreadsheet was created. The output was a text file."
102
+ }
103
+ ],
104
+ "summary": {
105
+ "passed": 2,
106
+ "failed": 1,
107
+ "total": 3,
108
+ "pass_rate": 0.67
109
+ },
110
+ "execution_metrics": {
111
+ "tool_calls": {
112
+ "Read": 5,
113
+ "Write": 2,
114
+ "Bash": 8
115
+ },
116
+ "total_tool_calls": 15,
117
+ "total_steps": 6,
118
+ "errors_encountered": 0,
119
+ "output_chars": 12450,
120
+ "transcript_chars": 3200
121
+ },
122
+ "timing": {
123
+ "executor_duration_seconds": 165.0,
124
+ "grader_duration_seconds": 26.0,
125
+ "total_duration_seconds": 191.0
126
+ },
127
+ "claims": [
128
+ {
129
+ "claim": "The form has 12 fillable fields",
130
+ "type": "factual",
131
+ "verified": true,
132
+ "evidence": "Counted 12 fields in field_info.json"
133
+ }
134
+ ],
135
+ "user_notes_summary": {
136
+ "uncertainties": ["Used 2023 data, may be stale"],
137
+ "needs_review": [],
138
+ "workarounds": ["Fell back to text overlay for non-fillable fields"]
139
+ },
140
+ "eval_feedback": {
141
+ "suggestions": [
142
+ {
143
+ "assertion": "The output includes the name 'John Smith'",
144
+ "reason": "A hallucinated document that mentions the name would also pass"
145
+ }
146
+ ],
147
+ "overall": "Assertions check presence but not correctness."
148
+ }
149
+ }
150
+ ```
151
+
152
+ **Fields:**
153
+ - `expectations[]`: Graded expectations along with their evidence
154
+ - `summary`: Aggregate pass/fail counts
155
+ - `execution_metrics`: Tool usage and output size (from the executor's metrics.json)
156
+ - `timing`: Wall clock timing (from timing.json)
157
+ - `claims`: Claims pulled from the output and verified
158
+ - `user_notes_summary`: Issues the executor flagged
159
+ - `eval_feedback`: (optional) Improvement suggestions for the Inspections, present only when the grader turns up issues worth raising
160
+
161
+ ---
162
+
163
+ ## metrics.json
164
+
165
+ The executor agent's output. Lives at `<run-dir>/outputs/metrics.json`.
166
+
167
+ ```json
168
+ {
169
+ "tool_calls": {
170
+ "Read": 5,
171
+ "Write": 2,
172
+ "Bash": 8,
173
+ "Edit": 1,
174
+ "Glob": 2,
175
+ "Grep": 0
176
+ },
177
+ "total_tool_calls": 18,
178
+ "total_steps": 6,
179
+ "files_created": ["filled_form.pdf", "field_values.json"],
180
+ "errors_encountered": 0,
181
+ "output_chars": 12450,
182
+ "transcript_chars": 3200
183
+ }
184
+ ```
185
+
186
+ **Fields:**
187
+ - `tool_calls`: Count per tool type
188
+ - `total_tool_calls`: Sum of every tool call
189
+ - `total_steps`: Number of major execution steps
190
+ - `files_created`: List of the output files created
191
+ - `errors_encountered`: Number of errors hit during execution
192
+ - `output_chars`: Total character count of the output files
193
+ - `transcript_chars`: Character count of the transcript
194
+
195
+ ---
196
+
197
+ ## timing.json
198
+
199
+ Wall clock timing for a single run. Lives at `<run-dir>/timing.json`.
200
+
201
+ **How to capture:** As a subagent task wraps up, the task notification carries `total_tokens` and `duration_ms`. Save them right away — they live nowhere else and can't be recovered later.
202
+
203
+ ```json
204
+ {
205
+ "total_tokens": 84852,
206
+ "duration_ms": 23332,
207
+ "total_duration_seconds": 23.3,
208
+ "executor_start": "2026-01-15T10:30:00Z",
209
+ "executor_end": "2026-01-15T10:32:45Z",
210
+ "executor_duration_seconds": 165.0,
211
+ "grader_start": "2026-01-15T10:32:46Z",
212
+ "grader_end": "2026-01-15T10:33:12Z",
213
+ "grader_duration_seconds": 26.0
214
+ }
215
+ ```
216
+
217
+ ---
218
+
219
+ ## benchmark.json
220
+
221
+ The output of Inspection mode. Lives at `benchmarks/<timestamp>/benchmark.json`.
222
+
223
+ ```json
224
+ {
225
+ "metadata": {
226
+ "skill_name": "pdf",
227
+ "skill_path": "/path/to/pdf",
228
+ "executor_model": "wormclaude v1",
229
+ "analyzer_model": "most-capable-model",
230
+ "timestamp": "2026-01-15T10:30:00Z",
231
+ "evals_run": [1, 2, 3],
232
+ "runs_per_configuration": 3
233
+ },
234
+
235
+ "runs": [
236
+ {
237
+ "eval_id": 1,
238
+ "eval_name": "Ocean",
239
+ "configuration": "with_skill",
240
+ "run_number": 1,
241
+ "result": {
242
+ "pass_rate": 0.85,
243
+ "passed": 6,
244
+ "failed": 1,
245
+ "total": 7,
246
+ "time_seconds": 42.5,
247
+ "tokens": 3800,
248
+ "tool_calls": 18,
249
+ "errors": 0
250
+ },
251
+ "expectations": [
252
+ {"text": "...", "passed": true, "evidence": "..."}
253
+ ],
254
+ "notes": [
255
+ "Used 2023 data, may be stale",
256
+ "Fell back to text overlay for non-fillable fields"
257
+ ]
258
+ }
259
+ ],
260
+
261
+ "run_summary": {
262
+ "with_skill": {
263
+ "pass_rate": {"mean": 0.85, "stddev": 0.05, "min": 0.80, "max": 0.90},
264
+ "time_seconds": {"mean": 45.0, "stddev": 12.0, "min": 32.0, "max": 58.0},
265
+ "tokens": {"mean": 3800, "stddev": 400, "min": 3200, "max": 4100}
266
+ },
267
+ "without_skill": {
268
+ "pass_rate": {"mean": 0.35, "stddev": 0.08, "min": 0.28, "max": 0.45},
269
+ "time_seconds": {"mean": 32.0, "stddev": 8.0, "min": 24.0, "max": 42.0},
270
+ "tokens": {"mean": 2100, "stddev": 300, "min": 1800, "max": 2500}
271
+ },
272
+ "delta": {
273
+ "pass_rate": "+0.50",
274
+ "time_seconds": "+13.0",
275
+ "tokens": "+1700"
276
+ }
277
+ },
278
+
279
+ "notes": [
280
+ "Assertion 'Output is a PDF file' passes 100% in both configurations - may not differentiate skill value",
281
+ "Eval 3 shows high variance (50% ± 40%) - may be flaky or model-dependent",
282
+ "Without-skill runs consistently fail on table extraction expectations",
283
+ "Skill adds 13s average execution time but improves pass rate by 50%"
284
+ ]
285
+ }
286
+ ```
287
+
288
+ **Fields:**
289
+ - `metadata`: Details about the Inspection run
290
+ - `skill_name`: Name of the skill
291
+ - `timestamp`: When the Inspection ran
292
+ - `evals_run`: List of Inspection names or IDs
293
+ - `runs_per_configuration`: Runs per config (e.g. 3)
294
+ - `runs[]`: Individual run results
295
+ - `eval_id`: Numeric Inspection identifier
296
+ - `eval_name`: Human-readable Inspection name (used as the section header in the viewer)
297
+ - `configuration`: Must be `"with_skill"` or `"without_skill"` (the viewer keys on this exact string for grouping and color coding)
298
+ - `run_number`: Integer run number (1, 2, 3...)
299
+ - `result`: Nested object holding `pass_rate`, `passed`, `total`, `time_seconds`, `tokens`, `errors`
300
+ - `run_summary`: Statistical aggregates per configuration
301
+ - `with_skill` / `without_skill`: Each holds `pass_rate`, `time_seconds`, `tokens` objects with `mean` and `stddev` fields
302
+ - `delta`: Difference strings like `"+0.50"`, `"+13.0"`, `"+1700"`
303
+ - `notes`: Freeform observations from the analyzer
304
+
305
+ **Important:** The viewer reads these field names verbatim. Writing `config` instead of `configuration`, or hoisting `pass_rate` to the top level of a run rather than nesting it under `result`, leaves the viewer showing empty/zero values. Always check against this schema when you build benchmark.json by hand.
306
+
307
+ ---
308
+
309
+ ## comparison.json
310
+
311
+ The blind comparator's output. Lives at `<grading-dir>/comparison-N.json`.
312
+
313
+ ```json
314
+ {
315
+ "winner": "A",
316
+ "reasoning": "Output A provides a complete solution with proper formatting and all required fields. Output B is missing the date field and has formatting inconsistencies.",
317
+ "rubric": {
318
+ "A": {
319
+ "content": {
320
+ "correctness": 5,
321
+ "completeness": 5,
322
+ "accuracy": 4
323
+ },
324
+ "structure": {
325
+ "organization": 4,
326
+ "formatting": 5,
327
+ "usability": 4
328
+ },
329
+ "content_score": 4.7,
330
+ "structure_score": 4.3,
331
+ "overall_score": 9.0
332
+ },
333
+ "B": {
334
+ "content": {
335
+ "correctness": 3,
336
+ "completeness": 2,
337
+ "accuracy": 3
338
+ },
339
+ "structure": {
340
+ "organization": 3,
341
+ "formatting": 2,
342
+ "usability": 3
343
+ },
344
+ "content_score": 2.7,
345
+ "structure_score": 2.7,
346
+ "overall_score": 5.4
347
+ }
348
+ },
349
+ "output_quality": {
350
+ "A": {
351
+ "score": 9,
352
+ "strengths": ["Complete solution", "Well-formatted", "All fields present"],
353
+ "weaknesses": ["Minor style inconsistency in header"]
354
+ },
355
+ "B": {
356
+ "score": 5,
357
+ "strengths": ["Readable output", "Correct basic structure"],
358
+ "weaknesses": ["Missing date field", "Formatting inconsistencies", "Partial data extraction"]
359
+ }
360
+ },
361
+ "expectation_results": {
362
+ "A": {
363
+ "passed": 4,
364
+ "total": 5,
365
+ "pass_rate": 0.80,
366
+ "details": [
367
+ {"text": "Output includes name", "passed": true}
368
+ ]
369
+ },
370
+ "B": {
371
+ "passed": 3,
372
+ "total": 5,
373
+ "pass_rate": 0.60,
374
+ "details": [
375
+ {"text": "Output includes name", "passed": true}
376
+ ]
377
+ }
378
+ }
379
+ }
380
+ ```
381
+
382
+ ---
383
+
384
+ ## analysis.json
385
+
386
+ The post-hoc analyzer's output. Lives at `<grading-dir>/analysis.json`.
387
+
388
+ ```json
389
+ {
390
+ "comparison_summary": {
391
+ "winner": "A",
392
+ "winner_skill": "path/to/winner/skill",
393
+ "loser_skill": "path/to/loser/skill",
394
+ "comparator_reasoning": "Brief summary of why comparator chose winner"
395
+ },
396
+ "winner_strengths": [
397
+ "Clear step-by-step instructions for handling multi-page documents",
398
+ "Included validation script that caught formatting errors"
399
+ ],
400
+ "loser_weaknesses": [
401
+ "Vague instruction 'process the document appropriately' led to inconsistent behavior",
402
+ "No script for validation, agent had to improvise"
403
+ ],
404
+ "instruction_following": {
405
+ "winner": {
406
+ "score": 9,
407
+ "issues": ["Minor: skipped optional logging step"]
408
+ },
409
+ "loser": {
410
+ "score": 6,
411
+ "issues": [
412
+ "Did not use the skill's formatting template",
413
+ "Invented own approach instead of following step 3"
414
+ ]
415
+ }
416
+ },
417
+ "improvement_suggestions": [
418
+ {
419
+ "priority": "high",
420
+ "category": "instructions",
421
+ "suggestion": "Replace 'process the document appropriately' with explicit steps",
422
+ "expected_impact": "Would eliminate ambiguity that caused inconsistent behavior"
423
+ }
424
+ ],
425
+ "transcript_insights": {
426
+ "winner_execution_pattern": "Read skill -> Followed 5-step process -> Used validation script",
427
+ "loser_execution_pattern": "Read skill -> Unclear on approach -> Tried 3 different methods"
428
+ }
429
+ }
430
+ ```
File without changes