wormclaude 1.0.74 → 1.0.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/theme.js +3 -3
- package/package.json +2 -2
- package/skills/build-mcp-app/SKILL.md +393 -0
- package/skills/build-mcp-app/references/abuse-protection.md +60 -0
- package/skills/build-mcp-app/references/apps-sdk-messages.md +227 -0
- package/skills/build-mcp-app/references/directory-checklist.md +18 -0
- package/skills/build-mcp-app/references/iframe-sandbox.md +164 -0
- package/skills/build-mcp-app/references/payload-budgeting.md +54 -0
- package/skills/build-mcp-app/references/widget-templates.md +249 -0
- package/skills/build-mcp-server/SKILL.md +222 -0
- package/skills/build-mcp-server/references/auth.md +108 -0
- package/skills/build-mcp-server/references/deploy-cloudflare-workers.md +106 -0
- package/skills/build-mcp-server/references/elicitation.md +129 -0
- package/skills/build-mcp-server/references/remote-http-scaffold.md +211 -0
- package/skills/build-mcp-server/references/resources-and-prompts.md +122 -0
- package/skills/build-mcp-server/references/server-capabilities.md +164 -0
- package/skills/build-mcp-server/references/tool-design.md +189 -0
- package/skills/build-mcp-server/references/versions.md +25 -0
- package/skills/build-mcpb/SKILL.md +200 -0
- package/skills/build-mcpb/references/local-security.md +149 -0
- package/skills/build-mcpb/references/manifest-schema.md +156 -0
- package/skills/docx/script/__init__.py +1 -0
- package/skills/docx/script/accept_chages.py +135 -0
- package/skills/docx/script/comment.py +318 -0
- package/skills/docx/script/office/helpers/__init__.py +0 -0
- package/skills/docx/script/office/helpers/merge_runs.py +199 -0
- package/skills/docx/script/office/helpers/simplify_redlines.py +197 -0
- package/skills/docx/script/office/pack.py +159 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/docx/script/office/schemas/mce/mc.xsd +75 -0
- package/skills/docx/script/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/docx/script/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/docx/script/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/docx/script/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/docx/script/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/docx/script/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/docx/script/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/docx/script/office/soffice.py +183 -0
- package/skills/docx/script/office/unpack.py +132 -0
- package/skills/docx/script/office/validate.py +117 -0
- package/skills/docx/script/office/validators/__init__.py +15 -0
- package/skills/docx/script/office/validators/base.py +851 -0
- package/skills/docx/script/office/validators/docx.py +446 -0
- package/skills/docx/script/office/validators/pptx.py +275 -0
- package/skills/docx/script/office/validators/redlining.py +247 -0
- package/skills/docx/script/templates/comments.xml +3 -0
- package/skills/docx/script/templates/commentsExtended.xml +3 -0
- package/skills/docx/script/templates/commentsExtensible.xml +3 -0
- package/skills/docx/script/templates/commentsIds.xml +3 -0
- package/skills/docx/script/templates/people.xml +3 -0
- package/skills/docx/skill.md +593 -0
- package/skills/frontend-design/SKILL.md +42 -0
- package/skills/pdf/FORMS.md +294 -0
- package/skills/pdf/REFERENCE.md +612 -0
- package/skills/pdf/SKILL.md +314 -0
- package/skills/pdf/scripts/check_bounding_boxes.py +65 -0
- package/skills/pdf/scripts/check_fillable_fields.py +11 -0
- package/skills/pdf/scripts/convert_pdf_to_images.py +33 -0
- package/skills/pdf/scripts/create_validation_image.py +37 -0
- package/skills/pdf/scripts/extract_form_field_info.py +122 -0
- package/skills/pdf/scripts/extract_form_structure.py +115 -0
- package/skills/pdf/scripts/fill_fillable_fields.py +98 -0
- package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +107 -0
- package/skills/playground/SKILL.md +77 -0
- package/skills/playground/templates/code-map.md +158 -0
- package/skills/playground/templates/concept-map.md +73 -0
- package/skills/playground/templates/data-explorer.md +67 -0
- package/skills/playground/templates/design-playground.md +67 -0
- package/skills/playground/templates/diff-review.md +179 -0
- package/skills/playground/templates/document-critique.md +171 -0
- package/skills/pptx/SKILL.md +230 -0
- package/skills/pptx/editing.md +205 -0
- package/skills/pptx/pptxgenjs.md +437 -0
- package/skills/pptx/scripts/__init__.py +0 -0
- package/skills/pptx/scripts/add_slide.py +195 -0
- package/skills/pptx/scripts/clean.py +286 -0
- package/skills/pptx/scripts/office/helpers/__init__.py +0 -0
- package/skills/pptx/scripts/office/helpers/merge_runs.py +199 -0
- package/skills/pptx/scripts/office/helpers/simplify_redlines.py +197 -0
- package/skills/pptx/scripts/office/pack.py +159 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/pptx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/pptx/scripts/office/soffice.py +183 -0
- package/skills/pptx/scripts/office/unpack.py +132 -0
- package/skills/pptx/scripts/office/validate.py +117 -0
- package/skills/pptx/scripts/office/validators/__init__.py +15 -0
- package/skills/pptx/scripts/office/validators/base.py +851 -0
- package/skills/pptx/scripts/office/validators/docx.py +446 -0
- package/skills/pptx/scripts/office/validators/pptx.py +275 -0
- package/skills/pptx/scripts/office/validators/redlining.py +247 -0
- package/skills/pptx/scripts/thumbnail.py +289 -0
- package/skills/talent-creator/SKILL.md +486 -0
- package/skills/talent-creator/agents/analyzer.md +274 -0
- package/skills/talent-creator/agents/comparator.md +202 -0
- package/skills/talent-creator/agents/grader.md +223 -0
- package/skills/talent-creator/assets/eval_review.html +146 -0
- package/skills/talent-creator/eval-viewer/generate_review.py +471 -0
- package/skills/talent-creator/eval-viewer/viewer.html +1325 -0
- package/skills/talent-creator/references/schemas.md +430 -0
- package/skills/talent-creator/scripts/__init__.py +0 -0
- package/skills/talent-creator/scripts/aggregate_benchmark.py +401 -0
- package/skills/talent-creator/scripts/generate_report.py +326 -0
- package/skills/talent-creator/scripts/improve_description.py +247 -0
- package/skills/talent-creator/scripts/package_skill.py +136 -0
- package/skills/talent-creator/scripts/quick_validate.py +146 -0
- package/skills/talent-creator/scripts/run_eval.py +310 -0
- package/skills/talent-creator/scripts/run_loop.py +328 -0
- package/skills/talent-creator/scripts/utils.py +47 -0
- package/skills/xlsx/SKILL.md +300 -0
- package/skills/xlsx/scripts/office/helpers/__init__.py +0 -0
- package/skills/xlsx/scripts/office/helpers/merge_runs.py +199 -0
- package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +197 -0
- package/skills/xlsx/scripts/office/pack.py +159 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/xlsx/scripts/office/soffice.py +183 -0
- package/skills/xlsx/scripts/office/unpack.py +132 -0
- package/skills/xlsx/scripts/office/validate.py +117 -0
- package/skills/xlsx/scripts/office/validators/__init__.py +15 -0
- package/skills/xlsx/scripts/office/validators/base.py +851 -0
- package/skills/xlsx/scripts/office/validators/docx.py +446 -0
- package/skills/xlsx/scripts/office/validators/pptx.py +275 -0
- package/skills/xlsx/scripts/office/validators/redlining.py +247 -0
- package/skills/xlsx/scripts/recalc.py +184 -0
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
# JSON Schemas
|
|
2
|
+
|
|
3
|
+
This document lays out the JSON schemas that Talent-Creator uses.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## evals.json
|
|
8
|
+
|
|
9
|
+
Sets out the Inspections for a skill. Lives at `evals/evals.json` inside the skill directory.
|
|
10
|
+
|
|
11
|
+
```json
|
|
12
|
+
{
|
|
13
|
+
"skill_name": "example-skill",
|
|
14
|
+
"evals": [
|
|
15
|
+
{
|
|
16
|
+
"id": 1,
|
|
17
|
+
"prompt": "User's example prompt",
|
|
18
|
+
"expected_output": "Description of expected result",
|
|
19
|
+
"files": ["evals/files/sample1.pdf"],
|
|
20
|
+
"expectations": [
|
|
21
|
+
"The output includes X",
|
|
22
|
+
"The skill used script Y"
|
|
23
|
+
]
|
|
24
|
+
}
|
|
25
|
+
]
|
|
26
|
+
}
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
**Fields:**
|
|
30
|
+
- `skill_name`: Name that matches the skill's frontmatter
|
|
31
|
+
- `evals[].id`: Unique integer identifier
|
|
32
|
+
- `evals[].prompt`: The task to run
|
|
33
|
+
- `evals[].expected_output`: Plain-language description of what success looks like
|
|
34
|
+
- `evals[].files`: Optional list of input file paths (relative to the skill root)
|
|
35
|
+
- `evals[].expectations`: List of checkable statements
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## history.json
|
|
40
|
+
|
|
41
|
+
Follows how versions progress in Improve mode. Lives at the workspace root.
|
|
42
|
+
|
|
43
|
+
```json
|
|
44
|
+
{
|
|
45
|
+
"started_at": "2026-01-15T10:30:00Z",
|
|
46
|
+
"skill_name": "pdf",
|
|
47
|
+
"current_best": "v2",
|
|
48
|
+
"iterations": [
|
|
49
|
+
{
|
|
50
|
+
"version": "v0",
|
|
51
|
+
"parent": null,
|
|
52
|
+
"expectation_pass_rate": 0.65,
|
|
53
|
+
"grading_result": "baseline",
|
|
54
|
+
"is_current_best": false
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"version": "v1",
|
|
58
|
+
"parent": "v0",
|
|
59
|
+
"expectation_pass_rate": 0.75,
|
|
60
|
+
"grading_result": "won",
|
|
61
|
+
"is_current_best": false
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"version": "v2",
|
|
65
|
+
"parent": "v1",
|
|
66
|
+
"expectation_pass_rate": 0.85,
|
|
67
|
+
"grading_result": "won",
|
|
68
|
+
"is_current_best": true
|
|
69
|
+
}
|
|
70
|
+
]
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
**Fields:**
|
|
75
|
+
- `started_at`: ISO timestamp marking when improvement kicked off
|
|
76
|
+
- `skill_name`: Name of the skill being improved
|
|
77
|
+
- `current_best`: Version identifier of the top performer
|
|
78
|
+
- `iterations[].version`: Version identifier (v0, v1, ...)
|
|
79
|
+
- `iterations[].parent`: The version this one was derived from
|
|
80
|
+
- `iterations[].expectation_pass_rate`: Pass rate coming out of grading
|
|
81
|
+
- `iterations[].grading_result`: "baseline", "won", "lost", or "tie"
|
|
82
|
+
- `iterations[].is_current_best`: Whether this is the reigning best version
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## grading.json
|
|
87
|
+
|
|
88
|
+
The grader agent's output. Lives at `<run-dir>/grading.json`.
|
|
89
|
+
|
|
90
|
+
```json
|
|
91
|
+
{
|
|
92
|
+
"expectations": [
|
|
93
|
+
{
|
|
94
|
+
"text": "The output includes the name 'John Smith'",
|
|
95
|
+
"passed": true,
|
|
96
|
+
"evidence": "Found in transcript Step 3: 'Extracted names: John Smith, Sarah Johnson'"
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"text": "The spreadsheet has a SUM formula in cell B10",
|
|
100
|
+
"passed": false,
|
|
101
|
+
"evidence": "No spreadsheet was created. The output was a text file."
|
|
102
|
+
}
|
|
103
|
+
],
|
|
104
|
+
"summary": {
|
|
105
|
+
"passed": 2,
|
|
106
|
+
"failed": 1,
|
|
107
|
+
"total": 3,
|
|
108
|
+
"pass_rate": 0.67
|
|
109
|
+
},
|
|
110
|
+
"execution_metrics": {
|
|
111
|
+
"tool_calls": {
|
|
112
|
+
"Read": 5,
|
|
113
|
+
"Write": 2,
|
|
114
|
+
"Bash": 8
|
|
115
|
+
},
|
|
116
|
+
"total_tool_calls": 15,
|
|
117
|
+
"total_steps": 6,
|
|
118
|
+
"errors_encountered": 0,
|
|
119
|
+
"output_chars": 12450,
|
|
120
|
+
"transcript_chars": 3200
|
|
121
|
+
},
|
|
122
|
+
"timing": {
|
|
123
|
+
"executor_duration_seconds": 165.0,
|
|
124
|
+
"grader_duration_seconds": 26.0,
|
|
125
|
+
"total_duration_seconds": 191.0
|
|
126
|
+
},
|
|
127
|
+
"claims": [
|
|
128
|
+
{
|
|
129
|
+
"claim": "The form has 12 fillable fields",
|
|
130
|
+
"type": "factual",
|
|
131
|
+
"verified": true,
|
|
132
|
+
"evidence": "Counted 12 fields in field_info.json"
|
|
133
|
+
}
|
|
134
|
+
],
|
|
135
|
+
"user_notes_summary": {
|
|
136
|
+
"uncertainties": ["Used 2023 data, may be stale"],
|
|
137
|
+
"needs_review": [],
|
|
138
|
+
"workarounds": ["Fell back to text overlay for non-fillable fields"]
|
|
139
|
+
},
|
|
140
|
+
"eval_feedback": {
|
|
141
|
+
"suggestions": [
|
|
142
|
+
{
|
|
143
|
+
"assertion": "The output includes the name 'John Smith'",
|
|
144
|
+
"reason": "A hallucinated document that mentions the name would also pass"
|
|
145
|
+
}
|
|
146
|
+
],
|
|
147
|
+
"overall": "Assertions check presence but not correctness."
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**Fields:**
|
|
153
|
+
- `expectations[]`: Graded expectations along with their evidence
|
|
154
|
+
- `summary`: Aggregate pass/fail counts
|
|
155
|
+
- `execution_metrics`: Tool usage and output size (from the executor's metrics.json)
|
|
156
|
+
- `timing`: Wall clock timing (from timing.json)
|
|
157
|
+
- `claims`: Claims pulled from the output and verified
|
|
158
|
+
- `user_notes_summary`: Issues the executor flagged
|
|
159
|
+
- `eval_feedback`: (optional) Improvement suggestions for the Inspections, present only when the grader turns up issues worth raising
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## metrics.json
|
|
164
|
+
|
|
165
|
+
The executor agent's output. Lives at `<run-dir>/outputs/metrics.json`.
|
|
166
|
+
|
|
167
|
+
```json
|
|
168
|
+
{
|
|
169
|
+
"tool_calls": {
|
|
170
|
+
"Read": 5,
|
|
171
|
+
"Write": 2,
|
|
172
|
+
"Bash": 8,
|
|
173
|
+
"Edit": 1,
|
|
174
|
+
"Glob": 2,
|
|
175
|
+
"Grep": 0
|
|
176
|
+
},
|
|
177
|
+
"total_tool_calls": 18,
|
|
178
|
+
"total_steps": 6,
|
|
179
|
+
"files_created": ["filled_form.pdf", "field_values.json"],
|
|
180
|
+
"errors_encountered": 0,
|
|
181
|
+
"output_chars": 12450,
|
|
182
|
+
"transcript_chars": 3200
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
**Fields:**
|
|
187
|
+
- `tool_calls`: Count per tool type
|
|
188
|
+
- `total_tool_calls`: Sum of every tool call
|
|
189
|
+
- `total_steps`: Number of major execution steps
|
|
190
|
+
- `files_created`: List of the output files created
|
|
191
|
+
- `errors_encountered`: Number of errors hit during execution
|
|
192
|
+
- `output_chars`: Total character count of the output files
|
|
193
|
+
- `transcript_chars`: Character count of the transcript
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## timing.json
|
|
198
|
+
|
|
199
|
+
Wall clock timing for a single run. Lives at `<run-dir>/timing.json`.
|
|
200
|
+
|
|
201
|
+
**How to capture:** As a subagent task wraps up, the task notification carries `total_tokens` and `duration_ms`. Save them right away — they live nowhere else and can't be recovered later.
|
|
202
|
+
|
|
203
|
+
```json
|
|
204
|
+
{
|
|
205
|
+
"total_tokens": 84852,
|
|
206
|
+
"duration_ms": 23332,
|
|
207
|
+
"total_duration_seconds": 23.3,
|
|
208
|
+
"executor_start": "2026-01-15T10:30:00Z",
|
|
209
|
+
"executor_end": "2026-01-15T10:32:45Z",
|
|
210
|
+
"executor_duration_seconds": 165.0,
|
|
211
|
+
"grader_start": "2026-01-15T10:32:46Z",
|
|
212
|
+
"grader_end": "2026-01-15T10:33:12Z",
|
|
213
|
+
"grader_duration_seconds": 26.0
|
|
214
|
+
}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## benchmark.json
|
|
220
|
+
|
|
221
|
+
The output of Inspection mode. Lives at `benchmarks/<timestamp>/benchmark.json`.
|
|
222
|
+
|
|
223
|
+
```json
|
|
224
|
+
{
|
|
225
|
+
"metadata": {
|
|
226
|
+
"skill_name": "pdf",
|
|
227
|
+
"skill_path": "/path/to/pdf",
|
|
228
|
+
"executor_model": "wormclaude v1",
|
|
229
|
+
"analyzer_model": "most-capable-model",
|
|
230
|
+
"timestamp": "2026-01-15T10:30:00Z",
|
|
231
|
+
"evals_run": [1, 2, 3],
|
|
232
|
+
"runs_per_configuration": 3
|
|
233
|
+
},
|
|
234
|
+
|
|
235
|
+
"runs": [
|
|
236
|
+
{
|
|
237
|
+
"eval_id": 1,
|
|
238
|
+
"eval_name": "Ocean",
|
|
239
|
+
"configuration": "with_skill",
|
|
240
|
+
"run_number": 1,
|
|
241
|
+
"result": {
|
|
242
|
+
"pass_rate": 0.85,
|
|
243
|
+
"passed": 6,
|
|
244
|
+
"failed": 1,
|
|
245
|
+
"total": 7,
|
|
246
|
+
"time_seconds": 42.5,
|
|
247
|
+
"tokens": 3800,
|
|
248
|
+
"tool_calls": 18,
|
|
249
|
+
"errors": 0
|
|
250
|
+
},
|
|
251
|
+
"expectations": [
|
|
252
|
+
{"text": "...", "passed": true, "evidence": "..."}
|
|
253
|
+
],
|
|
254
|
+
"notes": [
|
|
255
|
+
"Used 2023 data, may be stale",
|
|
256
|
+
"Fell back to text overlay for non-fillable fields"
|
|
257
|
+
]
|
|
258
|
+
}
|
|
259
|
+
],
|
|
260
|
+
|
|
261
|
+
"run_summary": {
|
|
262
|
+
"with_skill": {
|
|
263
|
+
"pass_rate": {"mean": 0.85, "stddev": 0.05, "min": 0.80, "max": 0.90},
|
|
264
|
+
"time_seconds": {"mean": 45.0, "stddev": 12.0, "min": 32.0, "max": 58.0},
|
|
265
|
+
"tokens": {"mean": 3800, "stddev": 400, "min": 3200, "max": 4100}
|
|
266
|
+
},
|
|
267
|
+
"without_skill": {
|
|
268
|
+
"pass_rate": {"mean": 0.35, "stddev": 0.08, "min": 0.28, "max": 0.45},
|
|
269
|
+
"time_seconds": {"mean": 32.0, "stddev": 8.0, "min": 24.0, "max": 42.0},
|
|
270
|
+
"tokens": {"mean": 2100, "stddev": 300, "min": 1800, "max": 2500}
|
|
271
|
+
},
|
|
272
|
+
"delta": {
|
|
273
|
+
"pass_rate": "+0.50",
|
|
274
|
+
"time_seconds": "+13.0",
|
|
275
|
+
"tokens": "+1700"
|
|
276
|
+
}
|
|
277
|
+
},
|
|
278
|
+
|
|
279
|
+
"notes": [
|
|
280
|
+
"Assertion 'Output is a PDF file' passes 100% in both configurations - may not differentiate skill value",
|
|
281
|
+
"Eval 3 shows high variance (50% ± 40%) - may be flaky or model-dependent",
|
|
282
|
+
"Without-skill runs consistently fail on table extraction expectations",
|
|
283
|
+
"Skill adds 13s average execution time but improves pass rate by 50%"
|
|
284
|
+
]
|
|
285
|
+
}
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
**Fields:**
|
|
289
|
+
- `metadata`: Details about the Inspection run
|
|
290
|
+
- `skill_name`: Name of the skill
|
|
291
|
+
- `timestamp`: When the Inspection ran
|
|
292
|
+
- `evals_run`: List of Inspection names or IDs
|
|
293
|
+
- `runs_per_configuration`: Runs per config (e.g. 3)
|
|
294
|
+
- `runs[]`: Individual run results
|
|
295
|
+
- `eval_id`: Numeric Inspection identifier
|
|
296
|
+
- `eval_name`: Human-readable Inspection name (used as the section header in the viewer)
|
|
297
|
+
- `configuration`: Must be `"with_skill"` or `"without_skill"` (the viewer keys on this exact string for grouping and color coding)
|
|
298
|
+
- `run_number`: Integer run number (1, 2, 3...)
|
|
299
|
+
- `result`: Nested object holding `pass_rate`, `passed`, `total`, `time_seconds`, `tokens`, `errors`
|
|
300
|
+
- `run_summary`: Statistical aggregates per configuration
|
|
301
|
+
- `with_skill` / `without_skill`: Each holds `pass_rate`, `time_seconds`, `tokens` objects with `mean` and `stddev` fields
|
|
302
|
+
- `delta`: Difference strings like `"+0.50"`, `"+13.0"`, `"+1700"`
|
|
303
|
+
- `notes`: Freeform observations from the analyzer
|
|
304
|
+
|
|
305
|
+
**Important:** The viewer reads these field names verbatim. Writing `config` instead of `configuration`, or hoisting `pass_rate` to the top level of a run rather than nesting it under `result`, leaves the viewer showing empty/zero values. Always check against this schema when you build benchmark.json by hand.
|
|
306
|
+
|
|
307
|
+
---
|
|
308
|
+
|
|
309
|
+
## comparison.json
|
|
310
|
+
|
|
311
|
+
The blind comparator's output. Lives at `<grading-dir>/comparison-N.json`.
|
|
312
|
+
|
|
313
|
+
```json
|
|
314
|
+
{
|
|
315
|
+
"winner": "A",
|
|
316
|
+
"reasoning": "Output A provides a complete solution with proper formatting and all required fields. Output B is missing the date field and has formatting inconsistencies.",
|
|
317
|
+
"rubric": {
|
|
318
|
+
"A": {
|
|
319
|
+
"content": {
|
|
320
|
+
"correctness": 5,
|
|
321
|
+
"completeness": 5,
|
|
322
|
+
"accuracy": 4
|
|
323
|
+
},
|
|
324
|
+
"structure": {
|
|
325
|
+
"organization": 4,
|
|
326
|
+
"formatting": 5,
|
|
327
|
+
"usability": 4
|
|
328
|
+
},
|
|
329
|
+
"content_score": 4.7,
|
|
330
|
+
"structure_score": 4.3,
|
|
331
|
+
"overall_score": 9.0
|
|
332
|
+
},
|
|
333
|
+
"B": {
|
|
334
|
+
"content": {
|
|
335
|
+
"correctness": 3,
|
|
336
|
+
"completeness": 2,
|
|
337
|
+
"accuracy": 3
|
|
338
|
+
},
|
|
339
|
+
"structure": {
|
|
340
|
+
"organization": 3,
|
|
341
|
+
"formatting": 2,
|
|
342
|
+
"usability": 3
|
|
343
|
+
},
|
|
344
|
+
"content_score": 2.7,
|
|
345
|
+
"structure_score": 2.7,
|
|
346
|
+
"overall_score": 5.4
|
|
347
|
+
}
|
|
348
|
+
},
|
|
349
|
+
"output_quality": {
|
|
350
|
+
"A": {
|
|
351
|
+
"score": 9,
|
|
352
|
+
"strengths": ["Complete solution", "Well-formatted", "All fields present"],
|
|
353
|
+
"weaknesses": ["Minor style inconsistency in header"]
|
|
354
|
+
},
|
|
355
|
+
"B": {
|
|
356
|
+
"score": 5,
|
|
357
|
+
"strengths": ["Readable output", "Correct basic structure"],
|
|
358
|
+
"weaknesses": ["Missing date field", "Formatting inconsistencies", "Partial data extraction"]
|
|
359
|
+
}
|
|
360
|
+
},
|
|
361
|
+
"expectation_results": {
|
|
362
|
+
"A": {
|
|
363
|
+
"passed": 4,
|
|
364
|
+
"total": 5,
|
|
365
|
+
"pass_rate": 0.80,
|
|
366
|
+
"details": [
|
|
367
|
+
{"text": "Output includes name", "passed": true}
|
|
368
|
+
]
|
|
369
|
+
},
|
|
370
|
+
"B": {
|
|
371
|
+
"passed": 3,
|
|
372
|
+
"total": 5,
|
|
373
|
+
"pass_rate": 0.60,
|
|
374
|
+
"details": [
|
|
375
|
+
{"text": "Output includes name", "passed": true}
|
|
376
|
+
]
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
---
|
|
383
|
+
|
|
384
|
+
## analysis.json
|
|
385
|
+
|
|
386
|
+
The post-hoc analyzer's output. Lives at `<grading-dir>/analysis.json`.
|
|
387
|
+
|
|
388
|
+
```json
|
|
389
|
+
{
|
|
390
|
+
"comparison_summary": {
|
|
391
|
+
"winner": "A",
|
|
392
|
+
"winner_skill": "path/to/winner/skill",
|
|
393
|
+
"loser_skill": "path/to/loser/skill",
|
|
394
|
+
"comparator_reasoning": "Brief summary of why comparator chose winner"
|
|
395
|
+
},
|
|
396
|
+
"winner_strengths": [
|
|
397
|
+
"Clear step-by-step instructions for handling multi-page documents",
|
|
398
|
+
"Included validation script that caught formatting errors"
|
|
399
|
+
],
|
|
400
|
+
"loser_weaknesses": [
|
|
401
|
+
"Vague instruction 'process the document appropriately' led to inconsistent behavior",
|
|
402
|
+
"No script for validation, agent had to improvise"
|
|
403
|
+
],
|
|
404
|
+
"instruction_following": {
|
|
405
|
+
"winner": {
|
|
406
|
+
"score": 9,
|
|
407
|
+
"issues": ["Minor: skipped optional logging step"]
|
|
408
|
+
},
|
|
409
|
+
"loser": {
|
|
410
|
+
"score": 6,
|
|
411
|
+
"issues": [
|
|
412
|
+
"Did not use the skill's formatting template",
|
|
413
|
+
"Invented own approach instead of following step 3"
|
|
414
|
+
]
|
|
415
|
+
}
|
|
416
|
+
},
|
|
417
|
+
"improvement_suggestions": [
|
|
418
|
+
{
|
|
419
|
+
"priority": "high",
|
|
420
|
+
"category": "instructions",
|
|
421
|
+
"suggestion": "Replace 'process the document appropriately' with explicit steps",
|
|
422
|
+
"expected_impact": "Would eliminate ambiguity that caused inconsistent behavior"
|
|
423
|
+
}
|
|
424
|
+
],
|
|
425
|
+
"transcript_insights": {
|
|
426
|
+
"winner_execution_pattern": "Read skill -> Followed 5-step process -> Used validation script",
|
|
427
|
+
"loser_execution_pattern": "Read skill -> Unclear on approach -> Tried 3 different methods"
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
```
|
|
File without changes
|