wormclaude 1.0.73 → 1.0.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/theme.js +4 -4
- package/dist/tools.js +19 -0
- package/package.json +2 -2
- package/skills/build-mcp-app/SKILL.md +393 -0
- package/skills/build-mcp-app/references/abuse-protection.md +60 -0
- package/skills/build-mcp-app/references/apps-sdk-messages.md +227 -0
- package/skills/build-mcp-app/references/directory-checklist.md +18 -0
- package/skills/build-mcp-app/references/iframe-sandbox.md +164 -0
- package/skills/build-mcp-app/references/payload-budgeting.md +54 -0
- package/skills/build-mcp-app/references/widget-templates.md +249 -0
- package/skills/build-mcp-server/SKILL.md +222 -0
- package/skills/build-mcp-server/references/auth.md +108 -0
- package/skills/build-mcp-server/references/deploy-cloudflare-workers.md +106 -0
- package/skills/build-mcp-server/references/elicitation.md +129 -0
- package/skills/build-mcp-server/references/remote-http-scaffold.md +211 -0
- package/skills/build-mcp-server/references/resources-and-prompts.md +122 -0
- package/skills/build-mcp-server/references/server-capabilities.md +164 -0
- package/skills/build-mcp-server/references/tool-design.md +189 -0
- package/skills/build-mcp-server/references/versions.md +25 -0
- package/skills/build-mcpb/SKILL.md +200 -0
- package/skills/build-mcpb/references/local-security.md +149 -0
- package/skills/build-mcpb/references/manifest-schema.md +156 -0
- package/skills/docx/script/__init__.py +1 -0
- package/skills/docx/script/accept_chages.py +135 -0
- package/skills/docx/script/comment.py +318 -0
- package/skills/docx/script/office/helpers/__init__.py +0 -0
- package/skills/docx/script/office/helpers/merge_runs.py +199 -0
- package/skills/docx/script/office/helpers/simplify_redlines.py +197 -0
- package/skills/docx/script/office/pack.py +159 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/docx/script/office/schemas/mce/mc.xsd +75 -0
- package/skills/docx/script/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/docx/script/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/docx/script/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/docx/script/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/docx/script/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/docx/script/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/docx/script/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/docx/script/office/soffice.py +183 -0
- package/skills/docx/script/office/unpack.py +132 -0
- package/skills/docx/script/office/validate.py +117 -0
- package/skills/docx/script/office/validators/__init__.py +15 -0
- package/skills/docx/script/office/validators/base.py +851 -0
- package/skills/docx/script/office/validators/docx.py +446 -0
- package/skills/docx/script/office/validators/pptx.py +275 -0
- package/skills/docx/script/office/validators/redlining.py +247 -0
- package/skills/docx/script/templates/comments.xml +3 -0
- package/skills/docx/script/templates/commentsExtended.xml +3 -0
- package/skills/docx/script/templates/commentsExtensible.xml +3 -0
- package/skills/docx/script/templates/commentsIds.xml +3 -0
- package/skills/docx/script/templates/people.xml +3 -0
- package/skills/docx/skill.md +593 -0
- package/skills/frontend-design/SKILL.md +42 -0
- package/skills/pdf/FORMS.md +294 -0
- package/skills/pdf/REFERENCE.md +612 -0
- package/skills/pdf/SKILL.md +314 -0
- package/skills/pdf/scripts/check_bounding_boxes.py +65 -0
- package/skills/pdf/scripts/check_fillable_fields.py +11 -0
- package/skills/pdf/scripts/convert_pdf_to_images.py +33 -0
- package/skills/pdf/scripts/create_validation_image.py +37 -0
- package/skills/pdf/scripts/extract_form_field_info.py +122 -0
- package/skills/pdf/scripts/extract_form_structure.py +115 -0
- package/skills/pdf/scripts/fill_fillable_fields.py +98 -0
- package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +107 -0
- package/skills/playground/SKILL.md +77 -0
- package/skills/playground/templates/code-map.md +158 -0
- package/skills/playground/templates/concept-map.md +73 -0
- package/skills/playground/templates/data-explorer.md +67 -0
- package/skills/playground/templates/design-playground.md +67 -0
- package/skills/playground/templates/diff-review.md +179 -0
- package/skills/playground/templates/document-critique.md +171 -0
- package/skills/pptx/SKILL.md +230 -0
- package/skills/pptx/editing.md +205 -0
- package/skills/pptx/pptxgenjs.md +437 -0
- package/skills/pptx/scripts/__init__.py +0 -0
- package/skills/pptx/scripts/add_slide.py +195 -0
- package/skills/pptx/scripts/clean.py +286 -0
- package/skills/pptx/scripts/office/helpers/__init__.py +0 -0
- package/skills/pptx/scripts/office/helpers/merge_runs.py +199 -0
- package/skills/pptx/scripts/office/helpers/simplify_redlines.py +197 -0
- package/skills/pptx/scripts/office/pack.py +159 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/pptx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/pptx/scripts/office/soffice.py +183 -0
- package/skills/pptx/scripts/office/unpack.py +132 -0
- package/skills/pptx/scripts/office/validate.py +117 -0
- package/skills/pptx/scripts/office/validators/__init__.py +15 -0
- package/skills/pptx/scripts/office/validators/base.py +851 -0
- package/skills/pptx/scripts/office/validators/docx.py +446 -0
- package/skills/pptx/scripts/office/validators/pptx.py +275 -0
- package/skills/pptx/scripts/office/validators/redlining.py +247 -0
- package/skills/pptx/scripts/thumbnail.py +289 -0
- package/skills/talent-creator/SKILL.md +486 -0
- package/skills/talent-creator/agents/analyzer.md +274 -0
- package/skills/talent-creator/agents/comparator.md +202 -0
- package/skills/talent-creator/agents/grader.md +223 -0
- package/skills/talent-creator/assets/eval_review.html +146 -0
- package/skills/talent-creator/eval-viewer/generate_review.py +471 -0
- package/skills/talent-creator/eval-viewer/viewer.html +1325 -0
- package/skills/talent-creator/references/schemas.md +430 -0
- package/skills/talent-creator/scripts/__init__.py +0 -0
- package/skills/talent-creator/scripts/aggregate_benchmark.py +401 -0
- package/skills/talent-creator/scripts/generate_report.py +326 -0
- package/skills/talent-creator/scripts/improve_description.py +247 -0
- package/skills/talent-creator/scripts/package_skill.py +136 -0
- package/skills/talent-creator/scripts/quick_validate.py +146 -0
- package/skills/talent-creator/scripts/run_eval.py +310 -0
- package/skills/talent-creator/scripts/run_loop.py +328 -0
- package/skills/talent-creator/scripts/utils.py +47 -0
- package/skills/xlsx/SKILL.md +300 -0
- package/skills/xlsx/scripts/office/helpers/__init__.py +0 -0
- package/skills/xlsx/scripts/office/helpers/merge_runs.py +199 -0
- package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +197 -0
- package/skills/xlsx/scripts/office/pack.py +159 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/xlsx/scripts/office/soffice.py +183 -0
- package/skills/xlsx/scripts/office/unpack.py +132 -0
- package/skills/xlsx/scripts/office/validate.py +117 -0
- package/skills/xlsx/scripts/office/validators/__init__.py +15 -0
- package/skills/xlsx/scripts/office/validators/base.py +851 -0
- package/skills/xlsx/scripts/office/validators/docx.py +446 -0
- package/skills/xlsx/scripts/office/validators/pptx.py +275 -0
- package/skills/xlsx/scripts/office/validators/redlining.py +247 -0
- package/skills/xlsx/scripts/recalc.py +184 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Run the eval + improve loop until all pass or max iterations reached.
|
|
3
|
+
|
|
4
|
+
Combines run_eval.py and improve_description.py in a loop, tracking history
|
|
5
|
+
and returning the best description found. Supports train/test split to prevent
|
|
6
|
+
overfitting.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
import random
|
|
12
|
+
import sys
|
|
13
|
+
import tempfile
|
|
14
|
+
import time
|
|
15
|
+
import webbrowser
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from scripts.generate_report import generate_html
|
|
19
|
+
from scripts.improve_description import improve_description
|
|
20
|
+
from scripts.run_eval import find_project_root, run_eval
|
|
21
|
+
from scripts.utils import parse_skill_md
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def split_eval_set(eval_set: list[dict], holdout: float, seed: int = 42) -> tuple[list[dict], list[dict]]:
|
|
25
|
+
"""Split eval set into train and test sets, stratified by should_trigger."""
|
|
26
|
+
random.seed(seed)
|
|
27
|
+
|
|
28
|
+
# Separate by should_trigger
|
|
29
|
+
trigger = [e for e in eval_set if e["should_trigger"]]
|
|
30
|
+
no_trigger = [e for e in eval_set if not e["should_trigger"]]
|
|
31
|
+
|
|
32
|
+
# Shuffle each group
|
|
33
|
+
random.shuffle(trigger)
|
|
34
|
+
random.shuffle(no_trigger)
|
|
35
|
+
|
|
36
|
+
# Calculate split points
|
|
37
|
+
n_trigger_test = max(1, int(len(trigger) * holdout))
|
|
38
|
+
n_no_trigger_test = max(1, int(len(no_trigger) * holdout))
|
|
39
|
+
|
|
40
|
+
# Split
|
|
41
|
+
test_set = trigger[:n_trigger_test] + no_trigger[:n_no_trigger_test]
|
|
42
|
+
train_set = trigger[n_trigger_test:] + no_trigger[n_no_trigger_test:]
|
|
43
|
+
|
|
44
|
+
return train_set, test_set
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def run_loop(
|
|
48
|
+
eval_set: list[dict],
|
|
49
|
+
skill_path: Path,
|
|
50
|
+
description_override: str | None,
|
|
51
|
+
num_workers: int,
|
|
52
|
+
timeout: int,
|
|
53
|
+
max_iterations: int,
|
|
54
|
+
runs_per_query: int,
|
|
55
|
+
trigger_threshold: float,
|
|
56
|
+
holdout: float,
|
|
57
|
+
model: str,
|
|
58
|
+
verbose: bool,
|
|
59
|
+
live_report_path: Path | None = None,
|
|
60
|
+
log_dir: Path | None = None,
|
|
61
|
+
) -> dict:
|
|
62
|
+
"""Run the eval + improvement loop."""
|
|
63
|
+
project_root = find_project_root()
|
|
64
|
+
name, original_description, content = parse_skill_md(skill_path)
|
|
65
|
+
current_description = description_override or original_description
|
|
66
|
+
|
|
67
|
+
# Split into train/test if holdout > 0
|
|
68
|
+
if holdout > 0:
|
|
69
|
+
train_set, test_set = split_eval_set(eval_set, holdout)
|
|
70
|
+
if verbose:
|
|
71
|
+
print(f"Split: {len(train_set)} train, {len(test_set)} test (holdout={holdout})", file=sys.stderr)
|
|
72
|
+
else:
|
|
73
|
+
train_set = eval_set
|
|
74
|
+
test_set = []
|
|
75
|
+
|
|
76
|
+
history = []
|
|
77
|
+
exit_reason = "unknown"
|
|
78
|
+
|
|
79
|
+
for iteration in range(1, max_iterations + 1):
|
|
80
|
+
if verbose:
|
|
81
|
+
print(f"\n{'='*60}", file=sys.stderr)
|
|
82
|
+
print(f"Iteration {iteration}/{max_iterations}", file=sys.stderr)
|
|
83
|
+
print(f"Description: {current_description}", file=sys.stderr)
|
|
84
|
+
print(f"{'='*60}", file=sys.stderr)
|
|
85
|
+
|
|
86
|
+
# Evaluate train + test together in one batch for parallelism
|
|
87
|
+
all_queries = train_set + test_set
|
|
88
|
+
t0 = time.time()
|
|
89
|
+
all_results = run_eval(
|
|
90
|
+
eval_set=all_queries,
|
|
91
|
+
skill_name=name,
|
|
92
|
+
description=current_description,
|
|
93
|
+
num_workers=num_workers,
|
|
94
|
+
timeout=timeout,
|
|
95
|
+
project_root=project_root,
|
|
96
|
+
runs_per_query=runs_per_query,
|
|
97
|
+
trigger_threshold=trigger_threshold,
|
|
98
|
+
model=model,
|
|
99
|
+
)
|
|
100
|
+
eval_elapsed = time.time() - t0
|
|
101
|
+
|
|
102
|
+
# Split results back into train/test by matching queries
|
|
103
|
+
train_queries_set = {q["query"] for q in train_set}
|
|
104
|
+
train_result_list = [r for r in all_results["results"] if r["query"] in train_queries_set]
|
|
105
|
+
test_result_list = [r for r in all_results["results"] if r["query"] not in train_queries_set]
|
|
106
|
+
|
|
107
|
+
train_passed = sum(1 for r in train_result_list if r["pass"])
|
|
108
|
+
train_total = len(train_result_list)
|
|
109
|
+
train_summary = {"passed": train_passed, "failed": train_total - train_passed, "total": train_total}
|
|
110
|
+
train_results = {"results": train_result_list, "summary": train_summary}
|
|
111
|
+
|
|
112
|
+
if test_set:
|
|
113
|
+
test_passed = sum(1 for r in test_result_list if r["pass"])
|
|
114
|
+
test_total = len(test_result_list)
|
|
115
|
+
test_summary = {"passed": test_passed, "failed": test_total - test_passed, "total": test_total}
|
|
116
|
+
test_results = {"results": test_result_list, "summary": test_summary}
|
|
117
|
+
else:
|
|
118
|
+
test_results = None
|
|
119
|
+
test_summary = None
|
|
120
|
+
|
|
121
|
+
history.append({
|
|
122
|
+
"iteration": iteration,
|
|
123
|
+
"description": current_description,
|
|
124
|
+
"train_passed": train_summary["passed"],
|
|
125
|
+
"train_failed": train_summary["failed"],
|
|
126
|
+
"train_total": train_summary["total"],
|
|
127
|
+
"train_results": train_results["results"],
|
|
128
|
+
"test_passed": test_summary["passed"] if test_summary else None,
|
|
129
|
+
"test_failed": test_summary["failed"] if test_summary else None,
|
|
130
|
+
"test_total": test_summary["total"] if test_summary else None,
|
|
131
|
+
"test_results": test_results["results"] if test_results else None,
|
|
132
|
+
# For backward compat with report generator
|
|
133
|
+
"passed": train_summary["passed"],
|
|
134
|
+
"failed": train_summary["failed"],
|
|
135
|
+
"total": train_summary["total"],
|
|
136
|
+
"results": train_results["results"],
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
# Write live report if path provided
|
|
140
|
+
if live_report_path:
|
|
141
|
+
partial_output = {
|
|
142
|
+
"original_description": original_description,
|
|
143
|
+
"best_description": current_description,
|
|
144
|
+
"best_score": "in progress",
|
|
145
|
+
"iterations_run": len(history),
|
|
146
|
+
"holdout": holdout,
|
|
147
|
+
"train_size": len(train_set),
|
|
148
|
+
"test_size": len(test_set),
|
|
149
|
+
"history": history,
|
|
150
|
+
}
|
|
151
|
+
live_report_path.write_text(generate_html(partial_output, auto_refresh=True, skill_name=name))
|
|
152
|
+
|
|
153
|
+
if verbose:
|
|
154
|
+
def print_eval_stats(label, results, elapsed):
|
|
155
|
+
pos = [r for r in results if r["should_trigger"]]
|
|
156
|
+
neg = [r for r in results if not r["should_trigger"]]
|
|
157
|
+
tp = sum(r["triggers"] for r in pos)
|
|
158
|
+
pos_runs = sum(r["runs"] for r in pos)
|
|
159
|
+
fn = pos_runs - tp
|
|
160
|
+
fp = sum(r["triggers"] for r in neg)
|
|
161
|
+
neg_runs = sum(r["runs"] for r in neg)
|
|
162
|
+
tn = neg_runs - fp
|
|
163
|
+
total = tp + tn + fp + fn
|
|
164
|
+
precision = tp / (tp + fp) if (tp + fp) > 0 else 1.0
|
|
165
|
+
recall = tp / (tp + fn) if (tp + fn) > 0 else 1.0
|
|
166
|
+
accuracy = (tp + tn) / total if total > 0 else 0.0
|
|
167
|
+
print(f"{label}: {tp+tn}/{total} correct, precision={precision:.0%} recall={recall:.0%} accuracy={accuracy:.0%} ({elapsed:.1f}s)", file=sys.stderr)
|
|
168
|
+
for r in results:
|
|
169
|
+
status = "PASS" if r["pass"] else "FAIL"
|
|
170
|
+
rate_str = f"{r['triggers']}/{r['runs']}"
|
|
171
|
+
print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:60]}", file=sys.stderr)
|
|
172
|
+
|
|
173
|
+
print_eval_stats("Train", train_results["results"], eval_elapsed)
|
|
174
|
+
if test_summary:
|
|
175
|
+
print_eval_stats("Test ", test_results["results"], 0)
|
|
176
|
+
|
|
177
|
+
if train_summary["failed"] == 0:
|
|
178
|
+
exit_reason = f"all_passed (iteration {iteration})"
|
|
179
|
+
if verbose:
|
|
180
|
+
print(f"\nAll train queries passed on iteration {iteration}!", file=sys.stderr)
|
|
181
|
+
break
|
|
182
|
+
|
|
183
|
+
if iteration == max_iterations:
|
|
184
|
+
exit_reason = f"max_iterations ({max_iterations})"
|
|
185
|
+
if verbose:
|
|
186
|
+
print(f"\nMax iterations reached ({max_iterations}).", file=sys.stderr)
|
|
187
|
+
break
|
|
188
|
+
|
|
189
|
+
# Improve the description based on train results
|
|
190
|
+
if verbose:
|
|
191
|
+
print(f"\nImproving description...", file=sys.stderr)
|
|
192
|
+
|
|
193
|
+
t0 = time.time()
|
|
194
|
+
# Strip test scores from history so improvement model can't see them
|
|
195
|
+
blinded_history = [
|
|
196
|
+
{k: v for k, v in h.items() if not k.startswith("test_")}
|
|
197
|
+
for h in history
|
|
198
|
+
]
|
|
199
|
+
new_description = improve_description(
|
|
200
|
+
skill_name=name,
|
|
201
|
+
skill_content=content,
|
|
202
|
+
current_description=current_description,
|
|
203
|
+
eval_results=train_results,
|
|
204
|
+
history=blinded_history,
|
|
205
|
+
model=model,
|
|
206
|
+
log_dir=log_dir,
|
|
207
|
+
iteration=iteration,
|
|
208
|
+
)
|
|
209
|
+
improve_elapsed = time.time() - t0
|
|
210
|
+
|
|
211
|
+
if verbose:
|
|
212
|
+
print(f"Proposed ({improve_elapsed:.1f}s): {new_description}", file=sys.stderr)
|
|
213
|
+
|
|
214
|
+
current_description = new_description
|
|
215
|
+
|
|
216
|
+
# Find the best iteration by TEST score (or train if no test set)
|
|
217
|
+
if test_set:
|
|
218
|
+
best = max(history, key=lambda h: h["test_passed"] or 0)
|
|
219
|
+
best_score = f"{best['test_passed']}/{best['test_total']}"
|
|
220
|
+
else:
|
|
221
|
+
best = max(history, key=lambda h: h["train_passed"])
|
|
222
|
+
best_score = f"{best['train_passed']}/{best['train_total']}"
|
|
223
|
+
|
|
224
|
+
if verbose:
|
|
225
|
+
print(f"\nExit reason: {exit_reason}", file=sys.stderr)
|
|
226
|
+
print(f"Best score: {best_score} (iteration {best['iteration']})", file=sys.stderr)
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
"exit_reason": exit_reason,
|
|
230
|
+
"original_description": original_description,
|
|
231
|
+
"best_description": best["description"],
|
|
232
|
+
"best_score": best_score,
|
|
233
|
+
"best_train_score": f"{best['train_passed']}/{best['train_total']}",
|
|
234
|
+
"best_test_score": f"{best['test_passed']}/{best['test_total']}" if test_set else None,
|
|
235
|
+
"final_description": current_description,
|
|
236
|
+
"iterations_run": len(history),
|
|
237
|
+
"holdout": holdout,
|
|
238
|
+
"train_size": len(train_set),
|
|
239
|
+
"test_size": len(test_set),
|
|
240
|
+
"history": history,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def main():
|
|
245
|
+
parser = argparse.ArgumentParser(description="Run eval + improve loop")
|
|
246
|
+
parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
|
|
247
|
+
parser.add_argument("--skill-path", required=True, help="Path to skill directory")
|
|
248
|
+
parser.add_argument("--description", default=None, help="Override starting description")
|
|
249
|
+
parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers")
|
|
250
|
+
parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
|
|
251
|
+
parser.add_argument("--max-iterations", type=int, default=5, help="Max improvement iterations")
|
|
252
|
+
parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
|
|
253
|
+
parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
|
|
254
|
+
parser.add_argument("--holdout", type=float, default=0.4, help="Fraction of eval set to hold out for testing (0 to disable)")
|
|
255
|
+
parser.add_argument("--model", required=True, help="Model for improvement")
|
|
256
|
+
parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
|
|
257
|
+
parser.add_argument("--report", default="auto", help="Generate HTML report at this path (default: 'auto' for temp file, 'none' to disable)")
|
|
258
|
+
parser.add_argument("--results-dir", default=None, help="Save all outputs (results.json, report.html, log.txt) to a timestamped subdirectory here")
|
|
259
|
+
args = parser.parse_args()
|
|
260
|
+
|
|
261
|
+
eval_set = json.loads(Path(args.eval_set).read_text())
|
|
262
|
+
skill_path = Path(args.skill_path)
|
|
263
|
+
|
|
264
|
+
if not (skill_path / "SKILL.md").exists():
|
|
265
|
+
print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
|
|
266
|
+
sys.exit(1)
|
|
267
|
+
|
|
268
|
+
name, _, _ = parse_skill_md(skill_path)
|
|
269
|
+
|
|
270
|
+
# Set up live report path
|
|
271
|
+
if args.report != "none":
|
|
272
|
+
if args.report == "auto":
|
|
273
|
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
274
|
+
live_report_path = Path(tempfile.gettempdir()) / f"skill_description_report_{skill_path.name}_{timestamp}.html"
|
|
275
|
+
else:
|
|
276
|
+
live_report_path = Path(args.report)
|
|
277
|
+
# Open the report immediately so the user can watch
|
|
278
|
+
live_report_path.write_text("<html><body><h1>Starting optimization loop...</h1><meta http-equiv='refresh' content='5'></body></html>")
|
|
279
|
+
webbrowser.open(str(live_report_path))
|
|
280
|
+
else:
|
|
281
|
+
live_report_path = None
|
|
282
|
+
|
|
283
|
+
# Determine output directory (create before run_loop so logs can be written)
|
|
284
|
+
if args.results_dir:
|
|
285
|
+
timestamp = time.strftime("%Y-%m-%d_%H%M%S")
|
|
286
|
+
results_dir = Path(args.results_dir) / timestamp
|
|
287
|
+
results_dir.mkdir(parents=True, exist_ok=True)
|
|
288
|
+
else:
|
|
289
|
+
results_dir = None
|
|
290
|
+
|
|
291
|
+
log_dir = results_dir / "logs" if results_dir else None
|
|
292
|
+
|
|
293
|
+
output = run_loop(
|
|
294
|
+
eval_set=eval_set,
|
|
295
|
+
skill_path=skill_path,
|
|
296
|
+
description_override=args.description,
|
|
297
|
+
num_workers=args.num_workers,
|
|
298
|
+
timeout=args.timeout,
|
|
299
|
+
max_iterations=args.max_iterations,
|
|
300
|
+
runs_per_query=args.runs_per_query,
|
|
301
|
+
trigger_threshold=args.trigger_threshold,
|
|
302
|
+
holdout=args.holdout,
|
|
303
|
+
model=args.model,
|
|
304
|
+
verbose=args.verbose,
|
|
305
|
+
live_report_path=live_report_path,
|
|
306
|
+
log_dir=log_dir,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Save JSON output
|
|
310
|
+
json_output = json.dumps(output, indent=2)
|
|
311
|
+
print(json_output)
|
|
312
|
+
if results_dir:
|
|
313
|
+
(results_dir / "results.json").write_text(json_output)
|
|
314
|
+
|
|
315
|
+
# Write final HTML report (without auto-refresh)
|
|
316
|
+
if live_report_path:
|
|
317
|
+
live_report_path.write_text(generate_html(output, auto_refresh=False, skill_name=name))
|
|
318
|
+
print(f"\nReport: {live_report_path}", file=sys.stderr)
|
|
319
|
+
|
|
320
|
+
if results_dir and live_report_path:
|
|
321
|
+
(results_dir / "report.html").write_text(generate_html(output, auto_refresh=False, skill_name=name))
|
|
322
|
+
|
|
323
|
+
if results_dir:
|
|
324
|
+
print(f"Results saved to: {results_dir}", file=sys.stderr)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
if __name__ == "__main__":
|
|
328
|
+
main()
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Shared utilities for skill-creator scripts."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def parse_skill_md(skill_path: Path) -> tuple[str, str, str]:
|
|
8
|
+
"""Parse a SKILL.md file, returning (name, description, full_content)."""
|
|
9
|
+
content = (skill_path / "SKILL.md").read_text()
|
|
10
|
+
lines = content.split("\n")
|
|
11
|
+
|
|
12
|
+
if lines[0].strip() != "---":
|
|
13
|
+
raise ValueError("SKILL.md missing frontmatter (no opening ---)")
|
|
14
|
+
|
|
15
|
+
end_idx = None
|
|
16
|
+
for i, line in enumerate(lines[1:], start=1):
|
|
17
|
+
if line.strip() == "---":
|
|
18
|
+
end_idx = i
|
|
19
|
+
break
|
|
20
|
+
|
|
21
|
+
if end_idx is None:
|
|
22
|
+
raise ValueError("SKILL.md missing frontmatter (no closing ---)")
|
|
23
|
+
|
|
24
|
+
name = ""
|
|
25
|
+
description = ""
|
|
26
|
+
frontmatter_lines = lines[1:end_idx]
|
|
27
|
+
i = 0
|
|
28
|
+
while i < len(frontmatter_lines):
|
|
29
|
+
line = frontmatter_lines[i]
|
|
30
|
+
if line.startswith("name:"):
|
|
31
|
+
name = line[len("name:"):].strip().strip('"').strip("'")
|
|
32
|
+
elif line.startswith("description:"):
|
|
33
|
+
value = line[len("description:"):].strip()
|
|
34
|
+
# Handle YAML multiline indicators (>, |, >-, |-)
|
|
35
|
+
if value in (">", "|", ">-", "|-"):
|
|
36
|
+
continuation_lines: list[str] = []
|
|
37
|
+
i += 1
|
|
38
|
+
while i < len(frontmatter_lines) and (frontmatter_lines[i].startswith(" ") or frontmatter_lines[i].startswith("\t")):
|
|
39
|
+
continuation_lines.append(frontmatter_lines[i].strip())
|
|
40
|
+
i += 1
|
|
41
|
+
description = " ".join(continuation_lines)
|
|
42
|
+
continue
|
|
43
|
+
else:
|
|
44
|
+
description = value.strip('"').strip("'")
|
|
45
|
+
i += 1
|
|
46
|
+
|
|
47
|
+
return name, description, content
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: xlsx
|
|
3
|
+
description: "Reach for this skill whenever a spreadsheet is the main thing being consumed or produced. That covers any request to: open, inspect, modify, or repair an existing .xlsx, .xlsm, .csv, or .tsv (such as inserting columns, building formulas, applying formatting, adding charts, or tidying messy data); build a brand-new spreadsheet either blank or from another data source; or translate one tabular format into another. It applies especially when someone points at a spreadsheet by name or path — even loosely (\"that xlsx in my downloads\") — and wants it acted on or generated. It also covers turning disordered tabular files (broken rows, headers in the wrong place, garbage data) into clean spreadsheets. The end result has to be a spreadsheet file. Skip it when the real deliverable is a Word document, an HTML report, a standalone Python script, a database pipeline, or a Google Sheets API integration, even when tables are part of the work."
|
|
4
|
+
license: WormClaude
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Requirements for Outputs
|
|
8
|
+
|
|
9
|
+
## All Excel files
|
|
10
|
+
|
|
11
|
+
### Professional Font
|
|
12
|
+
- Stick to one clean, business-appropriate typeface (Arial, Times New Roman, and the like) across every deliverable, unless the user tells you otherwise
|
|
13
|
+
|
|
14
|
+
### Zero Formula Errors
|
|
15
|
+
- Ship every Excel model entirely free of formula errors (#REF!, #DIV/0!, #VALUE!, #N/A, #NAME?)
|
|
16
|
+
|
|
17
|
+
### Preserve Existing Templates (when updating templates)
|
|
18
|
+
- When editing a file, examine its current format, style, and conventions and replicate them precisely
|
|
19
|
+
- Don't force your own standardized look onto a file that already has its own patterns
|
|
20
|
+
- Whatever conventions the template already uses take precedence over the guidance here
|
|
21
|
+
|
|
22
|
+
## Financial models
|
|
23
|
+
|
|
24
|
+
### Color Coding Standards
|
|
25
|
+
Unless otherwise stated by the user or existing template
|
|
26
|
+
|
|
27
|
+
#### Industry-Standard Color Conventions
|
|
28
|
+
- **Blue text (RGB: 0,0,255)**: Hardcoded inputs and the figures users tweak when running scenarios
|
|
29
|
+
- **Black text (RGB: 0,0,0)**: Every formula and calculation
|
|
30
|
+
- **Green text (RGB: 0,128,0)**: References that pull from another sheet in the same workbook
|
|
31
|
+
- **Red text (RGB: 255,0,0)**: Links reaching out to separate files
|
|
32
|
+
- **Yellow background (RGB: 255,255,0)**: Critical assumptions that need review or cells awaiting an update
|
|
33
|
+
|
|
34
|
+
### Number Formatting Standards
|
|
35
|
+
|
|
36
|
+
#### Required Format Rules
|
|
37
|
+
- **Years**: Render them as text so they read "2024" rather than "2,024"
|
|
38
|
+
- **Currency**: Apply the $#,##0 format and always state the units in the header ("Revenue ($mm)")
|
|
39
|
+
- **Zeros**: Lean on number formatting so zeros display as "-", percentages included (e.g., "$#,##0;($#,##0);-")
|
|
40
|
+
- **Percentages**: Use 0.0% (a single decimal) by default
|
|
41
|
+
- **Multiples**: Show valuation multiples like EV/EBITDA and P/E as 0.0x
|
|
42
|
+
- **Negative numbers**: Wrap them in parentheses (123) instead of writing -123
|
|
43
|
+
|
|
44
|
+
### Formula Construction Rules
|
|
45
|
+
|
|
46
|
+
#### Assumptions Placement
|
|
47
|
+
- Keep every assumption (growth rates, margins, multiples, and so on) in its own dedicated cell
|
|
48
|
+
- Point formulas at cell references rather than burying literal values inside them
|
|
49
|
+
- Example: Use =B5*(1+$B$6) instead of =B5*1.05
|
|
50
|
+
|
|
51
|
+
#### Formula Error Prevention
|
|
52
|
+
- Confirm each cell reference is pointing where it should
|
|
53
|
+
- Watch for off-by-one mistakes when defining ranges
|
|
54
|
+
- Keep formulas uniform across every projection period
|
|
55
|
+
- Stress-test with edge cases such as zeros and negative figures
|
|
56
|
+
- Make sure no accidental circular references have crept in
|
|
57
|
+
|
|
58
|
+
#### Documentation Requirements for Hardcodes
|
|
59
|
+
- Note it in a comment or in the adjacent cell (when the table ends there). Format: "Source: [System/Document], [Date], [Specific Reference], [URL if applicable]"
|
|
60
|
+
- Examples:
|
|
61
|
+
- "Source: Company 10-K, FY2024, Page 45, Revenue Note, [SEC EDGAR URL]"
|
|
62
|
+
- "Source: Company 10-Q, Q2 2025, Exhibit 99.1, [SEC EDGAR URL]"
|
|
63
|
+
- "Source: Bloomberg Terminal, 8/15/2025, AAPL US Equity"
|
|
64
|
+
- "Source: FactSet, 8/20/2025, Consensus Estimates Screen"
|
|
65
|
+
|
|
66
|
+
# XLSX creation, editing, and analysis
|
|
67
|
+
|
|
68
|
+
## Overview
|
|
69
|
+
|
|
70
|
+
Users will sometimes want you to build, modify, or examine what's inside an .xlsx file. Depending on the job, you have a range of tools and workflows to draw on.
|
|
71
|
+
|
|
72
|
+
## Important Requirements
|
|
73
|
+
|
|
74
|
+
**LibreOffice Required for Formula Recalculation**: Assume LibreOffice is present so the `scripts/recalc.py` script can recompute formula results. On its first run the script sets LibreOffice up on its own — even in sandboxed setups where Unix sockets are locked down, which `scripts/office/soffice.py` takes care of
|
|
75
|
+
|
|
76
|
+
## Reading and analyzing data
|
|
77
|
+
|
|
78
|
+
### Quick text dump
|
|
79
|
+
```bash
|
|
80
|
+
# Tab-separated rows under `## Sheet:` headers
|
|
81
|
+
extract-text file.xlsx | head -100
|
|
82
|
+
# .xlsm: same zip structure, override the extension
|
|
83
|
+
extract-text --format xlsx file.xlsm | head -100
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Data analysis with pandas
|
|
87
|
+
When you need to analyze, visualize, or run everyday operations on data, reach for **pandas** and its strong data-wrangling toolkit:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
import pandas as pd
|
|
91
|
+
|
|
92
|
+
# Read Excel
|
|
93
|
+
df = pd.read_excel('file.xlsx') # Default: first sheet
|
|
94
|
+
all_sheets = pd.read_excel('file.xlsx', sheet_name=None) # All sheets as dict
|
|
95
|
+
|
|
96
|
+
# Analyze
|
|
97
|
+
df.head() # Preview data
|
|
98
|
+
df.info() # Column info
|
|
99
|
+
df.describe() # Statistics
|
|
100
|
+
|
|
101
|
+
# Write Excel
|
|
102
|
+
df.to_excel('output.xlsx', index=False)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Excel File Workflows
|
|
106
|
+
|
|
107
|
+
## CRITICAL: Use Formulas, Not Hardcoded Values
|
|
108
|
+
|
|
109
|
+
**Compute things with Excel formulas rather than working out the numbers in Python and pasting fixed results.** That keeps the spreadsheet live and easy to update.
|
|
110
|
+
|
|
111
|
+
### ❌ WRONG - Hardcoding Calculated Values
|
|
112
|
+
```python
|
|
113
|
+
# Bad: Calculating in Python and hardcoding result
|
|
114
|
+
total = df['Sales'].sum()
|
|
115
|
+
sheet['B10'] = total # Hardcodes 5000
|
|
116
|
+
|
|
117
|
+
# Bad: Computing growth rate in Python
|
|
118
|
+
growth = (df.iloc[-1]['Revenue'] - df.iloc[0]['Revenue']) / df.iloc[0]['Revenue']
|
|
119
|
+
sheet['C5'] = growth # Hardcodes 0.15
|
|
120
|
+
|
|
121
|
+
# Bad: Python calculation for average
|
|
122
|
+
avg = sum(values) / len(values)
|
|
123
|
+
sheet['D20'] = avg # Hardcodes 42.5
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### ✅ CORRECT - Using Excel Formulas
|
|
127
|
+
```python
|
|
128
|
+
# Good: Let Excel calculate the sum
|
|
129
|
+
sheet['B10'] = '=SUM(B2:B9)'
|
|
130
|
+
|
|
131
|
+
# Good: Growth rate as Excel formula
|
|
132
|
+
sheet['C5'] = '=(C4-C2)/C2'
|
|
133
|
+
|
|
134
|
+
# Good: Average using Excel function
|
|
135
|
+
sheet['D20'] = '=AVERAGE(D2:D19)'
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
This holds for every kind of calculation — totals, percentages, ratios, differences, and the rest. The spreadsheet must be able to recompute itself whenever the underlying data shifts.
|
|
139
|
+
|
|
140
|
+
## Common Workflow
|
|
141
|
+
1. **Choose tool**: pandas for data, openpyxl for formulas/formatting
|
|
142
|
+
2. **Create/Load**: Start a new workbook or open an existing file
|
|
143
|
+
3. **Modify**: Add or change data, formulas, and formatting
|
|
144
|
+
4. **Save**: Write to file
|
|
145
|
+
5. **Recalculate formulas (MANDATORY IF USING FORMULAS)**: Use the scripts/recalc.py script
|
|
146
|
+
```bash
|
|
147
|
+
python scripts/recalc.py output.xlsx
|
|
148
|
+
```
|
|
149
|
+
6. **Verify and fix any errors**:
|
|
150
|
+
- The script hands back JSON describing any errors
|
|
151
|
+
- When `status` reads `errors_found`, look at `error_summary` to see which error types occurred and where
|
|
152
|
+
- Correct what it flags, then run the recalculation again
|
|
153
|
+
- Typical errors you'll need to resolve:
|
|
154
|
+
- `#REF!`: Invalid cell references
|
|
155
|
+
- `#DIV/0!`: Division by zero
|
|
156
|
+
- `#VALUE!`: Wrong data type in formula
|
|
157
|
+
- `#NAME?`: Unrecognized formula name
|
|
158
|
+
|
|
159
|
+
### Creating new Excel files
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
# Using openpyxl for formulas and formatting
|
|
163
|
+
from openpyxl import Workbook
|
|
164
|
+
from openpyxl.styles import Font, PatternFill, Alignment
|
|
165
|
+
|
|
166
|
+
wb = Workbook()
|
|
167
|
+
sheet = wb.active
|
|
168
|
+
|
|
169
|
+
# Add data
|
|
170
|
+
sheet['A1'] = 'Hello'
|
|
171
|
+
sheet['B1'] = 'World'
|
|
172
|
+
sheet.append(['Row', 'of', 'data'])
|
|
173
|
+
|
|
174
|
+
# Add formula
|
|
175
|
+
sheet['B2'] = '=SUM(A1:A10)'
|
|
176
|
+
|
|
177
|
+
# Formatting
|
|
178
|
+
sheet['A1'].font = Font(bold=True, color='FF0000')
|
|
179
|
+
sheet['A1'].fill = PatternFill('solid', start_color='FFFF00')
|
|
180
|
+
sheet['A1'].alignment = Alignment(horizontal='center')
|
|
181
|
+
|
|
182
|
+
# Column width
|
|
183
|
+
sheet.column_dimensions['A'].width = 20
|
|
184
|
+
|
|
185
|
+
wb.save('output.xlsx')
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Editing existing Excel files
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
# Using openpyxl to preserve formulas and formatting
|
|
192
|
+
from openpyxl import load_workbook
|
|
193
|
+
|
|
194
|
+
# Load existing file
|
|
195
|
+
wb = load_workbook('existing.xlsx')
|
|
196
|
+
sheet = wb.active # or wb['SheetName'] for specific sheet
|
|
197
|
+
|
|
198
|
+
# Working with multiple sheets
|
|
199
|
+
for sheet_name in wb.sheetnames:
|
|
200
|
+
sheet = wb[sheet_name]
|
|
201
|
+
print(f"Sheet: {sheet_name}")
|
|
202
|
+
|
|
203
|
+
# Modify cells
|
|
204
|
+
sheet['A1'] = 'New Value'
|
|
205
|
+
sheet.insert_rows(2) # Insert row at position 2
|
|
206
|
+
sheet.delete_cols(3) # Delete column 3
|
|
207
|
+
|
|
208
|
+
# Add new sheet
|
|
209
|
+
new_sheet = wb.create_sheet('NewSheet')
|
|
210
|
+
new_sheet['A1'] = 'Data'
|
|
211
|
+
|
|
212
|
+
wb.save('modified.xlsx')
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## Recalculating formulas
|
|
216
|
+
|
|
217
|
+
When openpyxl writes or edits an Excel file, it stores formulas as text but leaves their computed results empty. Run the bundled `scripts/recalc.py` script to evaluate them:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
python scripts/recalc.py <excel_file> [timeout_seconds]
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Example:
|
|
224
|
+
```bash
|
|
225
|
+
python scripts/recalc.py output.xlsx 30
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
What the script does:
|
|
229
|
+
- Installs the LibreOffice macro by itself the first time it runs
|
|
230
|
+
- Re-evaluates every formula across all sheets
|
|
231
|
+
- Sweeps each cell looking for Excel errors (#REF!, #DIV/0!, and so on)
|
|
232
|
+
- Emits JSON spelling out where errors are and how many there are
|
|
233
|
+
- Runs on Linux as well as macOS
|
|
234
|
+
|
|
235
|
+
## Formula Verification Checklist
|
|
236
|
+
|
|
237
|
+
Fast checks to confirm your formulas behave:
|
|
238
|
+
|
|
239
|
+
### Essential Verification
|
|
240
|
+
- [ ] **Test 2-3 sample references**: Make sure they return the right values before you build out the whole model
|
|
241
|
+
- [ ] **Column mapping**: Double-check the Excel columns line up (e.g., column 64 = BL, not BK)
|
|
242
|
+
- [ ] **Row offset**: Keep in mind Excel rows start at 1 (DataFrame row 5 = Excel row 6)
|
|
243
|
+
|
|
244
|
+
### Common Pitfalls
|
|
245
|
+
- [ ] **NaN handling**: Screen for null values with `pd.notna()`
|
|
246
|
+
- [ ] **Far-right columns**: FY data frequently sits out in columns 50+
|
|
247
|
+
- [ ] **Multiple matches**: Look through every occurrence, not only the first one
|
|
248
|
+
- [ ] **Division by zero**: Inspect denominators before you put a `/` in a formula (#DIV/0!)
|
|
249
|
+
- [ ] **Wrong references**: Make sure each cell reference lands on the cell you meant (#REF!)
|
|
250
|
+
- [ ] **Cross-sheet references**: Link sheets using the right syntax (Sheet1!A1)
|
|
251
|
+
|
|
252
|
+
### Formula Testing Strategy
|
|
253
|
+
- [ ] **Start small**: Try formulas on a handful of cells before rolling them out widely
|
|
254
|
+
- [ ] **Verify dependencies**: Confirm every cell a formula leans on actually exists
|
|
255
|
+
- [ ] **Test edge cases**: Cover zero, negative, and very large values
|
|
256
|
+
|
|
257
|
+
### Interpreting scripts/recalc.py Output
|
|
258
|
+
The script gives back JSON laying out the errors:
|
|
259
|
+
```json
|
|
260
|
+
{
|
|
261
|
+
"status": "success", // or "errors_found"
|
|
262
|
+
"total_errors": 0, // Total error count
|
|
263
|
+
"total_formulas": 42, // Number of formulas in file
|
|
264
|
+
"error_summary": { // Only present if errors found
|
|
265
|
+
"#REF!": {
|
|
266
|
+
"count": 2,
|
|
267
|
+
"locations": ["Sheet1!B5", "Sheet1!C10"]
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
## Best Practices
|
|
274
|
+
|
|
275
|
+
### Library Selection
|
|
276
|
+
- **pandas**: The go-to for analysis, large-scale operations, and straightforward data export
|
|
277
|
+
- **openpyxl**: The go-to for intricate formatting, formulas, and Excel-only capabilities
|
|
278
|
+
|
|
279
|
+
### Working with openpyxl
|
|
280
|
+
- Cell indices count from 1 (row=1, column=1 means cell A1)
|
|
281
|
+
- Pass `data_only=True` to read the computed values: `load_workbook('file.xlsx', data_only=True)`
|
|
282
|
+
- **Warning**: Opening with `data_only=True` and then saving overwrites formulas with their values for good
|
|
283
|
+
- For big files, read with `read_only=True` or write with `write_only=True`
|
|
284
|
+
- Formulas survive but aren't evaluated — run scripts/recalc.py to refresh the values
|
|
285
|
+
|
|
286
|
+
### Working with pandas
|
|
287
|
+
- Pin down data types so nothing gets misinferred: `pd.read_excel('file.xlsx', dtype={'id': str})`
|
|
288
|
+
- With large files, pull only the columns you need: `pd.read_excel('file.xlsx', usecols=['A', 'C', 'E'])`
|
|
289
|
+
- Treat dates correctly: `pd.read_excel('file.xlsx', parse_dates=['date_column'])`
|
|
290
|
+
|
|
291
|
+
## Code Style Guidelines
|
|
292
|
+
**IMPORTANT**: When you write Python for Excel work:
|
|
293
|
+
- Keep the code lean and to the point, skipping needless comments
|
|
294
|
+
- Steer clear of long-winded variable names and repetitive steps
|
|
295
|
+
- Leave out print statements that serve no purpose
|
|
296
|
+
|
|
297
|
+
**For Excel files themselves**:
|
|
298
|
+
- Attach comments to cells holding tricky formulas or notable assumptions
|
|
299
|
+
- Record where hardcoded values came from
|
|
300
|
+
- Leave notes explaining the main calculations and model sections
|
|
File without changes
|