@panda-agent/panda-cli 0.1.28 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/dist/panda-cli-ink.bundle.mjs +267 -258
  2. package/package.json +6 -4
  3. package/skills/.gitkeep +0 -0
  4. package/skills/README.md +13 -0
  5. package/skills/docx/.skill-metadata.yaml +173 -0
  6. package/skills/docx/LICENSE.txt +30 -0
  7. package/skills/docx/SKILL.md +589 -0
  8. package/skills/docx/scripts/__init__.py +1 -0
  9. package/skills/docx/scripts/accept_changes.py +206 -0
  10. package/skills/docx/scripts/comment.py +442 -0
  11. package/skills/docx/scripts/office/helpers/__init__.py +1 -0
  12. package/skills/docx/scripts/office/helpers/merge_runs.py +190 -0
  13. package/skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
  14. package/skills/docx/scripts/office/pack.py +167 -0
  15. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  16. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  17. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  18. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  19. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  20. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  21. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  22. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  23. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  24. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  25. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  26. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  27. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  28. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  29. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  30. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  31. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  32. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  33. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  34. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  35. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  36. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  37. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  38. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  39. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  40. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  41. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  42. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  43. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  44. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  45. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  46. package/skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
  47. package/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  48. package/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  49. package/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  50. package/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  51. package/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  52. package/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  53. package/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  54. package/skills/docx/scripts/office/soffice.py +194 -0
  55. package/skills/docx/scripts/office/unpack.py +145 -0
  56. package/skills/docx/scripts/office/validate.py +114 -0
  57. package/skills/docx/scripts/office/validators/__init__.py +16 -0
  58. package/skills/docx/scripts/office/validators/base.py +733 -0
  59. package/skills/docx/scripts/office/validators/docx.py +354 -0
  60. package/skills/docx/scripts/office/validators/pptx.py +230 -0
  61. package/skills/docx/scripts/office/validators/redlining.py +212 -0
  62. package/skills/docx/scripts/templates/comments.xml +3 -0
  63. package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
  64. package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
  65. package/skills/docx/scripts/templates/commentsIds.xml +3 -0
  66. package/skills/docx/scripts/templates/people.xml +3 -0
  67. package/skills/frontend-design/LICENSE.txt +177 -0
  68. package/skills/frontend-design/SKILL.md +42 -0
  69. package/skills/pdf/.skill-metadata.yaml +273 -0
  70. package/skills/pdf/LICENSE.txt +30 -0
  71. package/skills/pdf/SKILL.md +324 -0
  72. package/skills/pdf/advanced-reference.md +609 -0
  73. package/skills/pdf/form-filling-guide.md +318 -0
  74. package/skills/pdf/forms.md +294 -0
  75. package/skills/pdf/reference.md +612 -0
  76. package/skills/pdf/scripts/check_bounding_boxes.py +198 -0
  77. package/skills/pdf/scripts/check_fillable_fields.py +64 -0
  78. package/skills/pdf/scripts/convert_pdf_to_images.py +102 -0
  79. package/skills/pdf/scripts/create_validation_image.py +125 -0
  80. package/skills/pdf/scripts/extract_form_field_info.py +220 -0
  81. package/skills/pdf/scripts/extract_form_structure.py +202 -0
  82. package/skills/pdf/scripts/fill_fillable_fields.py +205 -0
  83. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
  84. package/skills/pptx-generator/SKILL.md +204 -0
  85. package/skills/pptx-generator/assets/styles/business.json +8 -0
  86. package/skills/pptx-generator/assets/styles/minimal.json +8 -0
  87. package/skills/pptx-generator/assets/styles/modern.json +8 -0
  88. package/skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
  89. package/skills/pptx-generator/references/collaboration_guide.md +381 -0
  90. package/skills/pptx-generator/references/json_format_spec.md +215 -0
  91. package/skills/pptx-generator/references/layout_guide.md +290 -0
  92. package/skills/pptx-generator/scripts/json_validator.py +194 -0
  93. package/skills/pptx-generator/scripts/pptx_builder.py +340 -0
  94. package/skills/pptx-generator/scripts/pptx_validator.py +162 -0
  95. package/skills/skill-creator/LICENSE.txt +202 -0
  96. package/skills/skill-creator/SKILL.md +479 -0
  97. package/skills/skill-creator/agents/analyzer.md +274 -0
  98. package/skills/skill-creator/agents/comparator.md +202 -0
  99. package/skills/skill-creator/agents/grader.md +223 -0
  100. package/skills/skill-creator/assets/eval_review.html +146 -0
  101. package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  102. package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  103. package/skills/skill-creator/references/schemas.md +430 -0
  104. package/skills/skill-creator/scripts/__init__.py +0 -0
  105. package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  106. package/skills/skill-creator/scripts/generate_report.py +326 -0
  107. package/skills/skill-creator/scripts/improve_description.py +248 -0
  108. package/skills/skill-creator/scripts/package_skill.py +136 -0
  109. package/skills/skill-creator/scripts/quick_validate.py +103 -0
  110. package/skills/skill-creator/scripts/run_eval.py +310 -0
  111. package/skills/skill-creator/scripts/run_loop.py +332 -0
  112. package/skills/skill-creator/scripts/utils.py +47 -0
  113. package/skills/xlsx/.skill-metadata.yaml +185 -0
  114. package/skills/xlsx/LICENSE.txt +30 -0
  115. package/skills/xlsx/SKILL.md +233 -0
  116. package/skills/xlsx/scripts/office/helpers/__init__.py +1 -0
  117. package/skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
  118. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
  119. package/skills/xlsx/scripts/office/pack.py +162 -0
  120. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  121. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  122. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  123. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  124. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  125. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  126. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  127. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  128. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  129. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  130. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  131. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  132. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  133. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  134. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  135. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  136. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  137. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  138. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  139. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  140. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  141. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  142. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  143. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  144. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  145. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  146. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  147. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  148. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  149. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  150. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  151. package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  152. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  153. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  154. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  155. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  156. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  157. package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  158. package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  159. package/skills/xlsx/scripts/office/soffice.py +185 -0
  160. package/skills/xlsx/scripts/office/unpack.py +146 -0
  161. package/skills/xlsx/scripts/office/validate.py +108 -0
  162. package/skills/xlsx/scripts/office/validators/__init__.py +13 -0
  163. package/skills/xlsx/scripts/office/validators/base.py +800 -0
  164. package/skills/xlsx/scripts/office/validators/docx.py +383 -0
  165. package/skills/xlsx/scripts/office/validators/pptx.py +250 -0
  166. package/skills/xlsx/scripts/office/validators/redlining.py +229 -0
  167. package/skills/xlsx/scripts/recalc.py +296 -0
@@ -0,0 +1,332 @@
1
+ #!/usr/bin/env python3
2
+ """Run the eval + improve loop until all pass or max iterations reached.
3
+
4
+ Combines run_eval.py and improve_description.py in a loop, tracking history
5
+ and returning the best description found. Supports train/test split to prevent
6
+ overfitting.
7
+ """
8
+
9
+ import argparse
10
+ import json
11
+ import random
12
+ import sys
13
+ import tempfile
14
+ import time
15
+ import webbrowser
16
+ from pathlib import Path
17
+
18
+ import anthropic
19
+
20
+ from scripts.generate_report import generate_html
21
+ from scripts.improve_description import improve_description
22
+ from scripts.run_eval import find_project_root, run_eval
23
+ from scripts.utils import parse_skill_md
24
+
25
+
26
+ def split_eval_set(eval_set: list[dict], holdout: float, seed: int = 42) -> tuple[list[dict], list[dict]]:
27
+ """Split eval set into train and test sets, stratified by should_trigger."""
28
+ random.seed(seed)
29
+
30
+ # Separate by should_trigger
31
+ trigger = [e for e in eval_set if e["should_trigger"]]
32
+ no_trigger = [e for e in eval_set if not e["should_trigger"]]
33
+
34
+ # Shuffle each group
35
+ random.shuffle(trigger)
36
+ random.shuffle(no_trigger)
37
+
38
+ # Calculate split points
39
+ n_trigger_test = max(1, int(len(trigger) * holdout))
40
+ n_no_trigger_test = max(1, int(len(no_trigger) * holdout))
41
+
42
+ # Split
43
+ test_set = trigger[:n_trigger_test] + no_trigger[:n_no_trigger_test]
44
+ train_set = trigger[n_trigger_test:] + no_trigger[n_no_trigger_test:]
45
+
46
+ return train_set, test_set
47
+
48
+
49
+ def run_loop(
50
+ eval_set: list[dict],
51
+ skill_path: Path,
52
+ description_override: str | None,
53
+ num_workers: int,
54
+ timeout: int,
55
+ max_iterations: int,
56
+ runs_per_query: int,
57
+ trigger_threshold: float,
58
+ holdout: float,
59
+ model: str,
60
+ verbose: bool,
61
+ live_report_path: Path | None = None,
62
+ log_dir: Path | None = None,
63
+ ) -> dict:
64
+ """Run the eval + improvement loop."""
65
+ project_root = find_project_root()
66
+ name, original_description, content = parse_skill_md(skill_path)
67
+ current_description = description_override or original_description
68
+
69
+ # Split into train/test if holdout > 0
70
+ if holdout > 0:
71
+ train_set, test_set = split_eval_set(eval_set, holdout)
72
+ if verbose:
73
+ print(f"Split: {len(train_set)} train, {len(test_set)} test (holdout={holdout})", file=sys.stderr)
74
+ else:
75
+ train_set = eval_set
76
+ test_set = []
77
+
78
+ client = anthropic.Anthropic()
79
+ history = []
80
+ exit_reason = "unknown"
81
+
82
+ for iteration in range(1, max_iterations + 1):
83
+ if verbose:
84
+ print(f"\n{'='*60}", file=sys.stderr)
85
+ print(f"Iteration {iteration}/{max_iterations}", file=sys.stderr)
86
+ print(f"Description: {current_description}", file=sys.stderr)
87
+ print(f"{'='*60}", file=sys.stderr)
88
+
89
+ # Evaluate train + test together in one batch for parallelism
90
+ all_queries = train_set + test_set
91
+ t0 = time.time()
92
+ all_results = run_eval(
93
+ eval_set=all_queries,
94
+ skill_name=name,
95
+ description=current_description,
96
+ num_workers=num_workers,
97
+ timeout=timeout,
98
+ project_root=project_root,
99
+ runs_per_query=runs_per_query,
100
+ trigger_threshold=trigger_threshold,
101
+ model=model,
102
+ )
103
+ eval_elapsed = time.time() - t0
104
+
105
+ # Split results back into train/test by matching queries
106
+ train_queries_set = {q["query"] for q in train_set}
107
+ train_result_list = [r for r in all_results["results"] if r["query"] in train_queries_set]
108
+ test_result_list = [r for r in all_results["results"] if r["query"] not in train_queries_set]
109
+
110
+ train_passed = sum(1 for r in train_result_list if r["pass"])
111
+ train_total = len(train_result_list)
112
+ train_summary = {"passed": train_passed, "failed": train_total - train_passed, "total": train_total}
113
+ train_results = {"results": train_result_list, "summary": train_summary}
114
+
115
+ if test_set:
116
+ test_passed = sum(1 for r in test_result_list if r["pass"])
117
+ test_total = len(test_result_list)
118
+ test_summary = {"passed": test_passed, "failed": test_total - test_passed, "total": test_total}
119
+ test_results = {"results": test_result_list, "summary": test_summary}
120
+ else:
121
+ test_results = None
122
+ test_summary = None
123
+
124
+ history.append({
125
+ "iteration": iteration,
126
+ "description": current_description,
127
+ "train_passed": train_summary["passed"],
128
+ "train_failed": train_summary["failed"],
129
+ "train_total": train_summary["total"],
130
+ "train_results": train_results["results"],
131
+ "test_passed": test_summary["passed"] if test_summary else None,
132
+ "test_failed": test_summary["failed"] if test_summary else None,
133
+ "test_total": test_summary["total"] if test_summary else None,
134
+ "test_results": test_results["results"] if test_results else None,
135
+ # For backward compat with report generator
136
+ "passed": train_summary["passed"],
137
+ "failed": train_summary["failed"],
138
+ "total": train_summary["total"],
139
+ "results": train_results["results"],
140
+ })
141
+
142
+ # Write live report if path provided
143
+ if live_report_path:
144
+ partial_output = {
145
+ "original_description": original_description,
146
+ "best_description": current_description,
147
+ "best_score": "in progress",
148
+ "iterations_run": len(history),
149
+ "holdout": holdout,
150
+ "train_size": len(train_set),
151
+ "test_size": len(test_set),
152
+ "history": history,
153
+ }
154
+ live_report_path.write_text(generate_html(partial_output, auto_refresh=True, skill_name=name))
155
+
156
+ if verbose:
157
+ def print_eval_stats(label, results, elapsed):
158
+ pos = [r for r in results if r["should_trigger"]]
159
+ neg = [r for r in results if not r["should_trigger"]]
160
+ tp = sum(r["triggers"] for r in pos)
161
+ pos_runs = sum(r["runs"] for r in pos)
162
+ fn = pos_runs - tp
163
+ fp = sum(r["triggers"] for r in neg)
164
+ neg_runs = sum(r["runs"] for r in neg)
165
+ tn = neg_runs - fp
166
+ total = tp + tn + fp + fn
167
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 1.0
168
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 1.0
169
+ accuracy = (tp + tn) / total if total > 0 else 0.0
170
+ print(f"{label}: {tp+tn}/{total} correct, precision={precision:.0%} recall={recall:.0%} accuracy={accuracy:.0%} ({elapsed:.1f}s)", file=sys.stderr)
171
+ for r in results:
172
+ status = "PASS" if r["pass"] else "FAIL"
173
+ rate_str = f"{r['triggers']}/{r['runs']}"
174
+ print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:60]}", file=sys.stderr)
175
+
176
+ print_eval_stats("Train", train_results["results"], eval_elapsed)
177
+ if test_summary:
178
+ print_eval_stats("Test ", test_results["results"], 0)
179
+
180
+ if train_summary["failed"] == 0:
181
+ exit_reason = f"all_passed (iteration {iteration})"
182
+ if verbose:
183
+ print(f"\nAll train queries passed on iteration {iteration}!", file=sys.stderr)
184
+ break
185
+
186
+ if iteration == max_iterations:
187
+ exit_reason = f"max_iterations ({max_iterations})"
188
+ if verbose:
189
+ print(f"\nMax iterations reached ({max_iterations}).", file=sys.stderr)
190
+ break
191
+
192
+ # Improve the description based on train results
193
+ if verbose:
194
+ print(f"\nImproving description...", file=sys.stderr)
195
+
196
+ t0 = time.time()
197
+ # Strip test scores from history so improvement model can't see them
198
+ blinded_history = [
199
+ {k: v for k, v in h.items() if not k.startswith("test_")}
200
+ for h in history
201
+ ]
202
+ new_description = improve_description(
203
+ client=client,
204
+ skill_name=name,
205
+ skill_content=content,
206
+ current_description=current_description,
207
+ eval_results=train_results,
208
+ history=blinded_history,
209
+ model=model,
210
+ log_dir=log_dir,
211
+ iteration=iteration,
212
+ )
213
+ improve_elapsed = time.time() - t0
214
+
215
+ if verbose:
216
+ print(f"Proposed ({improve_elapsed:.1f}s): {new_description}", file=sys.stderr)
217
+
218
+ current_description = new_description
219
+
220
+ # Find the best iteration by TEST score (or train if no test set)
221
+ if test_set:
222
+ best = max(history, key=lambda h: h["test_passed"] or 0)
223
+ best_score = f"{best['test_passed']}/{best['test_total']}"
224
+ else:
225
+ best = max(history, key=lambda h: h["train_passed"])
226
+ best_score = f"{best['train_passed']}/{best['train_total']}"
227
+
228
+ if verbose:
229
+ print(f"\nExit reason: {exit_reason}", file=sys.stderr)
230
+ print(f"Best score: {best_score} (iteration {best['iteration']})", file=sys.stderr)
231
+
232
+ return {
233
+ "exit_reason": exit_reason,
234
+ "original_description": original_description,
235
+ "best_description": best["description"],
236
+ "best_score": best_score,
237
+ "best_train_score": f"{best['train_passed']}/{best['train_total']}",
238
+ "best_test_score": f"{best['test_passed']}/{best['test_total']}" if test_set else None,
239
+ "final_description": current_description,
240
+ "iterations_run": len(history),
241
+ "holdout": holdout,
242
+ "train_size": len(train_set),
243
+ "test_size": len(test_set),
244
+ "history": history,
245
+ }
246
+
247
+
248
+ def main():
249
+ parser = argparse.ArgumentParser(description="Run eval + improve loop")
250
+ parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
251
+ parser.add_argument("--skill-path", required=True, help="Path to skill directory")
252
+ parser.add_argument("--description", default=None, help="Override starting description")
253
+ parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers")
254
+ parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
255
+ parser.add_argument("--max-iterations", type=int, default=5, help="Max improvement iterations")
256
+ parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
257
+ parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
258
+ parser.add_argument("--holdout", type=float, default=0.4, help="Fraction of eval set to hold out for testing (0 to disable)")
259
+ parser.add_argument("--model", required=True, help="Model for improvement")
260
+ parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
261
+ parser.add_argument("--report", default="auto", help="Generate HTML report at this path (default: 'auto' for temp file, 'none' to disable)")
262
+ parser.add_argument("--results-dir", default=None, help="Save all outputs (results.json, report.html, log.txt) to a timestamped subdirectory here")
263
+ args = parser.parse_args()
264
+
265
+ eval_set = json.loads(Path(args.eval_set).read_text())
266
+ skill_path = Path(args.skill_path)
267
+
268
+ if not (skill_path / "SKILL.md").exists():
269
+ print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
270
+ sys.exit(1)
271
+
272
+ name, _, _ = parse_skill_md(skill_path)
273
+
274
+ # Set up live report path
275
+ if args.report != "none":
276
+ if args.report == "auto":
277
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
278
+ live_report_path = Path(tempfile.gettempdir()) / f"skill_description_report_{skill_path.name}_{timestamp}.html"
279
+ else:
280
+ live_report_path = Path(args.report)
281
+ # Open the report immediately so the user can watch
282
+ live_report_path.write_text("<html><body><h1>Starting optimization loop...</h1><meta http-equiv='refresh' content='5'></body></html>")
283
+ webbrowser.open(str(live_report_path))
284
+ else:
285
+ live_report_path = None
286
+
287
+ # Determine output directory (create before run_loop so logs can be written)
288
+ if args.results_dir:
289
+ timestamp = time.strftime("%Y-%m-%d_%H%M%S")
290
+ results_dir = Path(args.results_dir) / timestamp
291
+ results_dir.mkdir(parents=True, exist_ok=True)
292
+ else:
293
+ results_dir = None
294
+
295
+ log_dir = results_dir / "logs" if results_dir else None
296
+
297
+ output = run_loop(
298
+ eval_set=eval_set,
299
+ skill_path=skill_path,
300
+ description_override=args.description,
301
+ num_workers=args.num_workers,
302
+ timeout=args.timeout,
303
+ max_iterations=args.max_iterations,
304
+ runs_per_query=args.runs_per_query,
305
+ trigger_threshold=args.trigger_threshold,
306
+ holdout=args.holdout,
307
+ model=args.model,
308
+ verbose=args.verbose,
309
+ live_report_path=live_report_path,
310
+ log_dir=log_dir,
311
+ )
312
+
313
+ # Save JSON output
314
+ json_output = json.dumps(output, indent=2)
315
+ print(json_output)
316
+ if results_dir:
317
+ (results_dir / "results.json").write_text(json_output)
318
+
319
+ # Write final HTML report (without auto-refresh)
320
+ if live_report_path:
321
+ live_report_path.write_text(generate_html(output, auto_refresh=False, skill_name=name))
322
+ print(f"\nReport: {live_report_path}", file=sys.stderr)
323
+
324
+ if results_dir and live_report_path:
325
+ (results_dir / "report.html").write_text(generate_html(output, auto_refresh=False, skill_name=name))
326
+
327
+ if results_dir:
328
+ print(f"Results saved to: {results_dir}", file=sys.stderr)
329
+
330
+
331
+ if __name__ == "__main__":
332
+ main()
@@ -0,0 +1,47 @@
1
+ """Shared utilities for skill-creator scripts."""
2
+
3
+ from pathlib import Path
4
+
5
+
6
+
7
+ def parse_skill_md(skill_path: Path) -> tuple[str, str, str]:
8
+ """Parse a SKILL.md file, returning (name, description, full_content)."""
9
+ content = (skill_path / "SKILL.md").read_text()
10
+ lines = content.split("\n")
11
+
12
+ if lines[0].strip() != "---":
13
+ raise ValueError("SKILL.md missing frontmatter (no opening ---)")
14
+
15
+ end_idx = None
16
+ for i, line in enumerate(lines[1:], start=1):
17
+ if line.strip() == "---":
18
+ end_idx = i
19
+ break
20
+
21
+ if end_idx is None:
22
+ raise ValueError("SKILL.md missing frontmatter (no closing ---)")
23
+
24
+ name = ""
25
+ description = ""
26
+ frontmatter_lines = lines[1:end_idx]
27
+ i = 0
28
+ while i < len(frontmatter_lines):
29
+ line = frontmatter_lines[i]
30
+ if line.startswith("name:"):
31
+ name = line[len("name:"):].strip().strip('"').strip("'")
32
+ elif line.startswith("description:"):
33
+ value = line[len("description:"):].strip()
34
+ # Handle YAML multiline indicators (>, |, >-, |-)
35
+ if value in (">", "|", ">-", "|-"):
36
+ continuation_lines: list[str] = []
37
+ i += 1
38
+ while i < len(frontmatter_lines) and (frontmatter_lines[i].startswith(" ") or frontmatter_lines[i].startswith("\t")):
39
+ continuation_lines.append(frontmatter_lines[i].strip())
40
+ i += 1
41
+ description = " ".join(continuation_lines)
42
+ continue
43
+ else:
44
+ description = value.strip('"').strip("'")
45
+ i += 1
46
+
47
+ return name, description, content
@@ -0,0 +1,185 @@
1
+ examples:
2
+ - id: create-spreadsheet
3
+ title:
4
+ zh: 创建电子表格
5
+ en: Create Spreadsheet
6
+ description:
7
+ zh: 创建包含数据分析和图表的新 Excel 文件
8
+ en: Create a new Excel file containing data analysis and charts
9
+ prompt:
10
+ zh: |-
11
+ 请帮我创建一个专业的 Excel 电子表格,用于分析销售数据:
12
+
13
+ 数据内容:
14
+ - 月份:1-12月的销售数据
15
+ - 产品类别:A、B、C三类产品
16
+ - 销售额:每月各类产品的销售额
17
+ - 成本:对应的成本数据
18
+ - 利润:自动计算(销售额-成本)
19
+
20
+ 要求:
21
+ 1. 创建多个工作表:原始数据、汇总统计、图表分析
22
+ 2. 使用公式自动计算总计、平均值、增长率
23
+ 3. 创建柱状图、折线图展示趋势
24
+ 4. 应用条件格式突出显示关键数据
25
+ 5. 添加数据验证确保输入正确
26
+
27
+ 请生成完整的 Excel 文件。
28
+ en: |-
29
+ Please help me create a professional Excel spreadsheet for sales data analysis:
30
+
31
+ Data content:
32
+ - Months: Sales data for 1-12 months
33
+ - Product categories: Three product types A, B, C
34
+ - Sales revenue: Monthly sales revenue for each product type
35
+ - Costs: Corresponding cost data
36
+ - Profit: Automatically calculated (revenue - cost)
37
+
38
+ Requirements:
39
+ 1. Create multiple worksheets: raw data, summary statistics, chart analysis
40
+ 2. Use formulas to automatically calculate totals, averages, growth rates
41
+ 3. Create bar charts and line charts to show trends
42
+ 4. Apply conditional formatting to highlight key data
43
+ 5. Add data validation to ensure correct input
44
+
45
+ Please generate the complete Excel file.
46
+ - id: data-analysis
47
+ title:
48
+ zh: 数据分析
49
+ en: Data Analysis
50
+ description:
51
+ zh: 对现有 Excel 文件进行深入数据分析和处理
52
+ en: Perform in-depth data analysis and processing on existing Excel files
53
+ prompt:
54
+ zh: |-
55
+ 请帮我分析这个 Excel 文件中的数据:
56
+
57
+ 文件路径:{{Excel文件路径}}
58
+
59
+ 分析要求:
60
+ 1. 数据清洗:去除重复项、处理空值、统一格式
61
+ 2. 统计分析:
62
+ - 计算各列的基本统计量(均值、中位数、标准差等)
63
+ - 识别异常值和离群点
64
+ - 分析数据分布特征
65
+ 3. 趋势分析:识别数据变化趋势和周期性
66
+ 4. 相关性分析:找出变量间的相关关系
67
+ 5. 生成分析报告:包含关键发现和可视化图表
68
+
69
+ 输出:修改后的 Excel 文件 + 分析报告(Markdown格式)
70
+ en: |-
71
+ Please help me analyze the data in this Excel file:
72
+
73
+ File path: {{Excel file path}}
74
+
75
+ Analysis requirements:
76
+ 1. Data cleaning: Remove duplicates, handle null values, standardize formats
77
+ 2. Statistical analysis:
78
+ - Calculate basic statistics for each column (mean, median, standard deviation, etc.)
79
+ - Identify outliers and anomalies
80
+ - Analyze data distribution characteristics
81
+ 3. Trend analysis: Identify data change trends and periodicity
82
+ 4. Correlation analysis: Find correlations between variables
83
+ 5. Generate analysis report: Include key findings and visual charts
84
+
85
+ Output: Modified Excel file + Analysis report (Markdown format)
86
+ - id: pivot-table
87
+ title:
88
+ zh: 数据透视表
89
+ en: Pivot Table
90
+ description:
91
+ zh: 创建和优化数据透视表进行多维度数据分析
92
+ en: Create and optimize pivot tables for multi-dimensional data analysis
93
+ prompt:
94
+ zh: |-
95
+ 我需要为这个销售数据创建数据透视表:
96
+
97
+ 数据源:{{销售数据Excel文件}}
98
+ 分析维度:
99
+ - 按地区分析销售额
100
+ - 按产品类别分析利润
101
+ - 按时间段分析趋势
102
+ - 按销售人员分析业绩
103
+
104
+ 具体要求:
105
+ 1. 创建多个数据透视表,每个关注不同的分析角度
106
+ 2. 添加计算字段(如利润率、同比增长率)
107
+ 3. 创建切片器便于交互式筛选
108
+ 4. 设计仪表板视图整合关键指标
109
+ 5. 应用合适的数字格式和样式
110
+
111
+ 请构建完整的分析工作簿。
112
+ en: |-
113
+ I need to create pivot tables for this sales data:
114
+
115
+ Data source: {{Sales data Excel file}}
116
+ Analysis dimensions:
117
+ - Analyze sales by region
118
+ - Analyze profit by product category
119
+ - Analyze trends by time period
120
+ - Analyze performance by salesperson
121
+
122
+ Specific requirements:
123
+ 1. Create multiple pivot tables, each focusing on different analysis angles
124
+ 2. Add calculated fields (such as profit margin, year-over-year growth rate)
125
+ 3. Create slicers for interactive filtering
126
+ 4. Design dashboard view integrating key metrics
127
+ 5. Apply appropriate number formatting and styling
128
+
129
+ Please build a complete analysis workbook.
130
+ - id: formula-automation
131
+ title:
132
+ zh: 公式自动化
133
+ en: Formula Automation
134
+ description:
135
+ zh: 批量应用复杂公式和函数自动化处理数据
136
+ en: Batch apply complex formulas and functions to automate data processing
137
+ prompt:
138
+ zh: |-
139
+ 请帮我在这个财务报表中设置自动化计算:
140
+
141
+ 工作簿:{{财务数据.xlsx}}
142
+
143
+ 需要实现的自动化功能:
144
+ 1. 财务比率计算:
145
+ - 流动比率 = 流动资产 / 流动负债
146
+ - 资产负债率 = 总负债 / 总资产
147
+ - 净利润率 = 净利润 / 营业收入
148
+ 2. 时间序列分析:
149
+ - 月度环比增长率
150
+ - 年度同比增长率
151
+ - 移动平均值计算
152
+ 3. 条件逻辑处理:
153
+ - 根据数值范围自动分级
154
+ - 异常值标记和预警
155
+ - 动态评分系统
156
+ 4. 数据查找和引用:
157
+ - VLOOKUP/HLOOKUP 查找关联数据
158
+ - INDEX+MATCH 组合查询
159
+ - INDIRECT 动态引用
160
+
161
+ 要求:所有公式应具有良好的可读性和可维护性。
162
+ en: |-
163
+ Please help me set up automated calculations in this financial statement:
164
+
165
+ Workbook: {{Financial data.xlsx}}
166
+
167
+ Automated functions to implement:
168
+ 1. Financial ratio calculations:
169
+ - Current ratio = Current assets / Current liabilities
170
+ - Debt-to-assets ratio = Total liabilities / Total assets
171
+ - Net profit margin = Net profit / Operating revenue
172
+ 2. Time series analysis:
173
+ - Monthly sequential growth rate
174
+ - Annual year-over-year growth rate
175
+ - Moving average calculation
176
+ 3. Conditional logic processing:
177
+ - Automatic grading based on numerical ranges
178
+ - Outlier marking and alerts
179
+ - Dynamic scoring system
180
+ 4. Data lookup and reference:
181
+ - VLOOKUP/HLOOKUP for associated data lookup
182
+ - INDEX+MATCH combination query
183
+ - INDIRECT dynamic reference
184
+
185
+ Requirement: All formulas should have good readability and maintainability.
@@ -0,0 +1,30 @@
1
+ © 2025 Anthropic, PBC. All rights reserved.
2
+
3
+ LICENSE: Use of these materials (including all code, prompts, assets, files,
4
+ and other components of this Skill) is governed by your agreement with
5
+ Anthropic regarding use of Anthropic's services. If no separate agreement
6
+ exists, use is governed by Anthropic's Consumer Terms of Service or
7
+ Commercial Terms of Service, as applicable:
8
+ https://www.anthropic.com/legal/consumer-terms
9
+ https://www.anthropic.com/legal/commercial-terms
10
+ Your applicable agreement is referred to as the "Agreement." "Services" are
11
+ as defined in the Agreement.
12
+
13
+ ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the
14
+ contrary, users may not:
15
+
16
+ - Extract these materials from the Services or retain copies of these
17
+ materials outside the Services
18
+ - Reproduce or copy these materials, except for temporary copies created
19
+ automatically during authorized use of the Services
20
+ - Create derivative works based on these materials
21
+ - Distribute, sublicense, or transfer these materials to any third party
22
+ - Make, offer to sell, sell, or import any inventions embodied in these
23
+ materials
24
+ - Reverse engineer, decompile, or disassemble these materials
25
+
26
+ The receipt, viewing, or possession of these materials does not convey or
27
+ imply any license or right beyond those expressly granted above.
28
+
29
+ Anthropic retains all right, title, and interest in these materials,
30
+ including all copyrights, patents, and other intellectual property rights.