npm - @intentsolutionsio/skill-creator - Versions diffs - 5.0.0 → 5.0.3 - Mend

@intentsolutionsio/skill-creator 5.0.0 → 5.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/skills/skill-creator/scripts/improve_description.py CHANGED Viewed

@@ -41,9 +41,7 @@ def _call_claude(prompt: str, model: str | None, timeout: int = 300) -> str:
         timeout=timeout,
     )
     if result.returncode != 0:
-        raise RuntimeError(
-            f"claude -p exited {result.returncode}\nstderr: {result.stderr}"
-        )
+        raise RuntimeError(f"claude -p exited {result.returncode}\nstderr: {result.stderr}")
     return result.stdout
@@ -59,14 +57,8 @@ def improve_description(
     iteration: int | None = None,
 ) -> str:
     """Call Claude to improve the description based on eval results."""
-    failed_triggers = [
-        r for r in eval_results["results"]
-        if r["should_trigger"] and not r["pass"]
-    ]
-    false_triggers = [
-        r for r in eval_results["results"]
-        if not r["should_trigger"] and not r["pass"]
-    ]
+    failed_triggers = [r for r in eval_results["results"] if r["should_trigger"] and not r["pass"]]
+    false_triggers = [r for r in eval_results["results"] if not r["should_trigger"] and not r["pass"]]
     # Build scores summary
     train_score = f"{eval_results['summary']['passed']}/{eval_results['summary']['total']}"
@@ -104,9 +96,11 @@ Current scores ({scores_summary}):
         prompt += "PREVIOUS ATTEMPTS (do NOT repeat these — try something structurally different):\n\n"
         for h in history:
             train_s = f"{h.get('train_passed', h.get('passed', 0))}/{h.get('train_total', h.get('total', 0))}"
-            test_s = f"{h.get('test_passed', '?')}/{h.get('test_total', '?')}" if h.get('test_passed') is not None else None
+            test_s = (
+                f"{h.get('test_passed', '?')}/{h.get('test_total', '?')}" if h.get("test_passed") is not None else None
+            )
             score_str = f"train={train_s}" + (f", test={test_s}" if test_s else "")
-            prompt += f'<attempt {score_str}>\n'
+            prompt += f"<attempt {score_str}>\n"
             prompt += f'Description: "{h["description"]}"\n'
             if "results" in h:
                 prompt += "Train results:\n"
@@ -114,7 +108,7 @@ Current scores ({scores_summary}):
                     status = "PASS" if r["pass"] else "FAIL"
                     prompt += f'  [{status}] "{r["query"][:80]}" (triggered {r["triggers"]}/{r["runs"]})\n'
             if h.get("note"):
-                prompt += f'Note: {h["note"]}\n'
+                prompt += f"Note: {h['note']}\n"
             prompt += "</attempt>\n\n"
     prompt += f"""</scores_summary>
@@ -232,13 +226,16 @@ def main():
     # Output as JSON with both the new description and updated history
     output = {
         "description": new_description,
-        "history": history + [{
-            "description": current_description,
-            "passed": eval_results["summary"]["passed"],
-            "failed": eval_results["summary"]["failed"],
-            "total": eval_results["summary"]["total"],
-            "results": eval_results["results"],
-        }],
+        "history": history
+        + [
+            {
+                "description": current_description,
+                "passed": eval_results["summary"]["passed"],
+                "failed": eval_results["summary"]["failed"],
+                "total": eval_results["summary"]["total"],
+                "results": eval_results["results"],
+            }
+        ],
     }
     print(json.dumps(output, indent=2))

package/skills/skill-creator/scripts/package_skill.py CHANGED Viewed

@@ -88,9 +88,9 @@ def package_skill(skill_path, output_dir=None):
     # Create the .skill file (zip format)
     try:
-        with zipfile.ZipFile(skill_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
+        with zipfile.ZipFile(skill_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
             # Walk through the skill directory, excluding build artifacts
-            for file_path in skill_path.rglob('*'):
+            for file_path in skill_path.rglob("*"):
                 if not file_path.is_file():
                     continue
                 arcname = file_path.relative_to(skill_path.parent)

package/skills/skill-creator/scripts/quick_validate.py CHANGED Viewed

@@ -4,27 +4,27 @@ Quick validation script for skills - minimal version
 """
 import sys
-import os
 import re
 import yaml
 from pathlib import Path
 def validate_skill(skill_path):
     """Basic validation of a skill"""
     skill_path = Path(skill_path)
     # Check SKILL.md exists
-    skill_md = skill_path / 'SKILL.md'
+    skill_md = skill_path / "SKILL.md"
     if not skill_md.exists():
         return False, "SKILL.md not found"
     # Read and validate frontmatter
     content = skill_md.read_text()
-    if not content.startswith('---'):
+    if not content.startswith("---"):
         return False, "No YAML frontmatter found"
     # Extract frontmatter
-    match = re.match(r'^---\n(.*?)\n---', content, re.DOTALL)
+    match = re.match(r"^---\n(.*?)\n---", content, re.DOTALL)
     if not match:
         return False, "Invalid frontmatter format"
@@ -39,7 +39,7 @@ def validate_skill(skill_path):
         return False, f"Invalid YAML in frontmatter: {e}"
     # Define allowed properties
-    ALLOWED_PROPERTIES = {'name', 'description', 'license', 'allowed-tools', 'metadata', 'compatibility'}
+    ALLOWED_PROPERTIES = {"name", "description", "license", "allowed-tools", "metadata", "compatibility"}
     # Check for unexpected properties (excluding nested keys under metadata)
     unexpected_keys = set(frontmatter.keys()) - ALLOWED_PROPERTIES
@@ -50,41 +50,41 @@ def validate_skill(skill_path):
         )
     # Check required fields
-    if 'name' not in frontmatter:
+    if "name" not in frontmatter:
         return False, "Missing 'name' in frontmatter"
-    if 'description' not in frontmatter:
+    if "description" not in frontmatter:
         return False, "Missing 'description' in frontmatter"
     # Extract name for validation
-    name = frontmatter.get('name', '')
+    name = frontmatter.get("name", "")
     if not isinstance(name, str):
         return False, f"Name must be a string, got {type(name).__name__}"
     name = name.strip()
     if name:
         # Check naming convention (kebab-case: lowercase with hyphens)
-        if not re.match(r'^[a-z0-9-]+$', name):
+        if not re.match(r"^[a-z0-9-]+$", name):
             return False, f"Name '{name}' should be kebab-case (lowercase letters, digits, and hyphens only)"
-        if name.startswith('-') or name.endswith('-') or '--' in name:
+        if name.startswith("-") or name.endswith("-") or "--" in name:
             return False, f"Name '{name}' cannot start/end with hyphen or contain consecutive hyphens"
         # Check name length (max 64 characters per spec)
         if len(name) > 64:
             return False, f"Name is too long ({len(name)} characters). Maximum is 64 characters."
     # Extract and validate description
-    description = frontmatter.get('description', '')
+    description = frontmatter.get("description", "")
     if not isinstance(description, str):
         return False, f"Description must be a string, got {type(description).__name__}"
     description = description.strip()
     if description:
         # Check for angle brackets
-        if '<' in description or '>' in description:
+        if "<" in description or ">" in description:
             return False, "Description cannot contain angle brackets (< or >)"
         # Check description length (max 1024 characters per spec)
         if len(description) > 1024:
             return False, f"Description is too long ({len(description)} characters). Maximum is 1024 characters."
     # Validate compatibility field if present (optional)
-    compatibility = frontmatter.get('compatibility', '')
+    compatibility = frontmatter.get("compatibility", "")
     if compatibility:
         if not isinstance(compatibility, str):
             return False, f"Compatibility must be a string, got {type(compatibility).__name__}"
@@ -93,11 +93,12 @@ def validate_skill(skill_path):
     return True, "Skill is valid!"
 if __name__ == "__main__":
     if len(sys.argv) != 2:
         print("Usage: python quick_validate.py <skill_directory>")
         sys.exit(1)
     valid, message = validate_skill(sys.argv[1])
     print(message)
-    sys.exit(0 if valid else 1)
+    sys.exit(0 if valid else 1)

package/skills/skill-creator/scripts/run_eval.py CHANGED Viewed

@@ -101,8 +101,10 @@ def run_single_query(
         cmd = [
             "claude",
-            "-p", query,
-            "--output-format", "stream-json",
+            "-p",
+            query,
+            "--output-format",
+            "stream-json",
             "--verbose",
             "--include-partial-messages",
         ]
@@ -265,14 +267,16 @@ def run_eval(
             did_pass = trigger_rate >= trigger_threshold
         else:
             did_pass = trigger_rate < trigger_threshold
-        results.append({
-            "query": query,
-            "should_trigger": should_trigger,
-            "trigger_rate": trigger_rate,
-            "triggers": sum(triggers),
-            "runs": len(triggers),
-            "pass": did_pass,
-        })
+        results.append(
+            {
+                "query": query,
+                "should_trigger": should_trigger,
+                "trigger_rate": trigger_rate,
+                "triggers": sum(triggers),
+                "runs": len(triggers),
+                "pass": did_pass,
+            }
+        )
     passed = sum(1 for r in results if r["pass"])
     total = len(results)

package/skills/skill-creator/scripts/run_loop.py CHANGED Viewed

@@ -78,10 +78,10 @@ def run_loop(
     for iteration in range(1, max_iterations + 1):
         if verbose:
-            print(f"\n{'='*60}", file=sys.stderr)
+            print(f"\n{'=' * 60}", file=sys.stderr)
             print(f"Iteration {iteration}/{max_iterations}", file=sys.stderr)
             print(f"Description: {current_description}", file=sys.stderr)
-            print(f"{'='*60}", file=sys.stderr)
+            print(f"{'=' * 60}", file=sys.stderr)
         # Evaluate train + test together in one batch for parallelism
         all_queries = train_set + test_set
@@ -119,23 +119,25 @@ def run_loop(
             test_results = None
             test_summary = None
-        history.append({
-            "iteration": iteration,
-            "description": current_description,
-            "train_passed": train_summary["passed"],
-            "train_failed": train_summary["failed"],
-            "train_total": train_summary["total"],
-            "train_results": train_results["results"],
-            "test_passed": test_summary["passed"] if test_summary else None,
-            "test_failed": test_summary["failed"] if test_summary else None,
-            "test_total": test_summary["total"] if test_summary else None,
-            "test_results": test_results["results"] if test_results else None,
-            # For backward compat with report generator
-            "passed": train_summary["passed"],
-            "failed": train_summary["failed"],
-            "total": train_summary["total"],
-            "results": train_results["results"],
-        })
+        history.append(
+            {
+                "iteration": iteration,
+                "description": current_description,
+                "train_passed": train_summary["passed"],
+                "train_failed": train_summary["failed"],
+                "train_total": train_summary["total"],
+                "train_results": train_results["results"],
+                "test_passed": test_summary["passed"] if test_summary else None,
+                "test_failed": test_summary["failed"] if test_summary else None,
+                "test_total": test_summary["total"] if test_summary else None,
+                "test_results": test_results["results"] if test_results else None,
+                # For backward compat with report generator
+                "passed": train_summary["passed"],
+                "failed": train_summary["failed"],
+                "total": train_summary["total"],
+                "results": train_results["results"],
+            }
+        )
         # Write live report if path provided
         if live_report_path:
@@ -152,6 +154,7 @@ def run_loop(
             live_report_path.write_text(generate_html(partial_output, auto_refresh=True, skill_name=name))
         if verbose:
             def print_eval_stats(label, results, elapsed):
                 pos = [r for r in results if r["should_trigger"]]
                 neg = [r for r in results if not r["should_trigger"]]
@@ -165,11 +168,17 @@ def run_loop(
                 precision = tp / (tp + fp) if (tp + fp) > 0 else 1.0
                 recall = tp / (tp + fn) if (tp + fn) > 0 else 1.0
                 accuracy = (tp + tn) / total if total > 0 else 0.0
-                print(f"{label}: {tp+tn}/{total} correct, precision={precision:.0%} recall={recall:.0%} accuracy={accuracy:.0%} ({elapsed:.1f}s)", file=sys.stderr)
+                print(
+                    f"{label}: {tp + tn}/{total} correct, precision={precision:.0%} recall={recall:.0%} accuracy={accuracy:.0%} ({elapsed:.1f}s)",
+                    file=sys.stderr,
+                )
                 for r in results:
                     status = "PASS" if r["pass"] else "FAIL"
                     rate_str = f"{r['triggers']}/{r['runs']}"
-                    print(f"  [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:60]}", file=sys.stderr)
+                    print(
+                        f"  [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:60]}",
+                        file=sys.stderr,
+                    )
             print_eval_stats("Train", train_results["results"], eval_elapsed)
             if test_summary:
@@ -189,14 +198,11 @@ def run_loop(
         # Improve the description based on train results
         if verbose:
-            print(f"\nImproving description...", file=sys.stderr)
+            print("\nImproving description...", file=sys.stderr)
         t0 = time.time()
         # Strip test scores from history so improvement model can't see them
-        blinded_history = [
-            {k: v for k, v in h.items() if not k.startswith("test_")}
-            for h in history
-        ]
+        blinded_history = [{k: v for k, v in h.items() if not k.startswith("test_")} for h in history]
         new_description = improve_description(
             skill_name=name,
             skill_content=content,
@@ -252,11 +258,21 @@ def main():
     parser.add_argument("--max-iterations", type=int, default=5, help="Max improvement iterations")
     parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
     parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
-    parser.add_argument("--holdout", type=float, default=0.4, help="Fraction of eval set to hold out for testing (0 to disable)")
+    parser.add_argument(
+        "--holdout", type=float, default=0.4, help="Fraction of eval set to hold out for testing (0 to disable)"
+    )
     parser.add_argument("--model", required=True, help="Model for improvement")
     parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
-    parser.add_argument("--report", default="auto", help="Generate HTML report at this path (default: 'auto' for temp file, 'none' to disable)")
-    parser.add_argument("--results-dir", default=None, help="Save all outputs (results.json, report.html, log.txt) to a timestamped subdirectory here")
+    parser.add_argument(
+        "--report",
+        default="auto",
+        help="Generate HTML report at this path (default: 'auto' for temp file, 'none' to disable)",
+    )
+    parser.add_argument(
+        "--results-dir",
+        default=None,
+        help="Save all outputs (results.json, report.html, log.txt) to a timestamped subdirectory here",
+    )
     args = parser.parse_args()
     eval_set = json.loads(Path(args.eval_set).read_text())
@@ -272,11 +288,15 @@ def main():
     if args.report != "none":
         if args.report == "auto":
             timestamp = time.strftime("%Y%m%d_%H%M%S")
-            live_report_path = Path(tempfile.gettempdir()) / f"skill_description_report_{skill_path.name}_{timestamp}.html"
+            live_report_path = (
+                Path(tempfile.gettempdir()) / f"skill_description_report_{skill_path.name}_{timestamp}.html"
+            )
         else:
             live_report_path = Path(args.report)
         # Open the report immediately so the user can watch
-        live_report_path.write_text("<html><body><h1>Starting optimization loop...</h1><meta http-equiv='refresh' content='5'></body></html>")
+        live_report_path.write_text(
+            "<html><body><h1>Starting optimization loop...</h1><meta http-equiv='refresh' content='5'></body></html>"
+        )
         webbrowser.open(str(live_report_path))
     else:
         live_report_path = None

package/skills/skill-creator/scripts/utils.py CHANGED Viewed

@@ -3,7 +3,6 @@
 from pathlib import Path
 def parse_skill_md(skill_path: Path) -> tuple[str, str, str]:
     """Parse a SKILL.md file, returning (name, description, full_content)."""
     content = (skill_path / "SKILL.md").read_text()
@@ -28,14 +27,16 @@ def parse_skill_md(skill_path: Path) -> tuple[str, str, str]:
     while i < len(frontmatter_lines):
         line = frontmatter_lines[i]
         if line.startswith("name:"):
-            name = line[len("name:"):].strip().strip('"').strip("'")
+            name = line[len("name:") :].strip().strip('"').strip("'")
         elif line.startswith("description:"):
-            value = line[len("description:"):].strip()
+            value = line[len("description:") :].strip()
             # Handle YAML multiline indicators (>, |, >-, |-)
             if value in (">", "|", ">-", "|-"):
                 continuation_lines: list[str] = []
                 i += 1
-                while i < len(frontmatter_lines) and (frontmatter_lines[i].startswith("  ") or frontmatter_lines[i].startswith("\t")):
+                while i < len(frontmatter_lines) and (
+                    frontmatter_lines[i].startswith("  ") or frontmatter_lines[i].startswith("\t")
+                ):
                     continuation_lines.append(frontmatter_lines[i].strip())
                     i += 1
                 description = " ".join(continuation_lines)

package/skills/skill-creator/templates/agent-template.md CHANGED Viewed

@@ -76,6 +76,7 @@ You receive these parameters in your prompt:
 ## When Activated
 You activate when:
 - {{ACTIVATION_CONDITION_1}}
 - {{ACTIVATION_CONDITION_2}}
 - {{ACTIVATION_CONDITION_3}}
@@ -89,11 +90,13 @@ You activate when:
 ## Success Criteria
 Good output includes:
 - {{QUALITY_MARKER_1}}
 - {{QUALITY_MARKER_2}}
 - {{QUALITY_MARKER_3}}
 Poor output is:
 - {{ANTI_PATTERN_1}}
 - {{ANTI_PATTERN_2}}
 - {{ANTI_PATTERN_3}}

package/skills/skill-creator/templates/skill-template.md CHANGED Viewed

@@ -68,11 +68,13 @@ model: inherit
 ### {{EXAMPLE_1_TITLE}}
 **Input:**
 ```
 {{EXAMPLE_1_INPUT}}
 ```
 **Output:**
 ```
 {{EXAMPLE_1_OUTPUT}}
 ```
@@ -80,11 +82,13 @@ model: inherit
 ### {{EXAMPLE_2_TITLE}}
 **Input:**
 ```
 {{EXAMPLE_2_INPUT}}
 ```
 **Output:**
 ```
 {{EXAMPLE_2_OUTPUT}}
 ```