npm - @einja/dev-cli - Versions diffs - 0.1.40 → 0.1.41 - Mend

@einja/dev-cli 0.1.40 → 0.1.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (183) hide show

package/presets/default/.claude/skills/einja-skill-creator/scripts/generate_report.py ADDED Viewed

@@ -0,0 +1,265 @@
+#!/usr/bin/env python3
+"""run_loop.pyの出力からHTMLレポートを生成。
+run_loop.pyのJSON出力を受け取り、各descriptionの試行結果を
+チェック/xで表示するHTMLレポートを生成する。
+トレーニングとテストのクエリを区別して表示。
+"""
+import argparse
+import html
+import json
+import sys
+from pathlib import Path
+def generate_html(data: dict, auto_refresh: bool = False, skill_name: str = "") -> str:
+    """ループ出力データからHTMLレポートを生成。auto_refreshがTrueの場合、メタリフレッシュタグを追加。"""
+    history = data.get("history", [])
+    holdout = data.get("holdout", 0)
+    title_prefix = html.escape(skill_name + " — ") if skill_name else ""
+    # トレーニングとテストの全ユニーククエリを取得（should_trigger情報付き）
+    train_queries: list[dict] = []
+    test_queries: list[dict] = []
+    if history:
+        for r in history[0].get("train_results", history[0].get("results", [])):
+            train_queries.append({"query": r["query"], "should_trigger": r.get("should_trigger", True)})
+        if history[0].get("test_results"):
+            for r in history[0].get("test_results", []):
+                test_queries.append({"query": r["query"], "should_trigger": r.get("should_trigger", True)})
+    refresh_tag = '    <meta http-equiv="refresh" content="5">\n' if auto_refresh else ""
+    html_parts = []
+    html_parts.append(f"""<!DOCTYPE html>
+<html lang="ja">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+{refresh_tag}    <title>{title_prefix}スキルDescription最適化</title>
+    <style>
+        * {{ margin: 0; padding: 0; box-sizing: border-box; }}
+        body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #0a0a0a; color: #e0e0e0; padding: 20px; }}
+        h1 {{ font-size: 1.4em; margin-bottom: 4px; color: #fff; }}
+        .explainer {{ color: #888; font-size: 0.85em; margin-bottom: 16px; line-height: 1.4; }}
+        .summary {{ display: flex; gap: 24px; margin-bottom: 16px; flex-wrap: wrap; }}
+        .summary-card {{ background: #1a1a1a; border: 1px solid #333; border-radius: 8px; padding: 12px 16px; min-width: 120px; }}
+        .summary-card .label {{ font-size: 0.75em; color: #888; text-transform: uppercase; letter-spacing: 0.05em; }}
+        .summary-card .value {{ font-size: 1.5em; font-weight: 600; margin-top: 2px; }}
+        .legend {{ font-size: 0.8em; color: #888; margin-bottom: 12px; }}
+        .legend span {{ margin-right: 16px; }}
+        .table-container {{ overflow-x: auto; }}
+        table {{ border-collapse: collapse; font-size: 0.8em; width: 100%; }}
+        th, td {{ border: 1px solid #333; padding: 6px 8px; text-align: center; }}
+        th {{ background: #1a1a1a; color: #ccc; font-weight: 600; position: sticky; top: 0; z-index: 2; }}
+        th.query-header {{ writing-mode: vertical-rl; text-orientation: mixed; max-width: 30px; height: 180px; font-weight: 400; font-size: 0.85em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }}
+        th.query-header.negative {{ color: #ff6b6b; }}
+        th.section-header {{ background: #222; color: #aaa; font-size: 0.7em; text-transform: uppercase; letter-spacing: 0.1em; }}
+        td.desc {{ text-align: left; max-width: 300px; font-size: 0.85em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }}
+        td.desc:hover {{ white-space: normal; overflow: visible; position: relative; z-index: 10; background: #1a1a1a; }}
+        td.pass {{ background: rgba(34, 197, 94, 0.15); color: #22c55e; }}
+        td.fail {{ background: rgba(239, 68, 68, 0.15); color: #ef4444; }}
+        td.score {{ font-weight: 600; }}
+        td.score.perfect {{ color: #22c55e; }}
+        td.score.good {{ color: #86efac; }}
+        td.score.mid {{ color: #fbbf24; }}
+        td.score.bad {{ color: #ef4444; }}
+        tr.best-row {{ background: rgba(34, 197, 94, 0.05); }}
+        tr.best-row td {{ border-color: #22c55e44; }}
+        .best-badge {{ background: #22c55e; color: #000; font-size: 0.7em; padding: 1px 6px; border-radius: 3px; font-weight: 700; margin-left: 4px; }}
+    </style>
+</head>
+<body>
+    <h1>{title_prefix}スキルDescription最適化</h1>
+    <p class="explainer">
+        各行はdescriptionの1イテレーションです。各列はクエリで、セルはそのdescriptionで
+        スキルがトリガーされたかどうかを示します。赤い列ヘッダーはトリガーすべきでないクエリです。
+""")
+    if holdout:
+        html_parts.append(f'        トレーニング/テスト分割: テスト用に{holdout}クエリをホールドアウト。\n')
+    html_parts.append('    </p>\n')
+    # サマリーカード
+    if history:
+        # 最良のイテレーションを見つける（テスト > トレーニングで優先）
+        best_idx = 0
+        best_test = -1
+        best_train = -1
+        for i, h in enumerate(history):
+            t_passed = h.get("test_passed", -1)
+            tr_passed = h.get("train_passed", h.get("passed", 0))
+            if t_passed > best_test or (t_passed == best_test and tr_passed > best_train):
+                best_test = t_passed
+                best_train = tr_passed
+                best_idx = i
+        original = history[0] if history else {}
+        best = history[best_idx] if history else {}
+        orig_train = f"{original.get('train_passed', original.get('passed', 0))}/{original.get('train_total', original.get('total', 0))}"
+        best_train_str = f"{best.get('train_passed', best.get('passed', 0))}/{best.get('train_total', best.get('total', 0))}"
+        html_parts.append('    <div class="summary">\n')
+        html_parts.append(f'        <div class="summary-card"><div class="label">オリジナル (トレーニング)</div><div class="value">{orig_train}</div></div>\n')
+        if best.get("test_passed") is not None:
+            best_test_str = f"{best.get('test_passed', '?')}/{best.get('test_total', '?')}"
+            html_parts.append(f'        <div class="summary-card"><div class="label">最良スコア (テスト)</div><div class="value">{best_test_str}</div></div>\n')
+        html_parts.append(f'        <div class="summary-card"><div class="label">最良スコア (トレーニング)</div><div class="value">{best_train_str}</div></div>\n')
+        html_parts.append(f'        <div class="summary-card"><div class="label">イテレーション</div><div class="value">{len(history)}</div></div>\n')
+        html_parts.append('    </div>\n')
+    # レジェンド
+    html_parts.append('    <div class="legend">\n')
+    html_parts.append('        <span>クエリカラム: 通常=トリガーすべき、<span style="color:#ff6b6b">赤</span>=トリガーすべきでない</span>\n')
+    html_parts.append('    </div>\n')
+    # テーブル
+    html_parts.append('    <div class="table-container">\n    <table>\n')
+    # ヘッダー行
+    html_parts.append('        <tr>\n')
+    html_parts.append('            <th>回</th>\n')
+    html_parts.append('            <th>Description</th>\n')
+    if train_queries:
+        html_parts.append(f'            <th class="section-header" colspan="{len(train_queries)}">トレーニング</th>\n')
+    if test_queries:
+        html_parts.append(f'            <th class="section-header" colspan="{len(test_queries)}">テスト</th>\n')
+    html_parts.append('            <th>トレーニング</th>\n')
+    if test_queries:
+        html_parts.append('            <th>テスト</th>\n')
+    html_parts.append('        </tr>\n')
+    # クエリヘッダー行
+    html_parts.append('        <tr>\n')
+    html_parts.append('            <th></th>\n')
+    html_parts.append('            <th></th>\n')
+    for q in train_queries:
+        css_class = "query-header negative" if not q["should_trigger"] else "query-header"
+        html_parts.append(f'            <th class="{css_class}" title="{html.escape(q["query"])}">{html.escape(q["query"][:60])}</th>\n')
+    for q in test_queries:
+        css_class = "query-header negative" if not q["should_trigger"] else "query-header"
+        html_parts.append(f'            <th class="{css_class}" title="{html.escape(q["query"])}">{html.escape(q["query"][:60])}</th>\n')
+    html_parts.append('            <th></th>\n')
+    if test_queries:
+        html_parts.append('            <th></th>\n')
+    html_parts.append('        </tr>\n')
+    # 最良のイテレーションを見つける
+    best_idx = 0
+    if history:
+        best_test_score = -1
+        best_train_score = -1
+        for i, h in enumerate(history):
+            t_passed = h.get("test_passed", -1)
+            tr_passed = h.get("train_passed", h.get("passed", 0))
+            if t_passed > best_test_score or (t_passed == best_test_score and tr_passed > best_train_score):
+                best_test_score = t_passed
+                best_train_score = tr_passed
+                best_idx = i
+    # データ行
+    for i, h in enumerate(history):
+        row_class = ' class="best-row"' if i == best_idx else ""
+        html_parts.append(f'        <tr{row_class}>\n')
+        # イテレーション番号
+        badge = ' <span class="best-badge">BEST</span>' if i == best_idx else ""
+        html_parts.append(f'            <td>{i}{badge}</td>\n')
+        # Description
+        desc = html.escape(h.get("description", ""))
+        html_parts.append(f'            <td class="desc" title="{desc}">{desc}</td>\n')
+        # トレーニング結果
+        train_results = h.get("train_results", h.get("results", []))
+        result_map = {r["query"]: r for r in train_results}
+        for q in train_queries:
+            r = result_map.get(q["query"])
+            if r:
+                css = "pass" if r["pass"] else "fail"
+                symbol = "&#10003;" if r["pass"] else "&#10007;"
+                rate = f'{r["triggers"]}/{r["runs"]}'
+                html_parts.append(f'            <td class="{css}" title="rate={rate}">{symbol}</td>\n')
+            else:
+                html_parts.append('            <td>-</td>\n')
+        # テスト結果
+        test_results = h.get("test_results", [])
+        test_result_map = {r["query"]: r for r in test_results}
+        for q in test_queries:
+            r = test_result_map.get(q["query"])
+            if r:
+                css = "pass" if r["pass"] else "fail"
+                symbol = "&#10003;" if r["pass"] else "&#10007;"
+                rate = f'{r["triggers"]}/{r["runs"]}'
+                html_parts.append(f'            <td class="{css}" title="rate={rate}">{symbol}</td>\n')
+            else:
+                html_parts.append('            <td>-</td>\n')
+        # トレーニングスコア
+        train_passed = h.get("train_passed", h.get("passed", 0))
+        train_total = h.get("train_total", h.get("total", 0))
+        if train_total > 0:
+            ratio = train_passed / train_total
+            if ratio >= 1.0:
+                score_class = "perfect"
+            elif ratio >= 0.8:
+                score_class = "good"
+            elif ratio >= 0.5:
+                score_class = "mid"
+            else:
+                score_class = "bad"
+        else:
+            score_class = "bad"
+        html_parts.append(f'            <td class="score {score_class}">{train_passed}/{train_total}</td>\n')
+        # テストスコア
+        if test_queries:
+            test_passed = h.get("test_passed")
+            test_total = h.get("test_total")
+            if test_passed is not None and test_total is not None and test_total > 0:
+                ratio = test_passed / test_total
+                if ratio >= 1.0:
+                    score_class = "perfect"
+                elif ratio >= 0.8:
+                    score_class = "good"
+                elif ratio >= 0.5:
+                    score_class = "mid"
+                else:
+                    score_class = "bad"
+                html_parts.append(f'            <td class="score {score_class}">{test_passed}/{test_total}</td>\n')
+            else:
+                html_parts.append('            <td>-</td>\n')
+        html_parts.append('        </tr>\n')
+    html_parts.append('    </table>\n    </div>\n')
+    html_parts.append('</body>\n</html>\n')
+    return "".join(html_parts)
+def main():
+    parser = argparse.ArgumentParser(description="run_loop.pyの出力からHTMLレポートを生成")
+    parser.add_argument("--input", required=True, help="run_loop.pyのJSON出力へのパス")
+    parser.add_argument("--output", required=True, help="HTMLレポートの出力先パス")
+    parser.add_argument("--auto-refresh", action="store_true", help="5秒ごとの自動リフレッシュを有効化（ライブモニタリング用）")
+    parser.add_argument("--skill-name", default="", help="レポートタイトルに表示するスキル名")
+    args = parser.parse_args()
+    data = json.loads(Path(args.input).read_text())
+    html_content = generate_html(data, auto_refresh=args.auto_refresh, skill_name=args.skill_name)
+    Path(args.output).write_text(html_content)
+    print(f"レポートを生成しました: {args.output}", file=sys.stderr)
+if __name__ == "__main__":
+    main()

package/presets/default/.claude/skills/einja-skill-creator/scripts/improve_description.py ADDED Viewed

@@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+"""評価結果に基づいてスキルdescriptionを改善。
+run_eval.pyからの評価結果を受け取り、extended thinkingを使用した
+Claudeでdescriptionを改善する。
+"""
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+import anthropic
+try:
+    from scripts.utils import parse_skill_md
+except ImportError:
+    from utils import parse_skill_md
+def improve_description(
+    client: anthropic.Anthropic,
+    skill_name: str,
+    skill_content: str,
+    current_description: str,
+    eval_results: dict,
+    history: list[dict],
+    model: str,
+    test_results: dict | None = None,
+    log_dir: Path | None = None,
+    iteration: int | None = None,
+) -> str:
+    """評価結果に基づいてClaudeを呼び出しdescriptionを改善する。"""
+    failed_triggers = [
+        r for r in eval_results["results"]
+        if r["should_trigger"] and not r["pass"]
+    ]
+    false_triggers = [
+        r for r in eval_results["results"]
+        if not r["should_trigger"] and not r["pass"]
+    ]
+    # スコアサマリーの構築
+    train_score = f"{eval_results['summary']['passed']}/{eval_results['summary']['total']}"
+    if test_results:
+        test_score = f"{test_results['summary']['passed']}/{test_results['summary']['total']}"
+        scores_summary = f"Train: {train_score}, Test: {test_score}"
+    else:
+        scores_summary = f"Train: {train_score}"
+    # NOTE: Claude APIへのプロンプトは精度維持のため英語のまま
+    prompt = f"""You are optimizing a skill description for a Claude Code skill called "{skill_name}". A "skill" is sort of like a prompt, but with progressive disclosure -- there's a title and description that Claude sees when deciding whether to use the skill, and then if it does use the skill, it reads the .md file which has lots more details and potentially links to other resources in the skill folder like helper files and scripts and additional documentation or examples.
+The description appears in Claude's "available_skills" list. When a user sends a query, Claude decides whether to invoke the skill based solely on the title and on this description. Your goal is to write a description that triggers for relevant queries, and doesn't trigger for irrelevant ones.
+Here's the current description:
+<current_description>
+"{current_description}"
+</current_description>
+Current scores ({scores_summary}):
+<scores_summary>
+"""
+    if failed_triggers:
+        prompt += "FAILED TO TRIGGER (should have triggered but didn't):\n"
+        for r in failed_triggers:
+            prompt += f'  - "{r["query"]}" (triggered {r["triggers"]}/{r["runs"]} times)\n'
+        prompt += "\n"
+    if false_triggers:
+        prompt += "FALSE TRIGGERS (triggered but shouldn't have):\n"
+        for r in false_triggers:
+            prompt += f'  - "{r["query"]}" (triggered {r["triggers"]}/{r["runs"]} times)\n'
+        prompt += "\n"
+    if history:
+        prompt += "PREVIOUS ATTEMPTS (do NOT repeat these — try something structurally different):\n\n"
+        for h in history:
+            train_s = f"{h.get('train_passed', h.get('passed', 0))}/{h.get('train_total', h.get('total', 0))}"
+            test_s = f"{h.get('test_passed', '?')}/{h.get('test_total', '?')}" if h.get('test_passed') is not None else None
+            score_str = f"train={train_s}" + (f", test={test_s}" if test_s else "")
+            prompt += f'<attempt {score_str}>\n'
+            prompt += f'Description: "{h["description"]}"\n'
+            if "results" in h:
+                prompt += "Train results:\n"
+                for r in h["results"]:
+                    status = "PASS" if r["pass"] else "FAIL"
+                    prompt += f'  [{status}] "{r["query"][:80]}" (triggered {r["triggers"]}/{r["runs"]})\n'
+            if h.get("note"):
+                prompt += f'Note: {h["note"]}\n'
+            prompt += "</attempt>\n\n"
+    prompt += f"""</scores_summary>
+Skill content (for context on what the skill does):
+<skill_content>
+{skill_content}
+</skill_content>
+Based on the failures, write a new and improved description that is more likely to trigger correctly. When I say "based on the failures", it's a bit of a tricky line to walk because we don't want to overfit to the specific cases you're seeing. So what I DON'T want you to do is produce an ever-expanding list of specific queries that this skill should or shouldn't trigger for. Instead, try to generalize from the failures to broader categories of user intent and situations where this skill would be useful or not useful. The reason for this is twofold:
+1. Avoid overfitting
+2. The list might get loooong and it's injected into ALL queries and there might be a lot of skills, so we don't want to blow too much space on any given description.
+Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy.
+Here are some tips that we've found to work well in writing these descriptions:
+- The skill should be phrased in the imperative -- "Use this skill for" rather than "this skill does"
+- The skill description should focus on the user's intent, what they are trying to achieve, vs. the implementation details of how the skill works.
+- The description competes with other skills for Claude's attention — make it distinctive and immediately recognizable.
+- If you're getting lots of failures after repeated attempts, change things up. Try different sentence structures or wordings.
+I'd encourage you to be creative and mix up the style in different iterations since you'll have multiple opportunities to try different approaches and we'll just grab the highest-scoring one at the end.
+Please respond with only the new description text in <new_description> tags, nothing else."""
+    response = client.messages.create(
+        model=model,
+        max_tokens=16000,
+        thinking={
+            "type": "enabled",
+            "budget_tokens": 10000,
+        },
+        messages=[{"role": "user", "content": prompt}],
+    )
+    # レスポンスからthinkingとtextを抽出
+    thinking_text = ""
+    text = ""
+    for block in response.content:
+        if block.type == "thinking":
+            thinking_text = block.thinking
+        elif block.type == "text":
+            text = block.text
+    # <new_description>タグをパース
+    match = re.search(r"<new_description>(.*?)</new_description>", text, re.DOTALL)
+    description = match.group(1).strip().strip('"') if match else text.strip().strip('"')
+    # トランスクリプトのログ
+    transcript: dict = {
+        "iteration": iteration,
+        "prompt": prompt,
+        "thinking": thinking_text,
+        "response": text,
+        "parsed_description": description,
+        "char_count": len(description),
+        "over_limit": len(description) > 1024,
+    }
+    # 1024文字超過時、モデルに短縮を依頼
+    if len(description) > 1024:
+        shorten_prompt = f"Your description is {len(description)} characters, which exceeds the hard 1024 character limit. Please rewrite it to be under 1024 characters while preserving the most important trigger words and intent coverage. Respond with only the new description in <new_description> tags."
+        shorten_response = client.messages.create(
+            model=model,
+            max_tokens=16000,
+            thinking={
+                "type": "enabled",
+                "budget_tokens": 10000,
+            },
+            messages=[
+                {"role": "user", "content": prompt},
+                {"role": "assistant", "content": text},
+                {"role": "user", "content": shorten_prompt},
+            ],
+        )
+        shorten_thinking = ""
+        shorten_text = ""
+        for block in shorten_response.content:
+            if block.type == "thinking":
+                shorten_thinking = block.thinking
+            elif block.type == "text":
+                shorten_text = block.text
+        match = re.search(r"<new_description>(.*?)</new_description>", shorten_text, re.DOTALL)
+        shortened = match.group(1).strip().strip('"') if match else shorten_text.strip().strip('"')
+        transcript["rewrite_prompt"] = shorten_prompt
+        transcript["rewrite_thinking"] = shorten_thinking
+        transcript["rewrite_response"] = shorten_text
+        transcript["rewrite_description"] = shortened
+        transcript["rewrite_char_count"] = len(shortened)
+        description = shortened
+    transcript["final_description"] = description
+    if log_dir:
+        log_dir.mkdir(parents=True, exist_ok=True)
+        log_file = log_dir / f"improve_iter_{iteration or 'unknown'}.json"
+        log_file.write_text(json.dumps(transcript, indent=2))
+    return description
+def main():
+    parser = argparse.ArgumentParser(description="評価結果に基づいてスキルdescriptionを改善")
+    parser.add_argument("--eval-results", required=True, help="評価結果JSONへのパス（run_eval.pyの出力）")
+    parser.add_argument("--skill-path", required=True, help="スキルディレクトリへのパス")
+    parser.add_argument("--history", default=None, help="履歴JSONへのパス（過去の試行）")
+    parser.add_argument("--model", required=True, help="改善に使用するモデル")
+    parser.add_argument("--verbose", action="store_true", help="thinkingをstderrに出力")
+    args = parser.parse_args()
+    skill_path = Path(args.skill_path)
+    if not (skill_path / "SKILL.md").exists():
+        print(f"エラー: {skill_path} にSKILL.mdが見つかりません", file=sys.stderr)
+        sys.exit(1)
+    eval_results = json.loads(Path(args.eval_results).read_text())
+    history = []
+    if args.history:
+        history = json.loads(Path(args.history).read_text())
+    name, _, content = parse_skill_md(skill_path)
+    current_description = eval_results["description"]
+    if args.verbose:
+        print(f"現在: {current_description}", file=sys.stderr)
+        print(f"スコア: {eval_results['summary']['passed']}/{eval_results['summary']['total']}", file=sys.stderr)
+    client = anthropic.Anthropic()
+    new_description = improve_description(
+        client=client,
+        skill_name=name,
+        skill_content=content,
+        current_description=current_description,
+        eval_results=eval_results,
+        history=history,
+        model=args.model,
+    )
+    if args.verbose:
+        print(f"改善後: {new_description}", file=sys.stderr)
+    # 新しいdescriptionと更新された履歴をJSONとして出力
+    output = {
+        "description": new_description,
+        "history": history + [{
+            "description": current_description,
+            "passed": eval_results["summary"]["passed"],
+            "failed": eval_results["summary"]["failed"],
+            "total": eval_results["summary"]["total"],
+            "results": eval_results["results"],
+        }],
+    }
+    print(json.dumps(output, indent=2))
+if __name__ == "__main__":
+    main()

package/presets/default/.claude/skills/einja-skill-creator/scripts/init_skill.py CHANGED Viewed

@@ -103,30 +103,24 @@ Claudeの処理と思考を導くためにコンテキストに読み込まれ
 **不要なディレクトリは削除できます。** すべてのSkillが3種類のリソースすべてを必要とするわけではありません。
-## einja固有の注意事項
+<!-- @einja:excluded:start -->
+## プロジェクト固有セクションの記入
-### マネージドセクション
+SKILL.md等のmdファイルの末尾には以下を記入する:
-一部のドキュメントでマネージドセクションを使用する場合：
+<!-- @einja:project-private:start id="unique-id" -->
+<!-- プロジェクト固有の情報を記入 -->
+<!-- @einja:project-private:end -->
+<!-- @einja:excluded:end -->
-```markdown
-<!-- @einja:managed:start -->
-この内容は`einja sync`で上書きされる
-<!-- @einja:managed:end -->
-<!-- @einja:seed:start id="unique-id" -->
-プロジェクト固有の内容をここに追記
-<!-- @einja:seed:end -->
-```
-### ビルドシステム連携
-einja management templateでは、`.claude/skills/einja-*/` 内のファイルはビルド時に自動的に `presets/default/` にコピーされます。
+---
-### 関連Skill
+<!-- @einja:project-private:start id="{skill_name}-project" -->
+## プロジェクト固有の設定
-- [einja-output-format](../einja-output-format/SKILL.md) - サブエージェント出力形式
-- [einja-coding-standards](../einja-coding-standards/SKILL.md) - コーディング規約
+<!-- このセクションはプロジェクト固有の内容を追記する場所です -->
+<!-- einja syncで上書きされません -->
+<!-- @einja:project-private:end -->
 """
 EXAMPLE_SCRIPT = '''#!/usr/bin/env python3

package/presets/default/.claude/skills/einja-skill-creator/scripts/package_skill.py CHANGED Viewed

@@ -10,10 +10,36 @@ Skillパッケージャー - Skillフォルダの配布可能な.skillファイ
     python package_skill.py .claude/skills/einja-my-skill ./dist
 """
+import fnmatch
 import sys
 import zipfile
 from pathlib import Path
-from quick_validate import validate_skill
+try:
+    from scripts.quick_validate import validate_skill
+except ImportError:
+    from quick_validate import validate_skill
+# パッケージ化時に除外するパターン
+EXCLUDE_DIRS = {"__pycache__", "node_modules"}
+EXCLUDE_GLOBS = {"*.pyc"}
+EXCLUDE_FILES = {".DS_Store"}
+# Skillルート直下のみ除外するディレクトリ
+ROOT_EXCLUDE_DIRS = {"evals"}
+def should_exclude(rel_path: Path) -> bool:
+    """パスをパッケージから除外すべきかチェック。"""
+    parts = rel_path.parts
+    if any(part in EXCLUDE_DIRS for part in parts):
+        return True
+    # rel_pathはskill_path.parentからの相対パス。parts[0]がSkillフォルダ名、
+    # parts[1]（存在する場合）が最初のサブディレクトリ
+    if len(parts) > 1 and parts[1] in ROOT_EXCLUDE_DIRS:
+        return True
+    name = rel_path.name
+    if name in EXCLUDE_FILES:
+        return True
+    return any(fnmatch.fnmatch(name, pat) for pat in EXCLUDE_GLOBS)
 def package_skill(skill_path, output_dir=None):
@@ -66,13 +92,16 @@ def package_skill(skill_path, output_dir=None):
     # .skillファイル（zip形式）を作成
     try:
         with zipfile.ZipFile(skill_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            # Skillディレクトリを走査
+            # Skillディレクトリを走査し、ビルドアーティファクトを除外
             for file_path in skill_path.rglob('*'):
-                if file_path.is_file():
-                    # zip内の相対パスを計算
-                    arcname = file_path.relative_to(skill_path.parent)
-                    zipf.write(file_path, arcname)
-                    print(f"  追加: {arcname}")
+                if not file_path.is_file():
+                    continue
+                arcname = file_path.relative_to(skill_path.parent)
+                if should_exclude(arcname):
+                    print(f"  スキップ: {arcname}")
+                    continue
+                zipf.write(file_path, arcname)
+                print(f"  追加: {arcname}")
         print(f"\n✅ Skillを正常にパッケージ化しました: {skill_filename}")
         return skill_filename