@einja/dev-cli 0.1.40 → 0.1.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/README.md +89 -1
  2. package/dist/cli.js +1 -0
  3. package/dist/cli.js.map +1 -1
  4. package/dist/commands/init.d.ts.map +1 -1
  5. package/dist/commands/init.js +71 -1
  6. package/dist/commands/init.js.map +1 -1
  7. package/dist/commands/list.js.map +1 -1
  8. package/dist/commands/sync.d.ts.map +1 -1
  9. package/dist/commands/sync.js +187 -13
  10. package/dist/commands/sync.js.map +1 -1
  11. package/dist/commands/task-loop/lib/github-client.test.js.map +1 -1
  12. package/dist/commands/task-loop/lib/vibe-kanban-rest-client.js +2 -2
  13. package/dist/commands/task-loop/lib/vibe-kanban-rest-client.js.map +1 -1
  14. package/dist/lib/dependency-checker.d.ts.map +1 -1
  15. package/dist/lib/merger.d.ts +12 -0
  16. package/dist/lib/merger.d.ts.map +1 -1
  17. package/dist/lib/merger.js +28 -0
  18. package/dist/lib/merger.js.map +1 -1
  19. package/dist/lib/preset-update/cli-repo-detector.d.ts.map +1 -1
  20. package/dist/lib/preset-update/file-copier.d.ts.map +1 -1
  21. package/dist/lib/preset-update/file-copier.js +3 -3
  22. package/dist/lib/preset-update/file-copier.js.map +1 -1
  23. package/dist/lib/preset-update/preset-finder.d.ts.map +1 -1
  24. package/dist/lib/preset.d.ts.map +1 -1
  25. package/dist/lib/sync/category-validator.d.ts +1 -1
  26. package/dist/lib/sync/category-validator.d.ts.map +1 -1
  27. package/dist/lib/sync/category-validator.js +2 -1
  28. package/dist/lib/sync/category-validator.js.map +1 -1
  29. package/dist/lib/sync/category-validator.test.js +3 -1
  30. package/dist/lib/sync/category-validator.test.js.map +1 -1
  31. package/dist/lib/sync/conflict-reporter.d.ts.map +1 -1
  32. package/dist/lib/sync/diff-engine.d.ts.map +1 -1
  33. package/dist/lib/sync/file-filter.d.ts.map +1 -1
  34. package/dist/lib/sync/file-filter.js +1 -0
  35. package/dist/lib/sync/file-filter.js.map +1 -1
  36. package/dist/lib/sync/integration.test.js +255 -69
  37. package/dist/lib/sync/integration.test.js.map +1 -1
  38. package/dist/lib/sync/json-processor.d.ts +4 -4
  39. package/dist/lib/sync/json-processor.d.ts.map +1 -1
  40. package/dist/lib/sync/json-processor.js +11 -11
  41. package/dist/lib/sync/json-processor.js.map +1 -1
  42. package/dist/lib/sync/marker-processor.d.ts +60 -8
  43. package/dist/lib/sync/marker-processor.d.ts.map +1 -1
  44. package/dist/lib/sync/marker-processor.js +117 -26
  45. package/dist/lib/sync/marker-processor.js.map +1 -1
  46. package/dist/lib/sync/marker-processor.test.js +261 -40
  47. package/dist/lib/sync/marker-processor.test.js.map +1 -1
  48. package/dist/lib/sync/metadata-manager.d.ts +4 -0
  49. package/dist/lib/sync/metadata-manager.d.ts.map +1 -1
  50. package/dist/lib/sync/metadata-manager.js +15 -0
  51. package/dist/lib/sync/metadata-manager.js.map +1 -1
  52. package/dist/lib/sync/metadata-manager.test.js +69 -0
  53. package/dist/lib/sync/metadata-manager.test.js.map +1 -1
  54. package/dist/lib/sync/orphan-cleaner.d.ts +29 -0
  55. package/dist/lib/sync/orphan-cleaner.d.ts.map +1 -0
  56. package/dist/lib/sync/orphan-cleaner.js +80 -0
  57. package/dist/lib/sync/orphan-cleaner.js.map +1 -0
  58. package/dist/lib/sync/orphan-cleaner.test.d.ts +2 -0
  59. package/dist/lib/sync/orphan-cleaner.test.d.ts.map +1 -0
  60. package/dist/lib/sync/orphan-cleaner.test.js +169 -0
  61. package/dist/lib/sync/orphan-cleaner.test.js.map +1 -0
  62. package/dist/lib/sync/project-private-synchronizer.d.ts +52 -0
  63. package/dist/lib/sync/project-private-synchronizer.d.ts.map +1 -0
  64. package/dist/lib/sync/project-private-synchronizer.js +110 -0
  65. package/dist/lib/sync/project-private-synchronizer.js.map +1 -0
  66. package/dist/lib/sync/project-private-synchronizer.test.d.ts +2 -0
  67. package/dist/lib/sync/project-private-synchronizer.test.d.ts.map +1 -0
  68. package/dist/lib/sync/project-private-synchronizer.test.js +348 -0
  69. package/dist/lib/sync/project-private-synchronizer.test.js.map +1 -0
  70. package/dist/types/index.d.ts +1 -0
  71. package/dist/types/index.d.ts.map +1 -1
  72. package/dist/types/sync.d.ts +36 -6
  73. package/dist/types/sync.d.ts.map +1 -1
  74. package/dist/types/sync.js +2 -2
  75. package/dist/types/sync.js.map +1 -1
  76. package/package.json +5 -4
  77. package/presets/default/.claude/agents/einja/Explore.md +140 -0
  78. package/presets/default/.claude/agents/einja/backend-architect.md +21 -1
  79. package/presets/default/.claude/agents/einja/codex-agent.md +5 -1
  80. package/presets/default/.claude/agents/einja/design-engineer.md +5 -1
  81. package/presets/default/.claude/agents/einja/docs/docs-updater.md +7 -93
  82. package/presets/default/.claude/agents/einja/frontend-architect.md +21 -1
  83. package/presets/default/.claude/agents/einja/frontend-coder.md +5 -1
  84. package/presets/default/.claude/agents/einja/{specs/spec-design-generator.md → issue-specs/design-generator.md} +16 -8
  85. package/presets/default/.claude/agents/einja/{specs/spec-qa-generator.md → issue-specs/qa-generator.md} +10 -4
  86. package/presets/default/.claude/agents/einja/{specs/spec-requirements-generator.md → issue-specs/requirements-generator.md} +9 -6
  87. package/presets/default/.claude/agents/einja/{specs/spec-tasks-generator.md → issue-specs/tasks-generator.md} +19 -16
  88. package/presets/default/.claude/agents/einja/{specs/spec-tasks-validator.md → issue-specs/tasks-validator.md} +13 -9
  89. package/presets/default/.claude/agents/einja/issue-specs/ui-design-generator.md +114 -0
  90. package/presets/default/.claude/agents/einja/task/task-executer.md +64 -116
  91. package/presets/default/.claude/agents/einja/task/task-modification-analyzer.md +6 -2
  92. package/presets/default/.claude/agents/einja/task/task-qa.md +7 -3
  93. package/presets/default/.claude/agents/einja/task/task-reviewer.md +17 -1
  94. package/presets/default/.claude/commands/einja/einja-sync.md +124 -45
  95. package/presets/default/.claude/commands/einja/frontend-implement.md +3 -1
  96. package/presets/default/.claude/commands/einja/issue-exec.md +413 -0
  97. package/presets/default/.claude/commands/einja/start-dev.md +4 -0
  98. package/presets/default/.claude/commands/einja/sync-cursor-commands.md +10 -6
  99. package/presets/default/.claude/commands/einja/{update-docs-by-task-specs.md → update-docs-by-issue-specs.md} +61 -57
  100. package/presets/default/.claude/hooks/einja/plan-mode-skill-loader.sh +27 -0
  101. package/presets/default/.claude/settings.json +29 -5
  102. package/presets/default/.claude/skills/{einja-general-context-loader → _einja-general-context-loader}/SKILL.md +6 -2
  103. package/presets/default/.claude/skills/{einja-output-format → _einja-output-format}/SKILL.md +5 -1
  104. package/presets/default/.claude/skills/_einja-project-overview/SKILL.md +29 -0
  105. package/presets/default/.claude/skills/{einja-spec-context-loader → _einja-spec-context-loader}/SKILL.md +9 -5
  106. package/presets/default/.claude/skills/einja-coding-standards/references/testing-strategy.md +899 -0
  107. package/presets/default/.claude/skills/einja-conflict-resolver/SKILL.md +5 -1
  108. package/presets/default/.claude/skills/einja-create-pr/SKILL.md +138 -0
  109. package/presets/default/.claude/skills/einja-infra-maintenance/SKILL.md +779 -0
  110. package/presets/default/.claude/{commands/einja/spec-create.md → skills/einja-issue-spec-create/SKILL.md} +60 -23
  111. package/presets/default/.claude/skills/einja-issue-spec-generator/SKILL.md +105 -0
  112. package/presets/default/.claude/skills/einja-issue-spec-generator/references/format-rules.md +35 -0
  113. package/presets/default/.claude/skills/einja-issue-spec-validator/SKILL.md +130 -0
  114. package/presets/default/.claude/skills/einja-issue-spec-validator/references/validation-rules.md +52 -0
  115. package/presets/default/.claude/skills/einja-npm-release/SKILL.md +242 -0
  116. package/presets/default/.claude/skills/einja-skill-creator/SKILL.md +311 -263
  117. package/presets/default/.claude/skills/einja-skill-creator/agents/analyzer.md +274 -0
  118. package/presets/default/.claude/skills/einja-skill-creator/agents/comparator.md +202 -0
  119. package/presets/default/.claude/skills/einja-skill-creator/agents/grader.md +195 -0
  120. package/presets/default/.claude/skills/einja-skill-creator/assets/eval_review.html +146 -0
  121. package/presets/default/.claude/skills/einja-skill-creator/eval-viewer/generate_review.py +471 -0
  122. package/presets/default/.claude/skills/einja-skill-creator/eval-viewer/viewer.html +1325 -0
  123. package/presets/default/.claude/skills/einja-skill-creator/references/schemas.md +430 -0
  124. package/presets/default/.claude/skills/einja-skill-creator/scripts/aggregate_benchmark.py +401 -0
  125. package/presets/default/.claude/skills/einja-skill-creator/scripts/compare_runs.py +154 -0
  126. package/presets/default/.claude/skills/einja-skill-creator/scripts/generate_report.py +272 -0
  127. package/presets/default/.claude/skills/einja-skill-creator/scripts/improve_description.py +247 -0
  128. package/presets/default/.claude/skills/einja-skill-creator/scripts/init_skill.py +13 -19
  129. package/presets/default/.claude/skills/einja-skill-creator/scripts/package_skill.py +36 -7
  130. package/presets/default/.claude/skills/einja-skill-creator/scripts/run_eval.py +310 -0
  131. package/presets/default/.claude/skills/einja-skill-creator/scripts/run_loop.py +375 -0
  132. package/presets/default/.claude/skills/einja-skill-creator/scripts/utils.py +48 -0
  133. package/presets/default/.claude/skills/einja-skill-first/SKILL.md +265 -0
  134. package/presets/default/.claude/skills/einja-subagent-question-protocol/SKILL.md +98 -0
  135. package/presets/default/.claude/skills/einja-task-commit/SKILL.md +11 -7
  136. package/presets/default/.claude/{commands/einja/task-exec.md → skills/einja-task-exec/SKILL.md} +106 -89
  137. package/presets/default/.claude/skills/einja-task-qa/SKILL.md +8 -4
  138. package/presets/default/.claude/skills/einja-task-qa/references/troubleshooting.md +1 -1
  139. package/presets/default/.claude/skills/einja-task-qa/references/usage-patterns.md +2 -2
  140. package/presets/default/.claude/skills/einja-team-exec/SKILL.md +165 -0
  141. package/presets/default/.envrc +5 -0
  142. package/presets/default/.mcp.json +2 -12
  143. package/presets/default/CLAUDE.md.template +45 -8
  144. package/presets/default/docs/einja/example/specs/issues/issue999-example-task/tasks.md +1 -1
  145. package/presets/default/docs/einja/instructions/deployment-setup.md +4 -9
  146. package/presets/default/docs/einja/instructions/environment-setup.md +3 -8
  147. package/presets/default/docs/einja/instructions/issue-exec-workflow.md +276 -0
  148. package/presets/default/docs/einja/instructions/local-server-environment-and-worktree.md +71 -9
  149. package/presets/default/docs/einja/instructions/neon-cli-reference.md +3 -8
  150. package/presets/default/docs/einja/instructions/setup-flow.md +279 -0
  151. package/presets/default/docs/einja/instructions/task-execute.md +63 -68
  152. package/presets/default/docs/einja/instructions/vercel-cli-reference.md +17 -10
  153. package/presets/default/docs/einja/steering/README.md +11 -11
  154. package/presets/default/docs/einja/steering/acceptance-criteria-and-qa-guide.md +4 -9
  155. package/presets/default/docs/einja/steering/architecture.md +3 -8
  156. package/presets/default/docs/einja/steering/branch-strategy.md +63 -70
  157. package/presets/default/docs/einja/steering/commit-rules.md +3 -8
  158. package/presets/default/docs/einja/steering/db-schema-design.md +3 -8
  159. package/presets/default/docs/einja/steering/development/api-development.md +3 -8
  160. package/presets/default/docs/einja/steering/development/backend-architecture.md +3 -8
  161. package/presets/default/docs/einja/steering/development/coding-standards.md +723 -0
  162. package/presets/default/docs/einja/steering/development/component-design.md +502 -0
  163. package/presets/default/docs/einja/steering/development/database-guidelines.md +2 -2
  164. package/presets/default/docs/einja/steering/development/frontend-development.md +3 -8
  165. package/presets/default/docs/einja/steering/development/playwright-guidelines.md +59 -0
  166. package/presets/default/docs/einja/steering/development/review-guidelines.md +3 -8
  167. package/presets/default/docs/einja/steering/development/testing-strategy.md +3 -8
  168. package/presets/default/docs/einja/steering/development-workflow.md +155 -140
  169. package/presets/default/docs/einja/steering/infrastructure/deployment.md +156 -55
  170. package/presets/default/docs/einja/steering/infrastructure/environment-variables.md +4 -8
  171. package/presets/default/docs/einja/steering/product.md +3 -8
  172. package/presets/default/docs/einja/steering/task-management.md +22 -110
  173. package/presets/default/scripts/ensure-serena.sh +75 -0
  174. package/presets/default/scripts/env-rotate-secrets.ts +396 -0
  175. package/presets/default/scripts/env-show.ts +130 -0
  176. package/presets/default/scripts/env.ts +479 -0
  177. package/presets/default/scripts/init-github.ts +363 -0
  178. package/presets/default/scripts/init.sh +98 -0
  179. package/presets/default/scripts/lib/env-common.ts +108 -0
  180. package/presets/default/scripts/lib/worktree-config.ts +64 -0
  181. package/presets/default/scripts/setup-dev.ts +655 -0
  182. package/presets/default/scripts/stop-serena.sh +25 -0
  183. package/presets/default/scripts/worktree/dev.ts +872 -0
  184. package/dist/lib/sync/seed-synchronizer.d.ts +0 -27
  185. package/dist/lib/sync/seed-synchronizer.d.ts.map +0 -1
  186. package/dist/lib/sync/seed-synchronizer.js +0 -72
  187. package/dist/lib/sync/seed-synchronizer.js.map +0 -1
  188. package/dist/lib/sync/seed-synchronizer.test.d.ts +0 -2
  189. package/dist/lib/sync/seed-synchronizer.test.d.ts.map +0 -1
  190. package/dist/lib/sync/seed-synchronizer.test.js +0 -147
  191. package/dist/lib/sync/seed-synchronizer.test.js.map +0 -1
  192. package/presets/default/.claude/agents/einja/git/conflict-resolver.md +0 -148
  193. package/presets/default/.claude/hooks/einja/validate-git-commit.sh +0 -239
  194. package/presets/default/.claude/skills/einja-api-development/SKILL.md +0 -14
  195. package/presets/default/.claude/skills/einja-backend-architecture/SKILL.md +0 -18
  196. package/presets/default/.claude/skills/einja-coding-standards/SKILL.md +0 -132
  197. package/presets/default/.claude/skills/einja-coding-standards/references/import-conventions.md +0 -69
  198. package/presets/default/.claude/skills/einja-coding-standards/references/naming-conventions.md +0 -107
  199. package/presets/default/.claude/skills/einja-coding-standards/references/prohibited-patterns.md +0 -169
  200. package/presets/default/.claude/skills/einja-coding-standards/references/typescript-rules.md +0 -247
  201. package/presets/default/.claude/skills/einja-component-design/SKILL.md +0 -109
  202. package/presets/default/.claude/skills/einja-component-design/references/directory-structure.md +0 -117
  203. package/presets/default/.claude/skills/einja-component-design/references/props-patterns.md +0 -159
  204. package/presets/default/.claude/skills/einja-component-design/references/styling-guide.md +0 -122
  205. package/presets/default/.claude/skills/einja-frontend-development/SKILL.md +0 -14
  206. package/presets/default/.claude/skills/einja-project-overview/SKILL.md +0 -35
  207. package/presets/default/docs/einja/instructions/task-vibe-kanban-loop.md +0 -565
@@ -0,0 +1,272 @@
1
+ #!/usr/bin/env python3
2
+ """run_loop.pyの出力からHTMLレポートを生成。
3
+
4
+ run_loop.pyのJSON出力を受け取り、各descriptionの試行結果を
5
+ チェック/xで表示するHTMLレポートを生成する。
6
+ トレーニングとテストのクエリを区別して表示。
7
+ """
8
+
9
+ import argparse
10
+ import html
11
+ import json
12
+ import sys
13
+ from pathlib import Path
14
+
15
+
16
+ def generate_html(data: dict, auto_refresh: bool = False, skill_name: str = "") -> str:
17
+ """ループ出力データからHTMLレポートを生成。auto_refreshがTrueの場合、メタリフレッシュタグを追加。"""
18
+ history = data.get("history", [])
19
+ holdout = data.get("holdout", 0)
20
+ title_prefix = html.escape(skill_name + " — ") if skill_name else ""
21
+
22
+ # トレーニングとテストの全ユニーククエリを取得(should_trigger情報付き)
23
+ train_queries: list[dict] = []
24
+ test_queries: list[dict] = []
25
+ if history:
26
+ for r in history[0].get("train_results", history[0].get("results", [])):
27
+ train_queries.append({"query": r["query"], "should_trigger": r.get("should_trigger", True)})
28
+ if history[0].get("test_results"):
29
+ for r in history[0].get("test_results", []):
30
+ test_queries.append({"query": r["query"], "should_trigger": r.get("should_trigger", True)})
31
+
32
+ refresh_tag = ' <meta http-equiv="refresh" content="5">\n' if auto_refresh else ""
33
+
34
+ html_parts = []
35
+ html_parts.append(f"""<!DOCTYPE html>
36
+ <html lang="ja">
37
+ <head>
38
+ <meta charset="UTF-8">
39
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
40
+ {refresh_tag} <title>{title_prefix}スキルDescription最適化</title>
41
+ <style>
42
+ * {{ margin: 0; padding: 0; box-sizing: border-box; }}
43
+ body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #0a0a0a; color: #e0e0e0; padding: 20px; }}
44
+ h1 {{ font-size: 1.4em; margin-bottom: 4px; color: #fff; }}
45
+ .explainer {{ color: #888; font-size: 0.85em; margin-bottom: 16px; line-height: 1.4; }}
46
+ .summary {{ display: flex; gap: 24px; margin-bottom: 16px; flex-wrap: wrap; }}
47
+ .summary-card {{ background: #1a1a1a; border: 1px solid #333; border-radius: 8px; padding: 12px 16px; min-width: 120px; }}
48
+ .summary-card .label {{ font-size: 0.75em; color: #888; text-transform: uppercase; letter-spacing: 0.05em; }}
49
+ .summary-card .value {{ font-size: 1.5em; font-weight: 600; margin-top: 2px; }}
50
+ .legend {{ font-size: 0.8em; color: #888; margin-bottom: 12px; }}
51
+ .legend span {{ margin-right: 16px; }}
52
+ .table-container {{ overflow-x: auto; }}
53
+ table {{ border-collapse: collapse; font-size: 0.8em; width: 100%; }}
54
+ th, td {{ border: 1px solid #333; padding: 6px 8px; text-align: center; }}
55
+ th {{ background: #1a1a1a; color: #ccc; font-weight: 600; position: sticky; top: 0; z-index: 2; }}
56
+ th.query-header {{ writing-mode: vertical-rl; text-orientation: mixed; max-width: 30px; height: 180px; font-weight: 400; font-size: 0.85em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }}
57
+ th.query-header.negative {{ color: #ff6b6b; }}
58
+ th.section-header {{ background: #222; color: #aaa; font-size: 0.7em; text-transform: uppercase; letter-spacing: 0.1em; }}
59
+ td.desc {{ text-align: left; max-width: 300px; font-size: 0.85em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }}
60
+ td.desc:hover {{ white-space: normal; overflow: visible; position: relative; z-index: 10; background: #1a1a1a; }}
61
+ td.pass {{ background: rgba(34, 197, 94, 0.15); color: #22c55e; }}
62
+ td.fail {{ background: rgba(239, 68, 68, 0.15); color: #ef4444; }}
63
+ td.score {{ font-weight: 600; }}
64
+ td.score.perfect {{ color: #22c55e; }}
65
+ td.score.good {{ color: #86efac; }}
66
+ td.score.mid {{ color: #fbbf24; }}
67
+ td.score.bad {{ color: #ef4444; }}
68
+ tr.best-row {{ background: rgba(34, 197, 94, 0.05); }}
69
+ tr.best-row td {{ border-color: #22c55e44; }}
70
+ .best-badge {{ background: #22c55e; color: #000; font-size: 0.7em; padding: 1px 6px; border-radius: 3px; font-weight: 700; margin-left: 4px; }}
71
+ </style>
72
+ </head>
73
+ <body>
74
+ <h1>{title_prefix}スキルDescription最適化</h1>
75
+ <p class="explainer">
76
+ 各行はdescriptionの1イテレーションです。各列はクエリで、セルはそのdescriptionで
77
+ スキルがトリガーされたかどうかを示します。赤い列ヘッダーはトリガーすべきでないクエリです。
78
+ """)
79
+
80
+ if holdout:
81
+ html_parts.append(f' トレーニング/テスト分割: テスト用に{holdout}クエリをホールドアウト。\n')
82
+
83
+ html_parts.append(' </p>\n')
84
+
85
+ # サマリーカード
86
+ if history:
87
+ # 最良のイテレーションを見つける(テスト > トレーニングで優先)
88
+ best_idx = 0
89
+ best_test = -1
90
+ best_train = -1
91
+ for i, h in enumerate(history):
92
+ t_passed = h.get("test_passed", -1)
93
+ tr_passed = h.get("train_passed", h.get("passed", 0))
94
+ if t_passed > best_test or (t_passed == best_test and tr_passed > best_train):
95
+ best_test = t_passed
96
+ best_train = tr_passed
97
+ best_idx = i
98
+
99
+ original = history[0] if history else {}
100
+ best = history[best_idx] if history else {}
101
+
102
+ orig_train = f"{original.get('train_passed', original.get('passed', 0))}/{original.get('train_total', original.get('total', 0))}"
103
+ best_train_str = f"{best.get('train_passed', best.get('passed', 0))}/{best.get('train_total', best.get('total', 0))}"
104
+
105
+ html_parts.append(' <div class="summary">\n')
106
+ html_parts.append(f' <div class="summary-card"><div class="label">オリジナル (トレーニング)</div><div class="value">{orig_train}</div></div>\n')
107
+
108
+ if best.get("test_passed") is not None:
109
+ best_test_str = f"{best.get('test_passed', '?')}/{best.get('test_total', '?')}"
110
+ html_parts.append(f' <div class="summary-card"><div class="label">最良スコア (テスト)</div><div class="value">{best_test_str}</div></div>\n')
111
+ html_parts.append(f' <div class="summary-card"><div class="label">最良スコア (トレーニング)</div><div class="value">{best_train_str}</div></div>\n')
112
+ html_parts.append(f' <div class="summary-card"><div class="label">イテレーション</div><div class="value">{len(history)}</div></div>\n')
113
+ html_parts.append(' </div>\n')
114
+
115
+ # レジェンド
116
+ html_parts.append(' <div class="legend">\n')
117
+ html_parts.append(' <span>クエリカラム: 通常=トリガーすべき、<span style="color:#ff6b6b">赤</span>=トリガーすべきでない</span>\n')
118
+ html_parts.append(' </div>\n')
119
+
120
+ # テーブル
121
+ html_parts.append(' <div class="table-container">\n <table>\n')
122
+
123
+ # ヘッダー行
124
+ html_parts.append(' <tr>\n')
125
+ html_parts.append(' <th>回</th>\n')
126
+ html_parts.append(' <th>Description</th>\n')
127
+
128
+ if train_queries:
129
+ html_parts.append(f' <th class="section-header" colspan="{len(train_queries)}">トレーニング</th>\n')
130
+ if test_queries:
131
+ html_parts.append(f' <th class="section-header" colspan="{len(test_queries)}">テスト</th>\n')
132
+
133
+ html_parts.append(' <th>トレーニング</th>\n')
134
+ if test_queries:
135
+ html_parts.append(' <th>テスト</th>\n')
136
+ html_parts.append(' </tr>\n')
137
+
138
+ # クエリヘッダー行
139
+ html_parts.append(' <tr>\n')
140
+ html_parts.append(' <th></th>\n')
141
+ html_parts.append(' <th></th>\n')
142
+
143
+ for q in train_queries:
144
+ css_class = "query-header negative" if not q["should_trigger"] else "query-header"
145
+ html_parts.append(f' <th class="{css_class}" title="{html.escape(q["query"])}">{html.escape(q["query"][:60])}</th>\n')
146
+ for q in test_queries:
147
+ css_class = "query-header negative" if not q["should_trigger"] else "query-header"
148
+ html_parts.append(f' <th class="{css_class}" title="{html.escape(q["query"])}">{html.escape(q["query"][:60])}</th>\n')
149
+
150
+ html_parts.append(' <th></th>\n')
151
+ if test_queries:
152
+ html_parts.append(' <th></th>\n')
153
+ html_parts.append(' </tr>\n')
154
+
155
+ # 最良のイテレーションを見つける
156
+ best_idx = 0
157
+ if history:
158
+ best_test_score = -1
159
+ best_train_score = -1
160
+ for i, h in enumerate(history):
161
+ t_passed = h.get("test_passed", -1)
162
+ tr_passed = h.get("train_passed", h.get("passed", 0))
163
+ if t_passed > best_test_score or (t_passed == best_test_score and tr_passed > best_train_score):
164
+ best_test_score = t_passed
165
+ best_train_score = tr_passed
166
+ best_idx = i
167
+
168
+ # データ行
169
+ for i, h in enumerate(history):
170
+ row_class = ' class="best-row"' if i == best_idx else ""
171
+ html_parts.append(f' <tr{row_class}>\n')
172
+
173
+ # イテレーション番号
174
+ badge = ' <span class="best-badge">BEST</span>' if i == best_idx else ""
175
+ html_parts.append(f' <td>{i}{badge}</td>\n')
176
+
177
+ # Description
178
+ desc = html.escape(h.get("description", ""))
179
+ html_parts.append(f' <td class="desc" title="{desc}">{desc}</td>\n')
180
+
181
+ # トレーニング結果
182
+ train_results = h.get("train_results", h.get("results", []))
183
+ result_map = {r["query"]: r for r in train_results}
184
+ for q in train_queries:
185
+ r = result_map.get(q["query"])
186
+ if r:
187
+ css = "pass" if r["pass"] else "fail"
188
+ symbol = "&#10003;" if r["pass"] else "&#10007;"
189
+ rate = f'{r["triggers"]}/{r["runs"]}'
190
+ html_parts.append(f' <td class="{css}" title="rate={rate}">{symbol}</td>\n')
191
+ else:
192
+ html_parts.append(' <td>-</td>\n')
193
+
194
+ # テスト結果
195
+ test_results = h.get("test_results", [])
196
+ test_result_map = {r["query"]: r for r in test_results}
197
+ for q in test_queries:
198
+ r = test_result_map.get(q["query"])
199
+ if r:
200
+ css = "pass" if r["pass"] else "fail"
201
+ symbol = "&#10003;" if r["pass"] else "&#10007;"
202
+ rate = f'{r["triggers"]}/{r["runs"]}'
203
+ html_parts.append(f' <td class="{css}" title="rate={rate}">{symbol}</td>\n')
204
+ else:
205
+ html_parts.append(' <td>-</td>\n')
206
+
207
+ # トレーニングスコア
208
+ train_passed = h.get("train_passed", h.get("passed", 0))
209
+ train_total = h.get("train_total", h.get("total", 0))
210
+ if train_total > 0:
211
+ ratio = train_passed / train_total
212
+ if ratio >= 1.0:
213
+ score_class = "perfect"
214
+ elif ratio >= 0.8:
215
+ score_class = "good"
216
+ elif ratio >= 0.5:
217
+ score_class = "mid"
218
+ else:
219
+ score_class = "bad"
220
+ else:
221
+ score_class = "bad"
222
+ html_parts.append(f' <td class="score {score_class}">{train_passed}/{train_total}</td>\n')
223
+
224
+ # テストスコア
225
+ if test_queries:
226
+ test_passed = h.get("test_passed")
227
+ test_total = h.get("test_total")
228
+ if test_passed is not None and test_total is not None and test_total > 0:
229
+ ratio = test_passed / test_total
230
+ if ratio >= 1.0:
231
+ score_class = "perfect"
232
+ elif ratio >= 0.8:
233
+ score_class = "good"
234
+ elif ratio >= 0.5:
235
+ score_class = "mid"
236
+ else:
237
+ score_class = "bad"
238
+ html_parts.append(f' <td class="score {score_class}">{test_passed}/{test_total}</td>\n')
239
+ else:
240
+ html_parts.append(' <td>-</td>\n')
241
+
242
+ html_parts.append(' </tr>\n')
243
+
244
+ html_parts.append(' </table>\n </div>\n')
245
+ html_parts.append('</body>\n</html>\n')
246
+
247
+ return "".join(html_parts)
248
+
249
+
250
+ def main():
251
+ parser = argparse.ArgumentParser(description="run_loop.pyの出力からHTMLレポートを生成")
252
+ parser.add_argument("input", help="run_loop.pyのJSON出力へのパス('-'でstdinから読み込み)")
253
+ parser.add_argument("-o", "--output", default=None, help="HTMLレポートの出力先パス(未指定時はstdout)")
254
+ parser.add_argument("--skill-name", default="", help="レポートタイトルに表示するスキル名")
255
+ args = parser.parse_args()
256
+
257
+ if args.input == "-":
258
+ data = json.load(sys.stdin)
259
+ else:
260
+ data = json.loads(Path(args.input).read_text())
261
+
262
+ html_content = generate_html(data, skill_name=args.skill_name)
263
+
264
+ if args.output:
265
+ Path(args.output).write_text(html_content)
266
+ print(f"レポートを生成しました: {args.output}", file=sys.stderr)
267
+ else:
268
+ print(html_content)
269
+
270
+
271
+ if __name__ == "__main__":
272
+ main()
@@ -0,0 +1,247 @@
1
+ #!/usr/bin/env python3
2
+ """評価結果に基づいてスキルdescriptionを改善。
3
+
4
+ run_eval.pyからの評価結果を受け取り、extended thinkingを使用した
5
+ Claudeでdescriptionを改善する。
6
+ """
7
+
8
+ import argparse
9
+ import json
10
+ import re
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ import anthropic
15
+
16
+ try:
17
+ from scripts.utils import parse_skill_md
18
+ except ImportError:
19
+ from utils import parse_skill_md
20
+
21
+
22
+ def improve_description(
23
+ client: anthropic.Anthropic,
24
+ skill_name: str,
25
+ skill_content: str,
26
+ current_description: str,
27
+ eval_results: dict,
28
+ history: list[dict],
29
+ model: str,
30
+ log_dir: Path | None = None,
31
+ iteration: int | None = None,
32
+ ) -> str:
33
+ """評価結果に基づいてClaudeを呼び出しdescriptionを改善する。"""
34
+ failed_triggers = [
35
+ r for r in eval_results["results"]
36
+ if r["should_trigger"] and not r["pass"]
37
+ ]
38
+ false_triggers = [
39
+ r for r in eval_results["results"]
40
+ if not r["should_trigger"] and not r["pass"]
41
+ ]
42
+
43
+ # スコアサマリーの構築(テストスコアはblinded_historyで隠蔽されるためトレーニングスコアのみ表示)
44
+ train_score = f"{eval_results['summary']['passed']}/{eval_results['summary']['total']}"
45
+ scores_summary = f"Train: {train_score}"
46
+
47
+ # NOTE: Claude APIへのプロンプトは精度維持のため英語のまま
48
+ prompt = f"""You are optimizing a skill description for a Claude Code skill called "{skill_name}". A "skill" is sort of like a prompt, but with progressive disclosure -- there's a title and description that Claude sees when deciding whether to use the skill, and then if it does use the skill, it reads the .md file which has lots more details and potentially links to other resources in the skill folder like helper files and scripts and additional documentation or examples.
49
+
50
+ The description appears in Claude's "available_skills" list. When a user sends a query, Claude decides whether to invoke the skill based solely on the title and on this description. Your goal is to write a description that triggers for relevant queries, and doesn't trigger for irrelevant ones.
51
+
52
+ Here's the current description:
53
+ <current_description>
54
+ "{current_description}"
55
+ </current_description>
56
+
57
+ Current scores ({scores_summary}):
58
+ <scores_summary>
59
+ """
60
+ if failed_triggers:
61
+ prompt += "FAILED TO TRIGGER (should have triggered but didn't):\n"
62
+ for r in failed_triggers:
63
+ prompt += f' - "{r["query"]}" (triggered {r["triggers"]}/{r["runs"]} times)\n'
64
+ prompt += "\n"
65
+
66
+ if false_triggers:
67
+ prompt += "FALSE TRIGGERS (triggered but shouldn't have):\n"
68
+ for r in false_triggers:
69
+ prompt += f' - "{r["query"]}" (triggered {r["triggers"]}/{r["runs"]} times)\n'
70
+ prompt += "\n"
71
+
72
+ if history:
73
+ prompt += "PREVIOUS ATTEMPTS (do NOT repeat these — try something structurally different):\n\n"
74
+ for h in history:
75
+ train_s = f"{h.get('train_passed', h.get('passed', 0))}/{h.get('train_total', h.get('total', 0))}"
76
+ test_s = f"{h.get('test_passed', '?')}/{h.get('test_total', '?')}" if h.get('test_passed') is not None else None
77
+ score_str = f"train={train_s}" + (f", test={test_s}" if test_s else "")
78
+ prompt += f'<attempt {score_str}>\n'
79
+ prompt += f'Description: "{h["description"]}"\n'
80
+ if "results" in h:
81
+ prompt += "Train results:\n"
82
+ for r in h["results"]:
83
+ status = "PASS" if r["pass"] else "FAIL"
84
+ prompt += f' [{status}] "{r["query"][:80]}" (triggered {r["triggers"]}/{r["runs"]})\n'
85
+ if h.get("note"):
86
+ prompt += f'Note: {h["note"]}\n'
87
+ prompt += "</attempt>\n\n"
88
+
89
+ prompt += f"""</scores_summary>
90
+
91
+ Skill content (for context on what the skill does):
92
+ <skill_content>
93
+ {skill_content}
94
+ </skill_content>
95
+
96
+ Based on the failures, write a new and improved description that is more likely to trigger correctly. When I say "based on the failures", it's a bit of a tricky line to walk because we don't want to overfit to the specific cases you're seeing. So what I DON'T want you to do is produce an ever-expanding list of specific queries that this skill should or shouldn't trigger for. Instead, try to generalize from the failures to broader categories of user intent and situations where this skill would be useful or not useful. The reason for this is twofold:
97
+
98
+ 1. Avoid overfitting
99
+ 2. The list might get loooong and it's injected into ALL queries and there might be a lot of skills, so we don't want to blow too much space on any given description.
100
+
101
+ Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy.
102
+
103
+ Here are some tips that we've found to work well in writing these descriptions:
104
+ - The skill should be phrased in the imperative -- "Use this skill for" rather than "this skill does"
105
+ - The skill description should focus on the user's intent, what they are trying to achieve, vs. the implementation details of how the skill works.
106
+ - The description competes with other skills for Claude's attention — make it distinctive and immediately recognizable.
107
+ - If you're getting lots of failures after repeated attempts, change things up. Try different sentence structures or wordings.
108
+
109
+ I'd encourage you to be creative and mix up the style in different iterations since you'll have multiple opportunities to try different approaches and we'll just grab the highest-scoring one at the end.
110
+
111
+ Please respond with only the new description text in <new_description> tags, nothing else."""
112
+
113
+ response = client.messages.create(
114
+ model=model,
115
+ max_tokens=16000,
116
+ thinking={
117
+ "type": "enabled",
118
+ "budget_tokens": 10000,
119
+ },
120
+ messages=[{"role": "user", "content": prompt}],
121
+ )
122
+
123
+ # レスポンスからthinkingとtextを抽出
124
+ thinking_text = ""
125
+ text = ""
126
+ for block in response.content:
127
+ if block.type == "thinking":
128
+ thinking_text = block.thinking
129
+ elif block.type == "text":
130
+ text = block.text
131
+
132
+ # <new_description>タグをパース
133
+ match = re.search(r"<new_description>(.*?)</new_description>", text, re.DOTALL)
134
+ description = match.group(1).strip().strip('"') if match else text.strip().strip('"')
135
+
136
+ # トランスクリプトのログ
137
+ transcript: dict = {
138
+ "iteration": iteration,
139
+ "prompt": prompt,
140
+ "thinking": thinking_text,
141
+ "response": text,
142
+ "parsed_description": description,
143
+ "char_count": len(description),
144
+ "over_limit": len(description) > 1024,
145
+ }
146
+
147
+ # 1024文字超過時、モデルに短縮を依頼
148
+ if len(description) > 1024:
149
+ shorten_prompt = f"Your description is {len(description)} characters, which exceeds the hard 1024 character limit. Please rewrite it to be under 1024 characters while preserving the most important trigger words and intent coverage. Respond with only the new description in <new_description> tags."
150
+ shorten_response = client.messages.create(
151
+ model=model,
152
+ max_tokens=16000,
153
+ thinking={
154
+ "type": "enabled",
155
+ "budget_tokens": 10000,
156
+ },
157
+ messages=[
158
+ {"role": "user", "content": prompt},
159
+ {"role": "assistant", "content": text},
160
+ {"role": "user", "content": shorten_prompt},
161
+ ],
162
+ )
163
+
164
+ shorten_thinking = ""
165
+ shorten_text = ""
166
+ for block in shorten_response.content:
167
+ if block.type == "thinking":
168
+ shorten_thinking = block.thinking
169
+ elif block.type == "text":
170
+ shorten_text = block.text
171
+
172
+ match = re.search(r"<new_description>(.*?)</new_description>", shorten_text, re.DOTALL)
173
+ shortened = match.group(1).strip().strip('"') if match else shorten_text.strip().strip('"')
174
+
175
+ transcript["rewrite_prompt"] = shorten_prompt
176
+ transcript["rewrite_thinking"] = shorten_thinking
177
+ transcript["rewrite_response"] = shorten_text
178
+ transcript["rewrite_description"] = shortened
179
+ transcript["rewrite_char_count"] = len(shortened)
180
+ description = shortened
181
+
182
+ transcript["final_description"] = description
183
+
184
+ if log_dir:
185
+ log_dir.mkdir(parents=True, exist_ok=True)
186
+ log_file = log_dir / f"improve_iter_{iteration or 'unknown'}.json"
187
+ log_file.write_text(json.dumps(transcript, indent=2))
188
+
189
+ return description
190
+
191
+
192
+ def main():
193
+ parser = argparse.ArgumentParser(description="評価結果に基づいてスキルdescriptionを改善")
194
+ parser.add_argument("--eval-results", required=True, help="評価結果JSONへのパス(run_eval.pyの出力)")
195
+ parser.add_argument("--skill-path", required=True, help="スキルディレクトリへのパス")
196
+ parser.add_argument("--history", default=None, help="履歴JSONへのパス(過去の試行)")
197
+ parser.add_argument("--model", required=True, help="改善に使用するモデル")
198
+ parser.add_argument("--verbose", action="store_true", help="thinkingをstderrに出力")
199
+ args = parser.parse_args()
200
+
201
+ skill_path = Path(args.skill_path)
202
+ if not (skill_path / "SKILL.md").exists():
203
+ print(f"エラー: {skill_path} にSKILL.mdが見つかりません", file=sys.stderr)
204
+ sys.exit(1)
205
+
206
+ eval_results = json.loads(Path(args.eval_results).read_text())
207
+ history = []
208
+ if args.history:
209
+ history = json.loads(Path(args.history).read_text())
210
+
211
+ name, _, content = parse_skill_md(skill_path)
212
+ current_description = eval_results["description"]
213
+
214
+ if args.verbose:
215
+ print(f"現在: {current_description}", file=sys.stderr)
216
+ print(f"スコア: {eval_results['summary']['passed']}/{eval_results['summary']['total']}", file=sys.stderr)
217
+
218
+ client = anthropic.Anthropic()
219
+ new_description = improve_description(
220
+ client=client,
221
+ skill_name=name,
222
+ skill_content=content,
223
+ current_description=current_description,
224
+ eval_results=eval_results,
225
+ history=history,
226
+ model=args.model,
227
+ )
228
+
229
+ if args.verbose:
230
+ print(f"改善後: {new_description}", file=sys.stderr)
231
+
232
+ # 新しいdescriptionと更新された履歴をJSONとして出力
233
+ output = {
234
+ "description": new_description,
235
+ "history": history + [{
236
+ "description": current_description,
237
+ "passed": eval_results["summary"]["passed"],
238
+ "failed": eval_results["summary"]["failed"],
239
+ "total": eval_results["summary"]["total"],
240
+ "results": eval_results["results"],
241
+ }],
242
+ }
243
+ print(json.dumps(output, indent=2))
244
+
245
+
246
+ if __name__ == "__main__":
247
+ main()
@@ -103,30 +103,24 @@ Claudeの処理と思考を導くためにコンテキストに読み込まれ
103
103
 
104
104
  **不要なディレクトリは削除できます。** すべてのSkillが3種類のリソースすべてを必要とするわけではありません。
105
105
 
106
- ## einja固有の注意事項
106
+ <!-- @einja:excluded:start -->
107
+ ## プロジェクト固有セクションの記入
107
108
 
108
- ### マネージドセクション
109
+ SKILL.md等のmdファイルの末尾には以下を記入する:
109
110
 
110
- 一部のドキュメントでマネージドセクションを使用する場合:
111
+ <!-- @einja:project-private:start id="unique-id" -->
112
+ <!-- プロジェクト固有の情報を記入 -->
113
+ <!-- @einja:project-private:end -->
114
+ <!-- @einja:excluded:end -->
111
115
 
112
- ```markdown
113
- <!-- @einja:managed:start -->
114
- この内容は`einja sync`で上書きされる
115
- <!-- @einja:managed:end -->
116
-
117
- <!-- @einja:seed:start id="unique-id" -->
118
- プロジェクト固有の内容をここに追記
119
- <!-- @einja:seed:end -->
120
- ```
121
-
122
- ### ビルドシステム連携
123
-
124
- einja management templateでは、`.claude/skills/einja-*/` 内のファイルはビルド時に自動的に `presets/default/` にコピーされます。
116
+ ---
125
117
 
126
- ### 関連Skill
118
+ <!-- @einja:project-private:start id="{skill_name}-project" -->
119
+ ## プロジェクト固有の設定
127
120
 
128
- - [einja-output-format](../einja-output-format/SKILL.md) - サブエージェント出力形式
129
- - [einja-coding-standards](../einja-coding-standards/SKILL.md) - コーディング規約
121
+ <!-- このセクションはプロジェクト固有の内容を追記する場所です -->
122
+ <!-- einja syncで上書きされません -->
123
+ <!-- @einja:project-private:end -->
130
124
  """
131
125
 
132
126
  EXAMPLE_SCRIPT = '''#!/usr/bin/env python3
@@ -10,10 +10,36 @@ Skillパッケージャー - Skillフォルダの配布可能な.skillファイ
10
10
  python package_skill.py .claude/skills/einja-my-skill ./dist
11
11
  """
12
12
 
13
+ import fnmatch
13
14
  import sys
14
15
  import zipfile
15
16
  from pathlib import Path
16
- from quick_validate import validate_skill
17
+ try:
18
+ from scripts.quick_validate import validate_skill
19
+ except ImportError:
20
+ from quick_validate import validate_skill
21
+
22
+ # パッケージ化時に除外するパターン
23
+ EXCLUDE_DIRS = {"__pycache__", "node_modules"}
24
+ EXCLUDE_GLOBS = {"*.pyc"}
25
+ EXCLUDE_FILES = {".DS_Store"}
26
+ # Skillルート直下のみ除外するディレクトリ
27
+ ROOT_EXCLUDE_DIRS = {"evals"}
28
+
29
+
30
+ def should_exclude(rel_path: Path) -> bool:
31
+ """パスをパッケージから除外すべきかチェック。"""
32
+ parts = rel_path.parts
33
+ if any(part in EXCLUDE_DIRS for part in parts):
34
+ return True
35
+ # rel_pathはskill_path.parentからの相対パス。parts[0]がSkillフォルダ名、
36
+ # parts[1](存在する場合)が最初のサブディレクトリ
37
+ if len(parts) > 1 and parts[1] in ROOT_EXCLUDE_DIRS:
38
+ return True
39
+ name = rel_path.name
40
+ if name in EXCLUDE_FILES:
41
+ return True
42
+ return any(fnmatch.fnmatch(name, pat) for pat in EXCLUDE_GLOBS)
17
43
 
18
44
 
19
45
  def package_skill(skill_path, output_dir=None):
@@ -66,13 +92,16 @@ def package_skill(skill_path, output_dir=None):
66
92
  # .skillファイル(zip形式)を作成
67
93
  try:
68
94
  with zipfile.ZipFile(skill_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
69
- # Skillディレクトリを走査
95
+ # Skillディレクトリを走査し、ビルドアーティファクトを除外
70
96
  for file_path in skill_path.rglob('*'):
71
- if file_path.is_file():
72
- # zip内の相対パスを計算
73
- arcname = file_path.relative_to(skill_path.parent)
74
- zipf.write(file_path, arcname)
75
- print(f" 追加: {arcname}")
97
+ if not file_path.is_file():
98
+ continue
99
+ arcname = file_path.relative_to(skill_path.parent)
100
+ if should_exclude(arcname):
101
+ print(f" スキップ: {arcname}")
102
+ continue
103
+ zipf.write(file_path, arcname)
104
+ print(f" 追加: {arcname}")
76
105
 
77
106
  print(f"\n✅ Skillを正常にパッケージ化しました: {skill_filename}")
78
107
  return skill_filename