npm - @laitszkin/apollo-toolkit - Versions diffs - 3.11.7 → 3.12.0 - Mend

@laitszkin/apollo-toolkit 3.11.7 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/CHANGELOG.md +33 -0
package/README.md +0 -2
package/align-project-documents/SKILL.md +20 -69
package/align-project-documents/references/templates/standardized-docs-template.md +1 -1
package/analyse-app-logs/scripts/__pycache__/filter_logs_by_time.cpython-312.pyc +0 -0
package/analyse-app-logs/scripts/__pycache__/log_cli_utils.cpython-312.pyc +0 -0
package/analyse-app-logs/scripts/__pycache__/search_logs.cpython-312.pyc +0 -0
package/archive-specs/SKILL.md +18 -70
package/commit-and-push/SKILL.md +24 -52
package/develop-new-features/SKILL.md +15 -60
package/discover-edge-cases/SKILL.md +16 -75
package/discover-security-issues/SKILL.md +49 -83
package/docs-to-voice/SKILL.md +3 -30
package/docs-to-voice/scripts/__pycache__/docs_to_voice.cpython-312.pyc +0 -0
package/enhance-existing-features/SKILL.md +36 -57
package/generate-spec/SKILL.md +51 -130
package/generate-spec/references/templates/coordination.md +0 -1
package/generate-spec/scripts/__pycache__/create-specscpython-312.pyc +0 -0
package/implement-specs/SKILL.md +27 -62
package/implement-specs-with-subagents/SKILL.md +28 -71
package/implement-specs-with-worktree/SKILL.md +38 -62
package/init-project-html/SKILL.md +27 -137
package/init-project-html/lib/atlas/cli.js +897 -43
package/init-project-html/scripts/architecture.js +4 -25
package/katex/scripts/__pycache__/render_katex.cpython-312.pyc +0 -0
package/lib/cli.js +166 -20
package/lib/tool-runner.js +418 -2
package/maintain-project-constraints/SKILL.md +24 -78
package/maintain-project-constraints/references/constraint-file-reference.md +58 -0
package/merge-changes-from-local-branches/SKILL.md +36 -99
package/open-github-issue/SKILL.md +7 -98
package/open-github-issue/scripts/__pycache__/open_github_issue.cpython-312.pyc +0 -0
package/optimise-skill/SKILL.md +7 -6
package/optimise-skill/references/definition.md +38 -0
package/optimise-skill/references/example_skill.md +7 -6
package/package.json +1 -1
package/read-github-issue/SKILL.md +6 -46
package/read-github-issue/scripts/__pycache__/find_issues.cpython-312.pyc +0 -0
package/read-github-issue/scripts/__pycache__/read_issue.cpython-312.pyc +0 -0
package/resolve-review-comments/SKILL.md +4 -26
package/resolve-review-comments/scripts/__pycache__/review_threads.cpython-312.pyc +0 -0
package/review-change-set/SKILL.md +41 -91
package/review-codebases/SKILL.md +42 -99
package/review-spec-related-changes/SKILL.md +42 -77
package/scripts/validate_openai_agent_config.py +16 -1
package/scripts/validate_skill_frontmatter.py +16 -1
package/solve-issues-found-during-review/SKILL.md +38 -66
package/spec-to-project-html/SKILL.md +27 -88
package/submission-readiness-check/SKILL.md +35 -55
package/systematic-debug/SKILL.md +37 -53
package/test-case-strategy/SKILL.md +38 -85
package/text-to-short-video/scripts/__pycache__/enforce_video_aspect_ratio.cpython-312.pyc +0 -0
package/update-project-html/SKILL.md +25 -110
package/version-release/SKILL.md +39 -74
package/weekly-financial-event-report/scripts/extract_pdf_text_pdfkit.swift +32 -4
package/archive-specs/references/templates/architecture.md +0 -21
package/archive-specs/references/templates/docs-index.md +0 -39
package/archive-specs/references/templates/features.md +0 -25
package/archive-specs/references/templates/principles.md +0 -28
package/maintain-skill-catalog/README.md +0 -18
package/maintain-skill-catalog/SKILL.md +0 -72
package/maintain-skill-catalog/agents/openai.yaml +0 -4

package/resolve-review-comments/SKILL.md CHANGED Viewed

@@ -40,21 +40,12 @@ description: Read GitHub pull request review comments, analyze each thread, deci
 - If user provides PR number, use it directly.
 - If user does not provide PR number, infer from current branch context.
-```bash
-apltk review-threads list --repo <owner>/<repo> --pr <number>
-apltk review-threads list
-```
+- Use `apltk review-threads --help` as the live command reference for PR inference and explicit repo selection.
 ## 2) Read unresolved review threads
 Use table view for quick scan, then JSON when you need full details.
-```bash
-apltk review-threads list --pr <number> --state unresolved --output table
-apltk review-threads list --pr <number> --state unresolved --output json > /tmp/pr_threads.json
-```
 The JSON output contains `thread_id`, `path`, `line`, and comment bodies for decision and resolution.
 ## 3) Decide adopt vs reject
@@ -91,16 +82,7 @@ Track adopted thread IDs in a JSON file:
 ## 7) Resolve addressed threads
 Resolve only threads you actually addressed in code.
-```bash
-apltk review-threads resolve --pr <number> --thread-id-file adopted_threads.json
-```
-Optional preview without mutating GitHub state:
-```bash
-apltk review-threads resolve --pr <number> --thread-id-file adopted_threads.json --dry-run
-```
+- Use `apltk review-threads --help` as the live command reference for listing, resolving, dry-run behavior, and thread-id file input.
 ## 8) Handle non-adopted comments
@@ -108,10 +90,6 @@ apltk review-threads resolve --pr <number> --thread-id-file adopted_threads.json
 - Reply with a concise technical reason and, if needed, a proposed follow-up.
 - Never resolve rejected or unhandled feedback threads.
-## CLI
-### `apltk review-threads`
+## CLI reference
-- `list`: fetch PR review threads via GitHub GraphQL (`gh api graphql`), supports repo/PR inference.
-- `resolve`: resolve selected review threads by thread IDs.
-- Supports thread IDs from flags, JSON files, or `--all-unresolved`.
+Use `apltk review-threads --help` as the authoritative command reference. This skill preserves the adopt/reject workflow and submission rules, not the flag catalog.

package/resolve-review-comments/scripts/__pycache__/review_threads.cpython-312.pyc CHANGED Viewed

Binary file

package/review-change-set/SKILL.md CHANGED Viewed

@@ -1,96 +1,46 @@
 ---
 name: review-change-set
 description: >-
-  Unbiased **git diff** review: architecture (boundaries, duplication, ownership) then simplification (real deletes/flattening, not churn)—discard conversation bias. For diffs spanning multiple files/modules, **prefer dispatching one read-only subagent per scope cluster** (each owns its files end-to-end), then aggregate confirmed findings on the main agent without re-reading delegated files.
-  Use for pre-commit/pre-PR review, refactor/abstraction second opinion, “review my branch” **STOP** greenfield feature design from scratch—use planning skills… BAD style-only nits… GOOD evidence + named abstraction target…
+  面向當前 `git diff` 的只讀審查技能。先從架構與模組邊界切入，再檢查是否存在真正能降低複雜度的簡化機會；忽略對話偏見，不做 style-only 評論。當變更跨多個檔案或模組時，優先按 scope cluster 派發只讀 subagent，各自讀完整個責任區後回傳結構化發現，由主代理統一去重與彙總。
 ---
-# Review Change Set
-## Dependencies
-- Required: none.
-- Conditional: none.
-- Optional: none.
-- Fallback: If a downstream consumer (e.g. `commit-and-push`) needs additional safety checks (security, edge cases), invoke those skills explicitly there — this skill no longer chains them automatically.
-## Non-negotiables
-- Read the **full** active change set (staged **and** unstaged when both exist—label which finding hits which).
-- **MUST** discard authorship bias; burden of proof on the code.
-- Prefer **architecture** and **maintainability** over style-only.
-- Abstraction only when it cuts duplication, clarifies ownership, or stabilizes boundaries.
-- Simplification only when behavior-preserving and genuinely simpler—**MUST NOT** shuffle complexity.
-- For non-trivial diffs (multiple files, multiple modules, or large per-file changes), **SHOULD** dispatch **read-only subagents** — one per coherent scope cluster (e.g. one feature/package, one layer, or one logical concern). Each subagent reads its assigned files end-to-end and returns ONLY structured findings (architecture + simplification with `path:line` evidence). The main agent **MUST NOT** re-read delegated files; it aggregates confirmed findings, deduplicates cross-cluster overlaps, and writes the final report. Tiny one-file diffs may be reviewed inline without subagents.
-- **MUST NOT** fabricate findings the diff does not actually contain; subagent reports stay confined to architecture and simplification.
-## Standards (summary)
-- **Evidence**: Full diff + minimum context reads to understand behavior; subagent reports cite `path:line` per finding.
-- **Execution**: Git state → baseline → (subagent fan-out for multi-cluster diffs OR inline read for tiny diffs) → architecture → simplification → aggregate → report.
-- **Quality**: Actionable, outsider perspective; subagent fan-out keeps each context window small; no duplicated findings across clusters.
-- **Output**: Scope, architecture findings, simplification findings, residual uncertainty.
-## Workflow
-**Chain-of-thought:** **`Pause →`** after each block—no verdicts from partial file reads.
-### 1) Inspect git state
-- `git status -sb`, `git diff --stat`, `git diff --cached --stat`; cover staged vs unstaged explicitly.
-- No diff → `No active git change set to review` and stop.
-   - **Pause →** Am I about to review **only** unstaged while staged also ships?
-### 2) Plan the read pattern
-- **Tiny diff** (one file or a handful of small hunks in one module) → read inline; skip subagents.
-- **Multi-file or multi-module diff** → cluster the changed files into coherent scopes (one feature/package, one layer, one logical concern). Dispatch **one read-only subagent per cluster**. Hand each subagent: the cluster file list, its slice of `git diff`/`git diff --cached`, and the structured-report contract below. The main agent **MUST NOT** read the delegated files itself.
-> **Cluster `<scope>` subagent contract**
-> - Read every file in this cluster E2E plus the minimum callers/callees/config needed to interpret behavior.
-> - Discard authorship bias; behavior comes from code/tests/config, not chat memory.
-> - Return ONLY a structured findings block:
->   - `Architecture`: list of `{ title, evidence (path:line), abstraction target, why current shape is weaker }`.
->   - `Simplification`: list of `{ title, evidence (path:line), candidate change, behavior-preserving benefit }`.
->   - `Outbound concerns`: any boundary issue that touches another cluster (so the orchestrator can deduplicate).
-> - No source dumps, no opinions about tasks outside this cluster, no security/edge-case fabrication.
-   - **Pause →** Did I cluster correctly so that one subagent owns each coherent boundary, or am I about to split a duplicated helper across two subagents and miss the dedup signal?
-### 3) Baseline (inline path) or aggregate (subagent path)
-- Inline: read every changed file E2E; pull in minimal callers/callees/config to interpret moves and interfaces.
-- Subagent: wait for **every** cluster subagent to return. Aggregate their findings, then deduplicate any architecture finding two clusters reported via their `Outbound concerns` block.
-- Behavior from **code, tests, config, execution**—not from chat memory.
-   - **Pause →** Can I quote **one concrete behavior** change this diff introduces—not intent?
-### 4) Architecture first
-Flag only if evidence-backed: duplicated workflows, cross-layer leakage, wrong helper ownership, repeated condition trees, unstable interfaces.
-Each finding **MUST** name abstraction target **and** why current shape is weaker.
-   - **Pause →** Is this “different style” or a real **boundary** problem?
-### 5) Simplification second
-Redundant branches/wrappers, deep nesting, duplicated validation, oversize functions, dead compat—**only** if it truly reduces complexity.
-   - **Pause →** Would this refactor just **move** lines between files?
-### 6) Report
-1. **Scope** — staged/unstaged; extra context paths read; cluster list when subagents were used.
-2. **Architecture** — title, evidence (`path:line`), candidate, why weaker.
-3. **Simplification** — title, evidence, candidate, benefit.
-4. **Residual uncertainty** — hypotheses / follow-up checks.
-If nothing actionable: `No actionable abstraction or simplification finding identified`.
-## Sample hints
-- **Staged only**: User ran `git add -p` → findings tagged **staged** vs **unstaged** separately.
-- **Rename-heavy**: Read old→new path mapping before calling “duplication.”
-- **Tiny diff**: One-file guard clause → architecture section may be empty; reviewed inline without subagents.
-- **Wide refactor across packages**: cluster by package; one read-only subagent per package; dedupe duplicated-helper findings on the main agent.
-## References
-- Downstream consumers (e.g. `commit-and-push`, `version-release`, `review-spec-related-changes`) decide independently when to add security or edge-case passes; this skill no longer wires them.
+## 目標
+輸出一份針對活躍變更集的審查報告，聚焦真正有價值的架構問題與可維護性簡化機會，而不是風格雜訊。報告需要標明審查範圍、staged/unstaged 狀態、已確認的架構問題、已確認的簡化建議，以及剩餘不確定性。
+## 驗收條件
+- 已完整覆蓋本次活躍變更集；若 staged 與 unstaged 同時存在，必須明確區分哪些發現命中哪一部分。
+- 每個發現都基於完整 diff 與最小必要上下文，並附帶 `path:line` 證據；不得只靠對話記憶、作者意圖或主觀風格偏好下結論。
+- 架構與責任邊界問題優先於 style-only 建議；抽象必須能消除重複、釐清歸屬或穩定邊界。
+- 簡化建議必須是行為等價且真正降低複雜度的改動，不能只是把複雜度搬到別處。
+- 對於跨多檔、多模組或大變更，應按 coherent scope cluster 使用只讀 subagent；主代理只負責聚合、去重與整理，不重讀已委派檔案。
+- 若沒有可操作發現，最終需明確輸出 `No actionable abstraction or simplification finding identified`。
+## 工作流程
+1. 檢查 git 狀態。
+   - 先確認 `git status`、staged diff 與 unstaged diff，避免只審其中一半。
+   - 若目前沒有活躍變更集，直接輸出 `No active git change set to review`。
+2. 決定閱讀策略。
+   - 單檔或很小的同模組變更可直接 inline 審查。
+   - 多檔、多模組或跨層變更需先按功能、套件、層級或邏輯關切點切成 scope cluster，並優先以一個只讀 subagent 對應一個 cluster。
+3. 建立基線並收斂上下文。
+   - Inline 路徑下，讀完整個變更檔案與最小必要的 caller、callee、配置。
+   - Subagent 路徑下，等待所有 cluster 結果返回，再根據共享邊界與 outbound concerns 做去重。
+   - 所有判斷都必須回到代碼、測試、配置與可觀察行為本身。
+4. 先做架構審查。
+   - 只報告有明確證據支撐的問題，例如跨層洩漏、重複工作流、錯誤 helper 歸屬、重複條件樹、脆弱介面或責任模糊。
+   - 每條架構發現都要指出候選抽象或責任落點，以及當前寫法為何更弱。
+5. 再做簡化審查。
+   - 只在確定不改變行為的前提下，指出多餘分支、包裝層、深巢狀、重複驗證、過大函式或歷史相容殘骸。
+   - 若建議只是把複雜度移位，則不應作為有效簡化發現。
+6. 輸出報告。
+   - 先交代審查範圍、staged/unstaged 區分、補讀的上下文與 cluster 情況。
+   - 再依序輸出架構發現、簡化發現與剩餘不確定性。
+## 使用範例
+- 「幫我 review 這次 branch 的變更」-> 先完整覆蓋 staged 與 unstaged diff，再按架構問題與簡化機會輸出報告。
+- 「我想知道這次重構有沒有真正變簡單」-> 聚焦行為等價的簡化空間，而不是風格偏好。
+- 「這次改動跨很多 package，請不要漏看邊界」-> 先按 package 或邏輯責任分 cluster，用只讀 subagent 分治後再由主代理去重彙總。
+- 「如果沒有值得改的地方，直接講沒有」-> 若無可操作發現，明確輸出 `No actionable abstraction or simplification finding identified`。
+## 參考資料索引
+- 下游工作流如 `commit-and-push`、`version-release`、`review-spec-related-changes` 會自行決定是否追加安全或邊界情況審查；本技能只負責變更集的架構與簡化分析。

package/review-codebases/SKILL.md CHANGED Viewed

@@ -1,103 +1,46 @@
 ---
 name: review-codebases
-description: Repository-wide code review workflow that requires reading the full codebase before judging, prioritizes architecture findings over implementation details, and publishes one GitHub issue per confirmed finding through open-github-issue. Use when users ask for a code review, repository audit, architecture review, maintainability review, or complete codebase inspection.
+description: >-
+  面向整個倉庫的只讀代碼審查技能。要求先讀完整個人類編寫的repo，再按「架構 → 代碼品質 → 邊界情況」的優先順序做判斷；一旦在更高層級發現已確認問題，就停止下探。若需要對外追蹤，為每個已確認且非重複的發現建立 GitHub issue，否則至少輸出可直接發布的草稿內容。
 ---
-# Review Codebases
-## Dependencies
-- Required: none.
-- Conditional: `read-github-issue` when issue publication requires duplicate checks; `open-github-issue` when confirmed findings should be tracked as GitHub issues.
-- Optional: none.
-- Fallback: If publication is needed and `open-github-issue` is unavailable, return draft issue bodies instead of inventing another publisher.
-## Standards
-- Evidence: Read the full human-authored repository before judging and cite concrete files for every finding.
-- Execution: Review architecture first, code quality second, and edge cases last, stopping when a higher-priority tier has confirmed findings.
-- Quality: Prefer root-cause findings over scattered symptoms, merge duplicates, and keep hypotheses out of published results.
-- Output: Return coverage, review tier reached, confirmed findings, publication status, and deferred lower-tier follow-up.
-## Core rules
-- Read the full repository before judging any design or implementation choice.
-- Inspect every human-authored file that affects behavior: source code, tests, build scripts, configuration, migrations, and key docs.
-- For generated, vendored, or snapshot files, verify what they are first; exclude them from deep review only when that status is clear, and list those exclusions explicitly.
-- Do not speculate. Every finding must cite concrete files and causal reasoning.
-- Prefer root-cause findings over scattered symptoms or style nits.
-- Merge duplicate symptoms into one finding when they come from the same underlying issue.
-## Workflow
-1. Map the repository
-   - List top-level directories and identify entrypoints, domain modules, test suites, configuration, scripts, and generated/vendor areas.
-   - Record any files or folders excluded from deep review and why.
-2. Read the whole codebase
-   - Read all relevant human-authored files end to end.
-   - Build a repository-wide model of boundaries, data flow, ownership, invariants, and failure handling.
-3. Review architecture first
-   - Check module boundaries, layering, hidden coupling, circular dependencies, duplicated workflows, leaky abstractions, ownership confusion, and inconsistent domain models.
-   - If any confirmed architecture findings exist, stop the lower-level review and report only architecture findings.
-4. Review code quality second
-   - Run this step only when there are no architecture findings.
-   - Check readability, duplication, dead code, error handling, unsafe state changes, unclear contracts, missing tests around critical logic, and maintainability risks.
-   - If any confirmed code-quality findings exist, stop before edge-case review and report only these findings.
-5. Review edge cases last
-   - Run this step only when there are no architecture or code-quality findings.
-   - Check null or empty inputs, boundary values, partial failures, retries, concurrency, ordering assumptions, idempotency, and invalid state transitions.
-6. Check for duplicate issues before publication
-   - When findings will be published, use `read-github-issue` to search the target repository for open and recently closed issues that match the same module, failure mode, or architectural boundary.
-   - Treat an existing issue as a duplicate when the underlying root cause, affected boundary, and requested outcome materially overlap, even if the wording differs.
-   - If a duplicate exists, cite it in the final report and do not publish a new issue for that finding.
-7. Publish each confirmed non-duplicate finding
-   - Invoke `open-github-issue` once per finding.
-   - Use a tier-specific title prefix:
-     - `[Architecture] <short finding>`
-     - `[Code Quality] <short finding>`
-     - `[Edge Case] <short finding>`
-   - Pass these fields to the dependency skill:
-     - `title`
-     - `problem-description`: symptom, impact, and repository evidence
-     - `suspected-cause`: file references, causal chain, and confidence
-     - `reproduction`: concrete trigger or conditions when known; otherwise leave empty
-     - `repo`: target repository in `owner/repo` format when known
-   - If invoking the publisher CLI directly, pass finding details through `apltk open-github-issue --payload-file <json>` or `@file` inputs rather than inline shell arguments so code snippets and backticks survive unchanged.
-## Evidence standard
-Each finding must include:
-- affected files or modules
-- why the current design or code is problematic
-- impact on correctness, maintenance, performance, or future change safety
-- confidence level with a short reason
-If evidence is incomplete, keep it as a hypothesis and do not publish a GitHub issue for it.
-## Output format
-Use this structure in responses:
-1. Codebase coverage
-   - reviewed areas
-   - explicit exclusions
-2. Review tier reached
-   - architecture / code quality / edge cases
-   - why lower tiers were skipped, if applicable
-3. Confirmed findings
-   - title
-   - affected files
-   - evidence and reasoning
-   - impact
-   - confidence
-4. GitHub issue publication status
-   - duplicate-check status and any matching existing issue URLs
-   - publication mode (`gh-cli` / `github-token` / `draft-only`)
-   - created issue URL or draft output per finding
-5. Deferred follow-up
-   - list lower-tier checks that were intentionally not performed because a higher-tier issue already exists
-## Resources
-- Dependency skill: `open-github-issue` for deterministic GitHub issue publication with auth fallback and README language detection.
+## 目標
+輸出一份面向整個倉庫的審查報告，先交付最有價值的根因級問題，而不是零散表象。報告需要說明覆蓋範圍、實際審查到的層級、已確認發現、是否已檢查重複 issue、是否已發布 issue，以及因高優先級問題而延後的後續檢查。
+## 驗收條件
+- 在做任何設計或實作判斷前，已完整閱讀所有會影響行為的人類編寫檔案，包括原始碼、測試、配置、建置腳本、遷移與關鍵文檔。
+- 對生成檔、vendor 檔與 snapshot 檔先確認其性質；只有在性質明確時才可排除深讀，且必須在報告中明確列出排除項與原因。
+- 每個發現都附帶具體檔案與因果說明，不做無證據推測；同一根因導致的多個症狀必須合併成一條發現。
+- 審查順序固定為「架構 → 代碼品質 → 邊界情況」；若在較高層級已確認問題，必須停止更低層級審查並在報告中說明原因。
+- 若需要發布 issue，必須先做重複檢查；每個已確認且非重複的發現都要有發布結果，若無法發布則提供可直接使用的草稿內容。
+- 最終交付物必須包含覆蓋範圍、審查層級、已確認發現、issue 發布狀態與延後檢查項。
+## 工作流程
+1. 映射倉庫。
+   - 先識別頂層目錄、入口點、核心模組、測試區、配置、腳本與疑似生成/vendor 區域。
+   - 同時記錄哪些內容會被排除深讀，以及排除理由。
+2. 完整閱讀repo。
+   - 逐個讀完所有相關的人類編寫檔案。
+   - 建立全倉視角的模組邊界、資料流、責任歸屬、不變式與失敗處理模型。
+3. 先做架構審查。
+   - 優先尋找模組邊界錯位、跨層洩漏、循環依賴、重複工作流、抽象滲漏、責任混亂與領域模型不一致。
+   - 若已確認任何架構問題，直接停止後續較低層級審查，專注輸出架構發現。
+4. 若架構層沒有問題，再做代碼品質審查。
+   - 檢查可讀性、重複代碼、死碼、錯誤處理、危險狀態變更、契約不清與關鍵邏輯缺少測試保護等問題。
+   - 若已確認任何代碼品質問題，停止邊界情況審查。
+5. 僅在前兩層都沒有問題時，才審查邊界情況。
+   - 檢查空值、邊界值、部分失敗、重試、併發、順序假設、冪等性與非法狀態轉移。
+6. 需要發布時，先做重複檢查再逐條發布。
+   - 使用 `read-github-issue` 檢查是否已有相同根因、相同邊界或相同結果導向的 open / recently closed issue。
+   - 對每個已確認且非重複的發現，使用 `open-github-issue` 發布；若發布依賴不可用，改為返回對應 issue 草稿。
+   - issue 標題前綴遵循審查層級，例如 `[Architecture]`、`[Code Quality]`、`[Edge Case]`。
+## 使用範例
+- 「幫我做一次整倉 code review」-> 先讀完整個人類編寫的repo，再按架構、代碼品質、邊界情況的順序輸出審查報告。
+- 「幫我做 maintainability audit，找到最值得開 issue 的問題」-> 聚焦根因級問題，必要時先做 duplicate check，再為每個非重複發現準備或發布 issue。
+- 「請做 architecture review，不要陷入 style nit」-> 先完成全倉閱讀，只報告有明確邊界與責任證據的架構問題。
+- 「如果 issue 發布不了，也要把內容整理好」-> 仍然完成重複檢查，並輸出可直接發布的 issue 草稿，而不是臨時改用其他未定義發佈方式。
+## 參考資料索引
+- `read-github-issue`：在發布前檢查目標倉庫中是否已存在相同根因的 issue，避免重複建單。
+- `open-github-issue`：為每個已確認且非重複的發現建立 GitHub issue；若不可用，則返回 issue 草稿內容。

package/review-spec-related-changes/SKILL.md CHANGED Viewed

@@ -1,82 +1,47 @@
 ---
 name: review-spec-related-changes
 description: >-
-  Read-only spec compliance vs governing docs/plans: score every business requirement Met/Partial/Not-met from code/tests/commands—checked `tasks.md` boxes never count; ambiguity between two plan roots halts execution; on code-bearing diffs run **`review-change-set`**, **`discover-edge-cases`**, **`discover-security-issues`** on the same scope afterward — **prefer one read-only subagent per secondary skill in parallel** so the main agent aggregates without re-reading. Independent requirement clusters may also be scored by parallel read-only subagents.
-  Use for “does this PR satisfy coordination.md + spec.md R2?” or a user-pinned `{change}` folder.
-  Do not mutate repos, bury missing goals, skip the tertiary bundle on code-bearing diffs, or rest on intent without evidence; FORBIDDEN to lead with refactor comments while R1 is failing. GOOD: pair every Not-met cite with a `spec.md` ref + concrete path:test gap.
+  面向spec合規性的只讀審查技能。先唯一確定本次變更受哪一套 `docs/plans/...` 規劃文件約束，再按業務目標逐條判定 `Met`、`Partially met`、`Not met` 或 `Deferred/N/A`；`tasks.md` 的勾選不算證據。若範圍涉及代碼實作，完成業務結論後還必須對同一範圍追加 `review-change-set`、`discover-edge-cases` 與 `discover-security-issues`，並保持固定報告順序。
 ---
-# Review Spec Related Changes
-## Dependencies
-- Required: `review-change-set`, `discover-edge-cases`, and `discover-security-issues` whenever the scope includes **code-affecting** implementation to assess.
-- Conditional: none.
-- Optional: none.
-- Fallback: If any required dependency is unavailable for a **code-affecting** review, **MUST** stop and report the gap. **MUST NOT** emit a “full pass” verdict without those three passes when code is in scope.
-## Non-negotiables
-- **MUST NOT** edit implementation code, tests, or planning docs during this skill (read-only review).
-- **MUST NOT** archive specs, commit, push, tag, or release from this skill.
-- **MUST** resolve which spec set governs the change **before** concluding; if multiple candidates fit equally and cannot be disambiguated from repo evidence, **MUST** stop and report ambiguity—**MUST NOT** guess.
-- **MUST** classify each business goal / acceptance item as `Met`, `Partially met`, `Not met`, or `Deferred/N/A` **only** using verifiable evidence (code, tests, commands, traces)—checked boxes in `tasks.md` are **not** proof by themselves.
-- **MUST** treat **unmet or partially met required business goals** as **highest severity**. **MUST NOT** let edge-case, security, or style findings **outrank** those gaps in the reported order or implied priority.
-- **MUST** finish the business-goal verdict **before** invoking secondary skills; **MUST** still run `review-change-set`, `discover-edge-cases`, and `discover-security-issues` on the **same** implementation scope when code is involved (after step 1 verdict is written).
-- **SHOULD** parallelize the secondary reviews when code is involved by dispatching **one read-only subagent per secondary skill** (one for `review-change-set`, one for `discover-edge-cases`, one for `discover-security-issues`). Each subagent receives the same diff scope, follows that skill’s own SKILL.md, and returns ONLY its structured findings; the main agent aggregates them in the fixed report order without re-running the underlying analysis. Single-file or trivial diffs may be reviewed inline without subagents.
-- For business-goal scoring itself, **MAY** parallelize by dispatching one read-only subagent per **independent requirement cluster** (requirements that share no owned paths and no shared spec section), each returning Met/Partial/Not-met with cited evidence. The main agent owns final ordering, severity, and any cross-requirement reconciliation. Tightly coupled requirements stay on the main agent.
-- **MUST NOT** rest conclusions on author intent, branch names, or chat memory unless **repository evidence** agrees.
-- **MUST NOT** let subagents mutate repositories, archive specs, or commit — every dispatched subagent runs in **read-only** mode.
-- Prefer **fewer confirmed findings** over broad speculation; unproven items belong under **Residual uncertainty**, not as faux defects.
-## Standards (summary)
-- **Evidence**: Full read of governing docs + minimal code/diff context + spec-named verification commands when safe to run.
-- **Execution**: Scope resolution → business compliance (optionally fanned out by independent requirement cluster) → parallel secondary reviews via per-skill read-only subagents → aggregated ordered report.
-- **Quality**: Business-first severity; secondary findings separated unless they also block an acceptance criterion; subagent reports stay strictly within their assigned skill’s scope.
-- **Output**: Ordered list: business gaps → edge cases → security → code review → passing summary → residual uncertainty.
-## Scope resolution
-**Chain-of-thought:** Before **`Workflow`**, answer **`Pause →`** for the governing spec you will use; equally plausible paths without disambiguation ⇒ **stop**.
-**User-named path** — Read `spec.md`, `tasks.md`, `checklist.md`, `contract.md`, `design.md`, and batch `coordination.md` when present unless the user narrowed the list. Treat as authoritative unless the user pointed at a **newer** superseding plan. Map implementation via tasks, owned paths, diff, branch, commits.
-   - **Pause →** Did the user give a **filesystem path** or only a nickname that could map to multiple `docs/plans/...` trees?
-   - **Pause →** For each major business verdict later, what **exact** spec heading or requirement ID will I cite?
-**User did not name a spec** — Inspect `git status -sb`, `git diff --name-only`, `git diff --cached --name-only`; search `docs/plans/`, `docs/archive/plans/`, or repo-documented plan dirs. If no plan file moved, infer from recent commits and plausible plan dirs; **if still ambiguous, stop** (see Non-negotiables).
-   - **Pause →** What **three** independent clues tie this implementation to **one** plan—not chat memory alone?
-   - **Pause →** If I withheld the conversation transcript, could another reviewer replicate my chosen spec folder from repo evidence?
-## Workflow
-**Chain-of-thought:** Answer **`Pause →`** after **each** step before moving down the list or calling dependent skills.
-1. **Spec baseline** — Read governing docs end-to-end. Extract goals, acceptance criteria, non-goals, deferrals, required verifications. Build a compact claim list provable from repo evidence. Keep “what the product must do” separate from “how clean the code is.”
-   - **Pause →** Which items are **mandatory** acceptance vs explicitly **out of scope** or deferred?
-   - **Pause →** What observable failure would make me change a `Met` to `Not met` for the top risk requirement?
-2. **Implementation evidence** — Read the relevant diff/staged/commits/files. Trace the minimum code path to validate claims. Run spec-named checks when available and safe. When the spec contains **independent requirement clusters** (disjoint owned paths, disjoint spec sections), **MAY** dispatch one read-only subagent per cluster to score Met/Partial/Not-met with cited evidence; the main agent reconciles cross-requirement effects and owns the final verdict ordering. Tightly coupled requirements stay on the main agent.
-   - **Pause →** What is the **smallest** code path I have not yet read that could still falsify a `Met`?
-   - **Pause →** Am I about to score `Met` from **intent** or from **tests/commands** I actually ran or inspected?
-   - **Pause →** If I clustered, is every cluster genuinely disjoint, or am I about to lose a cross-requirement contradiction by running them in isolation?
-3. **Business-goal verdict first** — Emit every `Not met` / `Partially met` **before** secondary findings, with **exact** spec cites and code/test evidence. While required goals stay failed, **MUST NOT** frame archival, release, or “done” narratives that imply compliance.
-   - **Pause →** If I sorted findings by “interesting” instead of business impact, which line would unfairly rise above a missing goal?
-   - **Pause →** For each `Partially met`, what single **missing proof** (test, wire-up, error path) am I naming explicitly?
-4. **Secondary reviews (code-affecting)** — On the same scope: `review-change-set` (architecture/simplification), `discover-edge-cases` (reproducible edge/observability risks), `discover-security-issues` (reproducible security). **Prefer dispatching one read-only subagent per skill in parallel** so each runs in its own context with the full SKILL.md it owns; hand each subagent the same diff scope and the structured-report contract from that skill. The main agent waits for every subagent to return, then keeps outputs labeled so they do not read as business-goal substitutions. Trivial single-file diffs may run inline without subagents. The main agent **MUST NOT** re-run a secondary skill it already delegated, and subagents **MUST NOT** mutate the repository.
-   - **Pause →** Does this secondary finding **force** a business re-score—if yes, did I revise step 3 before publishing?
-   - **Pause →** Is the diff scope identical to what I used for business mapping (no silent file creep)?
-   - **Pause →** Did every dispatched secondary subagent return — or am I about to publish from partial summaries?
-5. **Final report (fixed order)** — (1) Business-goal failures (always top severity for required gaps). (2) Edge-case. (3) Security. (4) Code-review / maintainability. (5) Passing evidence for goals confirmed `Met`. (6) Residual uncertainty (skipped commands, unmapped spec text, unverifiable externals). If nothing actionable: state that explicitly **and** still cite docs and evidence reviewed.
-   - **Pause →** What commands or spec paragraphs remain **unverified**—are they all listed under residual uncertainty?
-## Sample hints
-- **Business claim record**:
-  - `R3.2 refresh token rotation` → `Not met` — spec `spec.md` §3.2 requires one-time use; `src/auth/refresh.rs:40` still accepts same jti on replay; test `refresh_replay` not present.
-- **Wrong ordering** — starting with “nice simplification in `foo.ts`” while `R1.0` is unmet: **wrong**; lead with the `R1.0` gap.
-- **Tasks checked but behavior missing** — `tasks.md` shows `[x] implement rate limit` but no call site in `src/api/*` and no test: verdict stays **`Not met` / `Partially met`**, not `Met`.
-- **Ambiguous scope** — two directories `docs/plans/2026-05-01/foo/` and `…/batch-a/foo/` both plausible; **stop** with “need user to name path” instead of picking one.
+## 目標
+輸出一份只讀的spec合規審查報告，先回答「這次變更是否真的滿足規劃中的業務要求」，再補充邊界、安全與代碼審查發現。報告需要對每條關鍵需求給出可追溯的狀態判定、證據來源、缺口說明、通過證據與剩餘不確定性；本技能不負責修改代碼或更新規劃文件。
+## 驗收條件
+- 在給出任何合規結論前，已唯一確定 governing spec；若存在兩個同樣合理的規劃根目錄且無法由倉庫證據消歧，必須停止並報告歧義，不能猜測。
+- 每條業務目標或驗收項只能根據可驗證證據判定為 `Met`、`Partially met`、`Not met` 或 `Deferred/N/A`；證據可來自代碼、測試、命令、日誌或追蹤，`tasks.md` 勾選本身不算證據。
+- 未滿足或部分滿足的必選業務要求永遠是最高嚴重度，報告排序中不得被安全、邊界或可維護性意見壓過。
+- 若範圍涉及代碼實作，完成業務結論後必須在同一範圍追加 `review-change-set`、`discover-edge-cases` 與 `discover-security-issues`；任一依賴不可用時，不得輸出「完整通過」結論。
+- 全流程保持只讀：不得修改實作、測試、spec、archive、commit、push、tag 或 release。
+- 最終交付物固定按以下順序輸出：業務缺口 → 邊界情況 → 安全問題 → 代碼審查問題 → 已滿足要求的通過證據 → 剩餘不確定性。
+## 工作流程
+1. 確定 governing spec。
+   - 若使用者已指定具體路徑，直接以該規劃目錄為準，閱讀 `spec.md`、`tasks.md`、`checklist.md`、`contract.md`、`design.md`，以及存在時的 `coordination.md`。
+   - 若使用者未指定，需根據 `git status`、diff、相關路徑、近期提交與計劃目錄證據回推出唯一 spec；若仍有歧義，立即停止。
+2. 建立spec基線。
+   - 從 governing spec 中抽取業務目標、驗收條件、非目標、已明示的延期項與要求執行的驗證。
+   - 將「產品必須做到什麼」和「代碼寫得是否漂亮」明確分開。
+3. 蒐集實作證據並先做業務判定。
+   - 閱讀最小必要代碼路徑、相關 diff、提交與測試，必要時執行 spec 點名且安全可跑的驗證命令。
+   - 對每條需求給出 `Met`、`Partially met`、`Not met` 或 `Deferred/N/A`，並附上精確 spec 引用與代碼/測試證據。
+   - 若 spec 中存在完全獨立的需求群組，可用只讀 subagent 並行評分；但最終排序、嚴重度與跨需求衝突整理由主代理負責。
+4. 對代碼範圍追加次級審查。
+   - 若審查對象包含代碼實作，必須在同一範圍追加 `review-change-set`、`discover-edge-cases` 與 `discover-security-issues`。
+   - 優先以一個只讀 subagent 對應一個次級技能並行執行，主代理只負責聚合結果，不重跑已委派分析。
+   - 若次級審查結果反過來影響某條業務需求的判定，必須先回寫業務結論，再輸出最終報告。
+5. 生成固定順序的最終報告。
+   - 先列出 `Not met` 與 `Partially met` 的業務缺口，再依序列出邊界情況、安全問題、代碼審查問題。
+   - 接著補上所有已確認 `Met` 的通過證據。
+   - 最後列出未驗證命令、無法映射的 spec 段落、外部依賴不可驗證處與其他剩餘不確定性。
+## 使用範例
+- 「這個 PR 有沒有滿足 `coordination.md` 和 `spec.md`？」-> 先唯一確定 governing spec，再按業務要求逐條打分，最後補上同範圍的邊界、安全與變更集審查。
+- 「請檢查 `docs/plans/2026-05-01/foo/` 對應的實作是否完成」-> 直接以指定目錄為準做只讀合規審查，不依賴聊天上下文猜測。
+- 「`tasks.md` 都打勾了，應該算完成吧？」-> 不能直接採信，仍需用代碼、測試、命令或追蹤證據重新驗證每條要求。
+- 「先跟我講哪個 requirement 沒達成，再看程式碼好不好」-> 報告必須先輸出業務缺口，不能先用重構或風格意見掩蓋未滿足的spec要求。
+## 參考資料索引
+- `review-change-set`：在相同代碼範圍上補做架構與簡化審查，避免將spec合規與代碼品質混為一談。
+- `discover-edge-cases`：在相同範圍上補做可重現邊界情況審查，補齊失敗路徑、併發與可觀測性風險。
+- `discover-security-issues`：在相同範圍上補做可重現安全審查，補齊攻擊面、授權與資料外洩風險。

package/scripts/validate_openai_agent_config.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
+import argparse
 import re
 import sys
 from pathlib import Path
@@ -21,6 +22,11 @@ INTERFACE_ALLOWED_KEYS = {
 }
 HEX_COLOR_PATTERN = re.compile(r"^#[0-9A-Fa-f]{6}$")
+HELP_EPILOG = """Examples:
+  apltk validate-openai-agent-config
+    Result: prints either a pass summary or one error per invalid agents/openai.yaml file.
+"""
 def repo_root() -> Path:
     return Path(__file__).resolve().parent.parent
@@ -169,7 +175,16 @@ def validate_skill(skill_dir: Path) -> list[str]:
     return errors
-def main() -> int:
+def build_parser() -> argparse.ArgumentParser:
+    return argparse.ArgumentParser(
+        description="Validate agents/openai.yaml for all top-level skills.",
+        epilog=HELP_EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+def main(argv: list[str] | None = None) -> int:
+    build_parser().parse_args(argv)
     root = repo_root()
     skill_dirs = iter_skill_dirs(root)
     if not skill_dirs:

package/scripts/validate_skill_frontmatter.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
+import argparse
 import re
 import sys
 from pathlib import Path
@@ -13,6 +14,11 @@ NAME_PATTERN = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$")
 REQUIRED_KEYS = {"name", "description"}
 MAX_DESCRIPTION_LENGTH = 1024
+HELP_EPILOG = """Examples:
+  apltk validate-skill-frontmatter
+    Result: prints either a pass summary or one error per invalid top-level SKILL.md frontmatter file.
+"""
 def repo_root() -> Path:
     return Path(__file__).resolve().parent.parent
@@ -91,7 +97,16 @@ def validate_skill(skill_dir: Path) -> list[str]:
     return errors
-def main() -> int:
+def build_parser() -> argparse.ArgumentParser:
+    return argparse.ArgumentParser(
+        description="Validate SKILL.md frontmatter for all top-level skills.",
+        epilog=HELP_EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+def main(argv: list[str] | None = None) -> int:
+    build_parser().parse_args(argv)
     root = repo_root()
     skill_dirs = iter_skill_dirs(root)
     if not skill_dirs: