devlyn-cli 1.14.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/AGENTS.md +104 -0
  2. package/CLAUDE.md +112 -119
  3. package/README.md +43 -125
  4. package/benchmark/auto-resolve/BENCHMARK-DESIGN.md +272 -0
  5. package/benchmark/auto-resolve/README.md +114 -0
  6. package/benchmark/auto-resolve/RUBRIC.md +162 -0
  7. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/NOTES.md +30 -0
  8. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/expected.json +68 -0
  9. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/metadata.json +10 -0
  10. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/setup.sh +4 -0
  11. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/spec.md +45 -0
  12. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/task.txt +8 -0
  13. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/NOTES.md +54 -0
  14. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected-pair-plan-registry.json +170 -0
  15. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected.json +84 -0
  16. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/metadata.json +21 -0
  17. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-fail.json +214 -0
  18. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-pass.json +223 -0
  19. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/setup.sh +5 -0
  20. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/spec.md +56 -0
  21. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/task.txt +14 -0
  22. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/NOTES.md +28 -0
  23. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected-pair-plan-registry.json +162 -0
  24. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json +65 -0
  25. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/metadata.json +19 -0
  26. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/setup.sh +4 -0
  27. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md +56 -0
  28. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/task.txt +9 -0
  29. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/NOTES.md +40 -0
  30. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/expected.json +57 -0
  31. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/metadata.json +10 -0
  32. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/setup.sh +6 -0
  33. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/spec.md +49 -0
  34. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/task.txt +9 -0
  35. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/NOTES.md +38 -0
  36. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/expected.json +65 -0
  37. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/metadata.json +10 -0
  38. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/setup.sh +55 -0
  39. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/spec.md +49 -0
  40. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/task.txt +7 -0
  41. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/NOTES.md +38 -0
  42. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/expected.json +77 -0
  43. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/metadata.json +10 -0
  44. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/setup.sh +4 -0
  45. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/spec.md +49 -0
  46. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/task.txt +10 -0
  47. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/NOTES.md +50 -0
  48. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/expected.json +76 -0
  49. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/metadata.json +10 -0
  50. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/setup.sh +36 -0
  51. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/spec.md +46 -0
  52. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/task.txt +7 -0
  53. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/NOTES.md +50 -0
  54. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/expected.json +63 -0
  55. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/metadata.json +10 -0
  56. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/setup.sh +4 -0
  57. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/spec.md +48 -0
  58. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/task.txt +1 -0
  59. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/NOTES.md +93 -0
  60. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/expected.json +74 -0
  61. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/metadata.json +10 -0
  62. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/setup.sh +28 -0
  63. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/spec.md +62 -0
  64. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/task.txt +5 -0
  65. package/benchmark/auto-resolve/fixtures/SCHEMA.md +130 -0
  66. package/benchmark/auto-resolve/fixtures/test-repo/README.md +27 -0
  67. package/benchmark/auto-resolve/fixtures/test-repo/bin/cli.js +63 -0
  68. package/benchmark/auto-resolve/fixtures/test-repo/package-lock.json +823 -0
  69. package/benchmark/auto-resolve/fixtures/test-repo/package.json +22 -0
  70. package/benchmark/auto-resolve/fixtures/test-repo/playwright.config.js +17 -0
  71. package/benchmark/auto-resolve/fixtures/test-repo/server/index.js +37 -0
  72. package/benchmark/auto-resolve/fixtures/test-repo/tests/cli.test.js +25 -0
  73. package/benchmark/auto-resolve/fixtures/test-repo/tests/server.test.js +58 -0
  74. package/benchmark/auto-resolve/fixtures/test-repo/web/index.html +37 -0
  75. package/benchmark/auto-resolve/scripts/build-pair-eligible-manifest.py +174 -0
  76. package/benchmark/auto-resolve/scripts/check-f9-artifacts.py +256 -0
  77. package/benchmark/auto-resolve/scripts/compile-report.py +331 -0
  78. package/benchmark/auto-resolve/scripts/iter-0033c-compare.py +552 -0
  79. package/benchmark/auto-resolve/scripts/judge-opus-pass.sh +430 -0
  80. package/benchmark/auto-resolve/scripts/judge.sh +359 -0
  81. package/benchmark/auto-resolve/scripts/oracle-scope-tier-a.py +260 -0
  82. package/benchmark/auto-resolve/scripts/oracle-scope-tier-b.py +274 -0
  83. package/benchmark/auto-resolve/scripts/oracle-test-fidelity.py +328 -0
  84. package/benchmark/auto-resolve/scripts/pair-plan-idgen.py +401 -0
  85. package/benchmark/auto-resolve/scripts/pair-plan-lint.py +468 -0
  86. package/benchmark/auto-resolve/scripts/run-fixture.sh +691 -0
  87. package/benchmark/auto-resolve/scripts/run-iter-0033c.sh +234 -0
  88. package/benchmark/auto-resolve/scripts/run-suite.sh +214 -0
  89. package/benchmark/auto-resolve/scripts/ship-gate.py +222 -0
  90. package/bin/devlyn.js +129 -17
  91. package/config/skills/_shared/adapters/README.md +64 -0
  92. package/config/skills/_shared/adapters/gpt-5-5.md +29 -0
  93. package/config/skills/_shared/adapters/opus-4-7.md +29 -0
  94. package/config/skills/_shared/archive_run.py +130 -0
  95. package/config/skills/_shared/codex-config.md +54 -0
  96. package/config/skills/_shared/codex-monitored.sh +141 -0
  97. package/config/skills/_shared/engine-preflight.md +35 -0
  98. package/config/skills/_shared/expected.schema.json +93 -0
  99. package/config/skills/_shared/pair-plan-schema.md +298 -0
  100. package/config/skills/_shared/runtime-principles.md +110 -0
  101. package/config/skills/_shared/spec-verify-check.py +519 -0
  102. package/config/skills/devlyn:ideate/SKILL.md +99 -481
  103. package/config/skills/devlyn:ideate/references/elicitation.md +97 -0
  104. package/config/skills/devlyn:ideate/references/from-spec-mode.md +54 -0
  105. package/config/skills/devlyn:ideate/references/project-mode.md +76 -0
  106. package/config/skills/devlyn:ideate/references/spec-template.md +102 -0
  107. package/config/skills/devlyn:resolve/SKILL.md +172 -184
  108. package/config/skills/devlyn:resolve/references/free-form-mode.md +68 -0
  109. package/config/skills/devlyn:resolve/references/phases/build-gate.md +45 -0
  110. package/config/skills/devlyn:resolve/references/phases/cleanup.md +39 -0
  111. package/config/skills/devlyn:resolve/references/phases/implement.md +42 -0
  112. package/config/skills/devlyn:resolve/references/phases/plan.md +42 -0
  113. package/config/skills/devlyn:resolve/references/phases/verify.md +69 -0
  114. package/config/skills/devlyn:resolve/references/state-schema.md +106 -0
  115. package/{config/skills → optional-skills}/devlyn:design-system/SKILL.md +1 -0
  116. package/optional-skills/devlyn:reap/SKILL.md +105 -0
  117. package/optional-skills/devlyn:reap/scripts/reap.sh +129 -0
  118. package/optional-skills/devlyn:reap/scripts/scan.sh +116 -0
  119. package/{config/skills → optional-skills}/devlyn:team-design-ui/SKILL.md +5 -0
  120. package/package.json +16 -2
  121. package/scripts/lint-skills.sh +431 -0
  122. package/config/skills/devlyn:auto-resolve/SKILL.md +0 -602
  123. package/config/skills/devlyn:auto-resolve/references/build-gate.md +0 -116
  124. package/config/skills/devlyn:auto-resolve/references/engine-routing.md +0 -204
  125. package/config/skills/devlyn:browser-validate/SKILL.md +0 -164
  126. package/config/skills/devlyn:browser-validate/references/flow-testing.md +0 -118
  127. package/config/skills/devlyn:browser-validate/references/tier1-chrome.md +0 -137
  128. package/config/skills/devlyn:browser-validate/references/tier2-playwright.md +0 -195
  129. package/config/skills/devlyn:browser-validate/references/tier3-curl.md +0 -57
  130. package/config/skills/devlyn:clean/SKILL.md +0 -285
  131. package/config/skills/devlyn:design-ui/SKILL.md +0 -351
  132. package/config/skills/devlyn:discover-product/SKILL.md +0 -124
  133. package/config/skills/devlyn:evaluate/SKILL.md +0 -564
  134. package/config/skills/devlyn:feature-spec/SKILL.md +0 -630
  135. package/config/skills/devlyn:ideate/references/challenge-rubric.md +0 -122
  136. package/config/skills/devlyn:ideate/references/templates/item-spec.md +0 -90
  137. package/config/skills/devlyn:implement-ui/SKILL.md +0 -466
  138. package/config/skills/devlyn:preflight/SKILL.md +0 -370
  139. package/config/skills/devlyn:preflight/references/auditors/browser-auditor.md +0 -32
  140. package/config/skills/devlyn:preflight/references/auditors/code-auditor.md +0 -90
  141. package/config/skills/devlyn:preflight/references/auditors/docs-auditor.md +0 -38
  142. package/config/skills/devlyn:product-spec/SKILL.md +0 -603
  143. package/config/skills/devlyn:recommend-features/SKILL.md +0 -286
  144. package/config/skills/devlyn:review/SKILL.md +0 -161
  145. package/config/skills/devlyn:team-resolve/SKILL.md +0 -631
  146. package/config/skills/devlyn:team-review/SKILL.md +0 -493
  147. package/config/skills/devlyn:update-docs/SKILL.md +0 -463
  148. package/config/skills/workflow-routing/SKILL.md +0 -73
@@ -0,0 +1,42 @@
1
+ # PHASE 1 — PLAN (canonical body)
2
+
3
+ The per-engine adapter header from `_shared/adapters/<model>.md` is prepended at runtime. This file is engine-agnostic.
4
+
5
+ <role>
6
+ You translate a spec or generated criteria into a concrete plan: the file list to touch, the risks the implementation must navigate, and a verbatim restatement of what acceptance requires. The plan is the contract IMPLEMENT executes against.
7
+ </role>
8
+
9
+ <input>
10
+ - Source: `pipeline.state.json:source.spec_path` (real spec) or `state.source.criteria_path` (`.devlyn/criteria.generated.md`).
11
+ - Codebase at `state.base_ref.sha`.
12
+ - For free-form mode: also `state.complexity` (trivial / medium / large) — informs depth.
13
+ </input>
14
+
15
+ <output>
16
+ Write `.devlyn/plan.md` with three sections:
17
+
18
+ 1. **Files to touch** — explicit list. Each entry: path, change type (`new` / `edit` / `delete`), one-line rationale tied to a specific Requirement.
19
+ 2. **Risks** — out-of-scope expansions to refuse, ambiguous spec sections to interpret strictly, known failure modes for this language/framework.
20
+ 3. **Acceptance restatement** — verbatim copy of the spec's `## Verification` block (or generated criteria's equivalent). The plan is wrong if any verification command later fails because of a planning oversight.
21
+
22
+ Also update `pipeline.state.json:phases.plan.{verdict, completed_at, duration_ms}`. Verdict: `PASS` if plan is shippable; `BLOCKED` if spec is internally contradictory or cannot be planned without violating constraints.
23
+ </output>
24
+
25
+ <quality_bar>
26
+ - Scope first, then implementation. Decide what files to touch before deciding how to implement. Files not in the list are off-limits to IMPLEMENT.
27
+ - Tooling artifacts and reporter output are not deliverables unless the spec lists them. Plan to configure tools to emit to gitignored paths.
28
+ - Existing tests are contract. Plan to extend them; do not plan to remove or weaken them.
29
+ - Spec frontmatter is read-only to PLAN and IMPLEMENT. The DOCS-style status flip happens in CLEANUP under a tight allowlist.
30
+ - If a Requirement says "match the literal output X", restate the literal in the plan. Paraphrasing the contract here propagates into IMPLEMENT.
31
+ </quality_bar>
32
+
33
+ <runtime_principles>
34
+ Read `_shared/runtime-principles.md` (Subtractive-first / Goal-locked / No-workaround / Evidence). Codex-routed phases receive the contract excerpt inlined:
35
+
36
+ - Subtractive-first: prefer trimming an existing helper to introducing a new one. Pure-addition needs a cited prior failure mode or an explicit spec/user requirement.
37
+ - Goal-locked: refuse "while I'm here" cleanups, speculative robustness, mid-flight re-scoping. Single test before any deviation: "did the user ask for this OR does the stated goal strictly require it?"
38
+ - No-workaround: no `any`, no `@ts-ignore`, no silent `catch`, no hardcoded values, no helper scripts that bypass root cause.
39
+ - Evidence: every claim cites file:line opened at planning time. Vague claims excluded.
40
+ </runtime_principles>
41
+
42
+ The task is: [orchestrator pastes the task description and spec context here]
@@ -0,0 +1,69 @@
1
+ # PHASE 5 — VERIFY (canonical body, fresh subagent context)
2
+
3
+ Per-engine adapter header is prepended at runtime. **You are spawned with empty conversation context.** No carry-over from PLAN / IMPLEMENT / BUILD_GATE / CLEANUP. This is the structural guarantee of independence — the prompt body reinforces it but the spawn is what makes it real.
4
+
5
+ <role>
6
+ Independent quality layer. You answer one question: did the diff deliver what the spec said it would, with no scope creep, no quality regression, and no constraint violation? You produce findings only — you have no code-mutation tools.
7
+ </role>
8
+
9
+ <input>
10
+ - `spec.md` (or `.devlyn/criteria.generated.md` for free-form mode) — the contract.
11
+ - `spec.expected.json` — the mechanical acceptance contract per `_shared/expected.schema.json`.
12
+ - The cumulative diff against `state.base_ref.sha`.
13
+ - The spec hash (`state.source.spec_sha256`) — re-read the spec from disk and confirm the hash matches; if it does not, write `state.phases.verify.verdict: "BLOCKED"` with reason `spec_sha256_mismatch` and stop.
14
+
15
+ You do NOT receive: PLAN, IMPLEMENT's reasoning, BUILD_GATE's findings, CLEANUP's allowlist negotiations. Reading those would compromise independence.
16
+ </input>
17
+
18
+ <sub_phases>
19
+
20
+ ### MECHANICAL (deterministic)
21
+
22
+ Re-run the mechanical checks fresh, independent of BUILD_GATE's earlier run:
23
+
24
+ 1. `python3 .claude/skills/_shared/spec-verify-check.py` against the post-CLEANUP code.
25
+ 2. Re-scan `spec.expected.json.forbidden_patterns` against the diff (Python re.search; honor each pattern's `files` allowlist).
26
+ 3. Confirm `required_files` exist post-diff; confirm `forbidden_files` do not appear in the diff.
27
+ 4. Confirm `max_deps_added` is not exceeded (`git diff -- package.json` for Node; equivalent for other ecosystems).
28
+
29
+ Emit findings to `.devlyn/verify-mechanical.findings.jsonl`. Each match = one finding. Severity from the pattern's `severity` field (disqualifier → CRITICAL, warning → MEDIUM).
30
+
31
+ ### JUDGE (fresh-context grading)
32
+
33
+ Grade the diff against the spec on rubric axes:
34
+
35
+ - **Spec compliance** — did every Requirement get an `evidence` record pointing at code that satisfies it?
36
+ - **Scope** — does the diff touch only files PLAN listed (or the cleanup allowlist)? Out-of-scope file = HIGH finding `scope.out-of-scope-violation`.
37
+ - **Quality** — does the implementation follow the framework's idiomatic patterns, or are there hand-rolled helpers replacing standard primitives? `design.unidiomatic-pattern` MEDIUM if so.
38
+ - **Consistency** — internal style (naming, error shape, module boundaries) consistent with the surrounding code.
39
+
40
+ For each finding, write file:line evidence. Do not paraphrase code; quote it.
41
+
42
+ **Coverage check**: before declaring done, confirm you have evidence for every spec axis. If you could not exercise an axis (the spec asks for behavior X but the diff does not touch the code that produces X), set `state.verify.coverage_failed: true` and surface the missing-evidence finding rather than passing on assumption.
43
+
44
+ **Anti-self-filter rule**: report every finding you observe, including ones you consider low-severity or low-confidence. Tag each with `confidence: high|medium|low` and let the harness's downstream filter rank them. Filtering at this stage suppresses recall.
45
+
46
+ ### Pair-mode (when triggered by orchestrator)
47
+
48
+ When the orchestrator spawns a second VERIFY agent with the OTHER engine's adapter, both judgments are merged:
49
+ - Any HIGH/CRITICAL finding either model surfaces is verdict-binding.
50
+ - Lower-severity disagreements are logged but do not change the verdict.
51
+ - The orchestrator handles merge; you only emit your own findings.
52
+
53
+ </sub_phases>
54
+
55
+ <output>
56
+ - `.devlyn/verify-mechanical.findings.jsonl` — MECHANICAL findings.
57
+ - `.devlyn/verify.findings.jsonl` — JUDGE findings.
58
+ - `state.phases.verify.{verdict, completed_at, duration_ms, sub_verdicts: {mechanical, judge}, artifacts}`. Verdict: WORSE of the two sub-verdicts. `PASS` requires zero CRITICAL/HIGH findings AND coverage met.
59
+ </output>
60
+
61
+ <quality_bar>
62
+ - Independence is structural (fresh context) and behavioral (no code mutation). Both must hold.
63
+ - Quote, do not paraphrase. Findings without quoted file:line evidence are excluded.
64
+ - Coverage > confidence. Missing-evidence findings outrank a confident "looks fine."
65
+ </quality_bar>
66
+
67
+ <runtime_principles>
68
+ Read `_shared/runtime-principles.md`. VERIFY's discipline is "the spec is the contract, the diff is the evidence, the verdict is the comparison."
69
+ </runtime_principles>
@@ -0,0 +1,106 @@
1
+ # pipeline.state.json schema
2
+
3
+ Single authoritative verdict source for `/devlyn:resolve`. The orchestrator branches on `state.phases.<name>.verdict` directly — never parses `.devlyn/*.findings.jsonl` for routing. Living document; bump `version` on a breaking change.
4
+
5
+ ## Top-level shape
6
+
7
+ ```json
8
+ {
9
+ "version": "2.0",
10
+ "run_id": "rs-<UTC-timestamp>-<12-hex>",
11
+ "started_at": "2026-04-30T12:00:00Z",
12
+ "engine": "claude",
13
+ "mode": "spec",
14
+ "complexity": null,
15
+ "base_ref": { "branch": "main", "sha": "abc123..." },
16
+ "rounds": { "max_rounds": 4, "global": 0 },
17
+ "bypasses": [],
18
+ "implement_passed_sha": null,
19
+ "source": {
20
+ "type": "spec",
21
+ "spec_path": "docs/roadmap/phase-1/X.md",
22
+ "spec_sha256": "...",
23
+ "criteria_path": null,
24
+ "criteria_sha256": null
25
+ },
26
+ "criteria": [
27
+ { "id": "C1", "ref": "spec://requirements/0", "status": "pending", "evidence": [], "failed_by_finding_ids": [] }
28
+ ],
29
+ "phases": {
30
+ "plan": null,
31
+ "implement": null,
32
+ "build_gate": null,
33
+ "cleanup": null,
34
+ "verify": null,
35
+ "final_report": null
36
+ },
37
+ "verify": { "coverage_failed": false }
38
+ }
39
+ ```
40
+
41
+ ## Field rules
42
+
43
+ - **version** — string. Bump major on a breaking schema change.
44
+ - **mode** — `"free-form" | "spec" | "verify-only"`.
45
+ - **complexity** — `null | "trivial" | "medium" | "large"`. Free-form mode populates this; spec/verify-only mode leaves it null.
46
+ - **engine** — `"claude" | "codex" | "auto"` initially; rewritten by engine-preflight if a downgrade fired.
47
+ - **rounds.global** — incremented every fix-loop pass (BUILD_GATE → fix-loop OR VERIFY → fix-loop).
48
+ - **bypasses** — array of phase names from `--bypass`. Valid: `"build-gate" | "cleanup"`. PLAN, IMPLEMENT, VERIFY are non-bypassable (orchestrator rejects at parse time).
49
+ - **implement_passed_sha** — captured at end of PHASE 2; null until then. Activates the post-implement invariant for CLEANUP and VERIFY.
50
+ - **criteria** — generated from spec's `## Requirements` checklist (one per `- [ ]`). `status: pending → implemented` is the legal transition. `failed_by_finding_ids` populates when VERIFY surfaces a finding tied to a criterion.
51
+ - **verify.coverage_failed** — set by VERIFY's JUDGE sub-phase when a spec axis could not be exercised against the diff. Triggers pair-mode escalation when set.
52
+
53
+ ## Per-phase shape
54
+
55
+ Each entry under `phases.<name>` (for `plan`, `implement`, `build_gate`, `cleanup`, `verify`, `final_report`):
56
+
57
+ ```json
58
+ {
59
+ "started_at": "2026-04-30T12:00:01Z",
60
+ "completed_at": "2026-04-30T12:00:30Z",
61
+ "duration_ms": 29000,
62
+ "round": 0,
63
+ "triggered_by": null,
64
+ "verdict": "PASS",
65
+ "engine": "claude",
66
+ "model": "claude-opus-4-7",
67
+ "pre_sha": null,
68
+ "artifacts": { "findings_file": null, "log_file": null },
69
+ "sub_verdicts": null
70
+ }
71
+ ```
72
+
73
+ - `verdict` — `"PASS" | "PASS_WITH_ISSUES" | "FAIL" | "NEEDS_WORK" | "BLOCKED"`. PHASE 6 (FINAL_REPORT) writes its own verdict per the terminal-verdict precedence.
74
+ - `triggered_by` — null on first run; one of `"build_gate" | "verify"` when the phase is a fix-loop respawn.
75
+ - `pre_sha` — captured by orchestrator before CLEANUP and (if needed) other allowlist-enforced phases. Used to validate the post-spawn diff.
76
+ - `sub_verdicts` — only populated for VERIFY: `{ "mechanical": "PASS|FAIL", "judge": "PASS|...", "pair_judge": "PASS|..." | null }`.
77
+
78
+ ## Write protocol
79
+
80
+ 1. **Before each phase spawn**: orchestrator writes `phases.<name>.{started_at, round, triggered_by}` and (when applicable) `pre_sha`.
81
+ 2. **After each agent returns**: orchestrator validates `verdict`, `completed_at`, `duration_ms`, `artifacts` are populated. Missing fields → orchestrator fills from observable state. Branching on a null verdict is undefined behavior.
82
+ 3. **Before archive** (PHASE 6 step 3): `phases.final_report.verdict` must be non-null. Archive prune skips runs whose final_report verdict is null (treated as in-flight).
83
+
84
+ ## Terminal verdict (PHASE 6)
85
+
86
+ Precedence:
87
+
88
+ 1. `phases.<any>.verdict == "BLOCKED"` → terminal `BLOCKED:<reason>`.
89
+ 2. `phases.verify.verdict == "NEEDS_WORK"` after fix-loop exhaustion → terminal `NEEDS_WORK`.
90
+ 3. `phases.verify.verdict == "PASS_WITH_ISSUES"` → terminal `PASS_WITH_ISSUES`.
91
+ 4. `phases.verify.verdict == "PASS"` → terminal `PASS`.
92
+ 5. Verify-only mode: terminal = `phases.verify.verdict` directly (PHASE 1-4 are skipped).
93
+
94
+ ## Final-report shape
95
+
96
+ Header: `run_id | engine | mode | complexity | verdict | wall_time_s`.
97
+
98
+ Per-phase summary table: `phase | verdict | duration_ms | round | triggered_by | findings_count`.
99
+
100
+ Findings table (post-IMPLEMENT phases only — they are findings-only): each finding's `severity | rule_id | file:line | message | confidence`.
101
+
102
+ Follow-up notes: any `--continue-on-large` assumptions, any silent fallbacks (engine downgrade), any `state.verify.coverage_failed` axes.
103
+
104
+ ## Archive contract
105
+
106
+ PHASE 6 step 4 moves `.devlyn/*` (excluding `.devlyn/runs/`) into `.devlyn/runs/<run_id>/`. The `.devlyn/runs/` directory keeps the last 10 completed runs (sorted by `started_at`). Best-effort prune; archive failure does not change the run's verdict.
@@ -1,4 +1,5 @@
1
1
  ---
2
+ name: devlyn:design-system
2
3
  description: Extract all design values from selected style for exact reproduction
3
4
  argument-hint: <style-number> [platform] (e.g., "3", "3 flutter", "style 2 web")
4
5
  allowed-tools: Bash(ls:*), Bash(cat:*), Bash(grep:*), View, Edit, Write
@@ -0,0 +1,105 @@
1
+ ---
2
+ name: devlyn:reap
3
+ description: Safely count and kill orphaned child processes (PPID=1) left behind by Claude Code MCP plugins, Superset terminal tabs, and codex wrappers. Use this whenever the user says "too many processes", "can't open terminals", "pty/process limit", "hundreds of bun/codex/workerd piling up", "clean up orphans", "reap processes", or reports new terminals failing to spawn on macOS. Also use proactively after long Claude sessions to prevent hitting kern.maxprocperuid or kern.tty.ptmx_max limits. ONLY touches a conservative whitelist of known leaks — never guesses on unknown processes.
4
+ allowed-tools: Read, Bash(ps:*), Bash(lsof:*), Bash(pgrep:*), Bash(awk:*), Bash(id:*), Bash(sysctl:*), Bash(bash:*)
5
+ argument-hint: [scan | kill | kill --force | kill --include workerd | kill --only telegram-bun]
6
+ ---
7
+
8
+ <role>
9
+ You are a process-hygiene janitor for macOS. Your job is to find leaked orphan processes (PPID=1, user-owned) that accumulate from buggy tools — MCP plugins that don't reap children on stdin EOF, terminal apps that don't SIGTERM process groups on tab close, codex wrappers that leave `tail -F` behind — and let the user remove them safely.
10
+
11
+ Your operating principle: **the user's trust costs more than one missed cleanup.** If a process doesn't match a verified whitelist entry, leave it alone and report it as UNKNOWN so the user can decide. Never guess.
12
+ </role>
13
+
14
+ <user_input>
15
+ $ARGUMENTS
16
+ </user_input>
17
+
18
+ <process>
19
+
20
+ ## Phase 1: Parse intent
21
+
22
+ Look at `$ARGUMENTS` and classify:
23
+
24
+ | Input | Mode |
25
+ |---|---|
26
+ | empty, `scan`, `status`, `count`, `list`, or anything non-imperative | **SCAN only** (default) |
27
+ | starts with `kill`, `reap`, `clean`, `prune`, `죽여`, `정리` | **KILL** mode |
28
+
29
+ In KILL mode, also parse:
30
+ - `--force` → SIGKILL instead of SIGTERM
31
+ - `--include workerd` → extend the default whitelist with the workerd-dev category
32
+ - `--only <category>` → restrict to a single category
33
+ - `--dry-run` → list kills but don't send signals
34
+
35
+ If the user's intent is ambiguous (e.g., they say "지워줘" but didn't specify force or include), **default to SCAN first**, show the result, and then ask whether to proceed with kill. Never escalate to `--force` without an explicit request.
36
+
37
+ ## Phase 2: SCAN
38
+
39
+ Always run scan first — even in KILL mode — so the user sees what is about to happen.
40
+
41
+ Run the bundled scanner. The skill is installed at `~/.claude/skills/devlyn:reap/`:
42
+
43
+ ```bash
44
+ bash ~/.claude/skills/devlyn:reap/scripts/scan.sh
45
+ ```
46
+
47
+ Report the output verbatim to the user. Then add your own 2-line summary:
48
+
49
+ - total orphan count across whitelist categories
50
+ - any UNKNOWN_ORPHANS that the user might want to investigate manually
51
+
52
+ Also surface the macOS limits for context, only once per session:
53
+
54
+ ```bash
55
+ sysctl kern.maxprocperuid kern.tty.ptmx_max 2>/dev/null
56
+ ```
57
+
58
+ ## Phase 3: KILL (only when requested)
59
+
60
+ Run the reap script with the parsed flags:
61
+
62
+ ```bash
63
+ bash ~/.claude/skills/devlyn:reap/scripts/reap.sh [flags]
64
+ ```
65
+
66
+ Show the output verbatim. The script re-verifies `PPID==1 && user==current` for every PID right before signaling — a process that was legitimately adopted since the scan will be skipped, not killed.
67
+
68
+ After kill, re-run scan to confirm the counts dropped. If any whitelisted PIDs are still present after SIGTERM and 2 seconds, mention that `--force` (SIGKILL) is available.
69
+
70
+ ## Phase 4: Recommend (only if signals of chronic leak)
71
+
72
+ If `telegram-bun` count > 10 OR oldest whitelisted orphan > 24h, tell the user this is a recurring leak and suggest one of:
73
+
74
+ 1. **Patch the telegram plugin** — add `process.stdin.on('end', () => process.exit(0))` to `server.ts` so the child dies when Claude Code exits.
75
+ 2. **Schedule this skill** — run `/devlyn:reap kill` periodically (e.g., via the `/loop` skill or a launchd agent).
76
+ 3. **Update Superset** — newer versions may SIGTERM process groups on tab close.
77
+
78
+ Do NOT apply these automatically. Recommend and let the user choose.
79
+
80
+ </process>
81
+
82
+ <safety>
83
+
84
+ ## Never-touch rules
85
+
86
+ - **NEVER kill** a process whose command does not match a whitelist category in `scan.sh`. Unknown = informational only.
87
+ - **NEVER kill** anything where `ps -o ppid=` returns something other than `1` at signal time.
88
+ - **NEVER kill** processes owned by another user (the scripts check `id -un`).
89
+ - **NEVER use** `killall`, `pkill -9`, or wildcard `kill $(pgrep ...)` in this skill. Always iterate PIDs individually with per-PID re-verification.
90
+ - **NEVER suggest** `sudo` escalation — this is a user-scope cleanup tool.
91
+
92
+ ## Whitelist definitions
93
+
94
+ These are the ONLY categories reap.sh will touch:
95
+
96
+ | Category | Match | Why safe |
97
+ |---|---|---|
98
+ | `telegram-bun` | `bun server.ts` **AND** cwd contains `/plugins/cache/claude-plugins-official/telegram/` | Telegram MCP plugin leaks one per Claude session. Verified by cwd, not just cmdline. |
99
+ | `superset-codex-bash` | `/bin/bash .*/.superset/bin/codex` with PPID=1 | `.superset/bin/codex` wrapper exits without killing its tail child; bash copies left behind. |
100
+ | `superset-codex-tail` | `tail -F .*superset-codex-session-*.jsonl` with PPID=1 | Log tail from the same wrapper, always safe to stop. |
101
+ | `workerd` (opt-in) | `@cloudflare/workerd-darwin-*/bin/workerd serve ` with PPID=1 | moonmaker-engine dev server that survives tab close. Opt-in because the user may have an active dev session. |
102
+
103
+ If the user asks to add a new category, **edit scan.sh and reap.sh together** — both must know the same pattern so scan never promises a cleanup that reap won't deliver.
104
+
105
+ </safety>
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env bash
2
+ # devlyn:reap — kill orphan processes from safe whitelist categories.
3
+ # Verifies PPID==1 and user-ownership AGAIN at kill time to avoid racing a
4
+ # legitimately-reparented process. Unknown orphans are never killed.
5
+ #
6
+ # Usage:
7
+ # reap.sh # default categories, SIGTERM
8
+ # reap.sh --force # SIGKILL instead of SIGTERM
9
+ # reap.sh --include workerd # add workerd-dev to the default set
10
+ # reap.sh --only telegram-bun # restrict to a single category
11
+ # reap.sh --dry-run # print what WOULD be killed, kill nothing
12
+
13
+ set -u
14
+ LC_ALL=C
15
+ export LC_ALL
16
+
17
+ ME="$(id -un)"
18
+ SIGNAL="TERM"
19
+ DRY=0
20
+ INCLUDE=""
21
+ ONLY=""
22
+
23
+ while [ $# -gt 0 ]; do
24
+ case "$1" in
25
+ --force) SIGNAL="KILL" ;;
26
+ --dry-run) DRY=1 ;;
27
+ --include) shift; INCLUDE="${INCLUDE},$1" ;;
28
+ --only) shift; ONLY="$1" ;;
29
+ -h|--help)
30
+ sed -n '2,14p' "$0"; exit 0 ;;
31
+ *)
32
+ printf 'unknown flag: %s\n' "$1" >&2; exit 2 ;;
33
+ esac
34
+ shift
35
+ done
36
+
37
+ DEFAULT_CATEGORIES="telegram-bun,superset-codex-bash,superset-codex-tail"
38
+ if [ -n "$ONLY" ]; then
39
+ CATEGORIES="$ONLY"
40
+ else
41
+ CATEGORIES="${DEFAULT_CATEGORIES}${INCLUDE}"
42
+ fi
43
+
44
+ SNAPSHOT="$(ps -eo pid=,ppid=,user=,etime=,command= 2>/dev/null | awk -v me="$ME" '$2==1 && $3==me')"
45
+
46
+ collect_pids() {
47
+ local category="$1"
48
+ case "$category" in
49
+ telegram-bun)
50
+ # cwd-verified — same logic as scan.sh
51
+ printf '%s\n' "$SNAPSHOT" \
52
+ | grep -E '/bun[^ ]* server\.ts( |$)' \
53
+ | awk '{print $1}' \
54
+ | while read -r pid; do
55
+ cwd="$(lsof -a -d cwd -p "$pid" 2>/dev/null | awk 'NR==2 {for(i=9;i<=NF;i++) printf "%s ", $i; print ""}')"
56
+ case "$cwd" in
57
+ *"/plugins/cache/claude-plugins-official/telegram/"*) printf '%s\n' "$pid" ;;
58
+ esac
59
+ done
60
+ ;;
61
+ superset-codex-bash)
62
+ printf '%s\n' "$SNAPSHOT" | grep -E '/bin/bash .*/\.superset/bin/codex( |$)' | awk '{print $1}' ;;
63
+ superset-codex-tail)
64
+ printf '%s\n' "$SNAPSHOT" | grep -E 'tail .*superset-codex-session-.*\.jsonl' | awk '{print $1}' ;;
65
+ workerd)
66
+ printf '%s\n' "$SNAPSHOT" | grep -E '@cloudflare/workerd-darwin-[^/]+/bin/workerd serve ' | awk '{print $1}' ;;
67
+ *)
68
+ printf 'unknown category: %s\n' "$category" >&2
69
+ return 1 ;;
70
+ esac
71
+ }
72
+
73
+ TOTAL_KILLED=0
74
+ TOTAL_SKIPPED=0
75
+
76
+ # Split the comma-separated category list without letting IFS leak into the
77
+ # inner loop that iterates newline-separated PIDs.
78
+ CATS_ARR=()
79
+ OLD_IFS="$IFS"
80
+ IFS=,
81
+ for c in $CATEGORIES; do
82
+ [ -n "$c" ] && CATS_ARR+=("$c")
83
+ done
84
+ IFS="$OLD_IFS"
85
+
86
+ for cat in "${CATS_ARR[@]}"; do
87
+ pids="$(collect_pids "$cat")" || continue
88
+ if [ -z "$pids" ]; then
89
+ printf '[%s] nothing to kill\n' "$cat"
90
+ continue
91
+ fi
92
+ while IFS= read -r pid; do
93
+ [ -z "$pid" ] && continue
94
+ # Re-verify right before killing. Any of these mean "don't touch":
95
+ # - process already gone
96
+ # - PPID is no longer 1 (got adopted by a real parent — not our target)
97
+ # - owner changed (extremely unlikely but cheap to check)
98
+ live_info="$(ps -o ppid=,user= -p "$pid" 2>/dev/null)"
99
+ if [ -z "$live_info" ]; then
100
+ printf '[%s] %s skipped (already exited)\n' "$cat" "$pid"
101
+ TOTAL_SKIPPED=$((TOTAL_SKIPPED+1))
102
+ continue
103
+ fi
104
+ live_ppid="$(printf '%s' "$live_info" | awk '{print $1}')"
105
+ live_user="$(printf '%s' "$live_info" | awk '{print $2}')"
106
+ if [ "$live_ppid" != "1" ] || [ "$live_user" != "$ME" ]; then
107
+ printf '[%s] %s skipped (ppid=%s user=%s — no longer orphan)\n' "$cat" "$pid" "$live_ppid" "$live_user"
108
+ TOTAL_SKIPPED=$((TOTAL_SKIPPED+1))
109
+ continue
110
+ fi
111
+ if [ "$DRY" -eq 1 ]; then
112
+ printf '[%s] %s would SIG%s\n' "$cat" "$pid" "$SIGNAL"
113
+ else
114
+ if kill -s "$SIGNAL" "$pid" 2>/dev/null; then
115
+ printf '[%s] %s SIG%s sent\n' "$cat" "$pid" "$SIGNAL"
116
+ TOTAL_KILLED=$((TOTAL_KILLED+1))
117
+ else
118
+ printf '[%s] %s kill failed\n' "$cat" "$pid"
119
+ TOTAL_SKIPPED=$((TOTAL_SKIPPED+1))
120
+ fi
121
+ fi
122
+ done <<< "$pids"
123
+ done
124
+
125
+ if [ "$DRY" -eq 1 ]; then
126
+ printf '\ndry-run complete.\n'
127
+ else
128
+ printf '\ndone. killed=%s skipped=%s\n' "$TOTAL_KILLED" "$TOTAL_SKIPPED"
129
+ fi
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env bash
2
+ # devlyn:reap — scan orphan processes by safe-to-kill category.
3
+ # Read-only. Never kills anything. Always exits 0 on success.
4
+ #
5
+ # Output format: one TSV line per category with
6
+ # CATEGORY COUNT OLDEST_ETIME PIDS NOTE
7
+ # Followed by an "UNKNOWN_ORPHANS" line reporting non-system orphans we
8
+ # deliberately left out of the whitelist — these will NOT be touched by reap.sh.
9
+
10
+ set -u
11
+ LC_ALL=C
12
+ export LC_ALL
13
+
14
+ # PPID=1 user-owned processes. Column layout: PID PPID ETIME COMMAND...
15
+ ME="$(id -un)"
16
+ SNAPSHOT="$(ps -eo pid=,ppid=,user=,etime=,command= 2>/dev/null | awk -v me="$ME" '$2==1 && $3==me')"
17
+
18
+ # -----------------------------------------------------------------------------
19
+ # Category matchers (grep -E patterns). These target processes that are KNOWN
20
+ # to leak from specific tools that do not reap their children on exit.
21
+ # Conservative by design — if unsure, leave it UNKNOWN.
22
+ # -----------------------------------------------------------------------------
23
+ match_telegram_bun() { grep -E '/bun[^ ]* server\.ts( |$)'; }
24
+ match_superset_codex_sh() { grep -E '/bin/bash .*/\.superset/bin/codex( |$)'; }
25
+ match_superset_codex_tl() { grep -E 'tail .*superset-codex-session-.*\.jsonl'; }
26
+ match_workerd_dev() { grep -E '@cloudflare/workerd-darwin-[^/]+/bin/workerd serve '; }
27
+
28
+ emit() {
29
+ local name="$1"; shift
30
+ local note="$1"; shift
31
+ local lines; lines="$(cat)"
32
+ local count; count="$(printf '%s\n' "$lines" | grep -c . || true)"
33
+ if [ "${count:-0}" -eq 0 ]; then
34
+ printf '%-24s\t0\t-\t-\t%s\n' "$name" "$note"
35
+ return
36
+ fi
37
+ local pids oldest
38
+ # ps column order is: pid ppid user etime command...
39
+ pids="$(printf '%s\n' "$lines" | awk '{print $1}' | paste -sd, -)"
40
+ oldest="$(printf '%s\n' "$lines" | awk '{print $4}' | sort -r | head -1)"
41
+ printf '%-24s\t%s\t%s\t%s\t%s\n' "$name" "$count" "$oldest" "$pids" "$note"
42
+ }
43
+
44
+ printf 'CATEGORY \tCOUNT\tOLDEST\tPIDS\tNOTE\n'
45
+
46
+ # Verify the bun server belongs to the telegram plugin before classifying it.
47
+ # cwd is the reliable signal; command line alone is ambiguous.
48
+ TELEGRAM_PIDS=""
49
+ if [ -n "$SNAPSHOT" ]; then
50
+ BUN_CANDIDATES="$(printf '%s\n' "$SNAPSHOT" | match_telegram_bun | awk '{print $1}')"
51
+ for pid in $BUN_CANDIDATES; do
52
+ cwd="$(lsof -a -d cwd -p "$pid" 2>/dev/null | awk 'NR==2 {for(i=9;i<=NF;i++) printf "%s ", $i; print ""}')"
53
+ case "$cwd" in
54
+ *"/plugins/cache/claude-plugins-official/telegram/"*)
55
+ TELEGRAM_PIDS="${TELEGRAM_PIDS}${pid}
56
+ " ;;
57
+ esac
58
+ done
59
+ fi
60
+
61
+ if [ -n "$TELEGRAM_PIDS" ]; then
62
+ # Reconstruct rows for accurate ETIME/command display.
63
+ printf '%s' "$TELEGRAM_PIDS" | grep -v '^$' | while read -r pid; do
64
+ printf '%s\n' "$SNAPSHOT" | awk -v p="$pid" '$1==p'
65
+ done | emit "telegram-bun" "cwd=.../telegram/ plugin — safe"
66
+ else
67
+ printf '' | emit "telegram-bun" "cwd=.../telegram/ plugin — safe"
68
+ fi
69
+
70
+ printf '%s\n' "$SNAPSHOT" | match_superset_codex_sh | emit \
71
+ "superset-codex-bash" ".superset/bin/codex wrapper leak — safe"
72
+ printf '%s\n' "$SNAPSHOT" | match_superset_codex_tl | emit \
73
+ "superset-codex-tail" "superset-codex-session-*.jsonl tail — safe"
74
+ printf '%s\n' "$SNAPSHOT" | match_workerd_dev | emit \
75
+ "workerd-dev" "cloudflare dev server — opt-in (include=workerd)"
76
+
77
+ # -----------------------------------------------------------------------------
78
+ # UNKNOWN_ORPHANS: everything else that is PPID=1 and user-owned. Informational
79
+ # only. These will NOT be killed without a human explicitly extending the
80
+ # whitelist. macOS system helpers (launchd, /usr/libexec/**, Application
81
+ # bundles, Electron helpers, etc.) are filtered out — they're not orphans in
82
+ # the leak sense, they legitimately run under launchd.
83
+ # -----------------------------------------------------------------------------
84
+ SYSTEM_FILTER='(^|/)(launchd|aslmanager|cloudphotod|automountd|autofsd|usernotificationsd|voicebankingd|veraport)( |$)|^/System/|^/usr/libexec/|^/usr/sbin/|^/Library/Apple|^/Library/Developer/PrivateFrameworks/CoreSimulator|^/Library/PrivilegedHelperTools/|^/Applications/|CoreSimulator|raonsecure|TEK_|ChatGPTHelper|FigmaAgent|figma_agent|iniLINE|CrossEX|com\.apple\.|Superset Helper|Electron Framework|QuickLookUIService|SandboxHelper|MTLCompilerService|extensionkitservice|ssh-agent|Squirrel|app-server-broker\.mjs'
85
+
86
+ UNKNOWN="$(printf '%s\n' "$SNAPSHOT" \
87
+ | grep -Ev "$SYSTEM_FILTER" \
88
+ | awk '{printf "%s\t", $1; for(i=5;i<=NF;i++) printf "%s ", $i; print ""}')"
89
+
90
+ # Strip already-whitelisted categories from the UNKNOWN set so we don't
91
+ # double-count them.
92
+ WHITELIST_PIDS="$( {
93
+ printf '%s' "$TELEGRAM_PIDS"
94
+ printf '%s\n' "$SNAPSHOT" | match_superset_codex_sh | awk '{print $1}'
95
+ printf '%s\n' "$SNAPSHOT" | match_superset_codex_tl | awk '{print $1}'
96
+ printf '%s\n' "$SNAPSHOT" | match_workerd_dev | awk '{print $1}'
97
+ } | grep -v '^$' | sort -u)"
98
+
99
+ printf '\nUNKNOWN_ORPHANS (informational — NOT killed by reap.sh):\n'
100
+ if [ -z "$UNKNOWN" ]; then
101
+ printf ' (none)\n'
102
+ else
103
+ # awk can't take a multi-line string via -v (literal newlines are rejected),
104
+ # so pass the whitelist as a temp file instead.
105
+ WL_TMP="$(mktemp -t devlyn-reap-wl)"
106
+ # shellcheck disable=SC2064
107
+ trap "rm -f '$WL_TMP'" EXIT
108
+ printf '%s\n' "$WHITELIST_PIDS" > "$WL_TMP"
109
+ printf '%s\n' "$UNKNOWN" | awk -v wlf="$WL_TMP" '
110
+ BEGIN {
111
+ while ((getline line < wlf) > 0) if (line != "") wh[line]=1
112
+ close(wlf)
113
+ }
114
+ { if (!($1 in wh)) print " " $0 }
115
+ '
116
+ fi
@@ -1,3 +1,8 @@
1
+ ---
2
+ name: devlyn:team-design-ui
3
+ description: Assemble a world-class design team to generate 5 radically distinct, portfolio-worthy UI style explorations. Like /devlyn:design-ui but powered by a full team of design specialists.
4
+ ---
5
+
1
6
  Assemble a world-class design team to generate 5 radically distinct, portfolio-worthy UI style explorations. Like `/devlyn:design-ui` but powered by a full team of design specialists — Creative Director, Product Designer, Visual Designer, Interaction Designer, and Accessibility Designer — who collaborate to produce 5 stunning HTML design samples that go far beyond what a single designer could achieve.
2
7
 
3
8
  This is design exploration only. After the user picks a style:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "devlyn-cli",
3
- "version": "1.14.0",
3
+ "version": "2.0.0",
4
4
  "description": "AI development toolkit for Claude Code — ideate, auto-resolve, and ship with context engineering and agent orchestration",
5
5
  "homepage": "https://github.com/fysoul17/devlyn-cli#readme",
6
6
  "bin": {
@@ -13,9 +13,23 @@
13
13
  "!config/skills/preflight-workspace/**",
14
14
  "!config/skills/devlyn:ideate-workspace",
15
15
  "!config/skills/devlyn:ideate-workspace/**",
16
+ "!config/skills/devlyn:auto-resolve-workspace",
17
+ "!config/skills/devlyn:auto-resolve-workspace/**",
18
+ "!config/skills/roadmap-archival-workspace",
19
+ "!config/skills/roadmap-archival-workspace/**",
16
20
  "agents-config",
17
21
  "optional-skills",
18
- "CLAUDE.md"
22
+ "benchmark/auto-resolve/BENCHMARK-DESIGN.md",
23
+ "benchmark/auto-resolve/README.md",
24
+ "benchmark/auto-resolve/RUBRIC.md",
25
+ "benchmark/auto-resolve/fixtures/SCHEMA.md",
26
+ "benchmark/auto-resolve/fixtures/F*/**",
27
+ "benchmark/auto-resolve/fixtures/test-repo/**",
28
+ "!benchmark/auto-resolve/fixtures/test-repo/node_modules/**",
29
+ "benchmark/auto-resolve/scripts/**",
30
+ "scripts/lint-skills.sh",
31
+ "CLAUDE.md",
32
+ "AGENTS.md"
19
33
  ],
20
34
  "keywords": [
21
35
  "claude",