@kody-ade/kody-engine 0.2.46 → 0.2.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ {
2
+ "name": "orchestrator-plan-build-review",
3
+ "describe": "Deterministic orchestrator for the plan → build → review (→ fix on concerns/fail) flow. No agent — the postflight entries ARE the transition table, evaluated top-to-bottom via runWhen.",
4
+ "inputs": [
5
+ {
6
+ "name": "issue",
7
+ "flag": "--issue",
8
+ "type": "int",
9
+ "required": true,
10
+ "describe": "GitHub issue number to drive the flow on."
11
+ },
12
+ {
13
+ "name": "flow",
14
+ "flag": "--flow",
15
+ "type": "string",
16
+ "required": false,
17
+ "describe": "Flow name (cosmetic — recorded in state.flow.name)."
18
+ }
19
+ ],
20
+ "claudeCode": {
21
+ "model": "inherit",
22
+ "permissionMode": "default",
23
+ "maxTurns": 0,
24
+ "maxThinkingTokens": null,
25
+ "systemPromptAppend": null,
26
+ "tools": [],
27
+ "hooks": [],
28
+ "skills": [],
29
+ "commands": [],
30
+ "subagents": [],
31
+ "plugins": [],
32
+ "mcpServers": []
33
+ },
34
+ "cliTools": [],
35
+ "scripts": {
36
+ "preflight": [
37
+ { "script": "loadIssueContext" },
38
+ { "script": "loadTaskState" },
39
+ { "script": "skipAgent" }
40
+ ],
41
+ "postflight": [
42
+ { "script": "startFlow", "with": { "entry": "plan", "target": "issue" },
43
+ "runWhen": { "data.taskState.core.lastOutcome": null } },
44
+
45
+ { "script": "dispatch", "with": { "next": "run", "target": "issue" },
46
+ "runWhen": { "data.taskState.core.lastOutcome.type": "PLAN_COMPLETED" } },
47
+
48
+ { "script": "dispatch", "with": { "next": "review", "target": "pr" },
49
+ "runWhen": { "data.taskState.core.lastOutcome.type": "RUN_COMPLETED" } },
50
+
51
+ { "script": "finishFlow", "with": { "reason": "review-passed" },
52
+ "runWhen": { "data.taskState.core.lastOutcome.type": "REVIEW_PASS" } },
53
+
54
+ { "script": "dispatch", "with": { "next": "fix", "target": "pr" },
55
+ "runWhen": { "data.taskState.core.lastOutcome.type": ["REVIEW_CONCERNS", "REVIEW_FAIL"] } },
56
+
57
+ { "script": "finishFlow", "with": { "reason": "review-failed" },
58
+ "runWhen": { "data.taskState.core.lastOutcome.type": "REVIEW_FAILED" } },
59
+
60
+ { "script": "finishFlow", "with": { "reason": "fix-applied" },
61
+ "runWhen": { "data.taskState.core.lastOutcome.type": "FIX_COMPLETED" } },
62
+
63
+ { "script": "finishFlow", "with": { "reason": "aborted" },
64
+ "runWhen": { "data.taskState.core.lastOutcome.type": ["PLAN_FAILED", "RUN_FAILED", "FIX_FAILED", "AGENT_NOT_RUN"] } },
65
+
66
+ { "script": "persistFlowState" }
67
+ ]
68
+ },
69
+ "output": {
70
+ "actionTypes": [
71
+ "FLOW_STARTED",
72
+ "FLOW_COMPLETED",
73
+ "FLOW_ABORTED"
74
+ ]
75
+ }
76
+ }
@@ -0,0 +1,7 @@
1
+ <!--
2
+ This file exists only because the executor's profile loader expects a
3
+ prompt.md sibling. The orchestrator-plan-build-review executable runs
4
+ with maxTurns: 0 and a `skipAgent` preflight, so this prompt is never
5
+ actually delivered to Claude. The transition logic lives entirely in
6
+ profile.json's postflight entries.
7
+ -->
@@ -0,0 +1,75 @@
1
+ {
2
+ "name": "ui-review",
3
+ "describe": "UI/UX review of an open PR: browses the running preview with Playwright, compares behavior to diff intent, posts one structured review comment. Read-only on the repo (no commits); writes a throwaway Playwright spec under .kody2/.",
4
+ "kind": "oneshot",
5
+ "inputs": [
6
+ {
7
+ "name": "pr",
8
+ "flag": "--pr",
9
+ "type": "int",
10
+ "required": true,
11
+ "describe": "GitHub PR number to review."
12
+ },
13
+ {
14
+ "name": "previewUrl",
15
+ "flag": "--preview-url",
16
+ "type": "string",
17
+ "required": false,
18
+ "describe": "Base URL the agent should browse. Falls back to $PREVIEW_URL, then http://localhost:3000."
19
+ }
20
+ ],
21
+ "claudeCode": {
22
+ "model": "inherit",
23
+ "permissionMode": "acceptEdits",
24
+ "maxTurns": null,
25
+ "maxThinkingTokens": null,
26
+ "systemPromptAppend": null,
27
+ "tools": [
28
+ "Read",
29
+ "Grep",
30
+ "Glob",
31
+ "Bash",
32
+ "Write",
33
+ "Edit"
34
+ ],
35
+ "hooks": [],
36
+ "skills": [],
37
+ "commands": [],
38
+ "subagents": [],
39
+ "plugins": [],
40
+ "mcpServers": []
41
+ },
42
+ "cliTools": [
43
+ {
44
+ "name": "playwright",
45
+ "install": {
46
+ "required": false,
47
+ "checkCommand": "npx --no-install playwright --version",
48
+ "installCommand": "npx --yes playwright install --with-deps chromium"
49
+ },
50
+ "verify": "npx --no-install playwright --version",
51
+ "usage": "Use `npx playwright test <file>` to run a Playwright spec. Write ad-hoc specs under `.kody2/ui-review/*.spec.ts`. If `npx playwright test` errors with `Cannot find package '@playwright/test'`, install it once with `npm install -D @playwright/test` (or the repo's package-manager equivalent) before retrying — the `playwright` browser binaries are already set up by preflight, but the per-repo test framework may not be. Prefer `page.goto(process.env.UI_REVIEW_BASE_URL)` — the base URL is injected as `UI_REVIEW_BASE_URL` at run time. Capture screenshots with `await page.screenshot({ path: '.kody2/ui-review/<name>.png' })` and reference those paths in your final review.",
52
+ "allowedUses": [
53
+ "test",
54
+ "--version"
55
+ ]
56
+ }
57
+ ],
58
+ "inputArtifacts": [],
59
+ "outputArtifacts": [],
60
+ "scripts": {
61
+ "preflight": [
62
+ { "script": "reviewFlow" },
63
+ { "script": "loadTaskState" },
64
+ { "script": "loadConventions" },
65
+ { "script": "discoverQaContext" },
66
+ { "script": "loadQaGuide" },
67
+ { "script": "resolvePreviewUrl" },
68
+ { "script": "composePrompt" }
69
+ ],
70
+ "postflight": [
71
+ { "script": "postReviewResult" },
72
+ { "script": "saveTaskState" }
73
+ ]
74
+ }
75
+ }
@@ -0,0 +1,103 @@
1
+ You are Kody, a senior UI/UX reviewer. Review PR #{{pr.number}} by reading the diff AND browsing the running app with Playwright. Post ONE structured review comment. Do NOT edit any tracked source files. Do NOT run any `git` or `gh` commands.
2
+
3
+ You MAY write throwaway Playwright specs and screenshots under `.kody2/ui-review/` — that directory is ignored by the repo.
4
+
5
+ # PR #{{pr.number}}: {{pr.title}}
6
+
7
+ Base: {{pr.baseRefName}} ← Head: {{pr.headRefName}}
8
+
9
+ {{pr.body}}
10
+
11
+ # Preview URL
12
+
13
+ `{{previewUrl}}` (resolved from: {{previewUrlSource}})
14
+
15
+ Before you do anything else, run:
16
+
17
+ ```bash
18
+ curl -sS -o /dev/null -w "%{http_code}\n" --max-time 10 {{previewUrl}}
19
+ ```
20
+
21
+ If the response is not 2xx or 3xx, the preview is unreachable. In that case, SKIP browsing, note the failure in your review under "Browsing", and base your verdict on the diff alone.
22
+
23
+ # QA context (auto-discovered from the repo)
24
+
25
+ ```
26
+ {{qaContext}}
27
+ ```
28
+
29
+ # QA guide (committed in the repo — authoritative over the auto-discovery above)
30
+
31
+ {{qaGuide}}
32
+
33
+ # Diff
34
+
35
+ ```diff
36
+ {{prDiff}}
37
+ ```
38
+
39
+ {{conventionsBlock}}
40
+
41
+ {{toolsUsage}}
42
+
43
+ # What to do
44
+
45
+ 1. **Identify UI-affecting changes.** Read the diff. Which pages / components / forms / styles did this PR change? Which user-visible behavior should be verified in the browser? If the diff has no UI surface (pure backend, pure config, pure tests), say so and produce a diff-only review — do not spin up Playwright for nothing.
46
+
47
+ 2. **Plan the browse session.** For each UI-affecting change, pick 1–3 routes from the QA context that exercise it. If the change requires an authenticated role, grab credentials from the QA guide above. If no credentials are available for a role the change depends on, note that as a gap and browse only public pages.
48
+
49
+ 3. **Write a Playwright spec.** Create exactly one file at `.kody2/ui-review/browse.spec.ts`. Use `process.env.UI_REVIEW_BASE_URL` as the base URL. For each route you plan to check, write a test that:
50
+ - navigates there,
51
+ - performs the minimum interaction to exercise the change (click, submit, fill),
52
+ - takes a screenshot at `.kody2/ui-review/<slug>.png`,
53
+ - asserts at least one piece of visible content so the test fails loudly on a blank / error page.
54
+
55
+ Include a `playwright.config.ts` at `.kody2/ui-review/playwright.config.ts` only if you need custom config; otherwise rely on defaults (headless chromium).
56
+
57
+ 4. **Run it.** Invoke:
58
+
59
+ ```bash
60
+ UI_REVIEW_BASE_URL={{previewUrl}} npx playwright test .kody2/ui-review/browse.spec.ts --reporter=line
61
+ ```
62
+
63
+ Capture both stdout and exit code. If Playwright is not installed, the executor will have tried to install it in preflight — if it still fails, report the install error and fall back to a diff-only review.
64
+
65
+ 5. **Inspect screenshots.** Use the Read tool on each `.png` under `.kody2/ui-review/` so the visual state is in your context. Note anything that looks broken, empty, misaligned, or inconsistent with the diff's intent.
66
+
67
+ 6. **Write the review.** Your FINAL MESSAGE must be the markdown review comment — no preamble, no DONE / COMMIT_MSG markers. The entire final message is posted verbatim to the PR.
68
+
69
+ # Required output format
70
+
71
+ ```
72
+ ## Verdict: PASS | CONCERNS | FAIL
73
+
74
+ _UI review by kody2 — browsed {{previewUrl}}_
75
+
76
+ ### Summary
77
+ <2-3 sentences: what this PR changes in the UI, and whether the running app matches that intent>
78
+
79
+ ### What I browsed
80
+ - `<route>` — <what was checked, with screenshot path>
81
+ - ... (omit this section entirely if the diff had no UI surface)
82
+
83
+ ### UI findings
84
+ - <bullet — cite file:line for code issues; cite route + screenshot for visual issues; say "None." if truly none>
85
+
86
+ ### Code findings
87
+ - <bullets from reading the diff — correctness, a11y, performance, component structure; say "None." if none>
88
+
89
+ ### Gaps
90
+ - <anything you could NOT verify (missing creds, unreachable page, preview down) and why — say "None." if you verified everything relevant>
91
+
92
+ ### Bottom line
93
+ <one sentence>
94
+ ```
95
+
96
+ # Rules
97
+
98
+ - No commits. No `git` / `gh` invocations. No edits to files outside `.kody2/ui-review/`.
99
+ - Verdict **FAIL** only for clear visual regressions, broken flows, or correctness/accessibility issues that block merge.
100
+ - Verdict **CONCERNS** for clarity/polish/edge-case gaps that shouldn't block.
101
+ - Verdict **PASS** when the PR's UI changes work as intended and nothing obvious is broken.
102
+ - If the preview URL is unreachable, PASS/FAIL should be based on the diff alone, and the "Gaps" section must call that out.
103
+ - Be specific: every finding gets a route + screenshot reference, or a file:line reference. No generic advice.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kody-ade/kody-engine",
3
- "version": "0.2.46",
3
+ "version": "0.2.48",
4
4
  "description": "kody2 — autonomous development engine. Single-session Claude Code agent behind a generic executor + declarative executable profiles.",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -1,67 +0,0 @@
1
- {
2
- "name": "orchestrator",
3
- "describe": "Drive a chain of kody2 executables by posting @kody2 subcommand comments and polling the task-state comment. Atomic \u2014 one orchestrator run fires one flow.",
4
- "inputs": [
5
- {
6
- "name": "issue",
7
- "flag": "--issue",
8
- "type": "int",
9
- "required": true,
10
- "describe": "GitHub issue number to drive."
11
- },
12
- {
13
- "name": "flow",
14
- "flag": "--flow",
15
- "type": "string",
16
- "required": false,
17
- "describe": "Named flow to run (e.g. 'plan-then-build'). Defaults to plan-then-build."
18
- }
19
- ],
20
- "claudeCode": {
21
- "model": "inherit",
22
- "permissionMode": "default",
23
- "maxTurns": null,
24
- "systemPromptAppend": null,
25
- "tools": [
26
- "Bash",
27
- "Read"
28
- ],
29
- "hooks": [],
30
- "skills": [],
31
- "commands": [],
32
- "subagents": [],
33
- "plugins": [],
34
- "mcpServers": []
35
- },
36
- "cliTools": [],
37
- "scripts": {
38
- "preflight": [
39
- {
40
- "script": "loadIssueContext"
41
- },
42
- {
43
- "script": "loadTaskState"
44
- },
45
- {
46
- "script": "composePrompt"
47
- }
48
- ],
49
- "postflight": [
50
- {
51
- "script": "parseAgentResult"
52
- },
53
- {
54
- "script": "writeRunSummary"
55
- },
56
- {
57
- "script": "saveTaskState"
58
- }
59
- ]
60
- },
61
- "output": {
62
- "actionTypes": [
63
- "ORCHESTRATION_COMPLETED",
64
- "ORCHESTRATION_FAILED"
65
- ]
66
- }
67
- }
@@ -1,56 +0,0 @@
1
- You are the **kody2 orchestrator** for issue #{{issue.number}} on {{repoOwner}}/{{repoName}}.
2
-
3
- Your job: drive a 2-step flow **plan → build** by posting `@kody2 <subcommand>` comments on the issue and watching the state-comment for completion signals. You do NOT edit files. You do NOT run git. You use `gh` (via Bash) only to post comments and read the state-comment.
4
-
5
- ---
6
-
7
- # Issue #{{issue.number}}: {{issue.title}}
8
-
9
- {{issue.body}}
10
-
11
- # Required flow (plan-then-build)
12
-
13
- 1. **Kick off plan.** Post an issue comment with EXACTLY this body:
14
- ```
15
- @kody2 plan
16
- ```
17
- Use: `gh issue comment {{issue.number}} --body "@kody2 plan"` (in the cwd).
18
- 2. **Wait for plan to complete.** Poll the issue's state-comment every ~30s. The state-comment is the one whose body starts with `<!-- kody2:state:v1:begin -->`. Fetch it with:
19
- ```
20
- gh api repos/{{repoOwner}}/{{repoName}}/issues/{{issue.number}}/comments --paginate --jq '.[] | select(.body | contains("kody2:state:v1:begin")) | .body'
21
- ```
22
- Parse the JSON block inside the sentinels. Look for `core.lastOutcome.type == "PLAN_COMPLETED"`.
23
- If `core.lastOutcome.type == "PLAN_FAILED"` OR if 10 minutes pass without completion → abort with:
24
- ```
25
- FAILED: plan did not complete (<reason from state or "timeout">)
26
- ```
27
- 3. **Kick off build.** Post:
28
- ```
29
- @kody2 build
30
- ```
31
- Same `gh issue comment` command.
32
- 4. **Wait for build to complete.** Same poll technique. Look for `core.lastOutcome.type == "RUN_COMPLETED"` (build's success marker) or `RUN_FAILED`. If `RUN_FAILED` or 30 minutes pass → abort with `FAILED: build did not complete (...)`.
33
- 5. **Emit final summary.**
34
-
35
- # Required final output
36
-
37
- On success:
38
-
39
- ```
40
- DONE
41
- COMMIT_MSG: chore(orchestrator): plan-then-build for #{{issue.number}}
42
- PR_SUMMARY:
43
- - Posted `@kody2 plan` and observed PLAN_COMPLETED.
44
- - Posted `@kody2 build` and observed RUN_COMPLETED.
45
- - Final PR: <prUrl from state>
46
- ```
47
-
48
- On failure, a single line: `FAILED: <concrete reason>`.
49
-
50
- # Rules
51
-
52
- - NEVER edit files. Read-only flow.
53
- - NEVER run git. Only `gh` via Bash for comment posting and state polling.
54
- - Between polls, sleep ~30 seconds. Do NOT poll faster than once every 30 seconds.
55
- - Hard cap: 40 turns total across the whole flow. If you're approaching the cap, fail early with `FAILED: turn budget exhausted`.
56
- - If you post an `@kody2` comment and the state-comment does NOT update within the poll window, the child executable likely didn't run — check the GitHub Actions runs tab URL via `gh run list --limit 5 --json conclusion,status,url` to diagnose, then fail with a concrete reason.