@williambeto/ai-workflow 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -11
- package/dist-assets/docs/visual-validation-guide.md +76 -0
- package/package.json +2 -1
- package/src/cli.js +19 -1
- package/src/commands/execute.js +172 -0
- package/src/commands/run.js +1 -0
- package/src/core/execution-planner.js +59 -0
- package/src/core/gates/branch-gate.js +37 -4
- package/src/core/handoff/handoff-engine.js +39 -13
- package/src/core/request-classifier.js +58 -0
- package/src/core/runtime/opencode-adapter.js +94 -0
- package/src/core/templates.js +3 -0
- package/src/core/workflow-state-machine.js +46 -0
package/README.md
CHANGED
|
@@ -3,25 +3,33 @@
|
|
|
3
3
|
[](LICENSE)
|
|
4
4
|
[](https://www.npmjs.com/package/@williambeto/ai-workflow)
|
|
5
5
|
|
|
6
|
-
AI Workflow Kit is an
|
|
6
|
+
The AI Workflow Kit is an npm/CLI package that installs a suite of agents, commands, skills, policies, and templates to guide a coding agent inside a project.
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
The central promise is:
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
> "Transform a natural language request into a proportionate, safe, and verifiable software delivery."
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
npm install -g @williambeto/ai-workflow@latest
|
|
14
|
-
```
|
|
12
|
+
The workflow the product guarantees is:
|
|
15
13
|
|
|
16
|
-
|
|
14
|
+
1. **Request**: Understand the actual request.
|
|
15
|
+
2. **Planning**: Select the execution mode and workflow profile.
|
|
16
|
+
3. **Branch Gate**: Never implement directly on main/master.
|
|
17
|
+
4. **Delegation**: Atlas routes; Astra implements; Sage validates when appropriate; Phoenix remediates within a defined limit.
|
|
18
|
+
5. **Implementation**: Deliver useful code, not just reports.
|
|
19
|
+
6. **Validation**: Run observed validations and relevant tests, blocking false success.
|
|
20
|
+
7. **Evidence**: Summarize changes, validations, and limitations.
|
|
17
21
|
|
|
18
|
-
|
|
22
|
+
In practice, the kit is neither a frontend framework nor a UI library. It is an operational layer for software agents: it installs instructions and guardrails so that the agent can work more effectively inside the user's repository.
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
### Current public release
|
|
19
27
|
|
|
20
28
|
```bash
|
|
21
|
-
npm install -g @williambeto/ai-workflow@
|
|
29
|
+
npm install -g @williambeto/ai-workflow@latest
|
|
22
30
|
```
|
|
23
31
|
|
|
24
|
-
|
|
32
|
+
Use `latest` for the supported public release. Release notes record exact historical versions when needed.
|
|
25
33
|
|
|
26
34
|
## Quick start
|
|
27
35
|
|
|
@@ -30,9 +38,10 @@ mkdir workflow-test && cd workflow-test
|
|
|
30
38
|
npm init -y
|
|
31
39
|
ai-workflow init --yes
|
|
32
40
|
ai-workflow doctor
|
|
41
|
+
ai-workflow execute "Atlas, list all agents, subagents, skills, and commands in the project. Present them in a table with their name, description, and usage examples."
|
|
33
42
|
```
|
|
34
43
|
|
|
35
|
-
|
|
44
|
+
The CLI receives the natural request, classifies the intent, plans the workflow, switch/creates branch safely, delegates coding tasks to the OpenCode runtime adapter, validates changes, and runs bounded remediation if necessary before outputting the handoff summary.
|
|
36
45
|
|
|
37
46
|
## Modes and profiles
|
|
38
47
|
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Visual E2E Validation & Semantic UX Quality Guide
|
|
2
|
+
|
|
3
|
+
This guide explains how to extend the **AI Workflow Kit** to automate visual regression testing and UX aesthetic audits using Playwright and multimodal vision LLMs.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 1. Automated Visual E2E Validation
|
|
8
|
+
|
|
9
|
+
By default, the kit flags UI changes without screenshots as `PASS_WITH_NOTES`. To turn this into a strict, blocking quality gate, you can set up automated visual regression tests.
|
|
10
|
+
|
|
11
|
+
### Step 1: Install Playwright
|
|
12
|
+
Install Playwright in the consumer project:
|
|
13
|
+
```bash
|
|
14
|
+
npm install -D @playwright/test
|
|
15
|
+
npx playwright install
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
### Step 2: Configure Visual Regression Specs
|
|
19
|
+
Create a Playwright visual spec (e.g., `tests/visual/landing-page.spec.js`):
|
|
20
|
+
```javascript
|
|
21
|
+
import { test, expect } from '@playwright/test';
|
|
22
|
+
|
|
23
|
+
test('landing page visual regression', async ({ page }) => {
|
|
24
|
+
// 1. Navigate to the local server
|
|
25
|
+
await page.goto('http://localhost:5173');
|
|
26
|
+
|
|
27
|
+
// 2. Capture and compare screenshots (Desktop & Mobile viewports)
|
|
28
|
+
await expect(page).toHaveScreenshot('landing-page-desktop.png', {
|
|
29
|
+
fullPage: true,
|
|
30
|
+
maxDiffPixels: 50 // Tolerable pixel variance
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
await page.setViewportSize({ width: 375, height: 812 });
|
|
34
|
+
await expect(page).toHaveScreenshot('landing-page-mobile.png', {
|
|
35
|
+
fullPage: true
|
|
36
|
+
});
|
|
37
|
+
});
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Step 3: Link to the Quality Gate
|
|
41
|
+
Add a `test:visual` script to your `package.json`:
|
|
42
|
+
```json
|
|
43
|
+
{
|
|
44
|
+
"scripts": {
|
|
45
|
+
"test:visual": "playwright test tests/visual"
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
```
|
|
49
|
+
During the `QualityGuard` verification phase, the framework runs all test scripts. If a layout shift or visual bug occurs, Playwright will fail, and the kit will transition to `BLOCKED`, preventing implementation changes from being approved.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## 2. Semantic and Aesthetic Auditing (Vision LLM)
|
|
54
|
+
|
|
55
|
+
Subjective quality (color harmony, readable contrast, visual hierarchy) cannot be checked by raw pixel diffs. Multimodal vision models (like Gemini Pro or Claude 3.5 Sonnet) can act as automated **UX Auditors**.
|
|
56
|
+
|
|
57
|
+
### Architecture Workflow
|
|
58
|
+
```text
|
|
59
|
+
[Astra (Builder)]
|
|
60
|
+
└── Generates Page HTML/CSS
|
|
61
|
+
[Playwright Runner]
|
|
62
|
+
└── Launches headless browser & takes screenshots
|
|
63
|
+
[Sage (UX Auditor)]
|
|
64
|
+
└── Sends screenshots to Vision LLM with Design Checklist
|
|
65
|
+
└── Returns PASS or FAIL_QUALITY_GATE with layout feedback
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Prompt-Based UX Auditor Checklist
|
|
69
|
+
You can configure a specialized `ux-auditor` agent or load a skill containing these rules:
|
|
70
|
+
|
|
71
|
+
1. **Hierarchy & Composition**: The first fold must look like a cohesive layout. Check that headings, buttons, and graphics do not overlap or look cluttered.
|
|
72
|
+
2. **Color Contrast & Accessibility**: Ensure contrast between text and background is high. Avoid using unstyled bright primary colors (e.g., plain red/blue/green) without proper styling.
|
|
73
|
+
3. **Typography**: Typography must use a modern system (e.g., Google Fonts) instead of browser defaults. Line heights should be balanced.
|
|
74
|
+
4. **Responsiveness**: Verify that the mobile render does not cut off text or overflow horizontally.
|
|
75
|
+
|
|
76
|
+
If the Vision LLM finds aesthetic defects, it outputs a `FAIL_QUALITY_GATE` status along with coordinates or descriptions of the design bugs, which are then passed to the `HealerEngine` for remediation.
|
package/package.json
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@williambeto/ai-workflow",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.2.0",
|
|
4
4
|
"description": "AI Workflow Kit — OpenCode-first software delivery workflow with agents, commands, skills, validation, and evidence",
|
|
5
5
|
"license": "MIT",
|
|
6
|
+
"author": "José Willams",
|
|
6
7
|
"type": "module",
|
|
7
8
|
"private": false,
|
|
8
9
|
"engines": {
|
package/src/cli.js
CHANGED
|
@@ -3,17 +3,20 @@ import { runInit } from "./commands/init.js";
|
|
|
3
3
|
import { runDoctor } from "./commands/doctor.js";
|
|
4
4
|
import { runCollectEvidence } from "./commands/collect-evidence.js";
|
|
5
5
|
import { runMasterOrchestrator } from "./commands/run.js";
|
|
6
|
+
import { runExecute } from "./commands/execute.js";
|
|
6
7
|
|
|
7
8
|
function printHelp() {
|
|
8
9
|
console.log(`ai-workflow
|
|
9
10
|
|
|
10
11
|
Usage:
|
|
12
|
+
ai-workflow execute "<request>" [--task=<slug>] [--request="<request>"]
|
|
11
13
|
ai-workflow run --spec-path=<path>
|
|
12
14
|
ai-workflow init [--yes] [--force] [--dry-run] [--no-install] [--no-overwrite] [--gemini] [--claude] [--codex] [--profile=<profile>]
|
|
13
15
|
ai-workflow collect-evidence [--task=<slug>] [--mode=<quick|standard|full>] [--dry-run]
|
|
14
16
|
ai-workflow doctor
|
|
15
17
|
|
|
16
18
|
Commands:
|
|
19
|
+
execute Orchestrate execution of a natural request through the state machine
|
|
17
20
|
run Proportionate orchestrator with branch safety, validation, and bounded remediation
|
|
18
21
|
init Install AI workflow defaults (OpenCode). --profile=standard (default) or --profile=full (adds examples)
|
|
19
22
|
collect-evidence Run observed project validation; persists EVIDENCE.json only for full mode
|
|
@@ -26,6 +29,7 @@ function parseFlags(args) {
|
|
|
26
29
|
const profileEqArg = args.find((arg) => arg.startsWith("--profile="));
|
|
27
30
|
const taskArg = args.find((arg) => arg.startsWith("--task="));
|
|
28
31
|
const modeArg = args.find((arg) => arg.startsWith("--mode="));
|
|
32
|
+
const requestArg = args.find((arg) => arg.startsWith("--request="));
|
|
29
33
|
const profileIdx = args.indexOf("--profile");
|
|
30
34
|
const profileVal = profileEqArg
|
|
31
35
|
? profileEqArg.replace("--profile=", "")
|
|
@@ -46,7 +50,8 @@ function parseFlags(args) {
|
|
|
46
50
|
specPath: specPathArg ? specPathArg.replace("--spec-path=", "") : undefined,
|
|
47
51
|
profile: profileVal || undefined,
|
|
48
52
|
taskSlug: taskArg ? taskArg.replace("--task=", "") : undefined,
|
|
49
|
-
mode: modeArg ? modeArg.replace("--mode=", "") : undefined
|
|
53
|
+
mode: modeArg ? modeArg.replace("--mode=", "") : undefined,
|
|
54
|
+
request: requestArg ? requestArg.replace("--request=", "") : undefined
|
|
50
55
|
};
|
|
51
56
|
}
|
|
52
57
|
|
|
@@ -64,6 +69,19 @@ export async function runCli(args) {
|
|
|
64
69
|
return;
|
|
65
70
|
}
|
|
66
71
|
|
|
72
|
+
if (command === "execute") {
|
|
73
|
+
const flags = parseFlags(args.slice(1));
|
|
74
|
+
const positionals = args.slice(1).filter((arg) => !arg.startsWith("-"));
|
|
75
|
+
const request = flags.request || positionals.join(" ");
|
|
76
|
+
await runExecute({
|
|
77
|
+
cwd: process.cwd(),
|
|
78
|
+
naturalRequest: request,
|
|
79
|
+
override: args.includes("--override") ? "authorized-override-token" : undefined,
|
|
80
|
+
taskSlug: flags.taskSlug
|
|
81
|
+
});
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
|
|
67
85
|
if (command === "run") {
|
|
68
86
|
await runMasterOrchestrator({
|
|
69
87
|
cwd: process.cwd(),
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import { RequestClassifier } from "../core/request-classifier.js";
|
|
2
|
+
import { ExecutionPlanner } from "../core/execution-planner.js";
|
|
3
|
+
import { WorkflowStateMachine } from "../core/workflow-state-machine.js";
|
|
4
|
+
import { BranchGate } from "../core/gates/branch-gate.js";
|
|
5
|
+
import { OpenCodeAdapter } from "../core/runtime/opencode-adapter.js";
|
|
6
|
+
import { runCollectEvidence } from "./collect-evidence.js";
|
|
7
|
+
import { QualityGuard } from "../core/validation/quality-guard.js";
|
|
8
|
+
import { HandoffEngine } from "../core/handoff/handoff-engine.js";
|
|
9
|
+
import { HealerEngine } from "../core/healing/healer-engine.js";
|
|
10
|
+
import { createCliRemediationExecutor } from "../core/healing/cli-remediation-executor.js";
|
|
11
|
+
import { isRecoverableGateFailure, isTerminalFailure } from "../core/statuses.js";
|
|
12
|
+
import fs from "node:fs/promises";
|
|
13
|
+
import path from "node:path";
|
|
14
|
+
|
|
15
|
+
function slugify(text) {
|
|
16
|
+
return text
|
|
17
|
+
.toLowerCase()
|
|
18
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
19
|
+
.replace(/^-+|-+$/g, "")
|
|
20
|
+
.slice(0, 32) || "task";
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function combineValidation(evidence, quality) {
|
|
24
|
+
const statuses = [evidence.internalStatus, quality.overallStatus];
|
|
25
|
+
let overallStatus = "PASS";
|
|
26
|
+
if (statuses.includes("BLOCKED")) overallStatus = "BLOCKED";
|
|
27
|
+
else if (statuses.includes("FAIL")) overallStatus = "FAIL";
|
|
28
|
+
else if (statuses.includes("FAIL_DELEGATION_GATE")) overallStatus = "FAIL_DELEGATION_GATE";
|
|
29
|
+
else if (statuses.includes("FAIL_QUALITY_GATE")) overallStatus = "FAIL_QUALITY_GATE";
|
|
30
|
+
else if (statuses.includes("PASS_WITH_NOTES")) overallStatus = "PASS_WITH_NOTES";
|
|
31
|
+
return { overallStatus, evidence, quality };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* runExecute - Coordinators natural request execution.
|
|
36
|
+
*/
|
|
37
|
+
export async function runExecute({ cwd, naturalRequest, override, taskSlug: taskSlugOverride }) {
|
|
38
|
+
if (!naturalRequest || !naturalRequest.trim()) {
|
|
39
|
+
throw new Error("Missing request. Please provide a natural request string via positional arguments or --request flag.");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const taskSlug = taskSlugOverride || slugify(naturalRequest);
|
|
43
|
+
console.log(`\n[AI WORKFLOW] Executing natural request intake [slug: ${taskSlug}]...\n`);
|
|
44
|
+
|
|
45
|
+
const stateMachine = new WorkflowStateMachine();
|
|
46
|
+
|
|
47
|
+
// 1. Classification
|
|
48
|
+
const classifier = new RequestClassifier();
|
|
49
|
+
const classification = classifier.classify(naturalRequest);
|
|
50
|
+
console.log(`[CLASSIFIED] Intent: ${classification.intent}, Mode: ${classification.mode}, Profile: ${classification.profile}`);
|
|
51
|
+
stateMachine.transitionTo("CLASSIFIED");
|
|
52
|
+
|
|
53
|
+
// 2. Planning
|
|
54
|
+
const planner = new ExecutionPlanner({ cwd });
|
|
55
|
+
const plan = planner.plan(classification, taskSlug);
|
|
56
|
+
console.log(`[PLANNED] Owner: ${plan.owner}, Remediation limit: ${plan.remediationLimit}`);
|
|
57
|
+
stateMachine.transitionTo("PLANNED");
|
|
58
|
+
|
|
59
|
+
// 3. Branch Gate
|
|
60
|
+
const branchGate = new BranchGate({ memoryDir: path.join(cwd, ".ai-workflow"), cwd });
|
|
61
|
+
const gateResult = branchGate.check(override, {
|
|
62
|
+
autoRecover: true,
|
|
63
|
+
taskSlug,
|
|
64
|
+
readOnly: !plan.branchNeeded
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
if (gateResult.blocked) {
|
|
68
|
+
stateMachine.transitionTo("BLOCKED");
|
|
69
|
+
throw new Error(`[GATE BLOCKED] ${gateResult.reason}`);
|
|
70
|
+
}
|
|
71
|
+
console.log(`[PASS] Branch Gate: ${gateResult.recovered ? `${gateResult.branchBefore} -> ${gateResult.branch}` : `${gateResult.branch} is authorized`}.`);
|
|
72
|
+
stateMachine.transitionTo("BRANCH_READY");
|
|
73
|
+
|
|
74
|
+
// 4. Spec template creation (for full/deep mode)
|
|
75
|
+
if (plan.specPath) {
|
|
76
|
+
const fullSpecPath = path.join(cwd, plan.specPath);
|
|
77
|
+
const specExists = await fs.access(fullSpecPath).then(() => true).catch(() => false);
|
|
78
|
+
if (!specExists) {
|
|
79
|
+
await fs.mkdir(path.dirname(fullSpecPath), { recursive: true });
|
|
80
|
+
const specTemplatePath = path.join(cwd, ".ai-workflow/templates/specs/standard.md");
|
|
81
|
+
const templateContent = await fs.readFile(specTemplatePath, "utf8").catch(() => {
|
|
82
|
+
return `# [STANDARD] Specification: ${classification.request}\n\n## Metadata\n\n- ID: SPEC-${taskSlug}\n- Author: Spec-Engineer\n- Status: DRAFT\n- Date: ${new Date().toISOString().split("T")[0]}\n\n## Functional Requirements\n\n- ${classification.request}\n\n## Technical Implementation Plan\n\n### Files to Create/Modify\n\n- \`src/index.js\`\n\n## Acceptance Criteria\n\n- [ ] Implemented successfully\n\n## Testing Strategy\n\n- [ ] Behavior tests pass`;
|
|
83
|
+
});
|
|
84
|
+
await fs.writeFile(fullSpecPath, templateContent);
|
|
85
|
+
console.log(`[EXECUTE] Created DRAFT specification template at: ${plan.specPath}`);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// 5. Delegation
|
|
90
|
+
stateMachine.transitionTo("DELEGATED");
|
|
91
|
+
stateMachine.transitionTo("IMPLEMENTING");
|
|
92
|
+
|
|
93
|
+
const adapter = new OpenCodeAdapter({ cwd });
|
|
94
|
+
let promptMsg = classification.request;
|
|
95
|
+
if (plan.specPath) {
|
|
96
|
+
promptMsg = `Please review and fill in the specification file at: ${plan.specPath}. Make sure to change the Status field to 'APPROVED' in the Metadata section, and then implement the behavior described.`;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const runResult = await adapter.execute(promptMsg, {
|
|
100
|
+
agent: plan.owner,
|
|
101
|
+
dangerouslySkipPermissions: true
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
if (!runResult.success) {
|
|
105
|
+
console.error(`[EXECUTE] OpenCode adapter execution reported failure.`);
|
|
106
|
+
}
|
|
107
|
+
stateMachine.transitionTo("IMPLEMENTED");
|
|
108
|
+
|
|
109
|
+
// 6. Validation
|
|
110
|
+
stateMachine.transitionTo("VALIDATING");
|
|
111
|
+
const validateWorkflow = async () => {
|
|
112
|
+
const evidence = await runCollectEvidence({
|
|
113
|
+
cwd,
|
|
114
|
+
exitOnError: false,
|
|
115
|
+
taskSlug,
|
|
116
|
+
mode: plan.mode,
|
|
117
|
+
profile: plan.profile,
|
|
118
|
+
branchRecovery: gateResult.recovered ? `${gateResult.branchBefore} -> ${gateResult.branch}` : "NOT_REQUIRED"
|
|
119
|
+
});
|
|
120
|
+
const quality = await new QualityGuard({ cwd, taskSlug, mode: plan.mode }).verify();
|
|
121
|
+
return combineValidation(evidence, quality);
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
let result = await validateWorkflow();
|
|
125
|
+
|
|
126
|
+
// 7. Bounded Remediation
|
|
127
|
+
if (isRecoverableGateFailure(result.overallStatus)) {
|
|
128
|
+
stateMachine.transitionTo("REMEDIATING");
|
|
129
|
+
console.log(`\n[REMEDIATION REQUIRED] ${result.overallStatus}. Starting bounded ${plan.mode} remediation.`);
|
|
130
|
+
const executor = createCliRemediationExecutor(cwd);
|
|
131
|
+
const healer = new HealerEngine({ cwd, mode: plan.mode, taskSlug });
|
|
132
|
+
|
|
133
|
+
result = await healer.run({
|
|
134
|
+
initialResult: result,
|
|
135
|
+
validate: async () => {
|
|
136
|
+
stateMachine.transitionTo("REVALIDATING");
|
|
137
|
+
const res = await validateWorkflow();
|
|
138
|
+
return res;
|
|
139
|
+
},
|
|
140
|
+
remediate: executor
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (isTerminalFailure(result.overallStatus)) {
|
|
145
|
+
stateMachine.transitionTo("BLOCKED");
|
|
146
|
+
throw new Error(`[WORKFLOW BLOCKED] ${result.overallStatus}: Validation could not be resolved safely.`);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// 8. Finalizer
|
|
150
|
+
const finalState = result.overallStatus === "PASS"
|
|
151
|
+
? "COMPLETED"
|
|
152
|
+
: result.overallStatus === "PASS_WITH_NOTES"
|
|
153
|
+
? "COMPLETED_WITH_NOTES"
|
|
154
|
+
: "BLOCKED";
|
|
155
|
+
|
|
156
|
+
stateMachine.transitionTo(finalState);
|
|
157
|
+
|
|
158
|
+
console.log("\n--- Final Handoff ---");
|
|
159
|
+
const handoffEngine = new HandoffEngine({ cwd });
|
|
160
|
+
const handoffPath = await handoffEngine.generate({
|
|
161
|
+
taskId: taskSlug,
|
|
162
|
+
status: finalState,
|
|
163
|
+
specPaths: plan.specPath ? [plan.specPath] : [],
|
|
164
|
+
evidence: result.evidence,
|
|
165
|
+
nextActions: `Profile ${plan.profile}. Observed execution completed with status ${finalState}.`
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
console.log(`\n[AI WORKFLOW COMPLETE] ${finalState}`);
|
|
169
|
+
console.log(`Handoff Packet: ${path.relative(cwd, handoffPath)}\n`);
|
|
170
|
+
|
|
171
|
+
return { ...result, stateHistory: stateMachine.getHistory() };
|
|
172
|
+
}
|
package/src/commands/run.js
CHANGED
|
@@ -99,6 +99,7 @@ export async function runMasterOrchestrator({ cwd, specPath, override, remediati
|
|
|
99
99
|
taskId: path.basename(specPath, ".md"),
|
|
100
100
|
status: result.overallStatus,
|
|
101
101
|
specPaths: [specPath],
|
|
102
|
+
evidence: result.evidence,
|
|
102
103
|
nextActions: `Profile ${workflowProfile}. Observed validation completed. Ready for review or explicitly approved release actions.`
|
|
103
104
|
});
|
|
104
105
|
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* ExecutionPlanner - Formulates a minimum operational plan based on classification.
|
|
5
|
+
*/
|
|
6
|
+
export class ExecutionPlanner {
|
|
7
|
+
constructor({ cwd = process.cwd() } = {}) {
|
|
8
|
+
this.cwd = cwd;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Generates an execution plan.
|
|
13
|
+
* @param {Object} classification - The output of RequestClassifier.
|
|
14
|
+
* @param {string} taskSlug - Descriptive slug for the task.
|
|
15
|
+
* @returns {Object} Operational plan contract.
|
|
16
|
+
*/
|
|
17
|
+
plan(classification, taskSlug = "task") {
|
|
18
|
+
const remediationLimit = classification.mode === "full" ? 3 : classification.mode === "quick" ? 1 : 2;
|
|
19
|
+
const branchNeeded = classification.intent === "write";
|
|
20
|
+
const specPath = classification.specNeeded
|
|
21
|
+
? path.join("docs/workflows", taskSlug, "spec.md")
|
|
22
|
+
: null;
|
|
23
|
+
|
|
24
|
+
// Build default expected validations based on the resolved profile
|
|
25
|
+
const validationsExpected = [];
|
|
26
|
+
if (classification.validationNeeded) {
|
|
27
|
+
validationsExpected.push("test");
|
|
28
|
+
if (classification.profile.startsWith("frontend")) {
|
|
29
|
+
validationsExpected.push("lint", "build");
|
|
30
|
+
} else if (classification.profile === "backend-api") {
|
|
31
|
+
validationsExpected.push("lint");
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const restrictions = [
|
|
36
|
+
"Never commit or push directly to main/master.",
|
|
37
|
+
"Always execute behavior tests for new or modified behavior."
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
if (classification.risk === "high") {
|
|
41
|
+
restrictions.push("Require independent validation review.");
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
objective: classification.request,
|
|
46
|
+
scope: classification.intent === "read-only" ? "Analysis and verification" : "Implementation of requested behavior",
|
|
47
|
+
restrictions,
|
|
48
|
+
owner: classification.owner,
|
|
49
|
+
skills: [...classification.skills],
|
|
50
|
+
branchNeeded,
|
|
51
|
+
branchName: branchNeeded ? `feat/${taskSlug}` : null,
|
|
52
|
+
validationsExpected,
|
|
53
|
+
remediationLimit,
|
|
54
|
+
specPath,
|
|
55
|
+
mode: classification.mode,
|
|
56
|
+
profile: classification.profile
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
}
|
|
@@ -63,13 +63,44 @@ export class BranchGate {
|
|
|
63
63
|
return candidate;
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
+
getCurrentBranch() {
|
|
67
|
+
try {
|
|
68
|
+
return this.run("git branch --show-current") || this.run("git symbolic-ref --short HEAD") || "unknown";
|
|
69
|
+
} catch {
|
|
70
|
+
try {
|
|
71
|
+
return this.run("git symbolic-ref --short HEAD") || "unknown";
|
|
72
|
+
} catch {
|
|
73
|
+
return "unknown";
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
66
78
|
/**
|
|
67
79
|
* @param {string} override
|
|
68
|
-
* @param {{autoRecover?: boolean, taskSlug?: string}} options
|
|
80
|
+
* @param {{autoRecover?: boolean, taskSlug?: string, readOnly?: boolean}} options
|
|
69
81
|
*/
|
|
70
|
-
check(override = "", { autoRecover = false, taskSlug = "implementation" } = {}) {
|
|
82
|
+
check(override = "", { autoRecover = false, taskSlug = "implementation", readOnly = false } = {}) {
|
|
83
|
+
if (readOnly) {
|
|
84
|
+
const currentBranch = this.getCurrentBranch();
|
|
85
|
+
return { blocked: false, branch: currentBranch, recovered: false, readOnly: true };
|
|
86
|
+
}
|
|
87
|
+
|
|
71
88
|
try {
|
|
72
|
-
|
|
89
|
+
// Enforce fail-closed: verify inside worktree first
|
|
90
|
+
try {
|
|
91
|
+
this.run("git rev-parse --is-inside-work-tree");
|
|
92
|
+
} catch (e) {
|
|
93
|
+
const reason = "Not inside a Git repository or Git is unavailable. Implementation work is blocked.";
|
|
94
|
+
this.log(`BLOCKED: ${reason}`);
|
|
95
|
+
return { blocked: true, branch: "unknown", reason };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const currentBranch = this.getCurrentBranch();
|
|
99
|
+
if (currentBranch === "unknown") {
|
|
100
|
+
const reason = "Could not determine current Git branch name. Implementation work is blocked.";
|
|
101
|
+
this.log(`BLOCKED: ${reason}`);
|
|
102
|
+
return { blocked: true, branch: "unknown", reason };
|
|
103
|
+
}
|
|
73
104
|
const isProtected = this.protectedBranches.includes(currentBranch);
|
|
74
105
|
if (!isProtected) return { blocked: false, branch: currentBranch, recovered: false };
|
|
75
106
|
|
|
@@ -107,7 +138,9 @@ export class BranchGate {
|
|
|
107
138
|
reason: `${reason} Enable safe auto-recovery or use AI_OVERRIDE with a concrete justification.`
|
|
108
139
|
};
|
|
109
140
|
} catch (error) {
|
|
110
|
-
|
|
141
|
+
const reason = `Git command failure on branch gate check: ${error.message}`;
|
|
142
|
+
this.log(`BLOCKED: ${reason}`);
|
|
143
|
+
return { blocked: true, branch: "unknown", error: error.message, reason, recovered: false };
|
|
111
144
|
}
|
|
112
145
|
}
|
|
113
146
|
}
|
|
@@ -10,12 +10,12 @@ export class HandoffEngine {
|
|
|
10
10
|
this.cwd = cwd;
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
/**
|
|
14
14
|
* Generates a handoff packet.
|
|
15
15
|
* @param {Object} data - Context data.
|
|
16
16
|
* @returns {Promise<string>} The generated packet content.
|
|
17
17
|
*/
|
|
18
|
-
async generate({ taskId, author, status, specPaths = [], nextActions }) {
|
|
18
|
+
async generate({ taskId, author, status, specPaths = [], nextActions, evidence = null }) {
|
|
19
19
|
const timestamp = new Date().toISOString();
|
|
20
20
|
|
|
21
21
|
// 1. Collect Specs content
|
|
@@ -33,27 +33,53 @@ export class HandoffEngine {
|
|
|
33
33
|
// 2. Capture Git Diff
|
|
34
34
|
let gitDiff = "No changes detected.";
|
|
35
35
|
try {
|
|
36
|
-
gitDiff = execSync("git diff HEAD", { cwd: this.cwd, encoding: "utf8" }).trim() || "No changes detected.";
|
|
36
|
+
gitDiff = execSync("git diff HEAD", { cwd: this.cwd, encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] }).trim() || "No changes detected.";
|
|
37
37
|
} catch (err) {
|
|
38
38
|
gitDiff = `[Error capturing diff: ${err.message}]`;
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
-
// 3. Read EVIDENCE.json
|
|
41
|
+
// 3. Read EVIDENCE.json or use provided in-memory evidence
|
|
42
42
|
let evidenceJson = "{}";
|
|
43
43
|
let evidenceSummary = "No evidence found.";
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
const content = await fs.readFile(evidencePath, "utf8");
|
|
47
|
-
const data = JSON.parse(content);
|
|
44
|
+
if (evidence) {
|
|
45
|
+
const data = typeof evidence === "string" ? JSON.parse(evidence) : evidence;
|
|
48
46
|
evidenceJson = JSON.stringify(data, null, 2);
|
|
49
|
-
evidenceSummary = `Status: ${data.status}, Commands: ${data.commands?.length || 0}`;
|
|
50
|
-
}
|
|
51
|
-
|
|
47
|
+
evidenceSummary = `Status: ${data.status || data.internalStatus || "unknown"}, Commands: ${data.commands?.length || 0}`;
|
|
48
|
+
} else {
|
|
49
|
+
const evidencePath = path.join(this.cwd, "EVIDENCE.json");
|
|
50
|
+
try {
|
|
51
|
+
const content = await fs.readFile(evidencePath, "utf8");
|
|
52
|
+
const data = JSON.parse(content);
|
|
53
|
+
evidenceJson = JSON.stringify(data, null, 2);
|
|
54
|
+
evidenceSummary = `Status: ${data.status}, Commands: ${data.commands?.length || 0}`;
|
|
55
|
+
} catch (err) {
|
|
56
|
+
// Optional: don't fail if evidence is missing
|
|
57
|
+
}
|
|
52
58
|
}
|
|
53
59
|
|
|
54
60
|
// 4. Populate Template
|
|
55
|
-
const
|
|
56
|
-
|
|
61
|
+
const possibleTemplatePaths = [
|
|
62
|
+
path.join(this.cwd, ".ai-workflow/templates/HANDOFF.template.md"),
|
|
63
|
+
path.join(this.cwd, ".ai-workflow/harness/handoffs/HANDOFF.template.md"),
|
|
64
|
+
path.join(import.meta.dirname, "../../../dist-assets/templates/HANDOFF.template.md"),
|
|
65
|
+
path.join(import.meta.dirname, "../../../assets/templates/HANDOFF.template.md")
|
|
66
|
+
];
|
|
67
|
+
|
|
68
|
+
let template = null;
|
|
69
|
+
let lastError = null;
|
|
70
|
+
|
|
71
|
+
for (const tPath of possibleTemplatePaths) {
|
|
72
|
+
try {
|
|
73
|
+
template = await fs.readFile(tPath, "utf8");
|
|
74
|
+
break;
|
|
75
|
+
} catch (err) {
|
|
76
|
+
lastError = err;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (!template) {
|
|
81
|
+
throw new Error(`CRITICAL FAILURE: Could not locate HANDOFF.template.md. Tried paths:\n${possibleTemplatePaths.join("\n")}\nLast error: ${lastError?.message}`);
|
|
82
|
+
}
|
|
57
83
|
|
|
58
84
|
const packet = template
|
|
59
85
|
.replace("${TASK_ID}", taskId || "unknown")
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { resolveWorkflowProfile, getWorkflowProfile } from "./workflow-profiles.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* RequestClassifier - Classifies natural requests into structured, testable contracts.
|
|
5
|
+
*/
|
|
6
|
+
export class RequestClassifier {
|
|
7
|
+
/**
|
|
8
|
+
* Classifies a natural request.
|
|
9
|
+
* @param {string} request - The natural language request string.
|
|
10
|
+
* @returns {Object} Structured classification contract.
|
|
11
|
+
*/
|
|
12
|
+
classify(request = "") {
|
|
13
|
+
const text = String(request).trim();
|
|
14
|
+
if (!text) {
|
|
15
|
+
throw new Error("Cannot classify an empty request.");
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// 1. Resolve Profile
|
|
19
|
+
const profile = resolveWorkflowProfile({ request: text });
|
|
20
|
+
const profileDef = getWorkflowProfile(profile);
|
|
21
|
+
|
|
22
|
+
// 2. Classify Intent
|
|
23
|
+
const readOnlyPatterns = /\b(analyze|check|inspect|review|find|search|show|list|read|view|verify)\b/i;
|
|
24
|
+
const isReadOnly = readOnlyPatterns.test(text);
|
|
25
|
+
const intent = isReadOnly ? "read-only" : "write";
|
|
26
|
+
|
|
27
|
+
// 3. Classify Mode
|
|
28
|
+
let mode = "standard";
|
|
29
|
+
if (/\b(deep|architectural|migration|major|full|\[deep\])\b/i.test(text)) {
|
|
30
|
+
mode = "full";
|
|
31
|
+
} else if (/\b(tiny|small|quick|simple|only\s+update|typo|comment|\[tiny\])\b/i.test(text)) {
|
|
32
|
+
mode = "quick";
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// 4. Classify Risk
|
|
36
|
+
let risk = "medium";
|
|
37
|
+
if (mode === "full" || profile === "security-review") {
|
|
38
|
+
risk = "high";
|
|
39
|
+
} else if (mode === "quick" || intent === "read-only") {
|
|
40
|
+
risk = "low";
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const specNeeded = mode === "full" && intent === "write";
|
|
44
|
+
const validationNeeded = intent === "write";
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
request: text,
|
|
48
|
+
profile,
|
|
49
|
+
owner: profileDef.owner,
|
|
50
|
+
intent,
|
|
51
|
+
mode,
|
|
52
|
+
risk,
|
|
53
|
+
specNeeded,
|
|
54
|
+
validationNeeded,
|
|
55
|
+
skills: [...profileDef.skills]
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { spawn, execSync } from "node:child_process";
|
|
2
|
+
import readline from "node:readline";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* OpenCodeAdapter - Real runtime integration with the OpenCode CLI.
|
|
6
|
+
*/
|
|
7
|
+
export class OpenCodeAdapter {
|
|
8
|
+
constructor({ cwd = process.cwd() } = {}) {
|
|
9
|
+
this.cwd = cwd;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Runs opencode with a prompt and options.
|
|
14
|
+
* @param {string} message - The message prompt.
|
|
15
|
+
* @param {Object} options - CLI options (e.g. agent, model, dangerouslySkipPermissions).
|
|
16
|
+
* @returns {Promise<{success: boolean, commandsRun: string[], eventCount: number}>}
|
|
17
|
+
*/
|
|
18
|
+
async execute(message, { agent = null, model = null, dangerouslySkipPermissions = false } = {}) {
|
|
19
|
+
return new Promise((resolve) => {
|
|
20
|
+
const args = ["run", message, "--format", "json"];
|
|
21
|
+
if (agent) {
|
|
22
|
+
args.push("--agent", agent);
|
|
23
|
+
}
|
|
24
|
+
if (model) {
|
|
25
|
+
args.push("--model", model);
|
|
26
|
+
}
|
|
27
|
+
if (dangerouslySkipPermissions) {
|
|
28
|
+
args.push("--dangerously-skip-permissions");
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
console.log(`[RUNTIME] Delegating to OpenCode: opencode ${args.map(a => a.includes(" ") ? `"${a}"` : a).join(" ")}`);
|
|
32
|
+
|
|
33
|
+
const child = spawn("opencode", args, {
|
|
34
|
+
cwd: this.cwd,
|
|
35
|
+
shell: true,
|
|
36
|
+
stdio: ["ignore", "pipe", "inherit"] // Inherit stderr to show warnings directly
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
const rl = readline.createInterface({
|
|
40
|
+
input: child.stdout,
|
|
41
|
+
terminal: false
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
const commandsRun = [];
|
|
45
|
+
let eventCount = 0;
|
|
46
|
+
|
|
47
|
+
rl.on("line", (line) => {
|
|
48
|
+
const trimmed = line.trim();
|
|
49
|
+
if (!trimmed) return;
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
const event = JSON.parse(trimmed);
|
|
53
|
+
eventCount++;
|
|
54
|
+
|
|
55
|
+
// 1. Stream agent output text in real-time to user
|
|
56
|
+
if (event.type === "text" && event.part?.text) {
|
|
57
|
+
process.stdout.write(event.part.text);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// 2. Capture tool executions/commands
|
|
61
|
+
if (event.type === "step_start" && event.part?.toolCalls) {
|
|
62
|
+
for (const call of event.part.toolCalls) {
|
|
63
|
+
if (call.name === "run_command" && call.args?.CommandLine) {
|
|
64
|
+
commandsRun.push(call.args.CommandLine);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
} catch {
|
|
69
|
+
// If a line is not valid JSON (e.g. starting/finishing logs), print it directly
|
|
70
|
+
console.log(trimmed);
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
child.on("close", (code) => {
|
|
75
|
+
console.log("\n[RUNTIME] OpenCode finished execution.");
|
|
76
|
+
resolve({
|
|
77
|
+
success: code === 0,
|
|
78
|
+
commandsRun,
|
|
79
|
+
eventCount
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
child.on("error", (err) => {
|
|
84
|
+
console.error(`\n[RUNTIME] Failed to launch opencode: ${err.message}`);
|
|
85
|
+
resolve({
|
|
86
|
+
success: false,
|
|
87
|
+
commandsRun,
|
|
88
|
+
eventCount: 0,
|
|
89
|
+
error: err.message
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
package/src/core/templates.js
CHANGED
|
@@ -102,6 +102,9 @@ function buildRuntimeFiles({ includeFormalEvidence = false } = {}) {
|
|
|
102
102
|
const dpContent = readPackageFile("dist-assets/docs/design-patterns-policy.md");
|
|
103
103
|
if (dpContent !== null) files["docs/design-patterns-policy.md"] = dpContent;
|
|
104
104
|
|
|
105
|
+
const visualValidationGuide = readPackageFile("dist-assets/docs/visual-validation-guide.md");
|
|
106
|
+
if (visualValidationGuide !== null) files["docs/visual-validation-guide.md"] = visualValidationGuide;
|
|
107
|
+
|
|
105
108
|
const governancePolicies = discoverPackageFiles("dist-assets/docs/policies");
|
|
106
109
|
for (const [relPath, content] of Object.entries(governancePolicies)) {
|
|
107
110
|
const targetPath = relPath.replace(/^dist-assets\//, "");
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WorkflowStateMachine - Enforces state transitions for the AI Workflow loop.
|
|
3
|
+
*/
|
|
4
|
+
export class WorkflowStateMachine {
|
|
5
|
+
constructor() {
|
|
6
|
+
this.state = "RECEIVED";
|
|
7
|
+
this.history = [{ state: this.state, timestamp: new Date().toISOString() }];
|
|
8
|
+
|
|
9
|
+
this.validTransitions = {
|
|
10
|
+
RECEIVED: ["CLASSIFIED", "BLOCKED"],
|
|
11
|
+
CLASSIFIED: ["PLANNED", "BLOCKED"],
|
|
12
|
+
PLANNED: ["BRANCH_READY", "BLOCKED"],
|
|
13
|
+
BRANCH_READY: ["DELEGATED", "BLOCKED"],
|
|
14
|
+
DELEGATED: ["IMPLEMENTING", "BLOCKED"],
|
|
15
|
+
IMPLEMENTING: ["IMPLEMENTED", "BLOCKED"],
|
|
16
|
+
IMPLEMENTED: ["VALIDATING", "BLOCKED"],
|
|
17
|
+
VALIDATING: ["COMPLETED", "COMPLETED_WITH_NOTES", "REMEDIATING", "BLOCKED"],
|
|
18
|
+
REMEDIATING: ["REVALIDATING", "BLOCKED"],
|
|
19
|
+
REVALIDATING: ["COMPLETED", "COMPLETED_WITH_NOTES", "REMEDIATING", "BLOCKED"],
|
|
20
|
+
COMPLETED: [],
|
|
21
|
+
COMPLETED_WITH_NOTES: [],
|
|
22
|
+
BLOCKED: []
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Transitions the machine to a new state.
|
|
28
|
+
* @param {string} newState - The state to transition to.
|
|
29
|
+
*/
|
|
30
|
+
transitionTo(newState) {
|
|
31
|
+
const allowed = this.validTransitions[this.state];
|
|
32
|
+
if (!allowed || !allowed.includes(newState)) {
|
|
33
|
+
throw new Error(`Invalid state transition: ${this.state} -> ${newState}`);
|
|
34
|
+
}
|
|
35
|
+
this.state = newState;
|
|
36
|
+
this.history.push({ state: this.state, timestamp: new Date().toISOString() });
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
getCurrentState() {
|
|
40
|
+
return this.state;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
getHistory() {
|
|
44
|
+
return [...this.history];
|
|
45
|
+
}
|
|
46
|
+
}
|