@scanton/phase2s 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.phase2s/skills/deep-specify/SKILL.md +45 -27
- package/README.md +177 -95
- package/dist/src/cli/goal.d.ts +32 -0
- package/dist/src/cli/goal.d.ts.map +1 -0
- package/dist/src/cli/goal.js +262 -0
- package/dist/src/cli/goal.js.map +1 -0
- package/dist/src/cli/index.d.ts.map +1 -1
- package/dist/src/cli/index.js +10 -0
- package/dist/src/cli/index.js.map +1 -1
- package/dist/src/core/spec-parser.d.ts +41 -0
- package/dist/src/core/spec-parser.d.ts.map +1 -0
- package/dist/src/core/spec-parser.js +188 -0
- package/dist/src/core/spec-parser.js.map +1 -0
- package/dist/src/providers/codex.d.ts +8 -11
- package/dist/src/providers/codex.d.ts.map +1 -1
- package/dist/src/providers/codex.js +115 -118
- package/dist/src/providers/codex.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: deep-specify
|
|
3
|
-
description: Structured spec interview — resolve ambiguity with Socratic questions
|
|
3
|
+
description: Structured spec interview — resolve ambiguity with Socratic questions, then output a 5-pillar spec consumable by phase2s goal
|
|
4
4
|
model: smart
|
|
5
5
|
triggers:
|
|
6
6
|
- deep specify
|
|
@@ -14,9 +14,7 @@ triggers:
|
|
|
14
14
|
- spec first
|
|
15
15
|
---
|
|
16
16
|
|
|
17
|
-
You are a specification interviewer. Your job is to resolve ambiguity before any code is written. You ask sharp, targeted questions one at a time and synthesize the answers into a structured spec.
|
|
18
|
-
|
|
19
|
-
This skill is ported from oh-my-codex's `$deep-interview` pattern, adapted for Phase2S.
|
|
17
|
+
You are a specification interviewer. Your job is to resolve ambiguity before any code is written. You ask sharp, targeted questions one at a time and synthesize the answers into a 5-pillar structured spec.
|
|
20
18
|
|
|
21
19
|
**Phase 1: Read context**
|
|
22
20
|
Before asking anything, read any provided files, descriptions, or existing code. Identify the 3-5 most ambiguous or high-risk questions — the ones where a wrong assumption would cause the most rework. Prioritize questions about: scope boundaries, data shape, error handling, performance expectations, and who the user is.
|
|
@@ -31,41 +29,61 @@ For each question:
|
|
|
31
29
|
|
|
32
30
|
Do not proceed to the spec until all questions are answered. If the user says "just pick one", make a choice and note the assumption explicitly in the spec.
|
|
33
31
|
|
|
34
|
-
**Phase 3: Synthesize**
|
|
35
|
-
After all answers, write a structured spec:
|
|
32
|
+
**Phase 3: Synthesize — 5-pillar spec format**
|
|
36
33
|
|
|
37
|
-
|
|
38
|
-
|
|
34
|
+
After all answers, write a spec in this exact format and save it to `.phase2s/specs/YYYY-MM-DD-HH-MM-<slug>.md`. Create the directory if it does not exist.
|
|
35
|
+
|
|
36
|
+
```markdown
|
|
37
|
+
# Spec: {{title}}
|
|
38
|
+
|
|
39
|
+
Generated: {{date}}
|
|
40
|
+
Spec ID: {{slug}}
|
|
39
41
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
+
## Problem Statement
|
|
43
|
+
{{self_contained_context — what are we building, why, for whom, and what problem does it solve. 2-4 sentences. Complete enough that someone who wasn't in this conversation can understand it.}}
|
|
42
44
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
## Acceptance Criteria
|
|
46
|
+
1. {{criterion — independently testable, specific, not vague}}
|
|
47
|
+
2. {{criterion}}
|
|
48
|
+
3. {{criterion}}
|
|
47
49
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
-
|
|
51
|
-
|
|
50
|
+
## Constraint Architecture
|
|
51
|
+
**Must Do:** {{hard requirements — things that are non-negotiable}}
|
|
52
|
+
**Cannot Do:** {{explicit non-goals and off-limits approaches}}
|
|
53
|
+
**Should Prefer:** {{style, architectural, or implementation preferences}}
|
|
54
|
+
**Should Escalate:** {{situations where the executor should stop and ask the user}}
|
|
52
55
|
|
|
53
|
-
|
|
54
|
-
|
|
56
|
+
## Decomposition
|
|
57
|
+
### Sub-task 1: {{name}}
|
|
58
|
+
- **Input:** {{what this sub-task receives or reads}}
|
|
59
|
+
- **Output:** {{what this sub-task produces or modifies}}
|
|
60
|
+
- **Success criteria:** {{how to know this sub-task is done}}
|
|
55
61
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
-
|
|
59
|
-
-
|
|
62
|
+
### Sub-task 2: {{name}}
|
|
63
|
+
- **Input:** {{input}}
|
|
64
|
+
- **Output:** {{output}}
|
|
65
|
+
- **Success criteria:** {{success criteria}}
|
|
66
|
+
|
|
67
|
+
(repeat for each sub-task, ordered by dependency)
|
|
68
|
+
|
|
69
|
+
## Evaluation Design
|
|
70
|
+
| Test Case | Input | Expected Output |
|
|
71
|
+
|-----------|-------|-----------------|
|
|
72
|
+
| {{test name}} | {{input or scenario}} | {{expected result}} |
|
|
73
|
+
|
|
74
|
+
## Eval Command
|
|
75
|
+
{{command to run to validate the spec is complete, e.g. "npm test" or "npm test -- --grep 'rate limiting'"}}
|
|
60
76
|
```
|
|
61
77
|
|
|
62
|
-
|
|
78
|
+
**Deriving eval design:** If the user says "just use npm test" or doesn't provide specific test cases, derive the eval design from the acceptance criteria — write one test case per criterion that describes what a passing test would look like. Do not force the user to enumerate test cases manually if they have a test suite.
|
|
79
|
+
|
|
80
|
+
**Decomposition guidance:** Break into 2-6 sub-tasks, each representing a distinct, independently implementable unit of work. Ordered by dependency (sub-task 2 can depend on sub-task 1 being done). Each sub-task should take roughly 15-45 minutes of focused implementation work.
|
|
63
81
|
|
|
64
82
|
**Gate:**
|
|
65
83
|
End every session with:
|
|
66
84
|
```
|
|
67
|
-
SPEC READY: .phase2s/specs/YYYY-MM-DD-<slug>.md
|
|
68
|
-
NEXT: run
|
|
85
|
+
SPEC READY: .phase2s/specs/YYYY-MM-DD-HH-MM-<slug>.md
|
|
86
|
+
NEXT: run `phase2s goal .phase2s/specs/YYYY-MM-DD-HH-MM-<slug>.md` to execute autonomously
|
|
69
87
|
```
|
|
70
88
|
|
|
71
89
|
If the user provides context (file paths, a description, a task), read it before asking questions.
|
package/README.md
CHANGED
|
@@ -1,21 +1,12 @@
|
|
|
1
1
|
# Phase2S
|
|
2
2
|
|
|
3
|
-
Phase2S
|
|
3
|
+
Three things Phase2S does that most AI coding tools don't:
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
1. **Runs on your ChatGPT subscription** — If you pay for ChatGPT Plus or Pro, you can use that same subscription to power a full coding assistant in your terminal. No API key, no per-token billing. The $20/month you're already paying starts pulling its weight.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
you > /review src/core/agent.ts
|
|
9
|
-
assistant > Reviewing src/core/agent.ts...
|
|
10
|
-
|
|
11
|
-
CRIT: The `maxTurns` check runs after tool execution, not before.
|
|
12
|
-
An LLM that loops tool calls can exceed the limit by one turn.
|
|
7
|
+
2. **Gives Claude Code a second opinion** — If you use Claude Code as your daily driver, Phase2S plugs in as an MCP server and gives Claude a tool to call that runs your plan through GPT from a completely different model with completely different training. Before Claude executes anything big, it gets challenged by a second AI that has no stake in agreeing with it.
|
|
13
8
|
|
|
14
|
-
|
|
15
|
-
Callers that mutate it will corrupt the conversation state.
|
|
16
|
-
|
|
17
|
-
NIT: Inline comment on line 47 is stale — describes old batch behavior.
|
|
18
|
-
```
|
|
9
|
+
3. **Executes specs autonomously** — Write a spec describing what you want built and how you'll know it's done. Run `phase2s goal your-spec.md`. Phase2S breaks it into sub-tasks, implements each one, runs your tests, checks whether the results match your criteria, and retries with failure analysis if anything falls short. You come back when it's done.
|
|
19
10
|
|
|
20
11
|
---
|
|
21
12
|
|
|
@@ -23,9 +14,7 @@ assistant > Reviewing src/core/agent.ts...
|
|
|
23
14
|
|
|
24
15
|
Requires [Node.js](https://nodejs.org) >= 20.
|
|
25
16
|
|
|
26
|
-
**
|
|
27
|
-
|
|
28
|
-
No API key, no per-token billing. All 29 skills work.
|
|
17
|
+
**If you have ChatGPT Plus or Pro (recommended)**
|
|
29
18
|
|
|
30
19
|
```bash
|
|
31
20
|
npm install -g @openai/codex @scanton/phase2s
|
|
@@ -33,9 +22,9 @@ codex auth
|
|
|
33
22
|
phase2s
|
|
34
23
|
```
|
|
35
24
|
|
|
36
|
-
|
|
25
|
+
`codex auth` opens a browser window and logs into your ChatGPT account. You do it once. After that, `phase2s` uses your subscription automatically — no API keys, no credits to manage.
|
|
37
26
|
|
|
38
|
-
|
|
27
|
+
**If you have an OpenAI API key**
|
|
39
28
|
|
|
40
29
|
```bash
|
|
41
30
|
npm install -g @scanton/phase2s
|
|
@@ -44,9 +33,7 @@ export PHASE2S_PROVIDER=openai-api
|
|
|
44
33
|
phase2s
|
|
45
34
|
```
|
|
46
35
|
|
|
47
|
-
**
|
|
48
|
-
|
|
49
|
-
Run all 29 skills on Claude 3.5 Sonnet (or any Anthropic model).
|
|
36
|
+
**If you have an Anthropic API key**
|
|
50
37
|
|
|
51
38
|
```bash
|
|
52
39
|
npm install -g @scanton/phase2s
|
|
@@ -55,9 +42,7 @@ export PHASE2S_PROVIDER=anthropic
|
|
|
55
42
|
phase2s
|
|
56
43
|
```
|
|
57
44
|
|
|
58
|
-
**
|
|
59
|
-
|
|
60
|
-
No API keys. Runs entirely on your machine after the initial model pull.
|
|
45
|
+
**If you want to run everything locally (free, private, no internet)**
|
|
61
46
|
|
|
62
47
|
```bash
|
|
63
48
|
npm install -g @scanton/phase2s
|
|
@@ -68,110 +53,207 @@ phase2s
|
|
|
68
53
|
|
|
69
54
|
---
|
|
70
55
|
|
|
71
|
-
##
|
|
56
|
+
## Feature 1: Your ChatGPT subscription, in your terminal
|
|
72
57
|
|
|
73
|
-
|
|
58
|
+
Most people who pay for ChatGPT Plus use it by opening a browser tab and typing. Phase2S turns it into a programmable coding tool you can use from the command line, from scripts, and from inside Claude Code.
|
|
74
59
|
|
|
75
60
|
```
|
|
76
|
-
you > /review src/core/
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
61
|
+
you > /review src/core/auth.ts
|
|
62
|
+
|
|
63
|
+
CRIT: session.destroy() is called without await on line 83.
|
|
64
|
+
If it rejects (Redis timeout, etc.), the error is silently dropped
|
|
65
|
+
and the response goes out before the session is actually cleared.
|
|
66
|
+
|
|
67
|
+
WARN: The JWT expiry check on line 47 uses Date.now() directly.
|
|
68
|
+
Clock skew between your server and the token issuer can cause
|
|
69
|
+
valid tokens to fail. Use a small leeway (±30s) instead.
|
|
70
|
+
|
|
71
|
+
NIT: The error message on line 91 says "auth failed" but should say
|
|
72
|
+
"session_expired" to match the error codes in api-errors.ts.
|
|
81
73
|
```
|
|
82
74
|
|
|
83
|
-
|
|
75
|
+
```
|
|
76
|
+
you > /satori add rate limiting to the API
|
|
84
77
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
78
|
+
-- Attempt 1 --
|
|
79
|
+
[creates src/utils/rate-limiter.ts]
|
|
80
|
+
[creates src/middleware/rate-limit.ts]
|
|
81
|
+
[registers middleware in app.ts]
|
|
82
|
+
[writes tests in test/middleware/rate-limit.test.ts]
|
|
83
|
+
npm test: FAIL — bucket not clearing on window expiry
|
|
84
|
+
|
|
85
|
+
-- Attempt 2 --
|
|
86
|
+
[fixes resetAt logic in RateLimiter.check()]
|
|
87
|
+
npm test: PASS (23 tests)
|
|
88
|
+
|
|
89
|
+
Done in 2 attempts.
|
|
88
90
|
```
|
|
89
91
|
|
|
90
|
-
|
|
92
|
+
The 29 built-in skills cover the full development loop: specify, plan, implement, test, review, debug, ship, deploy. All of them run on your subscription.
|
|
91
93
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
94
|
+
[Full skill list →](docs/skills.md)
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Feature 2: Claude Code + Phase2S adversarial review
|
|
99
|
+
|
|
100
|
+
If you use Claude Code, here's the problem: one model reviewing its own work has blind spots. Claude agrees with Claude. The same training data, the same biases, the same failure modes.
|
|
101
|
+
|
|
102
|
+
Phase2S solves this by plugging into Claude Code as an MCP server. When Claude is about to execute a plan, it can call Phase2S — which runs the same plan through GPT using your ChatGPT subscription — and get back a structured challenge:
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
VERDICT: CHALLENGED
|
|
106
|
+
STRONGEST_CONCERN: The token bucket resets per-request rather than per-window.
|
|
107
|
+
OBJECTIONS:
|
|
108
|
+
1. RateLimiter.check() increments the counter and checks it in the same call.
|
|
109
|
+
When the window expires, the bucket resets on the next request — meaning
|
|
110
|
+
a client can always make exactly one request immediately after the window
|
|
111
|
+
closes, even if they were throttled. The reset should happen on a fixed
|
|
112
|
+
schedule, not lazily on first request.
|
|
113
|
+
2. The middleware is registered after the auth middleware in app.ts line 34.
|
|
114
|
+
Unauthenticated requests bypass rate limiting entirely.
|
|
115
|
+
APPROVE_IF: Fix the window reset logic and move middleware before auth.
|
|
95
116
|
```
|
|
96
117
|
|
|
97
|
-
|
|
118
|
+
Claude gets specific, falsifiable objections from a model that wasn't involved in writing the plan. You see the verdict. You decide whether to proceed.
|
|
119
|
+
|
|
120
|
+
**Setup takes about 2 minutes.** [Step-by-step guide →](docs/claude-code.md)
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Feature 3: The dark factory
|
|
125
|
+
|
|
126
|
+
Write a spec. Run one command. Come back when it's done.
|
|
127
|
+
|
|
128
|
+
`phase2s goal` reads your spec, breaks it into sub-tasks, implements each one through the `/satori` skill (which runs implement → test → retry until green), runs your eval command, checks whether your acceptance criteria actually passed, and if they didn't — analyzes what broke, figures out which sub-tasks need to be re-run, and tries again with that failure context.
|
|
129
|
+
|
|
130
|
+
It keeps going until all criteria pass or it runs out of attempts.
|
|
98
131
|
|
|
99
132
|
```bash
|
|
100
|
-
|
|
133
|
+
# Write the spec interactively
|
|
134
|
+
phase2s
|
|
135
|
+
you > /deep-specify add pagination to the search endpoint
|
|
136
|
+
|
|
137
|
+
# Execute it autonomously
|
|
138
|
+
phase2s goal .phase2s/specs/2026-04-04-11-00-pagination.md
|
|
101
139
|
```
|
|
102
140
|
|
|
103
|
-
|
|
141
|
+
```
|
|
142
|
+
Goal executor: Pagination for search endpoint
|
|
143
|
+
Sub-tasks: 3 | Eval: npm test | Max attempts: 3
|
|
144
|
+
|
|
145
|
+
=== Attempt 1/3 ===
|
|
146
|
+
Running sub-task: Cursor-based pagination logic
|
|
147
|
+
[satori: implement → test → retry until green]
|
|
148
|
+
|
|
149
|
+
Running sub-task: API response format update
|
|
150
|
+
[satori: implement → test → retry until green]
|
|
151
|
+
|
|
152
|
+
Running sub-task: Frontend page controls
|
|
153
|
+
[satori: implement → test → retry until green]
|
|
154
|
+
|
|
155
|
+
Running evaluation: npm test
|
|
104
156
|
|
|
105
|
-
|
|
157
|
+
Acceptance criteria:
|
|
158
|
+
✗ Returns correct next_cursor on paginated results
|
|
159
|
+
✓ Returns 20 items per page by default
|
|
160
|
+
✓ next_cursor is null on last page
|
|
106
161
|
|
|
107
|
-
|
|
162
|
+
Retrying 1 sub-task(s): Cursor-based pagination logic
|
|
108
163
|
|
|
109
|
-
|
|
110
|
-
-
|
|
111
|
-
|
|
112
|
-
- `/debug` — reproduce, isolate, fix, and verify a bug end-to-end.
|
|
113
|
-
- `/remember` — save project conventions to persistent memory. Injected into every future session automatically.
|
|
114
|
-
- `/skill` — create a new `/command` from inside Phase2S. Three questions, no YAML editing.
|
|
115
|
-
- `/land-and-deploy` — push, open a PR, merge it, wait for CI, confirm the land. Picks up where `/ship` leaves off.
|
|
164
|
+
=== Attempt 2/3 ===
|
|
165
|
+
Running sub-task: Cursor-based pagination logic
|
|
166
|
+
[satori: implement → test → retry until green]
|
|
116
167
|
|
|
117
|
-
|
|
168
|
+
Running evaluation: npm test
|
|
169
|
+
|
|
170
|
+
Acceptance criteria:
|
|
171
|
+
✓ Returns correct next_cursor on paginated results
|
|
172
|
+
✓ Returns 20 items per page by default
|
|
173
|
+
✓ next_cursor is null on last page
|
|
174
|
+
|
|
175
|
+
✓ All acceptance criteria met after 2 attempt(s).
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
This uses your ChatGPT subscription for all the implementation work. No API key needed.
|
|
179
|
+
|
|
180
|
+
[Full dark factory guide →](docs/dark-factory.md)
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## All 29 skills
|
|
185
|
+
|
|
186
|
+
```
|
|
187
|
+
you > /review src/auth.ts — code review: CRIT / WARN / NIT
|
|
188
|
+
you > /diff — review all uncommitted changes
|
|
189
|
+
you > /satori add pagination — implement + test + retry until green
|
|
190
|
+
you > /deep-specify add OAuth — spec interview → 5-pillar spec file
|
|
191
|
+
you > /consensus-plan add OAuth — planner + architect + critic passes
|
|
192
|
+
you > /debug logout fails — reproduce, isolate, fix, verify
|
|
193
|
+
you > /investigate why 500s — evidence trail to root cause
|
|
194
|
+
you > /health — code quality score (tests, types, lint)
|
|
195
|
+
you > /audit — secrets scan, dependency CVEs, injection
|
|
196
|
+
you > /ship — diff review + commit message
|
|
197
|
+
you > /land-and-deploy — push, PR, CI wait, merge
|
|
198
|
+
you > /remember — save a project convention to memory
|
|
199
|
+
you > /retro — weekly velocity and pattern analysis
|
|
200
|
+
```
|
|
118
201
|
|
|
119
202
|
```bash
|
|
120
|
-
phase2s skills
|
|
203
|
+
phase2s skills # full list with model tier badges
|
|
204
|
+
phase2s skills --json # machine-readable for scripts
|
|
121
205
|
```
|
|
122
206
|
|
|
207
|
+
[Skills reference →](docs/skills.md)
|
|
208
|
+
|
|
123
209
|
---
|
|
124
210
|
|
|
125
211
|
## Docs
|
|
126
212
|
|
|
127
|
-
- [
|
|
128
|
-
- [
|
|
129
|
-
- [
|
|
130
|
-
- [
|
|
131
|
-
- [
|
|
132
|
-
- [
|
|
133
|
-
- [
|
|
134
|
-
- [
|
|
135
|
-
- [
|
|
213
|
+
- [Getting started](docs/getting-started.md) — first install, first session, all four provider options
|
|
214
|
+
- [Dark factory](docs/dark-factory.md) — write a spec, run `phase2s goal`, get a feature
|
|
215
|
+
- [Claude Code integration](docs/claude-code.md) — MCP setup, adversarial review, CLAUDE.md routing rules
|
|
216
|
+
- [Skills reference](docs/skills.md) — all 29 skills with examples
|
|
217
|
+
- [Workflows](docs/workflows.md) — real development sessions end to end
|
|
218
|
+
- [Memory and persistence](docs/memory.md) — session resume, `/remember`, what gets saved
|
|
219
|
+
- [Writing custom skills](docs/writing-skills.md) — create your own `/commands`
|
|
220
|
+
- [GitHub Action](docs/github-action.md) — `uses: scanton/phase2s@v1` for CI (requires API key)
|
|
221
|
+
- [Advanced](docs/advanced.md) — streaming, model routing, tool allow/deny
|
|
222
|
+
- [Configuration](docs/configuration.md) — `.phase2s.yaml` and environment variables
|
|
136
223
|
|
|
137
224
|
---
|
|
138
225
|
|
|
139
226
|
## Roadmap
|
|
140
227
|
|
|
141
|
-
- [x] Codex CLI provider (
|
|
228
|
+
- [x] Codex CLI provider (ChatGPT subscription, no API key required)
|
|
142
229
|
- [x] 29 built-in skills across 6 categories
|
|
143
|
-
- [x]
|
|
144
|
-
- [x]
|
|
145
|
-
- [x]
|
|
146
|
-
- [x]
|
|
147
|
-
- [x]
|
|
148
|
-
- [x]
|
|
149
|
-
- [x]
|
|
150
|
-
- [x]
|
|
151
|
-
- [x]
|
|
152
|
-
- [x]
|
|
153
|
-
- [x]
|
|
154
|
-
- [x]
|
|
155
|
-
- [x]
|
|
156
|
-
- [x]
|
|
157
|
-
- [x]
|
|
158
|
-
- [x] `/
|
|
159
|
-
- [x]
|
|
160
|
-
- [x]
|
|
161
|
-
- [x]
|
|
162
|
-
- [x]
|
|
163
|
-
- [x]
|
|
164
|
-
- [x]
|
|
165
|
-
- [x]
|
|
166
|
-
- [x]
|
|
167
|
-
- [x]
|
|
168
|
-
- [x]
|
|
169
|
-
- [x]
|
|
170
|
-
- [x] Shell completion — `eval "$(phase2s completion bash)"` for tab-complete in bash/zsh
|
|
171
|
-
- [x] Tool allow/deny — `tools:` and `deny:` in `.phase2s.yaml` restrict agent tool access
|
|
172
|
-
- [x] Headless browser tool — navigate, click, type, screenshot, evaluate JS via Playwright (opt-in: `browser: true`)
|
|
173
|
-
- [x] GitHub Action — `uses: scanton/phase2s@v1` runs any skill in CI, posts results as PR comments and Step Summaries
|
|
174
|
-
- [ ] Real Codex streaming (JSONL stdout parsing)
|
|
230
|
+
- [x] File sandbox: tools reject paths outside project directory, including symlink escapes
|
|
231
|
+
- [x] 389 tests covering all tools, core modules, agent integration, and goal executor
|
|
232
|
+
- [x] CI: runs `npm test` on every push and PR
|
|
233
|
+
- [x] OpenAI API provider with live tool calling
|
|
234
|
+
- [x] Anthropic API provider — Claude 3.5 Sonnet and family
|
|
235
|
+
- [x] Ollama provider — local models, offline, no API keys
|
|
236
|
+
- [x] Streaming output
|
|
237
|
+
- [x] Session persistence — auto-save + `--resume`
|
|
238
|
+
- [x] Model-per-skill routing — `fast_model` / `smart_model` tiers
|
|
239
|
+
- [x] Satori persistent execution — retry loop with shell verification
|
|
240
|
+
- [x] Consensus planning — planner + architect + critic
|
|
241
|
+
- [x] Claude Code MCP integration — all skills as Claude Code tools
|
|
242
|
+
- [x] `/adversarial` — cross-model adversarial review
|
|
243
|
+
- [x] Persistent memory — `/remember` + auto-inject into sessions
|
|
244
|
+
- [x] `/skill` — create new skills from inside Phase2S
|
|
245
|
+
- [x] `/land-and-deploy` — push, PR, CI wait, merge
|
|
246
|
+
- [x] Model tier badges in `phase2s skills` output
|
|
247
|
+
- [x] `--dry-run` for skill routing preview
|
|
248
|
+
- [x] Typed input hints in REPL
|
|
249
|
+
- [x] `phase2s skills --json`
|
|
250
|
+
- [x] Shell completion — `eval "$(phase2s completion bash)"`
|
|
251
|
+
- [x] Tool allow/deny in `.phase2s.yaml`
|
|
252
|
+
- [x] Headless browser tool via Playwright
|
|
253
|
+
- [x] GitHub Action — `uses: scanton/phase2s@v1`
|
|
254
|
+
- [x] `phase2s goal <spec.md>` — dark factory: spec in, feature out
|
|
255
|
+
- [x] 5-pillar spec format — `/deep-specify` output feeds directly into `phase2s goal`
|
|
256
|
+
- [x] Real Codex streaming (JSONL stdout parsing) — step-by-step feedback for multi-step tasks
|
|
175
257
|
|
|
176
258
|
---
|
|
177
259
|
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Goal executor — "dark factory" mode.
|
|
3
|
+
*
|
|
4
|
+
* Reads a 5-pillar spec file, breaks it into sub-tasks, runs each through
|
|
5
|
+
* /satori (implement + test + retry), checks acceptance criteria against eval
|
|
6
|
+
* output, retries failed sub-tasks with failure context, and loops until all
|
|
7
|
+
* criteria pass or max attempts are exhausted.
|
|
8
|
+
*
|
|
9
|
+
* Usage: phase2s goal <spec-file> [--max-attempts <n>]
|
|
10
|
+
*/
|
|
11
|
+
import { Agent } from "../core/agent.js";
|
|
12
|
+
import { type Spec, type SubTask } from "../core/spec-parser.js";
|
|
13
|
+
export interface GoalOptions {
|
|
14
|
+
maxAttempts?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface GoalResult {
|
|
17
|
+
success: boolean;
|
|
18
|
+
attempts: number;
|
|
19
|
+
criteriaResults: Record<string, boolean>;
|
|
20
|
+
}
|
|
21
|
+
export declare function runGoal(specFile: string, options?: GoalOptions): Promise<GoalResult>;
|
|
22
|
+
export declare function buildSatoriContext(subtask: SubTask, constraints: Spec["constraints"], failureContext?: string): string;
|
|
23
|
+
export declare function checkCriteria(criteria: string[], evalOutput: string, agent: Agent): Promise<Record<string, boolean>>;
|
|
24
|
+
export declare function analyzeFailures(failing: string[], evalOutput: string, spec: Spec, agent: Agent): Promise<string>;
|
|
25
|
+
export declare function identifyFailedSubtasks(failing: string[], decomposition: SubTask[], failureContext: string, agent: Agent): Promise<SubTask[]>;
|
|
26
|
+
/**
|
|
27
|
+
* Run a shell command, capturing stdout + stderr.
|
|
28
|
+
* NEVER throws on non-zero exit — non-zero exit = test failures = valid output.
|
|
29
|
+
* On timeout, returns a timeout message + any partial output captured.
|
|
30
|
+
*/
|
|
31
|
+
export declare function runCommand(cmd: string, timeoutMs?: number): Promise<string>;
|
|
32
|
+
//# sourceMappingURL=goal.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"goal.d.ts","sourceRoot":"","sources":["../../../src/cli/goal.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAKH,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAGzC,OAAO,EAAa,KAAK,IAAI,EAAE,KAAK,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAI5E,MAAM,WAAW,WAAW;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC1C;AAED,wBAAsB,OAAO,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,UAAU,CAAC,CAiH9F;AAMD,wBAAgB,kBAAkB,CAChC,OAAO,EAAE,OAAO,EAChB,WAAW,EAAE,IAAI,CAAC,aAAa,CAAC,EAChC,cAAc,CAAC,EAAE,MAAM,GACtB,MAAM,CAwBR;AAED,wBAAsB,aAAa,CACjC,QAAQ,EAAE,MAAM,EAAE,EAClB,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,KAAK,GACX,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAwClC;AAeD,wBAAsB,eAAe,CACnC,OAAO,EAAE,MAAM,EAAE,EACjB,UAAU,EAAE,MAAM,EAClB,IAAI,EAAE,IAAI,EACV,KAAK,EAAE,KAAK,GACX,OAAO,CAAC,MAAM,CAAC,CAejB;AAED,wBAAsB,sBAAsB,CAC1C,OAAO,EAAE,MAAM,EAAE,EACjB,aAAa,EAAE,OAAO,EAAE,EACxB,cAAc,EAAE,MAAM,EACtB,KAAK,EAAE,KAAK,GACX,OAAO,CAAC,OAAO,EAAE,CAAC,CA8BpB;AAED;;;;GAIG;AACH,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,SAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAyBlF"}
|