gm-codex 2.0.888 → 2.0.890
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codex-plugin/plugin.json +1 -1
- package/bin/plugkit.sha256 +6 -6
- package/bin/plugkit.version +1 -1
- package/bin/rtk.sha256 +2 -2
- package/gm.json +2 -2
- package/package.json +1 -1
- package/plugin.json +1 -1
- package/skills/browser/SKILL.md +16 -15
- package/skills/code-search/SKILL.md +13 -15
- package/skills/create-lang-plugin/SKILL.md +22 -26
- package/skills/gm/SKILL.md +31 -105
- package/skills/gm-complete/SKILL.md +46 -71
- package/skills/gm-emit/SKILL.md +40 -65
- package/skills/gm-execute/SKILL.md +35 -104
- package/skills/governance/SKILL.md +24 -23
- package/skills/pages/SKILL.md +42 -92
- package/skills/planning/SKILL.md +40 -153
- package/skills/research/SKILL.md +8 -14
- package/skills/ssh/SKILL.md +15 -9
- package/skills/textprocessing/SKILL.md +17 -25
- package/skills/update-docs/SKILL.md +15 -24
package/skills/pages/SKILL.md
CHANGED
|
@@ -3,41 +3,37 @@ name: pages
|
|
|
3
3
|
description: Scaffold and maintain a GitHub Pages site. Buildless in browser (webjsx + rippleui via CDN), flatspace for content aggregation built during GH Actions. Use when user wants to create or update a GH Pages site.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
# Pages — GitHub Pages
|
|
6
|
+
# Pages — GitHub Pages site scaffolder
|
|
7
7
|
|
|
8
|
-
Scaffold a complete GH Pages site
|
|
9
|
-
|
|
10
|
-
**Follow full gm skill chain: planning → gm-execute → gm-emit → gm-complete → update-docs.**
|
|
8
|
+
Scaffold a complete GH Pages site with no local build step. Content via flatspace flat-file CMS, UI via webjsx + rippleui CDN, GH Actions builds and deploys. Follow the full chain: `planning → gm-execute → gm-emit → gm-complete → update-docs`.
|
|
11
9
|
|
|
12
10
|
## Stack
|
|
13
11
|
|
|
14
12
|
| Layer | Tool | How |
|
|
15
|
-
|
|
13
|
+
|---|---|---|
|
|
16
14
|
| UI rendering | [webjsx](https://webjsx.org) | ES module via importmap, `applyDiff` for DOM updates |
|
|
17
15
|
| Styling | [rippleui](https://ripple-ui.com) | CDN `<link>` — Tailwind-based component classes |
|
|
18
16
|
| Content CMS | [flatspace](https://npmjs.com/package/flatspace) | Aggregates `content/` → `docs/data/*.json` at build time |
|
|
19
17
|
| Build | GH Actions | `npx flatspace` runs in CI, commits output to `docs/` |
|
|
20
|
-
| Hosting | GitHub Pages | Source
|
|
18
|
+
| Hosting | GitHub Pages | Source set to "GitHub Actions" |
|
|
21
19
|
|
|
22
|
-
##
|
|
20
|
+
## Layout
|
|
23
21
|
|
|
24
22
|
```
|
|
25
23
|
<project>/
|
|
26
|
-
content/
|
|
27
|
-
pages/
|
|
28
|
-
posts/
|
|
29
|
-
data/
|
|
30
|
-
docs/
|
|
31
|
-
index.html
|
|
32
|
-
app.js
|
|
33
|
-
data/
|
|
34
|
-
.github/
|
|
35
|
-
|
|
36
|
-
pages.yml # Build + deploy workflow
|
|
37
|
-
flatspace.config.js # flatspace aggregation config
|
|
24
|
+
content/
|
|
25
|
+
pages/
|
|
26
|
+
posts/
|
|
27
|
+
data/
|
|
28
|
+
docs/
|
|
29
|
+
index.html # committed, never regenerated
|
|
30
|
+
app.js # committed
|
|
31
|
+
data/ # flatspace output, gitignored
|
|
32
|
+
.github/workflows/pages.yml
|
|
33
|
+
flatspace.config.js
|
|
38
34
|
```
|
|
39
35
|
|
|
40
|
-
## index.html
|
|
36
|
+
## index.html
|
|
41
37
|
|
|
42
38
|
```html
|
|
43
39
|
<!DOCTYPE html>
|
|
@@ -63,28 +59,19 @@ Scaffold a complete GH Pages site: **no local build step**, content managed via
|
|
|
63
59
|
</html>
|
|
64
60
|
```
|
|
65
61
|
|
|
66
|
-
## app.js
|
|
62
|
+
## app.js
|
|
67
63
|
|
|
68
64
|
```js
|
|
69
65
|
import { applyDiff } from 'webjsx';
|
|
70
66
|
|
|
71
67
|
const h = (tag, props, ...children) => ({ tag, props: props || {}, children });
|
|
72
|
-
|
|
73
68
|
const state = { page: null, data: {} };
|
|
74
69
|
|
|
75
|
-
async function loadData(path) {
|
|
76
|
-
|
|
77
|
-
return res.json();
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
function render() {
|
|
81
|
-
applyDiff(document.getElementById('root'), App(state));
|
|
82
|
-
}
|
|
70
|
+
async function loadData(path) { return (await fetch(path)).json(); }
|
|
71
|
+
function render() { applyDiff(document.getElementById('root'), App(state)); }
|
|
83
72
|
|
|
84
73
|
function App(s) {
|
|
85
|
-
if (!s.page) return h('div', { class: 'flex justify-center p-8' },
|
|
86
|
-
h('span', { class: 'spinner' })
|
|
87
|
-
);
|
|
74
|
+
if (!s.page) return h('div', { class: 'flex justify-center p-8' }, h('span', { class: 'spinner' }));
|
|
88
75
|
return h('div', { class: 'max-w-4xl mx-auto p-4' },
|
|
89
76
|
h('nav', { class: 'navbar bg-backgroundSecondary mb-6' },
|
|
90
77
|
h('span', { class: 'navbar-brand text-xl font-bold' }, s.page.title)
|
|
@@ -100,11 +87,7 @@ function Section(section) {
|
|
|
100
87
|
);
|
|
101
88
|
}
|
|
102
89
|
|
|
103
|
-
async function main() {
|
|
104
|
-
state.page = await loadData('./data/index.json');
|
|
105
|
-
render();
|
|
106
|
-
}
|
|
107
|
-
|
|
90
|
+
async function main() { state.page = await loadData('./data/index.json'); render(); }
|
|
108
91
|
main();
|
|
109
92
|
```
|
|
110
93
|
|
|
@@ -122,11 +105,10 @@ module.exports = {
|
|
|
122
105
|
};
|
|
123
106
|
```
|
|
124
107
|
|
|
125
|
-
##
|
|
108
|
+
## pages.yml
|
|
126
109
|
|
|
127
110
|
```yaml
|
|
128
111
|
name: Deploy GitHub Pages
|
|
129
|
-
|
|
130
112
|
on:
|
|
131
113
|
push:
|
|
132
114
|
branches: [main]
|
|
@@ -142,14 +124,10 @@ jobs:
|
|
|
142
124
|
runs-on: ubuntu-latest
|
|
143
125
|
steps:
|
|
144
126
|
- uses: actions/checkout@v4
|
|
145
|
-
|
|
146
127
|
- uses: actions/setup-node@v4
|
|
147
|
-
with:
|
|
148
|
-
node-version: '20'
|
|
149
|
-
|
|
128
|
+
with: { node-version: '20' }
|
|
150
129
|
- name: Build content with flatspace
|
|
151
130
|
run: npx flatspace
|
|
152
|
-
|
|
153
131
|
- name: Commit built data
|
|
154
132
|
run: |
|
|
155
133
|
git config user.name "github-actions[bot]"
|
|
@@ -157,11 +135,8 @@ jobs:
|
|
|
157
135
|
git add docs/data/
|
|
158
136
|
git diff --staged --quiet || git commit -m "chore: build content [skip ci]"
|
|
159
137
|
git push
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
uses: actions/upload-pages-artifact@v3
|
|
163
|
-
with:
|
|
164
|
-
path: docs/
|
|
138
|
+
- uses: actions/upload-pages-artifact@v3
|
|
139
|
+
with: { path: docs/ }
|
|
165
140
|
|
|
166
141
|
deploy:
|
|
167
142
|
needs: build
|
|
@@ -174,26 +149,14 @@ jobs:
|
|
|
174
149
|
uses: actions/deploy-pages@v4
|
|
175
150
|
```
|
|
176
151
|
|
|
177
|
-
##
|
|
152
|
+
## Scaffold sequence
|
|
178
153
|
|
|
179
|
-
|
|
154
|
+
Read existing `docs/` and `content/` if present — never clobber existing content. Create the directory structure. Write `docs/index.html`, `docs/app.js`, `flatspace.config.js`, `.github/workflows/pages.yml`, `content/pages/index.md` with minimal frontmatter (`title`, `sections` array). Add `docs/data/` to `.gitignore`. Verify GH Pages setting is "GitHub Actions" in repo Settings — remind the user if you can't verify.
|
|
180
155
|
|
|
181
|
-
|
|
182
|
-
2. **Create** directory structure above
|
|
183
|
-
3. **Write** `docs/index.html` with correct site title
|
|
184
|
-
4. **Write** `docs/app.js` with webjsx app skeleton
|
|
185
|
-
5. **Write** `flatspace.config.js`
|
|
186
|
-
6. **Write** `.github/workflows/pages.yml`
|
|
187
|
-
7. **Write** `content/pages/index.md` with minimal frontmatter (`title`, `sections` array)
|
|
188
|
-
8. **Add** `docs/data/` to `.gitignore` (built by CI, not committed by humans)
|
|
189
|
-
9. **Verify** GH Pages setting is "GitHub Actions" in repo Settings — remind user if can't verify
|
|
190
|
-
|
|
191
|
-
## rippleui Component Classes Quick Reference
|
|
192
|
-
|
|
193
|
-
Use these directly in JSX className strings — no config needed:
|
|
156
|
+
## rippleui classes
|
|
194
157
|
|
|
195
158
|
| Component | Class |
|
|
196
|
-
|
|
159
|
+
|---|---|
|
|
197
160
|
| Button | `btn btn-primary`, `btn btn-secondary`, `btn btn-ghost` |
|
|
198
161
|
| Card | `card p-4` |
|
|
199
162
|
| Input | `input input-primary` |
|
|
@@ -203,30 +166,18 @@ Use these directly in JSX className strings — no config needed:
|
|
|
203
166
|
| Spinner | `spinner` |
|
|
204
167
|
| Divider | `divider` |
|
|
205
168
|
|
|
206
|
-
Background
|
|
207
|
-
|
|
208
|
-
**CSS variable warning**: rippleui color vars (e.g. `--gray-2`) are raw space-separated RGB tuples — not valid CSS colors. Never use them in `rgb()` directly from JS. Use the component classes instead.
|
|
209
|
-
|
|
210
|
-
## webjsx Patterns
|
|
211
|
-
|
|
212
|
-
**No JSX transpile needed** — use `h()` factory or import from CDN with importmap and write JSX in `.jsx` files served directly (Chrome supports importmap natively).
|
|
213
|
-
|
|
214
|
-
For `.js` files without transpile, use the `h` factory pattern shown above.
|
|
169
|
+
Background `bg-backgroundPrimary`, `bg-backgroundSecondary`. Text `text-content1`, `text-content2`. rippleui CSS color vars (e.g. `--gray-2`) are raw space-separated RGB tuples — invalid in `rgb()` directly. Use the component classes instead.
|
|
215
170
|
|
|
216
|
-
|
|
217
|
-
```js
|
|
218
|
-
/** @jsxImportSource webjsx */
|
|
219
|
-
import { applyDiff } from 'webjsx';
|
|
220
|
-
```
|
|
221
|
-
Only works if server sets correct MIME type for `.jsx` — GH Pages does not. Use `.js` + `h()` factory.
|
|
171
|
+
## webjsx
|
|
222
172
|
|
|
223
|
-
|
|
173
|
+
No JSX transpile needed. Use the `h()` factory in `.js` files served directly. `.jsx` with native importmap requires the server to set the correct MIME type, which GH Pages does not — stay in `.js` + `h()`.
|
|
224
174
|
|
|
225
|
-
|
|
175
|
+
`applyDiff(domNode, vnodeOrArray)` — never pass a string. State updates mutate `state` and call `render()`; no reactive system.
|
|
226
176
|
|
|
227
|
-
## Content
|
|
177
|
+
## Content format
|
|
228
178
|
|
|
229
179
|
Markdown with YAML frontmatter:
|
|
180
|
+
|
|
230
181
|
```markdown
|
|
231
182
|
---
|
|
232
183
|
title: Home
|
|
@@ -240,19 +191,18 @@ sections:
|
|
|
240
191
|
Full markdown body here.
|
|
241
192
|
```
|
|
242
193
|
|
|
243
|
-
|
|
194
|
+
Output `docs/data/pages/index.json`:
|
|
195
|
+
|
|
244
196
|
```json
|
|
245
197
|
{ "title": "Home", "sections": [...], "body": "<p>Full markdown body here.</p>", "slug": "index" }
|
|
246
198
|
```
|
|
247
199
|
|
|
248
|
-
##
|
|
249
|
-
|
|
250
|
-
**GH Pages must be set to "GitHub Actions"** in Settings → Pages. "Deploy from branch" ignores the deploy-pages action entirely.
|
|
200
|
+
## Gotchas
|
|
251
201
|
|
|
252
|
-
|
|
202
|
+
GH Pages must be set to "GitHub Actions" in Settings → Pages. "Deploy from branch" ignores the deploy-pages action.
|
|
253
203
|
|
|
254
|
-
|
|
204
|
+
`docs/data/` is gitignored; `docs/index.html` and `docs/app.js` are not — they are the committed source files.
|
|
255
205
|
|
|
256
|
-
|
|
206
|
+
`npx flatspace` cold-start is ~10s on first CI run; subsequent runs use the `actions/setup-node` cache.
|
|
257
207
|
|
|
258
|
-
|
|
208
|
+
Pin the webjsx CDN version in importmap (e.g. `@0.0.42`) — `@latest` breaks silently on upstream updates.
|
package/skills/planning/SKILL.md
CHANGED
|
@@ -4,131 +4,40 @@ description: State machine orchestrator. Mutable discovery, PRD construction, an
|
|
|
4
4
|
allowed-tools: Skill, Bash, Write, Read, Agent
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
# Planning —
|
|
7
|
+
# Planning — PLAN phase
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
Translate the request into `.gm/prd.yml` and hand to `gm-execute`. Re-enter on any new unknown in any phase.
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
Cross-cutting dispositions (autonomy, fix-on-sight, nothing-fake, browser-witness, scope, recall, memorize) live in `gm` SKILL.md; this skill only carries what is unique to PLAN.
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
## Transitions
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
Triggers: matches prior topic | "have we seen this" | designing where prior decision likely exists | quirk feels familiar | sub-task in known project.
|
|
21
|
-
|
|
22
|
-
Hits = weak_prior; witness via EXECUTE before adopting. Skip recall only on brand-new project / trivially-bounded edit / surgical user instruction.
|
|
23
|
-
|
|
24
|
-
## MEMORIZE — HARD RULE
|
|
25
|
-
|
|
26
|
-
Every unknown→known = same-turn memorize. Background, parallel, never batched.
|
|
27
|
-
|
|
28
|
-
```
|
|
29
|
-
Agent(subagent_type='gm:memorize', model='haiku', run_in_background=true, prompt='## CONTEXT TO MEMORIZE\n<fact>')
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
Triggers: exec output answers prior unknown | code read confirms/refutes | CI log reveals root cause | user states preference/constraint | fix worked non-obviously | env quirk observed.
|
|
33
|
-
|
|
34
|
-
N facts → N parallel Agent calls in ONE message.
|
|
35
|
-
|
|
36
|
-
## STATE MACHINE
|
|
37
|
-
|
|
38
|
-
**FORWARD**: PLAN → `gm-execute` | EXECUTE → `gm-emit` | EMIT → `gm-complete` | VERIFY .prd remains → `gm-execute` | VERIFY .prd empty+pushed → `update-docs`
|
|
39
|
-
|
|
40
|
-
**REGRESSIONS**: new unknown anywhere → `planning` | EXECUTE unresolvable 2 passes → `planning` | EMIT logic error → `gm-execute` | VERIFY broken output → `gm-emit` | VERIFY logic wrong → `gm-execute`
|
|
41
|
-
|
|
42
|
-
Runs until: .gm/prd.yml empty AND git clean AND all pushes confirmed AND CI green.
|
|
43
|
-
|
|
44
|
-
## AUTONOMY — HARD RULE
|
|
45
|
-
|
|
46
|
-
**USER REQUEST = AUTHORIZATION.** The user's message asking for X is the green light. PLAN's job is to translate that ask into a PRD and start — not to re-confirm. "Want me to do X?", "should I take shape A or B?", "this is multi-repo work, OK to proceed?" after the user said "do X" are all forced closure. When the user surfaces a tradeoff (deep vs light, single-file vs cross-repo), pick the read that matches the obvious meaning of the request — "deeply integrate" means deep, "all platforms" means all — declare the choice in one line ("going with A because Y") and execute. Multi-repo scope, build cost, CI duration, binary-size impact, and "this will take a while" are never grounds to re-confirm. The user already knows; that's why they asked.
|
|
47
|
-
|
|
48
|
-
PRD written → execute to COMPLETE without asking the user. Doubts that arise during execution are resolved by witnessed probe, by recall, or by re-reading the PRD — never by asking. Any question whose answer is reachable from the agent's tools belongs to the agent, not the user.
|
|
49
|
-
|
|
50
|
-
Asking is last-resort: destructive-irreversible without PRD coverage, OR user intent irrecoverable from PRD/memory/code/web. Channel: `exec:pause` (renames `prd.yml` → `prd.paused.yml`; question in header). In-conversation asking is last-resort beneath last-resort.
|
|
51
|
-
|
|
52
|
-
**WEB-SEARCH BEFORE PAUSE — HARD RULE.** Before `exec:pause` for any blocking question whose answer could plausibly exist on the public web — missing artifact, unknown library/API, prebuilt binary, version compatibility, build recipe, upstream status — fire `WebSearch` and at least one `WebFetch` first. Only after the web pass returns empty (or returns options the agent then witnesses and rejects) is `exec:pause` legitimate. Pausing on a question the web could have answered is forced closure dressed up as humility — fix on sight by re-entering planning, web-searching, and resuming. The only questions that genuinely require user-ask are ones the public web cannot answer: private credentials, intent/preference between viable options the agent has *already surfaced*, destructive-irreversible authorization.
|
|
53
|
-
|
|
54
|
-
**Cannot stop while**: `.gm/prd.yml` has items | git uncommitted | git unpushed.
|
|
55
|
-
|
|
56
|
-
**MAXIMIZE THE SCOPE — HARD RULE.** When the request admits a deeper, broader, or more thorough shape that is reachable from this session, plan it that way. The internal "is this too much work?" question is the agent's, not the user's — and the answer is yes when the larger shape matches the spirit of the ask. "Deeply integrate", "do it right", "all of them", "every X", "across the whole Y", "keep going", and silence between options all read as *go wider*. Default to the maximum scope the obvious read of the request supports; declare the read in one PRD line so the user can interrupt; execute. A PRD that ships a smaller-than-necessary slice while a larger reachable shape exists is the same failure as distributed refusal. The bar for *not* expanding is the same destructive-irreversible-and-not-covered bar that gates asking; below that bar, larger always beats smaller.
|
|
57
|
-
|
|
58
|
-
## MAXIMAL COVER — HARD RULE
|
|
59
|
-
|
|
60
|
-
When scope exceeds reach, expand the cover. Don't refuse. Don't ship one slice with the rest abandoned as "follow-up" — that's distributed refusal: the same failure dressed up as triage.
|
|
61
|
-
|
|
62
|
-
**Required move when scope exceeds reach**: construct a *covering family* — every bounded subset of the request that is witnessable from this session — and write the family into the PRD as separate items, with the dependency graph explicit so independent members parallelize. Execute every member.
|
|
63
|
-
|
|
64
|
-
**Residuals the agent judges within the spirit of the original ask AND completable from this session are self-authorized — expand the PRD with them and execute, do not stop to ask.** The judgment is the agent's honest read of what the user probably wanted, paired with reachability from this session. Only residuals genuinely outside the original ask, or genuinely unreachable from this session, are name-and-stop. When expanding under self-authorization, the agent declares its judgment in the response ("treating X as in-scope because Y") so the user can correct mid-chain. Silent expansion without the declaration is the failure mode this rule guards against.
|
|
65
|
-
|
|
66
|
-
Enforcement is on what is delivered, not on which words appear. Before closing the turn, check that committed work + named out-of-spirit residuals = witnessable closure of the request. Gap = cover not yet maximal → re-enter PLAN to expand.
|
|
67
|
-
|
|
68
|
-
## FIX ON SIGHT — HARD RULE
|
|
69
|
-
|
|
70
|
-
Every issue surfaced during planning, execution, or verification is fixed in-band, the same session, at root cause. A known-bad signal carried past the moment of detection — by deferral, suppression, silencing, skipping, or "next time" narration — is a small forced closure.
|
|
71
|
-
|
|
72
|
-
Surface → diagnose → fix → re-witness → continue. New unknown surfaced by the fix → regress here. Genuinely out-of-scope-irreversible → the residual goes into `.gm/prd.yml` *before* moving on; narration is not a substitute for an item.
|
|
15
|
+
- PLAN done → `gm-execute`
|
|
16
|
+
- New unknown anywhere in chain → re-enter PLAN
|
|
17
|
+
- EXECUTE unresolvable after 2 passes → PLAN
|
|
18
|
+
- VERIFY: `.prd` empty + git clean + pushed → `update-docs`; else → `gm-execute`
|
|
73
19
|
|
|
74
|
-
|
|
20
|
+
Cannot stop while `.gm/prd.yml` has items, git is dirty, or commits are unpushed.
|
|
75
21
|
|
|
76
|
-
|
|
22
|
+
## Orient
|
|
77
23
|
|
|
78
|
-
|
|
24
|
+
Open every plan with one parallel pack of `exec:recall` + `exec:codesearch` against the request's nouns. Hits land as `weak_prior`; misses confirm the unknown is fresh. The pack runs in one message.
|
|
79
25
|
|
|
80
|
-
##
|
|
26
|
+
## Mutable discovery
|
|
81
27
|
|
|
82
|
-
|
|
28
|
+
For each aspect of the work, ask: what do I not know, what could go wrong, what depends on what, what am I assuming. Unwitnessed assumptions are mutables.
|
|
83
29
|
|
|
84
|
-
|
|
30
|
+
Fault surfaces to scan: file existence, API shape, data format, dep versions, runtime behavior, env differences, error conditions, concurrency, integration seams, backwards compat, rollback paths, CI correctness.
|
|
85
31
|
|
|
86
|
-
|
|
32
|
+
Tag every item with a route family (grounding | reasoning | state | execution | observability | boundary | representation) and cross-reference the 16-failure taxonomy. `governance` skill holds the table.
|
|
87
33
|
|
|
88
|
-
|
|
34
|
+
`existingImpl=UNKNOWN` is the default; resolve via `exec:codesearch` before adding the item. An existing concern routes to consolidation, not addition.
|
|
89
35
|
|
|
90
|
-
Plan
|
|
36
|
+
Plan exits when zero new unknowns surfaced last pass AND every item has acceptance criteria AND deps are mapped.
|
|
91
37
|
|
|
92
|
-
|
|
38
|
+
## .prd format
|
|
93
39
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
The fake-detection test is behavioral: would the code, executed against the inputs it claims to accept, produce the outputs it claims to produce? If the answer requires "after we fill in the body" or "once X is wired up", the plan item is open, not done.
|
|
97
|
-
|
|
98
|
-
## ORIENT — HARD RULE
|
|
99
|
-
|
|
100
|
-
Open every plan with a parallel pack of `exec:recall` and `exec:codesearch` against the request's nouns. Hits land as `weak_prior`; misses confirm the unknown is fresh. The pack runs in one message — never serially. The agent that skips orient pays the same cost in fresh probes a turn later, plus the price of disagreeing with its own prior witness.
|
|
101
|
-
|
|
102
|
-
## PRD — HARD RULE
|
|
103
|
-
|
|
104
|
-
`./.gm/prd.yml` is the authorization. It is written before EXECUTE fires for any task that touches more than one line in one file. The cost of writing it equals the cost of skipping it; what the file buys is durable trace, resumability, and the cover-maximality check.
|
|
105
|
-
|
|
106
|
-
## PLAN PHASE — MUTABLE DISCOVERY
|
|
107
|
-
|
|
108
|
-
For every aspect: what do I not know (UNKNOWN) | what could go wrong (failure mode) | what depends on what (blocking/blockedBy) | what assumptions am I making (unwitnessed hypothesis = mutable).
|
|
109
|
-
|
|
110
|
-
Fault surfaces: file existence | API shape | data format | dep versions | runtime behavior | env differences | error conditions | concurrency | integration seams | backwards compat | rollback paths | CI correctness.
|
|
111
|
-
|
|
112
|
-
**Route family** (governance): tag every item — grounding|reasoning|state|execution|observability|boundary|representation.
|
|
113
|
-
|
|
114
|
-
**Failure-mode mapping**: cross-reference 16-failure taxonomy.
|
|
115
|
-
|
|
116
|
-
**MANDATORY CODEBASE SCAN**: `existingImpl=UNKNOWN` for every item. Resolve via exec:codesearch before adding. Existing concern → consolidation, not addition.
|
|
117
|
-
|
|
118
|
-
**EXIT PLAN**: zero new unknowns last pass AND every item has acceptance criteria AND deps mapped → launch subagents or invoke `gm-execute`.
|
|
119
|
-
|
|
120
|
-
## OBSERVABILITY — MANDATORY EVERY PASS
|
|
121
|
-
|
|
122
|
-
Server: every subsystem exposes `/debug/<subsystem>`. Structured logs `{subsystem, severity, ts}`.
|
|
123
|
-
Client: `window.__debug` live registry; modules register on mount.
|
|
124
|
-
|
|
125
|
-
`console.log` ≠ observability. Discovery of gap → add .prd item immediately, never deferred.
|
|
126
|
-
|
|
127
|
-
**No parallel observability surfaces.** `window.__debug` is THE in-page registry; `test.js` at project root is the sole out-of-page test asset. Any new file whose purpose is to exercise, smoke-test, demo, or sandbox in-page behavior outside that registry fights the discipline — extend the registry instead.
|
|
128
|
-
|
|
129
|
-
## .PRD FORMAT
|
|
130
|
-
|
|
131
|
-
Path: `./.gm/prd.yml`. Write via `exec:nodejs` + `fs.writeFileSync`. Delete when empty.
|
|
40
|
+
Path: `./.gm/prd.yml`. Write via `exec:nodejs` + `fs.writeFileSync`. Delete the file when empty.
|
|
132
41
|
|
|
133
42
|
```yaml
|
|
134
43
|
- id: kebab-id
|
|
@@ -150,64 +59,42 @@ Path: `./.gm/prd.yml`. Write via `exec:nodejs` + `fs.writeFileSync`. Delete when
|
|
|
150
59
|
- failure mode
|
|
151
60
|
```
|
|
152
61
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
Status: pending → in_progress → completed (remove). Effort: small <15min | medium <45min | large >1h.
|
|
156
|
-
|
|
157
|
-
## PARALLEL SUBAGENT LAUNCH
|
|
158
|
-
|
|
159
|
-
After .prd written, ≤3 parallel `gm:gm` subagents for independent items in ONE message. Browser tasks serialize.
|
|
160
|
-
|
|
161
|
-
`Agent(subagent_type="gm:gm", prompt="Work on .prd item: <id>. .prd path: <path>. Item: <full YAML>.")`
|
|
162
|
-
|
|
163
|
-
Not parallelizable → invoke `gm-execute` directly.
|
|
62
|
+
`load` is consequence-if-wrong: 0.9 = headline collapses, 0.7 = sub-argument rebuilt, 0.4 = local patch, 0.1 = nothing breaks. Verification budget = `load × (1 − tier_confidence)`. λ>0.75 must reach witnessed before EMIT.
|
|
164
63
|
|
|
165
|
-
|
|
64
|
+
`status`: pending → in_progress → completed (then remove). `effort`: small <15min | medium <45min | large >1h.
|
|
166
65
|
|
|
167
|
-
|
|
66
|
+
## Parallel subagent launch
|
|
168
67
|
|
|
169
|
-
|
|
68
|
+
After `.prd` is written, up to 3 parallel `gm:gm` subagents for independent items in one message. Browser tasks serialize.
|
|
170
69
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
`exec:codesearch` only — Glob/Grep/Find/Explore hook-blocked. Start 2 words → change/add one per pass → minimum 4 attempts before concluding absent.
|
|
176
|
-
|
|
177
|
-
Pack runs: Promise.allSettled for parallel. Each idea own try/catch. Under 12s per call.
|
|
178
|
-
|
|
179
|
-
## DEV WORKFLOW
|
|
180
|
-
|
|
181
|
-
No comments. No scattered test files. 200-line limit per file. Fail loud. No duplication. Scan before edit. AGENTS.md via memorize agent only. CHANGELOG.md append per commit.
|
|
70
|
+
```
|
|
71
|
+
Agent(subagent_type="gm:gm", prompt="Work on .prd item: <id>. .prd path: <path>. Item: <full YAML>.")
|
|
72
|
+
```
|
|
182
73
|
|
|
183
|
-
|
|
74
|
+
Items not parallelizable → invoke `gm-execute` directly.
|
|
184
75
|
|
|
185
|
-
##
|
|
76
|
+
## Observability gates in the plan
|
|
186
77
|
|
|
187
|
-
|
|
78
|
+
Server: every subsystem exposes `/debug/<subsystem>`; structured logs `{subsystem, severity, ts}`. Client: `window.__debug` live registry; modules register on mount. `console.log` is not observability. Discovery of a gap during PLAN adds a `.prd` item the same pass — never deferred.
|
|
188
79
|
|
|
189
|
-
|
|
80
|
+
`window.__debug` is THE in-page registry; `test.js` at project root is the sole out-of-page test asset. Any new file whose purpose is to exercise, smoke-test, demo, or sandbox in-page behavior outside that registry fights the discipline — extend the registry instead.
|
|
190
81
|
|
|
191
|
-
|
|
82
|
+
## Test discipline encoded in the plan
|
|
192
83
|
|
|
193
|
-
|
|
194
|
-
- The agent-machine tool-loop test exercises bash dispatch → no separate bash test needed beyond a smoke-call inside the tools+toolsets group.
|
|
195
|
-
- The dashboard test asserts the API surface AND that the registry has ≥N tools → covers tool registration coverage.
|
|
196
|
-
- The plugins+memory group exercises observability metrics + achievements → no need for a separate plugins-extra group.
|
|
197
|
-
- The gateway test exercising one platform plus a platform-stub-shape loop covers all 18 adapters in one group.
|
|
84
|
+
One `test.js` at project root, 200-line hard cap, real data, real system. No fixtures, mocks, or scattered tests. A second test runner under any name in any directory is a smuggled parallel surface.
|
|
198
85
|
|
|
199
|
-
|
|
86
|
+
The 200 lines are a *budget* for maximum surface coverage, not a target. Subsystems get one combined group each — names joined with `+` (`home+config+skin`, `mcp+swe+distributions+account+credpool`). When a new subsystem's failure mode overlaps an existing group's side-effects, fold the assertion in rather than creating a new group. When `wc -l test.js > 200`, the discipline is *merge groups + drop redundancy*, never split.
|
|
200
87
|
|
|
201
|
-
|
|
88
|
+
## Execution norms encoded in the plan
|
|
202
89
|
|
|
203
|
-
|
|
90
|
+
`exec:<lang>` only via Bash; file I/O via `exec:nodejs` + `fs`; git directly in Bash; never `Bash(node/npm/npx/bun)`. Paths in `exec:nodejs` are platform-literal — use `os.tmpdir()` and `path.join`, reserve `/tmp/...` for `exec:bash` heredocs. Every `exec:<lang>` and `exec:bash` call passes `--timeout-ms <ms>`; on timeout, partial output is preserved and the runner emits `[exec timed out after Nms; partial output above]` — re-issue with a higher budget rather than retrying blindly.
|
|
204
91
|
|
|
205
|
-
|
|
92
|
+
`exec:codesearch` only — Grep/Glob/Find/Explore are hook-blocked. Start two words, change/add one per pass, minimum four attempts before concluding absent.
|
|
206
93
|
|
|
207
|
-
|
|
94
|
+
Pack runs use `Promise.allSettled`, each idea its own try/catch, under 12s per call.
|
|
208
95
|
|
|
209
|
-
##
|
|
96
|
+
## Dev workflow encoded in the plan
|
|
210
97
|
|
|
211
|
-
|
|
98
|
+
No comments. 200-line per-file cap. Fail loud. No duplication. Scan before edit. AGENTS.md edits route through the memorize sub-agent only. CHANGELOG.md gets one entry per commit.
|
|
212
99
|
|
|
213
|
-
|
|
100
|
+
Minimal-code process, stop at the first that resolves: native → library → structure (map / pipeline) → write.
|
package/skills/research/SKILL.md
CHANGED
|
@@ -6,21 +6,17 @@ allowed-tools: Skill, Bash, Agent, WebFetch, WebSearch, Read, Write
|
|
|
6
6
|
|
|
7
7
|
# Research
|
|
8
8
|
|
|
9
|
-
Lead orchestrates. Workers fetch. Findings converge. The lead never reads pages — workers do.
|
|
9
|
+
Lead orchestrates. Workers fetch. Findings converge on disk. The lead never reads pages — workers do.
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
Effort matches stakes. A single fact is one short fetch. A vendor comparison is a handful of workers, each owning one vendor. A landscape survey is ten or more, each owning one axis. Spending a fan-out on a fact wastes tokens; spending a fact-fetch on a landscape under-delivers.
|
|
12
12
|
|
|
13
|
-
Breadth first, depth on demand. Open with a wide sweep that maps the terrain
|
|
14
|
-
|
|
15
|
-
Effort matches stakes. A single fact warrants one short fetch. A vendor comparison warrants a handful of workers, each owning one vendor. A landscape survey warrants ten or more, each owning one axis. Spending a fan-out on a fact wastes tokens; spending a fact-fetch on a landscape under-delivers.
|
|
16
|
-
|
|
17
|
-
Workers run in parallel. Independent questions launch in one message, never serialized. Serial fan-out is the default failure mode — guard against it explicitly.
|
|
13
|
+
Breadth first, depth on demand. Open with a wide sweep that maps the terrain, then commit deep dives only where the sweep surfaces something load-bearing. A narrow opening misses the alternative the user actually needed.
|
|
18
14
|
|
|
19
15
|
## Worker contract
|
|
20
16
|
|
|
21
|
-
Each worker receives
|
|
17
|
+
Each worker receives the precise question it owns, the shape of the answer (bullets, table row, prose paragraph), the boundary of what it must not pursue, and the destination path under `.gm/research/<slug>/<worker-id>.md`. Workers write structured findings to disk and return only a path plus a one-line summary. The lead reads the paths it cares about; the rest stay on disk. Returning full prose through the agent boundary burns context that the synthesis pass needs.
|
|
22
18
|
|
|
23
|
-
Workers
|
|
19
|
+
Workers run in parallel — independent questions launch in one message, never serialized.
|
|
24
20
|
|
|
25
21
|
## Citations
|
|
26
22
|
|
|
@@ -28,13 +24,11 @@ A claim without a source URL is a hallucination waiting to be quoted. Workers at
|
|
|
28
24
|
|
|
29
25
|
## Source quality
|
|
30
26
|
|
|
31
|
-
|
|
27
|
+
Vendor docs, RFCs, primary repos, dated blog posts from named authors, and academic preprints beat aggregator pages. When two sources disagree, the older primary usually beats the newer aggregator.
|
|
32
28
|
|
|
33
29
|
## Convergence
|
|
34
30
|
|
|
35
|
-
Synthesis happens once, after all workers return. Mid-flight summarisation truncates findings the next worker would have built on.
|
|
36
|
-
|
|
37
|
-
If a worker's return reveals a new axis the original plan missed, expand the fan-out — do not stretch an existing worker past its brief.
|
|
31
|
+
Synthesis happens once, after all workers return. Mid-flight summarisation truncates findings the next worker would have built on. If a worker's return reveals a new axis the original plan missed, expand the fan-out — do not stretch an existing worker past its brief.
|
|
38
32
|
|
|
39
33
|
## When not to fan out
|
|
40
34
|
|
|
@@ -42,4 +36,4 @@ One question, one page, one fetch. A single `WebFetch` answers it. The fan-out m
|
|
|
42
36
|
|
|
43
37
|
## Handoff
|
|
44
38
|
|
|
45
|
-
Final answer cites every load-bearing claim, names the workers' output paths for audit, and surfaces disagreements
|
|
39
|
+
Final answer cites every load-bearing claim, names the workers' output paths for audit, and surfaces disagreements rather than averaging them away.
|
package/skills/ssh/SKILL.md
CHANGED
|
@@ -3,13 +3,14 @@ name: ssh
|
|
|
3
3
|
description: Run shell commands on remote SSH hosts via exec:ssh. Reads targets from ~/.claude/ssh-targets.json. Use for deploying, monitoring, or controlling remote machines.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
# exec:ssh — Remote SSH
|
|
6
|
+
# exec:ssh — Remote SSH execution
|
|
7
7
|
|
|
8
|
-
Runs shell commands on remote host. No manual connection needed.
|
|
8
|
+
Runs shell commands on a remote host. No manual connection needed.
|
|
9
9
|
|
|
10
10
|
## Setup
|
|
11
11
|
|
|
12
12
|
`~/.claude/ssh-targets.json`:
|
|
13
|
+
|
|
13
14
|
```json
|
|
14
15
|
{
|
|
15
16
|
"default": { "host": "192.168.1.10", "port": 22, "username": "pi", "password": "pass" },
|
|
@@ -17,7 +18,7 @@ Runs shell commands on remote host. No manual connection needed.
|
|
|
17
18
|
}
|
|
18
19
|
```
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
`host` and `username` required. `port` defaults to 22. Auth: `password` OR `keyPath` + optional `passphrase`.
|
|
21
22
|
|
|
22
23
|
## Usage
|
|
23
24
|
|
|
@@ -26,28 +27,32 @@ exec:ssh
|
|
|
26
27
|
<shell command>
|
|
27
28
|
```
|
|
28
29
|
|
|
29
|
-
Named host with `@name` on first line:
|
|
30
|
+
Named host with `@name` on the first line:
|
|
31
|
+
|
|
30
32
|
```
|
|
31
33
|
exec:ssh
|
|
32
34
|
@prod
|
|
33
35
|
sudo systemctl restart myapp
|
|
34
36
|
```
|
|
35
37
|
|
|
36
|
-
## Process
|
|
38
|
+
## Process persistence
|
|
39
|
+
|
|
40
|
+
SSH kills child processes on close. To survive disconnect:
|
|
37
41
|
|
|
38
|
-
SSH kills child processes on close. To persist:
|
|
39
42
|
```
|
|
40
43
|
exec:ssh
|
|
41
44
|
sudo systemctl reset-failed myunit 2>/dev/null; systemd-run --unit=myunit bash -c 'your-command'
|
|
42
45
|
```
|
|
43
46
|
|
|
44
|
-
Unique name:
|
|
47
|
+
Unique unit name per launch:
|
|
48
|
+
|
|
45
49
|
```
|
|
46
50
|
exec:ssh
|
|
47
51
|
systemd-run --unit=job-$(date +%s) bash -c 'nohup myprogram &'
|
|
48
52
|
```
|
|
49
53
|
|
|
50
|
-
|
|
54
|
+
No-systemd fallback:
|
|
55
|
+
|
|
51
56
|
```
|
|
52
57
|
exec:ssh
|
|
53
58
|
setsid nohup bash -c 'myprogram > /tmp/out.log 2>&1' &
|
|
@@ -55,7 +60,8 @@ setsid nohup bash -c 'myprogram > /tmp/out.log 2>&1' &
|
|
|
55
60
|
|
|
56
61
|
## Dependency
|
|
57
62
|
|
|
58
|
-
Requires `ssh2`
|
|
63
|
+
Requires `ssh2` in `~/.claude/gm-tools`:
|
|
64
|
+
|
|
59
65
|
```
|
|
60
66
|
exec:bash
|
|
61
67
|
cd ~/.claude/gm-tools && npm install ssh2
|