gru-ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/brainstorm/SKILL.md +340 -0
- package/.claude/skills/code-review-excellence/SKILL.md +198 -0
- package/.claude/skills/directive/SKILL.md +121 -0
- package/.claude/skills/directive/docs/pipeline/00-delegation-and-triage.md +181 -0
- package/.claude/skills/directive/docs/pipeline/01-checkpoint.md +34 -0
- package/.claude/skills/directive/docs/pipeline/02-read-directive.md +38 -0
- package/.claude/skills/directive/docs/pipeline/03-read-context.md +15 -0
- package/.claude/skills/directive/docs/pipeline/04-challenge.md +38 -0
- package/.claude/skills/directive/docs/pipeline/05-planning.md +64 -0
- package/.claude/skills/directive/docs/pipeline/06-technical-audit.md +88 -0
- package/.claude/skills/directive/docs/pipeline/07-plan-approval.md +145 -0
- package/.claude/skills/directive/docs/pipeline/07b-project-brainstorm.md +85 -0
- package/.claude/skills/directive/docs/pipeline/08-worktree-and-state.md +50 -0
- package/.claude/skills/directive/docs/pipeline/09-execute-projects.md +709 -0
- package/.claude/skills/directive/docs/pipeline/10-wrapup.md +242 -0
- package/.claude/skills/directive/docs/pipeline/11-completion-gate.md +75 -0
- package/.claude/skills/directive/docs/reference/rules/casting-rules.md +78 -0
- package/.claude/skills/directive/docs/reference/rules/failure-handling.md +20 -0
- package/.claude/skills/directive/docs/reference/rules/phase-definitions.md +42 -0
- package/.claude/skills/directive/docs/reference/rules/scope-and-dod.md +30 -0
- package/.claude/skills/directive/docs/reference/schemas/audit-output.md +44 -0
- package/.claude/skills/directive/docs/reference/schemas/brainstorm-output.md +52 -0
- package/.claude/skills/directive/docs/reference/schemas/challenger-output.md +13 -0
- package/.claude/skills/directive/docs/reference/schemas/checkpoint.md +18 -0
- package/.claude/skills/directive/docs/reference/schemas/current-json.md +5 -0
- package/.claude/skills/directive/docs/reference/schemas/directive-json.md +143 -0
- package/.claude/skills/directive/docs/reference/schemas/investigation-output.md +37 -0
- package/.claude/skills/directive/docs/reference/schemas/plan-schema.md +103 -0
- package/.claude/skills/directive/docs/reference/templates/architect-prompt.md +66 -0
- package/.claude/skills/directive/docs/reference/templates/auditor-prompt.md +53 -0
- package/.claude/skills/directive/docs/reference/templates/brainstorm-prompt.md +68 -0
- package/.claude/skills/directive/docs/reference/templates/challenger-prompt.md +35 -0
- package/.claude/skills/directive/docs/reference/templates/digest.md +134 -0
- package/.claude/skills/directive/docs/reference/templates/investigator-prompt.md +51 -0
- package/.claude/skills/directive/docs/reference/templates/planner-prompt.md +130 -0
- package/.claude/skills/frontend-design/SKILL.md +42 -0
- package/.claude/skills/gruai-agents/SKILL.md +161 -0
- package/.claude/skills/gruai-config/SKILL.md +61 -0
- package/.claude/skills/healthcheck/SKILL.md +216 -0
- package/.claude/skills/report/SKILL.md +380 -0
- package/.claude/skills/scout/SKILL.md +452 -0
- package/.claude/skills/seo-audit/SKILL.md +107 -0
- package/.claude/skills/walkthrough/SKILL.md +274 -0
- package/.claude/skills/webapp-testing/SKILL.md +96 -0
- package/LICENSE +21 -0
- package/README.md +206 -0
- package/cli/templates/CLAUDE.md.template +57 -0
- package/cli/templates/agent-roles/backend.md +47 -0
- package/cli/templates/agent-roles/cmo.md +52 -0
- package/cli/templates/agent-roles/content.md +48 -0
- package/cli/templates/agent-roles/coo.md +66 -0
- package/cli/templates/agent-roles/cpo.md +52 -0
- package/cli/templates/agent-roles/cto.md +63 -0
- package/cli/templates/agent-roles/data.md +46 -0
- package/cli/templates/agent-roles/design.md +46 -0
- package/cli/templates/agent-roles/frontend.md +47 -0
- package/cli/templates/agent-roles/fullstack.md +47 -0
- package/cli/templates/agent-roles/qa.md +46 -0
- package/cli/templates/backlog.json.template +3 -0
- package/cli/templates/directive.json.template +9 -0
- package/cli/templates/directive.md.template +23 -0
- package/cli/templates/goals-index.md +21 -0
- package/cli/templates/gruai.config.json.template +12 -0
- package/cli/templates/lessons.md +16 -0
- package/cli/templates/vision.md +35 -0
- package/cli/templates/welcome-directive/directive.json +9 -0
- package/cli/templates/welcome-directive/directive.md +53 -0
- package/dist/assets/GamePage-C5XQQOQH.js +49 -0
- package/dist/assets/README.md +17 -0
- package/dist/assets/characters/char_0.png +0 -0
- package/dist/assets/characters/char_1.png +0 -0
- package/dist/assets/characters/char_10.png +0 -0
- package/dist/assets/characters/char_11.png +0 -0
- package/dist/assets/characters/char_2.png +0 -0
- package/dist/assets/characters/char_3.png +0 -0
- package/dist/assets/characters/char_4.png +0 -0
- package/dist/assets/characters/char_5.png +0 -0
- package/dist/assets/characters/char_6.png +0 -0
- package/dist/assets/characters/char_7.png +0 -0
- package/dist/assets/characters/char_8.png +0 -0
- package/dist/assets/characters/char_9.png +0 -0
- package/dist/assets/index-CnTPDqpP.js +12 -0
- package/dist/assets/index-gR5q7ikB.css +1 -0
- package/dist/assets/office/furniture.png +0 -0
- package/dist/assets/office/room-builder.png +0 -0
- package/dist/index.html +16 -0
- package/dist-server/scripts/intelligence-trends.d.ts +100 -0
- package/dist-server/scripts/intelligence-trends.js +365 -0
- package/dist-server/server/actions/cleanup.d.ts +4 -0
- package/dist-server/server/actions/cleanup.js +30 -0
- package/dist-server/server/actions/send-input.d.ts +6 -0
- package/dist-server/server/actions/send-input.js +147 -0
- package/dist-server/server/actions/terminal.d.ts +4 -0
- package/dist-server/server/actions/terminal.js +427 -0
- package/dist-server/server/config.d.ts +9 -0
- package/dist-server/server/config.js +217 -0
- package/dist-server/server/db.d.ts +7 -0
- package/dist-server/server/db.js +79 -0
- package/dist-server/server/hooks/event-receiver.d.ts +11 -0
- package/dist-server/server/hooks/event-receiver.js +36 -0
- package/dist-server/server/index.d.ts +1 -0
- package/dist-server/server/index.js +552 -0
- package/dist-server/server/notifications/macos.d.ts +5 -0
- package/dist-server/server/notifications/macos.js +22 -0
- package/dist-server/server/notifications/notifier.d.ts +17 -0
- package/dist-server/server/notifications/notifier.js +110 -0
- package/dist-server/server/parsers/process-discovery.d.ts +39 -0
- package/dist-server/server/parsers/process-discovery.js +776 -0
- package/dist-server/server/parsers/session-scanner.d.ts +56 -0
- package/dist-server/server/parsers/session-scanner.js +390 -0
- package/dist-server/server/parsers/session-state.d.ts +68 -0
- package/dist-server/server/parsers/session-state.js +696 -0
- package/dist-server/server/parsers/session-state.test.d.ts +1 -0
- package/dist-server/server/parsers/session-state.test.js +950 -0
- package/dist-server/server/parsers/task-parser.d.ts +10 -0
- package/dist-server/server/parsers/task-parser.js +97 -0
- package/dist-server/server/parsers/team-parser.d.ts +3 -0
- package/dist-server/server/parsers/team-parser.js +67 -0
- package/dist-server/server/platform/__tests__/claude-code.test.d.ts +1 -0
- package/dist-server/server/platform/__tests__/claude-code.test.js +311 -0
- package/dist-server/server/platform/claude-code.d.ts +34 -0
- package/dist-server/server/platform/claude-code.js +94 -0
- package/dist-server/server/platform/index.d.ts +5 -0
- package/dist-server/server/platform/index.js +1 -0
- package/dist-server/server/platform/types.d.ts +190 -0
- package/dist-server/server/platform/types.js +9 -0
- package/dist-server/server/state/aggregator.d.ts +42 -0
- package/dist-server/server/state/aggregator.js +1080 -0
- package/dist-server/server/state/work-item-types.d.ts +555 -0
- package/dist-server/server/state/work-item-types.js +168 -0
- package/dist-server/server/types.d.ts +237 -0
- package/dist-server/server/types.js +1 -0
- package/dist-server/server/watchers/claude-watcher.d.ts +17 -0
- package/dist-server/server/watchers/claude-watcher.js +130 -0
- package/dist-server/server/watchers/context-watcher.d.ts +22 -0
- package/dist-server/server/watchers/context-watcher.js +125 -0
- package/dist-server/server/watchers/directive-watcher.d.ts +46 -0
- package/dist-server/server/watchers/directive-watcher.js +497 -0
- package/dist-server/server/watchers/session-watcher.d.ts +18 -0
- package/dist-server/server/watchers/session-watcher.js +126 -0
- package/dist-server/server/watchers/state-watcher.d.ts +36 -0
- package/dist-server/server/watchers/state-watcher.js +369 -0
- package/package.json +68 -0
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: "walkthrough"
|
|
3
|
+
description: "Cognitive walkthrough — simulate real user scenarios against the current system to find gaps between ideal and actual. Takes an optional scenario name or 'all' to run standing scenarios. Run after major directives or periodically as a reality check."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Walkthrough — Cognitive Walkthrough
|
|
7
|
+
|
|
8
|
+
## Role Resolution
|
|
9
|
+
|
|
10
|
+
Read `.claude/agent-registry.json` to map roles to agent names. Use each agent's `id` as the `subagent_type` when spawning. The CPO designs the ideal experience; the CTO traces the actual implementation.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
Simulate user scenarios against the current system. Find what's broken, missing, or surprising.
|
|
15
|
+
|
|
16
|
+
**The pattern:** For each scenario, design what SHOULD happen (ideal), trace what DOES happen (actual), report the gaps.
|
|
17
|
+
|
|
18
|
+
**Arguments:** `$ARGUMENTS`
|
|
19
|
+
- A specific scenario name (e.g., `ceo-runs-directive`) → run just that one
|
|
20
|
+
- `all` → run all standing scenarios
|
|
21
|
+
- A free-text scenario description (e.g., `"seller wants to see competitor prices"`) → ad-hoc walkthrough
|
|
22
|
+
- Empty → list available scenarios and ask which to run
|
|
23
|
+
|
|
24
|
+
## Step 1: Load Scenarios
|
|
25
|
+
|
|
26
|
+
### If $ARGUMENTS is a scenario name or "all":
|
|
27
|
+
|
|
28
|
+
Read standing scenarios from `.context/lessons/scenarios.md`.
|
|
29
|
+
|
|
30
|
+
Each scenario has:
|
|
31
|
+
- **Name**: slug identifier
|
|
32
|
+
- **Actor**: who is performing the action (CEO, seller, shopper, developer)
|
|
33
|
+
- **Trigger**: what starts the flow ("CEO types /directive improve-security")
|
|
34
|
+
- **Goal**: what the actor wants to achieve
|
|
35
|
+
- **Critical path**: the steps that MUST work for the scenario to succeed
|
|
36
|
+
|
|
37
|
+
If `all`, load all scenarios. If a specific name, load just that one.
|
|
38
|
+
|
|
39
|
+
### If $ARGUMENTS is free text:
|
|
40
|
+
|
|
41
|
+
Treat it as an ad-hoc scenario. Spawn the CPO to formalize it:
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
You are the CPO. The CEO described a user scenario informally:
|
|
45
|
+
|
|
46
|
+
"{$ARGUMENTS}"
|
|
47
|
+
|
|
48
|
+
Formalize it into this structure:
|
|
49
|
+
{
|
|
50
|
+
"name": "slug-name",
|
|
51
|
+
"actor": "who is doing this",
|
|
52
|
+
"trigger": "what starts the flow",
|
|
53
|
+
"goal": "what the actor wants to achieve",
|
|
54
|
+
"critical_path": [
|
|
55
|
+
"Step 1: what should happen first",
|
|
56
|
+
"Step 2: what should happen next",
|
|
57
|
+
...
|
|
58
|
+
],
|
|
59
|
+
"success_criteria": "how do you know the scenario succeeded"
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
Think from the ACTOR's perspective, not the system's. What does the actor expect at each step? What would surprise or frustrate them?
|
|
63
|
+
|
|
64
|
+
CRITICAL OUTPUT FORMAT: First character must be `{`, last must be `}`. JSON only.
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### If $ARGUMENTS is empty:
|
|
68
|
+
|
|
69
|
+
Read `.context/lessons/scenarios.md` and list available scenarios:
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
Available scenarios:
|
|
73
|
+
1. ceo-runs-directive — CEO issues a directive, wants it handled without blocking
|
|
74
|
+
2. ceo-morning-review — CEO opens dashboard, wants to know what happened overnight
|
|
75
|
+
3. ...
|
|
76
|
+
|
|
77
|
+
Which scenario to walk through? (or describe a new one)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Use AskUserQuestion with the scenario names as options.
|
|
81
|
+
|
|
82
|
+
## Step 2: Design the Ideal (per scenario)
|
|
83
|
+
|
|
84
|
+
For each scenario, spawn the CPO to design the **ideal experience** — what SHOULD happen if everything worked perfectly.
|
|
85
|
+
|
|
86
|
+
The CPO receives:
|
|
87
|
+
- Their personality file
|
|
88
|
+
- The scenario definition
|
|
89
|
+
- `.context/vision.md` — so the ideal aligns with the north star
|
|
90
|
+
- `.context/preferences.md` — CEO expectations
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
You are the CPO. You are designing the IDEAL user experience for this scenario. Don't look at the current implementation — design from scratch what the perfect flow would be.
|
|
94
|
+
|
|
95
|
+
SCENARIO:
|
|
96
|
+
- Actor: {actor}
|
|
97
|
+
- Trigger: {trigger}
|
|
98
|
+
- Goal: {goal}
|
|
99
|
+
|
|
100
|
+
For each step of the critical path, describe:
|
|
101
|
+
1. What the actor does
|
|
102
|
+
2. What the system should do in response
|
|
103
|
+
3. What the actor sees/experiences
|
|
104
|
+
4. How long it should take (instant / seconds / minutes / async)
|
|
105
|
+
5. What would frustrate the actor at this step
|
|
106
|
+
|
|
107
|
+
Then describe the END STATE: what does "success" look like from the actor's perspective?
|
|
108
|
+
|
|
109
|
+
Think like a product designer, not an engineer. The actor doesn't care about checkpoints, worktrees, or JSON schemas. They care about: did the thing work? Was it fast? Did I have to babysit it?
|
|
110
|
+
|
|
111
|
+
CRITICAL OUTPUT FORMAT: First character must be `{`, last must be `}`. JSON only.
|
|
112
|
+
|
|
113
|
+
{
|
|
114
|
+
"scenario": "{name}",
|
|
115
|
+
"ideal_flow": [
|
|
116
|
+
{
|
|
117
|
+
"step": 1,
|
|
118
|
+
"actor_action": "what the actor does",
|
|
119
|
+
"system_response": "what should happen",
|
|
120
|
+
"actor_experience": "what they see/feel",
|
|
121
|
+
"timing": "instant | seconds | minutes | async",
|
|
122
|
+
"frustration_risk": "what could annoy the actor here"
|
|
123
|
+
}
|
|
124
|
+
],
|
|
125
|
+
"end_state": "what success looks like",
|
|
126
|
+
"key_expectations": ["the non-negotiable things the actor expects"]
|
|
127
|
+
}
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Step 3: Trace the Actual (per scenario)
|
|
131
|
+
|
|
132
|
+
For each scenario, spawn the CTO to trace what ACTUALLY happens in the current system. The CTO reads code, config, and skill files to follow the real execution path.
|
|
133
|
+
|
|
134
|
+
The CTO receives:
|
|
135
|
+
- Their personality file
|
|
136
|
+
- The scenario definition
|
|
137
|
+
- The CPO's ideal flow (from Step 2)
|
|
138
|
+
- `.context/lessons/` topic files — known issues
|
|
139
|
+
- `.context/preferences.md`
|
|
140
|
+
|
|
141
|
+
```
|
|
142
|
+
You are the CTO. You are tracing what ACTUALLY happens in the current system for this scenario. Read the real code and config — don't guess.
|
|
143
|
+
|
|
144
|
+
SCENARIO:
|
|
145
|
+
- Actor: {actor}
|
|
146
|
+
- Trigger: {trigger}
|
|
147
|
+
- Goal: {goal}
|
|
148
|
+
- Critical path: {steps}
|
|
149
|
+
|
|
150
|
+
IDEAL FLOW (from the CPO):
|
|
151
|
+
{CPO's ideal_flow JSON}
|
|
152
|
+
|
|
153
|
+
For each step of the ideal flow, trace what the current system actually does:
|
|
154
|
+
1. Read the relevant files (SKILL.md, agent files, code)
|
|
155
|
+
2. Follow the execution path step by step
|
|
156
|
+
3. Note where reality matches the ideal
|
|
157
|
+
4. Note where reality DIVERGES from the ideal
|
|
158
|
+
5. Note where the system does NOTHING (missing functionality)
|
|
159
|
+
|
|
160
|
+
Be thorough. Grep for entry points, read the actual instructions, trace the branching logic. Don't assume — verify.
|
|
161
|
+
|
|
162
|
+
CRITICAL OUTPUT FORMAT: First character must be `{`, last must be `}`. JSON only.
|
|
163
|
+
|
|
164
|
+
{
|
|
165
|
+
"scenario": "{name}",
|
|
166
|
+
"actual_flow": [
|
|
167
|
+
{
|
|
168
|
+
"ideal_step": 1,
|
|
169
|
+
"ideal_expectation": "what the CPO said should happen",
|
|
170
|
+
"actual_behavior": "what the system actually does",
|
|
171
|
+
"status": "match | diverge | missing | broken",
|
|
172
|
+
"evidence": "file:line or config entry that proves this",
|
|
173
|
+
"notes": "explanation of the gap, if any"
|
|
174
|
+
}
|
|
175
|
+
],
|
|
176
|
+
"gaps_found": [
|
|
177
|
+
{
|
|
178
|
+
"id": "gap-slug",
|
|
179
|
+
"severity": "critical | major | minor | cosmetic",
|
|
180
|
+
"type": "missing | broken | wrong | slow | confusing",
|
|
181
|
+
"description": "what's wrong",
|
|
182
|
+
"ideal": "what should happen",
|
|
183
|
+
"actual": "what does happen",
|
|
184
|
+
"evidence": "file:line",
|
|
185
|
+
"suggested_fix": "how to close the gap"
|
|
186
|
+
}
|
|
187
|
+
],
|
|
188
|
+
"working_well": ["things that match the ideal — acknowledge what's good"]
|
|
189
|
+
}
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Step 4: Synthesize Gaps
|
|
193
|
+
|
|
194
|
+
After all scenarios are traced, consolidate the findings:
|
|
195
|
+
|
|
196
|
+
1. **Deduplicate** — the same gap may appear in multiple scenarios
|
|
197
|
+
2. **Prioritize** — critical gaps that block the actor's goal come first
|
|
198
|
+
3. **Cross-reference** — gaps that appear in 2+ scenarios are systemic
|
|
199
|
+
4. **Classify effort** — quick fix (< 1 hour), medium (half day), large (1+ days)
|
|
200
|
+
|
|
201
|
+
## Step 5: Present to CEO
|
|
202
|
+
|
|
203
|
+
```
|
|
204
|
+
# Walkthrough Report — {date}
|
|
205
|
+
|
|
206
|
+
## Scenarios Walked: {count}
|
|
207
|
+
|
|
208
|
+
### {Scenario Name}
|
|
209
|
+
**Actor**: {actor} | **Goal**: {goal}
|
|
210
|
+
|
|
211
|
+
**Ideal vs Actual:**
|
|
212
|
+
| Step | Ideal | Actual | Status |
|
|
213
|
+
|------|-------|--------|--------|
|
|
214
|
+
| 1 | {ideal} | {actual} | ✅ match / ⚠️ diverge / ❌ missing |
|
|
215
|
+
| 2 | ... | ... | ... |
|
|
216
|
+
|
|
217
|
+
**Gaps Found: {count}**
|
|
218
|
+
- [{severity}] **{description}** — {type}
|
|
219
|
+
Ideal: {what should happen}
|
|
220
|
+
Actual: {what does happen}
|
|
221
|
+
Fix: {suggested fix} ({effort})
|
|
222
|
+
|
|
223
|
+
**Working Well:**
|
|
224
|
+
- {things that matched the ideal}
|
|
225
|
+
|
|
226
|
+
(repeat per scenario)
|
|
227
|
+
|
|
228
|
+
## Systemic Gaps (appear in 2+ scenarios)
|
|
229
|
+
- **{gap}** — found in: {scenario list}
|
|
230
|
+
|
|
231
|
+
## Summary
|
|
232
|
+
- Total gaps: {count} ({critical}, {major}, {minor})
|
|
233
|
+
- Scenarios fully passing: {count}/{total}
|
|
234
|
+
- Top 3 fixes by impact: {list}
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
Then ask the CEO:
|
|
238
|
+
- "Create directive from gaps" — bundle gaps into a directive in directives/
|
|
239
|
+
- "Add to backlog" — write gaps to the relevant goal's backlog
|
|
240
|
+
- "Note only" — just keep the report
|
|
241
|
+
|
|
242
|
+
## Step 6: Save Report
|
|
243
|
+
|
|
244
|
+
Write the full report to `.context/reports/walkthrough-{date}.md`
|
|
245
|
+
|
|
246
|
+
If gaps were approved as a directive, create it in `.context/directives/`.
|
|
247
|
+
|
|
248
|
+
## Standing Scenarios File
|
|
249
|
+
|
|
250
|
+
If `.context/lessons/scenarios.md` doesn't exist, create it with starter scenarios on first run. The CEO and team add scenarios over time as new flows become important.
|
|
251
|
+
|
|
252
|
+
## Failure Handling
|
|
253
|
+
|
|
254
|
+
| Situation | Action |
|
|
255
|
+
|-----------|--------|
|
|
256
|
+
| The CPO can't formalize ad-hoc scenario | Ask CEO to clarify the scenario |
|
|
257
|
+
| The CTO can't find the entry point for a step | Mark as "missing — no implementation found" |
|
|
258
|
+
| A scenario has no gaps | Report it as passing — this is good news |
|
|
259
|
+
| scenarios.md doesn't exist | Create it with starter scenarios, then run |
|
|
260
|
+
|
|
261
|
+
## Rules
|
|
262
|
+
|
|
263
|
+
### NEVER
|
|
264
|
+
- Skip the ideal design (Step 2) — the whole point is comparing ideal vs actual
|
|
265
|
+
- Have the same agent design ideal AND trace actual — separate perspectives prevent bias
|
|
266
|
+
- Mark a gap as "minor" if it blocks the actor's goal — that's critical by definition
|
|
267
|
+
- Trace the actual by reading docs/comments — read the real code/config
|
|
268
|
+
|
|
269
|
+
### ALWAYS
|
|
270
|
+
- Design ideal BEFORE tracing actual — don't let current state constrain the ideal
|
|
271
|
+
- Include evidence (file:line) for every gap — no hand-waving
|
|
272
|
+
- Acknowledge what's working well — not just gaps
|
|
273
|
+
- Save the report even if no gaps found (it's a health signal)
|
|
274
|
+
- Use the CPO for ideal (product thinking) and the CTO for actual (technical tracing)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: webapp-testing
|
|
3
|
+
description: Toolkit for interacting with and testing local web applications using Playwright. Supports verifying frontend functionality, debugging UI behavior, capturing browser screenshots, and viewing browser logs.
|
|
4
|
+
license: Complete terms in LICENSE.txt
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Web Application Testing
|
|
8
|
+
|
|
9
|
+
To test local web applications, write native Python Playwright scripts.
|
|
10
|
+
|
|
11
|
+
**Helper Scripts Available**:
|
|
12
|
+
- `scripts/with_server.py` - Manages server lifecycle (supports multiple servers)
|
|
13
|
+
|
|
14
|
+
**Always run scripts with `--help` first** to see usage. DO NOT read the source until you try running the script first and find that a customized solution is abslutely necessary. These scripts can be very large and thus pollute your context window. They exist to be called directly as black-box scripts rather than ingested into your context window.
|
|
15
|
+
|
|
16
|
+
## Decision Tree: Choosing Your Approach
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
User task → Is it static HTML?
|
|
20
|
+
├─ Yes → Read HTML file directly to identify selectors
|
|
21
|
+
│ ├─ Success → Write Playwright script using selectors
|
|
22
|
+
│ └─ Fails/Incomplete → Treat as dynamic (below)
|
|
23
|
+
│
|
|
24
|
+
└─ No (dynamic webapp) → Is the server already running?
|
|
25
|
+
├─ No → Run: python scripts/with_server.py --help
|
|
26
|
+
│ Then use the helper + write simplified Playwright script
|
|
27
|
+
│
|
|
28
|
+
└─ Yes → Reconnaissance-then-action:
|
|
29
|
+
1. Navigate and wait for networkidle
|
|
30
|
+
2. Take screenshot or inspect DOM
|
|
31
|
+
3. Identify selectors from rendered state
|
|
32
|
+
4. Execute actions with discovered selectors
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Example: Using with_server.py
|
|
36
|
+
|
|
37
|
+
To start a server, run `--help` first, then use the helper:
|
|
38
|
+
|
|
39
|
+
**Single server:**
|
|
40
|
+
```bash
|
|
41
|
+
python scripts/with_server.py --server "npm run dev" --port 5173 -- python your_automation.py
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**Multiple servers (e.g., backend + frontend):**
|
|
45
|
+
```bash
|
|
46
|
+
python scripts/with_server.py \
|
|
47
|
+
--server "cd backend && python server.py" --port 3000 \
|
|
48
|
+
--server "cd frontend && npm run dev" --port 5173 \
|
|
49
|
+
-- python your_automation.py
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
To create an automation script, include only Playwright logic (servers are managed automatically):
|
|
53
|
+
```python
|
|
54
|
+
from playwright.sync_api import sync_playwright
|
|
55
|
+
|
|
56
|
+
with sync_playwright() as p:
|
|
57
|
+
browser = p.chromium.launch(headless=True) # Always launch chromium in headless mode
|
|
58
|
+
page = browser.new_page()
|
|
59
|
+
page.goto('http://localhost:5173') # Server already running and ready
|
|
60
|
+
page.wait_for_load_state('networkidle') # CRITICAL: Wait for JS to execute
|
|
61
|
+
# ... your automation logic
|
|
62
|
+
browser.close()
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Reconnaissance-Then-Action Pattern
|
|
66
|
+
|
|
67
|
+
1. **Inspect rendered DOM**:
|
|
68
|
+
```python
|
|
69
|
+
page.screenshot(path='/tmp/inspect.png', full_page=True)
|
|
70
|
+
content = page.content()
|
|
71
|
+
page.locator('button').all()
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
2. **Identify selectors** from inspection results
|
|
75
|
+
|
|
76
|
+
3. **Execute actions** using discovered selectors
|
|
77
|
+
|
|
78
|
+
## Common Pitfall
|
|
79
|
+
|
|
80
|
+
❌ **Don't** inspect the DOM before waiting for `networkidle` on dynamic apps
|
|
81
|
+
✅ **Do** wait for `page.wait_for_load_state('networkidle')` before inspection
|
|
82
|
+
|
|
83
|
+
## Best Practices
|
|
84
|
+
|
|
85
|
+
- **Use bundled scripts as black boxes** - To accomplish a task, consider whether one of the scripts available in `scripts/` can help. These scripts handle common, complex workflows reliably without cluttering the context window. Use `--help` to see usage, then invoke directly.
|
|
86
|
+
- Use `sync_playwright()` for synchronous scripts
|
|
87
|
+
- Always close the browser when done
|
|
88
|
+
- Use descriptive selectors: `text=`, `role=`, CSS selectors, or IDs
|
|
89
|
+
- Add appropriate waits: `page.wait_for_selector()` or `page.wait_for_timeout()`
|
|
90
|
+
|
|
91
|
+
## Reference Files
|
|
92
|
+
|
|
93
|
+
- **examples/** - Examples showing common patterns:
|
|
94
|
+
- `element_discovery.py` - Discovering buttons, links, and inputs on a page
|
|
95
|
+
- `static_html_automation.py` - Using file:// URLs for local HTML
|
|
96
|
+
- `console_logging.py` - Capturing console logs during automation
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 gruai contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<strong>Your AI dev team, visualized.</strong>
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
# gruai
|
|
6
|
+
|
|
7
|
+
[](LICENSE) [](https://www.typescriptlang.org/) []()
|
|
8
|
+
|
|
9
|
+
<p align="center">
|
|
10
|
+
<img src="docs/assets/demo.gif" alt="gruai pixel-art office simulation" width="720" />
|
|
11
|
+
</p>
|
|
12
|
+
|
|
13
|
+
Watch your AI agents work in a pixel-art office. gruai turns Claude Code sessions
|
|
14
|
+
into a living simulation -- autonomous agents sitting at desks, writing code,
|
|
15
|
+
reviewing PRs, and shipping features while you grab coffee.
|
|
16
|
+
|
|
17
|
+
No other tool does this. Devin is $500/mo and headless. CrewAI is YAML config files.
|
|
18
|
+
gruai gives you a real-time office where you can *see* your team think.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Quickstart
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
git clone https://github.com/andrew-yangy/gruai.git
|
|
26
|
+
cd gruai && npm install
|
|
27
|
+
npm run dev
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Then in Claude Code, run `/gruai-agents` to scaffold your AI team. The skill
|
|
31
|
+
generates agent personalities, a team registry, and a welcome directive so your
|
|
32
|
+
team has something to work on immediately. Open [http://localhost:5173](http://localhost:5173)
|
|
33
|
+
to see the pixel-art office.
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## What You Get
|
|
38
|
+
|
|
39
|
+
### Pixel-Art Office Simulation
|
|
40
|
+
Your agents aren't abstract boxes on a kanban board. They're characters in an
|
|
41
|
+
office -- walking to their desks, typing at keyboards, gathering around a
|
|
42
|
+
whiteboard to brainstorm. The office is a live view of your project's real state.
|
|
43
|
+
|
|
44
|
+
### Autonomous Agent Teams
|
|
45
|
+
Define roles (planner, builder, reviewer, scout) and gruai handles the rest.
|
|
46
|
+
Agents pick up directives, decompose them into projects and tasks, build code,
|
|
47
|
+
review each other's work, and report back. You approve or redirect -- they execute.
|
|
48
|
+
|
|
49
|
+
### Directive Pipeline
|
|
50
|
+
Every piece of work flows through a structured pipeline: triage, planning, audit,
|
|
51
|
+
build, review, completion. Lightweight tasks skip the heavy steps automatically.
|
|
52
|
+
No ceremony for small fixes, full rigor for big features.
|
|
53
|
+
|
|
54
|
+
### Custom Teams
|
|
55
|
+
Create your own agent roles with markdown templates in `.claude/agents/`. Give
|
|
56
|
+
them personalities, specializations, and memory. The office simulation renders
|
|
57
|
+
them as unique characters.
|
|
58
|
+
|
|
59
|
+
### Live Dashboard
|
|
60
|
+
Session kanban, activity tracking, one-click terminal focus, approval actions,
|
|
61
|
+
prompt history, and usage insights. Everything you need to manage 10+ concurrent
|
|
62
|
+
Claude Code sessions without losing your mind.
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Two Ways to Use gruai
|
|
67
|
+
|
|
68
|
+
### Clone and run (recommended)
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
git clone https://github.com/andrew-yangy/gruai.git
|
|
72
|
+
cd gruai
|
|
73
|
+
npm install
|
|
74
|
+
npm run dev
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Then run `/gruai-agents` in Claude Code to scaffold your team. The dashboard
|
|
78
|
+
discovers all Claude Code sessions from `~/.claude/` automatically -- no config needed.
|
|
79
|
+
|
|
80
|
+
### Install as npm package
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
npm install gruai
|
|
84
|
+
npm start
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Then run `/gruai-agents` in Claude Code to scaffold agents into your project.
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## How It Works
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
Your repo gruai
|
|
95
|
+
┌─────────────────────┐ ┌──────────────────────────────┐
|
|
96
|
+
│ .context/ │ │ │
|
|
97
|
+
│ directives/ │ file watch │ Directive Agent │
|
|
98
|
+
│ {id}/ ├──────────────>│ Pipeline -> Casting │
|
|
99
|
+
│ directive.json│ │ │
|
|
100
|
+
│ projects/ │ │ Session Pixel-Art │
|
|
101
|
+
│ │ │ Scanner --> Office UI │
|
|
102
|
+
│ .claude/ │ │ │
|
|
103
|
+
│ agents/ │ session │ Process Live │
|
|
104
|
+
│ {role}.md │ discovery │ Discovery -> Dashboard │
|
|
105
|
+
│ ├──────────────>│ │
|
|
106
|
+
│ ~/.claude/ │ │ WebSocket React │
|
|
107
|
+
│ projects/ │ │ Server --> Frontend │
|
|
108
|
+
│ *.jsonl │ │ │
|
|
109
|
+
└─────────────────────┘ └──────────────────────────────┘
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
1. **Directive Pipeline** reads `.context/directives/` and orchestrates work
|
|
113
|
+
through triage, planning, build, and review phases
|
|
114
|
+
2. **Session Scanner** watches `~/.claude/projects/` for live Claude Code sessions
|
|
115
|
+
and extracts metadata (model, branch, current tool, files being edited)
|
|
116
|
+
3. **Process Discovery** maps running Claude processes to terminal panes via
|
|
117
|
+
`ps` and `lsof` -- supports tmux, iTerm2, Warp, and Terminal.app
|
|
118
|
+
4. **Pixel-Art Office** renders agents as characters in an isometric office,
|
|
119
|
+
with real-time animations tied to actual session state
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## Terminal Support
|
|
124
|
+
|
|
125
|
+
Session discovery works on any OS. Terminal focus requires OS integration:
|
|
126
|
+
|
|
127
|
+
| Environment | Focus | Send Input | Notes |
|
|
128
|
+
|-------------|:-----:|:----------:|-------|
|
|
129
|
+
| iTerm2 + tmux | Yes | Yes | AppleScript + tmux pane switching |
|
|
130
|
+
| iTerm2 native | Yes | Yes | AppleScript with session ID |
|
|
131
|
+
| Warp + tmux | Yes | Yes | CGEvents + tmux |
|
|
132
|
+
| Warp native | Yes | No | CGEvents tab navigation |
|
|
133
|
+
| Terminal.app + tmux | Yes | Yes | Bring to front + tmux |
|
|
134
|
+
|
|
135
|
+
> Linux and Windows support coming soon.
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## Optional: Claude Code Hooks
|
|
140
|
+
|
|
141
|
+
gruai works without hooks. For instant status detection (permission prompts, idle
|
|
142
|
+
states), add hooks to `~/.claude/settings.json`:
|
|
143
|
+
|
|
144
|
+
```json
|
|
145
|
+
{
|
|
146
|
+
"hooks": {
|
|
147
|
+
"Notification": [
|
|
148
|
+
{
|
|
149
|
+
"matcher": "permission_prompt",
|
|
150
|
+
"hooks": [{ "type": "command", "command": "bash -c 'INPUT=$(cat); curl -s -X POST http://localhost:4444/api/events -H \"Content-Type: application/json\" -d \"{\\\"type\\\":\\\"permission_prompt\\\",\\\"sessionId\\\":\\\"$(echo $INPUT | jq -r .session_id)\\\",\\\"message\\\":\\\"$(echo $INPUT | jq -r .message)\\\"}\"'" }]
|
|
151
|
+
}
|
|
152
|
+
],
|
|
153
|
+
"Stop": [
|
|
154
|
+
{
|
|
155
|
+
"hooks": [{ "type": "command", "command": "bash -c 'INPUT=$(cat); curl -s -X POST http://localhost:4444/api/events -H \"Content-Type: application/json\" -d \"{\\\"type\\\":\\\"stop\\\",\\\"sessionId\\\":\\\"$(echo $INPUT | jq -r .session_id)\\\"}\"'" }]
|
|
156
|
+
}
|
|
157
|
+
]
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Without hooks, status updates via filesystem scanning (slight delay). With hooks,
|
|
163
|
+
updates are instant.
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## Tech Stack
|
|
168
|
+
|
|
169
|
+
| Layer | Stack |
|
|
170
|
+
|-------|-------|
|
|
171
|
+
| Server | Node.js + WebSocket + SQLite + chokidar |
|
|
172
|
+
| Frontend | React 19 + Vite + Zustand + Tailwind v4 + shadcn/ui |
|
|
173
|
+
| Game | Canvas 2D pixel-art engine, 16x16 tile system |
|
|
174
|
+
| Terminal | AppleScript (iTerm2) + CGEvents (Warp) + tmux CLI |
|
|
175
|
+
| Data | Zero external services -- reads from `~/.claude/` locally |
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## Scripts
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
npm run dev # Dev mode (server + client with hot reload)
|
|
183
|
+
npm run dev:server # Server only (port 4444)
|
|
184
|
+
npm run dev:client # Vite dev only
|
|
185
|
+
npm start # Production server (serves built assets)
|
|
186
|
+
npm run build # Production build
|
|
187
|
+
npm run type-check # TypeScript check
|
|
188
|
+
npm run lint # ESLint
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Claude Code Skills
|
|
192
|
+
|
|
193
|
+
```
|
|
194
|
+
/gruai-agents # Scaffold AI agent team (replaces gruai init)
|
|
195
|
+
/gruai-config # Update framework files to latest version
|
|
196
|
+
/directive # Run work through the directive pipeline
|
|
197
|
+
/report # CEO dashboard report
|
|
198
|
+
/healthcheck # Internal codebase health check
|
|
199
|
+
/scout # External intelligence gathering
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## License
|
|
205
|
+
|
|
206
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# {{PROJECT_NAME}} — Claude Code Rules
|
|
2
|
+
|
|
3
|
+
## What This Is
|
|
4
|
+
{{PROJECT_NAME}} uses gruai, an autonomous AI company framework. See `.context/vision.md` for the full vision.
|
|
5
|
+
|
|
6
|
+
## Context Tree Structure
|
|
7
|
+
|
|
8
|
+
```
|
|
9
|
+
.context/
|
|
10
|
+
|-- vision.md # System vision (read first)
|
|
11
|
+
|
|
|
12
|
+
|-- directives/ # ALL work lives here: Directive > Project > Task
|
|
13
|
+
| |-- {id}/
|
|
14
|
+
| | |-- directive.json # Pipeline state, weight, category
|
|
15
|
+
| | |-- directive.md # CEO brief
|
|
16
|
+
| | +-- projects/
|
|
17
|
+
| | |-- {project-id}/
|
|
18
|
+
| | | +-- project.json # Tasks[], DOD, agents -- THE source of truth
|
|
19
|
+
| | +-- ...
|
|
20
|
+
| +-- ...
|
|
21
|
+
|
|
|
22
|
+
|-- reports/ # CEO dashboard reports
|
|
23
|
+
| |-- daily-{date}.md
|
|
24
|
+
| +-- weekly-{date}.md
|
|
25
|
+
|
|
|
26
|
+
|-- lessons/ # Flat, topic-based knowledge
|
|
27
|
+
| +-- index.md
|
|
28
|
+
|
|
|
29
|
+
|-- backlog.json # Prioritized work items
|
|
30
|
+
|-- preferences.md # CEO standing orders
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## How to Read the Context Tree
|
|
34
|
+
|
|
35
|
+
- **"What should we do now?"** -> Read `directives/*/directive.json` for active directives
|
|
36
|
+
- **Planning a feature:** -> Read `vision.md` + relevant directive context + `lessons/`
|
|
37
|
+
- **Building a feature:** -> Read project.json for tasks
|
|
38
|
+
- **After completing work:** -> Update project.json tasks, update `lessons/` if new patterns
|
|
39
|
+
|
|
40
|
+
## Key Conventions
|
|
41
|
+
|
|
42
|
+
- Directory names = entity IDs. `directives/pipeline-v2/` means `directive.id = "pipeline-v2"`
|
|
43
|
+
- project.json is THE source of truth for a project including all its tasks
|
|
44
|
+
- Tasks are embedded in project.json -- no separate task files
|
|
45
|
+
- Directives discovered via glob: `directives/*/directive.json`
|
|
46
|
+
- Projects discovered via glob: `directives/*/projects/*/project.json`
|
|
47
|
+
|
|
48
|
+
## Agent Team
|
|
49
|
+
|
|
50
|
+
{{AGENT_ROSTER}}
|
|
51
|
+
|
|
52
|
+
## Pipeline Enforcement
|
|
53
|
+
- **ALL work goes through the `/directive` pipeline.** The pipeline is weight-adaptive: lightweight tasks skip some steps; heavyweight gets the full process.
|
|
54
|
+
- NEVER spawn builder/engineer agents directly. Use `/directive` which handles reviews, scope, and completion verification.
|
|
55
|
+
|
|
56
|
+
## Git Operations
|
|
57
|
+
NEVER perform git operations without explicit user approval.
|