ai-collab-open-system 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.aict/START_HERE.md +127 -0
- package/.aict/WORKSPACE_MANIFEST.json +91 -0
- package/.aict/acceptance/EXAMPLE.synthetic.md +49 -0
- package/.aict/acceptance/FAILURE_MODES.md +40 -0
- package/.aict/acceptance/PROMPT.md +47 -0
- package/.aict/acceptance/README.md +44 -0
- package/.aict/acceptance/TEMPLATE.md +57 -0
- package/.aict/adapters/SHARED_CORE_CONTRACT.md +106 -0
- package/.aict/adapters/claude-code/ADAPTER.md +28 -0
- package/.aict/adapters/cline/ADAPTER.md +28 -0
- package/.aict/adapters/codex/ADAPTER.md +28 -0
- package/.aict/adapters/copilot/ADAPTER.md +28 -0
- package/.aict/adapters/cursor/ADAPTER.md +28 -0
- package/.aict/adapters/windsurf/ADAPTER.md +28 -0
- package/.aict/context/EXAMPLE.synthetic.md +53 -0
- package/.aict/context/FAILURE_MODES.md +40 -0
- package/.aict/context/PROMPT.md +47 -0
- package/.aict/context/README.md +44 -0
- package/.aict/context/TEMPLATE.md +63 -0
- package/.aict/cookbook/README.md +8 -0
- package/.aict/cookbook/bridge-to-a-second-family.md +103 -0
- package/.aict/cookbook/connect-a-tool.md +67 -0
- package/.aict/cookbook/review-a-half-product.md +79 -0
- package/.aict/cookbook/run-a-first-loop.md +81 -0
- package/.aict/examples/README.md +21 -0
- package/.aict/examples/ai-coding-long-task/CASE.md +161 -0
- package/.aict/examples/ai-coding-long-task/artifacts/acceptance-card.md +36 -0
- package/.aict/examples/ai-coding-long-task/artifacts/context-package.md +30 -0
- package/.aict/examples/ai-coding-long-task/artifacts/execution-prompt.md +30 -0
- package/.aict/examples/ai-coding-long-task/artifacts/first-ai-output.md +109 -0
- package/.aict/examples/ai-coding-long-task/artifacts/guard-review.md +40 -0
- package/.aict/examples/ai-coding-long-task/artifacts/handoff-note.md +28 -0
- package/.aict/examples/ai-coding-long-task/artifacts/harvest-seed.md +28 -0
- package/.aict/examples/ai-coding-long-task/artifacts/revised-output.md +62 -0
- package/.aict/examples/content-production-harvest/CASE.md +87 -0
- package/.aict/examples/content-production-harvest/artifacts/acceptance-card.md +28 -0
- package/.aict/examples/content-production-harvest/artifacts/context-package.md +28 -0
- package/.aict/examples/content-production-harvest/artifacts/execution-prompt.md +30 -0
- package/.aict/examples/content-production-harvest/artifacts/guard-review.md +28 -0
- package/.aict/examples/content-production-harvest/artifacts/handoff-note.md +28 -0
- package/.aict/examples/content-production-harvest/artifacts/harvest-seed.md +28 -0
- package/.aict/examples/multi-tool-collaboration/CASE.md +87 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/acceptance-card.md +28 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/context-package.md +28 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/execution-prompt.md +30 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/guard-review.md +28 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/handoff-note.md +28 -0
- package/.aict/examples/multi-tool-collaboration/artifacts/harvest-seed.md +28 -0
- package/.aict/examples/personal-judgment-growth-assistant/CASE.md +87 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/acceptance-card.md +28 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/context-package.md +28 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/execution-prompt.md +30 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/guard-review.md +28 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/handoff-note.md +28 -0
- package/.aict/examples/personal-judgment-growth-assistant/artifacts/harvest-seed.md +28 -0
- package/.aict/examples/research-knowledge-synthesis/CASE.md +87 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/acceptance-card.md +28 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/context-package.md +28 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/execution-prompt.md +30 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/guard-review.md +28 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/handoff-note.md +28 -0
- package/.aict/examples/research-knowledge-synthesis/artifacts/harvest-seed.md +28 -0
- package/.aict/guard/EXAMPLE.synthetic.md +51 -0
- package/.aict/guard/FAILURE_MODES.md +40 -0
- package/.aict/guard/PROMPT.md +47 -0
- package/.aict/guard/README.md +44 -0
- package/.aict/guard/TEMPLATE.md +60 -0
- package/.aict/handoff/EXAMPLE.synthetic.md +51 -0
- package/.aict/handoff/FAILURE_MODES.md +40 -0
- package/.aict/handoff/PROMPT.md +47 -0
- package/.aict/handoff/README.md +44 -0
- package/.aict/handoff/TEMPLATE.md +60 -0
- package/.aict/harvest/EXAMPLE.synthetic.md +51 -0
- package/.aict/harvest/FAILURE_MODES.md +40 -0
- package/.aict/harvest/PROMPT.md +47 -0
- package/.aict/harvest/README.md +44 -0
- package/.aict/harvest/TEMPLATE.md +60 -0
- package/.aict/mechanisms/README.md +34 -0
- package/.aict/mechanisms/anti-drift-partner/EXAMPLE.synthetic.md +46 -0
- package/.aict/mechanisms/anti-drift-partner/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/anti-drift-partner/PROMPT.md +75 -0
- package/.aict/mechanisms/anti-drift-partner/README.md +82 -0
- package/.aict/mechanisms/anti-drift-partner/TEMPLATE.md +74 -0
- package/.aict/mechanisms/blind-spot-scan/EXAMPLE.synthetic.md +39 -0
- package/.aict/mechanisms/blind-spot-scan/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/blind-spot-scan/PROMPT.md +72 -0
- package/.aict/mechanisms/blind-spot-scan/README.md +79 -0
- package/.aict/mechanisms/blind-spot-scan/TEMPLATE.md +70 -0
- package/.aict/mechanisms/collaboration-coach/EXAMPLE.synthetic.md +40 -0
- package/.aict/mechanisms/collaboration-coach/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/collaboration-coach/PROMPT.md +72 -0
- package/.aict/mechanisms/collaboration-coach/README.md +79 -0
- package/.aict/mechanisms/collaboration-coach/TEMPLATE.md +61 -0
- package/.aict/mechanisms/do-not-handle-yet/EXAMPLE.synthetic.md +15 -0
- package/.aict/mechanisms/do-not-handle-yet/FAILURE_MODES.md +16 -0
- package/.aict/mechanisms/do-not-handle-yet/PROMPT.md +41 -0
- package/.aict/mechanisms/do-not-handle-yet/README.md +30 -0
- package/.aict/mechanisms/do-not-handle-yet/TEMPLATE.md +38 -0
- package/.aict/mechanisms/dual-guard/EXAMPLE.synthetic.md +54 -0
- package/.aict/mechanisms/dual-guard/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/dual-guard/PROMPT.md +76 -0
- package/.aict/mechanisms/dual-guard/README.md +81 -0
- package/.aict/mechanisms/dual-guard/TEMPLATE.md +73 -0
- package/.aict/mechanisms/feedback-absorption-ledger/EXAMPLE.synthetic.md +49 -0
- package/.aict/mechanisms/feedback-absorption-ledger/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/feedback-absorption-ledger/PROMPT.md +74 -0
- package/.aict/mechanisms/feedback-absorption-ledger/README.md +81 -0
- package/.aict/mechanisms/feedback-absorption-ledger/TEMPLATE.md +69 -0
- package/.aict/mechanisms/half-product-review/EXAMPLE.synthetic.md +15 -0
- package/.aict/mechanisms/half-product-review/FAILURE_MODES.md +16 -0
- package/.aict/mechanisms/half-product-review/PROMPT.md +41 -0
- package/.aict/mechanisms/half-product-review/README.md +30 -0
- package/.aict/mechanisms/half-product-review/TEMPLATE.md +38 -0
- package/.aict/mechanisms/handoff-abc/EXAMPLE.synthetic.md +47 -0
- package/.aict/mechanisms/handoff-abc/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/handoff-abc/PROMPT.md +75 -0
- package/.aict/mechanisms/handoff-abc/README.md +82 -0
- package/.aict/mechanisms/handoff-abc/TEMPLATE.md +60 -0
- package/.aict/mechanisms/harvest-and-erc/EXAMPLE.synthetic.md +43 -0
- package/.aict/mechanisms/harvest-and-erc/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/harvest-and-erc/PROMPT.md +74 -0
- package/.aict/mechanisms/harvest-and-erc/README.md +81 -0
- package/.aict/mechanisms/harvest-and-erc/TEMPLATE.md +60 -0
- package/.aict/mechanisms/honest-calibration/EXAMPLE.synthetic.md +43 -0
- package/.aict/mechanisms/honest-calibration/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/honest-calibration/PROMPT.md +74 -0
- package/.aict/mechanisms/honest-calibration/README.md +81 -0
- package/.aict/mechanisms/honest-calibration/TEMPLATE.md +66 -0
- package/.aict/mechanisms/one-click-dispatch/EXAMPLE.synthetic.md +15 -0
- package/.aict/mechanisms/one-click-dispatch/FAILURE_MODES.md +16 -0
- package/.aict/mechanisms/one-click-dispatch/PROMPT.md +41 -0
- package/.aict/mechanisms/one-click-dispatch/README.md +30 -0
- package/.aict/mechanisms/one-click-dispatch/TEMPLATE.md +38 -0
- package/.aict/mechanisms/plain-language-first-screen/EXAMPLE.synthetic.md +15 -0
- package/.aict/mechanisms/plain-language-first-screen/FAILURE_MODES.md +16 -0
- package/.aict/mechanisms/plain-language-first-screen/PROMPT.md +41 -0
- package/.aict/mechanisms/plain-language-first-screen/README.md +30 -0
- package/.aict/mechanisms/plain-language-first-screen/TEMPLATE.md +38 -0
- package/.aict/mechanisms/root-cause-brake/EXAMPLE.synthetic.md +55 -0
- package/.aict/mechanisms/root-cause-brake/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/root-cause-brake/PROMPT.md +73 -0
- package/.aict/mechanisms/root-cause-brake/README.md +79 -0
- package/.aict/mechanisms/root-cause-brake/TEMPLATE.md +74 -0
- package/.aict/mechanisms/scout-review-controller/EXAMPLE.synthetic.md +15 -0
- package/.aict/mechanisms/scout-review-controller/FAILURE_MODES.md +16 -0
- package/.aict/mechanisms/scout-review-controller/PROMPT.md +41 -0
- package/.aict/mechanisms/scout-review-controller/README.md +30 -0
- package/.aict/mechanisms/scout-review-controller/TEMPLATE.md +38 -0
- package/.aict/mechanisms/single-tool-guard/EXAMPLE.synthetic.md +54 -0
- package/.aict/mechanisms/single-tool-guard/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/single-tool-guard/PROMPT.md +76 -0
- package/.aict/mechanisms/single-tool-guard/README.md +83 -0
- package/.aict/mechanisms/single-tool-guard/TEMPLATE.md +75 -0
- package/.aict/mechanisms/task-splitting/EXAMPLE.synthetic.md +53 -0
- package/.aict/mechanisms/task-splitting/FAILURE_MODES.md +25 -0
- package/.aict/mechanisms/task-splitting/PROMPT.md +72 -0
- package/.aict/mechanisms/task-splitting/README.md +79 -0
- package/.aict/mechanisms/task-splitting/TEMPLATE.md +76 -0
- package/.aict/modes/README.md +11 -0
- package/.aict/modes/execute.md +31 -0
- package/.aict/modes/handoff.md +29 -0
- package/.aict/modes/harvest.md +30 -0
- package/.aict/modes/review.md +28 -0
- package/.aict/modes/shape.md +34 -0
- package/.aict/privacy/COMMERCIAL_BOUNDARY.md +34 -0
- package/.aict/privacy/PRIVACY.md +36 -0
- package/.aict/privacy/REDACTION_CHECKLIST.md +12 -0
- package/.aict/profile/CANDIDATES.md +44 -0
- package/.aict/profile/EXAMPLE.synthetic.md +49 -0
- package/.aict/profile/FAILURE_MODES.md +40 -0
- package/.aict/profile/PROMPT.md +47 -0
- package/.aict/profile/README.md +44 -0
- package/.aict/profile/TEMPLATE.md +57 -0
- package/.aict/prompts/acceptance-definition.md +109 -0
- package/.aict/prompts/guard-review.md +116 -0
- package/.aict/prompts/handoff-generation.md +110 -0
- package/.aict/prompts/harvest-extraction.md +110 -0
- package/.aict/prompts/mode-switching.md +66 -0
- package/.aict/prompts/profile-creation.md +66 -0
- package/.aict/prompts/profile-refinement.md +66 -0
- package/.aict/prompts/project-context-packaging.md +113 -0
- package/.aict/prompts/red-team-challenge.md +106 -0
- package/.aict/prompts/rule-update-proposal.md +114 -0
- package/.aict/prompts/workflow-reset.md +109 -0
- package/.aict/roles/README.md +18 -0
- package/.aict/roles/executor.md +34 -0
- package/.aict/roles/harvester.md +33 -0
- package/.aict/roles/owner-controller.md +38 -0
- package/.aict/roles/scout.md +33 -0
- package/.aict/roles/supervisor.md +34 -0
- package/.aict/roles/system-guardian.md +34 -0
- package/.aict/skills/acceptance/SKILL.md +43 -0
- package/.aict/skills/context/SKILL.md +44 -0
- package/.aict/skills/evidence-pack/SKILL.md +42 -0
- package/.aict/skills/guard/SKILL.md +46 -0
- package/.aict/skills/handoff/SKILL.md +44 -0
- package/.aict/skills/harvest/SKILL.md +44 -0
- package/.aict/skills/mode-switch/SKILL.md +42 -0
- package/.aict/skills/profile/SKILL.md +42 -0
- package/.aict/skills/red-team/SKILL.md +42 -0
- package/.aict/skills/single-tool-guard/SKILL.md +42 -0
- package/.aict/state/CURRENT_STATE.md +13 -0
- package/.aict/state/DECISIONS.md +7 -0
- package/.aict/state/TASK_LOG.md +7 -0
- package/.aict/state/evidence.jsonl +2 -0
- package/.aict/state/learning-ledger.jsonl +1 -0
- package/.aict/state/receipts.jsonl +1 -0
- package/.aict/state/runs.jsonl +1 -0
- package/.aict/state/tasks.jsonl +1 -0
- package/.aict/walkthroughs/10-minute-your-task.md +107 -0
- package/.aict/walkthroughs/10-minute.md +43 -0
- package/.aict/walkthroughs/30-minute.md +22 -0
- package/.aict/walkthroughs/60-minute.md +27 -0
- package/.aict/walkthroughs/synthetic-loop-transcript.md +43 -0
- package/CHANGELOG.md +23 -0
- package/CODE_OF_CONDUCT.md +20 -0
- package/CONTRIBUTING.md +30 -0
- package/KNOWN_LIMITATIONS.md +54 -0
- package/LICENSE +199 -0
- package/PRODUCT_CONTRACT.md +446 -0
- package/README.md +245 -0
- package/RELEASE_CHECKLIST.md +78 -0
- package/SECURITY.md +56 -0
- package/START_HERE.md +89 -0
- package/bin/ai-collab.js +2 -0
- package/docs/DOGFOOD.md +85 -0
- package/docs/FEEDBACK.md +61 -0
- package/docs/FIRST_EXPERIENCE_SPEC.md +32 -0
- package/docs/FREE_VS_PAID.md +53 -0
- package/docs/PUBLIC_BOUNDARY.md +36 -0
- package/docs/PUBLIC_MAPPING.md +178 -0
- package/docs/RELEASE_PRIORITY.md +23 -0
- package/docs/WHY_THIS_EXISTS.md +36 -0
- package/docs/open-system/00-start-here.md +60 -0
- package/docs/open-system/01-ai-collaboration-os.md +33 -0
- package/docs/open-system/02-six-layer-architecture.md +45 -0
- package/docs/open-system/03-role-system.md +33 -0
- package/docs/open-system/04-core-mechanisms.md +34 -0
- package/docs/open-system/05-failure-patterns.md +31 -0
- package/docs/open-system/06-how-to-adapt-to-your-workflow.md +31 -0
- package/package.json +69 -0
- package/privacy-manifest.json +78 -0
- package/privacy-scan.local.json.example +18 -0
- package/scripts/lib/forbidden-in-pack.js +55 -0
- package/scripts/pack-check.js +154 -0
- package/scripts/privacy-scan.js +487 -0
- package/scripts/validate-contract.js +160 -0
- package/src/adapters.js +590 -0
- package/src/bootstrap.js +1184 -0
- package/src/catalog.js +2723 -0
- package/src/cli.js +2899 -0
- package/src/dialogue.js +470 -0
- package/src/i18n.js +1034 -0
- package/src/ledger.js +2011 -0
- package/src/render.js +1381 -0
- package/src/sendmodel.js +452 -0
- package/src/validate.js +1307 -0
- package/src/workspace.js +1679 -0
- package/tests/contract.test.js +8514 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# Run a First Loop
|
|
2
|
+
|
|
3
|
+
A do-it recipe: run one complete AI collaboration loop end to end on your own real (lightly redacted) task, and watch a guard catch a false completion claim that a single agent would have accepted. This is the recipe; `../walkthroughs/10-minute-your-task.md` is the operation card for that real-task run. The walkthrough says "press these buttons in this order"; this recipe says "here is why each step exists, and here is how to adapt it to whatever you are actually working on." If you would rather watch the loop on a prepared example before pointing it at your own work, the synthetic case is the optional "look first" track — see the box below.
|
|
4
|
+
|
|
5
|
+
> Optional "look first" track: if your task feels too sensitive to paste right now, or you just want to see the shape of the loop first, run it once on the prepared synthetic case using `../walkthroughs/10-minute.md` (the demo preview), then come back and run it on your own task with the copy-paste block below.
|
|
6
|
+
|
|
7
|
+
## When to use this
|
|
8
|
+
|
|
9
|
+
- Your first time through the system, and you want to feel the whole loop on work you actually care about.
|
|
10
|
+
- You can describe the loop but have never watched a guard actually reject a fluent "done".
|
|
11
|
+
- You are about to start a real task and want a tested prompt sequence to adapt, not a blank page.
|
|
12
|
+
|
|
13
|
+
Skip it if you have already run the loop and just need the fast operation card; go straight to `../walkthroughs/10-minute-your-task.md`.
|
|
14
|
+
|
|
15
|
+
## Prerequisites
|
|
16
|
+
|
|
17
|
+
- This workspace exists (you are reading a file inside it).
|
|
18
|
+
- One real task of your own you can describe in a few sentences (lightly redacted: swap private names, paths, and numbers for placeholders). No private file needs to be uploaded — a redacted description is enough.
|
|
19
|
+
- One AI tool you can paste into (any general chat AI, coding assistant, or command-line AI). One tool is enough for a first pass; a second tool of a different model family makes the guard step stronger but is optional.
|
|
20
|
+
- Five to ten minutes. Nothing is uploaded; you only read and copy local files plus your own redacted description.
|
|
21
|
+
|
|
22
|
+
## Steps
|
|
23
|
+
|
|
24
|
+
Run these five moves on your own task. (Each move maps to one shipped artifact in `../examples/ai-coding-long-task/artifacts/` — open the matching file there any time you want to see the move done once on the prepared synthetic case.)
|
|
25
|
+
|
|
26
|
+
1. Set context. Describe your task to the AI and have it write a context package: the goal in one sentence, what is in scope, and explicit non-goals. This turns a tangled request into a boundary. Reference: `context-package.md`.
|
|
27
|
+
2. Set acceptance. Turn that context into an acceptance card — a short numbered list of checkable "done" criteria a reviewer can verify, not a vibe. This is the step people skip and then regret. Reference: `acceptance-card.md`.
|
|
28
|
+
3. Produce the first output. Have the AI do only the accepted slice and report what changed, what it ran, what failed, and what it did NOT verify. Read its completion claim against the actual code or evidence — this is where a fluent "done" usually overstates the work. References: `execution-prompt.md`, `first-ai-output.md`.
|
|
29
|
+
4. Run the guard. Paste that output plus `../guard/PROMPT.md` into a second AI tool (or the same one in a fresh turn) and ask it to review against the acceptance card. A good guard returns a cause-and-effect chain tied to specific spots and a reject, not a one-line "looks good". Reference: `guard-review.md`.
|
|
30
|
+
5. Revise and close. Fix the named blocker and re-show it with evidence, then write a handoff (done / pending / unverified) and harvest one reusable lesson with all private specifics removed. References: `revised-output.md`, `handoff-note.md`, `harvest-seed.md`.
|
|
31
|
+
|
|
32
|
+
The copy-paste block below is the prompt sequence that drives exactly these five moves on your task.
|
|
33
|
+
|
|
34
|
+
## Copy-paste block
|
|
35
|
+
|
|
36
|
+
Paste these in order into your AI tool, filling the bracketed parts with your own redacted task. This is the same loop as the steps above.
|
|
37
|
+
|
|
38
|
+
```text
|
|
39
|
+
[1 / CONTEXT]
|
|
40
|
+
Help me write a context package for this task. Capture: the goal in one sentence, what is in scope, and explicit non-goals. Keep it local-first; I will not upload private material.
|
|
41
|
+
Task (redacted): [describe your task; replace any private name, path, or number with a placeholder]
|
|
42
|
+
|
|
43
|
+
[2 / ACCEPTANCE]
|
|
44
|
+
Now turn that context into an acceptance card: a short numbered list of checkable criteria that define "done". Each criterion must be something a reviewer can verify, not a vibe. Mark anything explicitly out of scope.
|
|
45
|
+
|
|
46
|
+
[3 / EXECUTION]
|
|
47
|
+
Do only the work the acceptance card describes. Do not expand scope. When done, report: what changed, what you ran to check it, what failed, and what you did NOT verify.
|
|
48
|
+
|
|
49
|
+
[4 / GUARD - run this in a SECOND tool, ideally a different model family]
|
|
50
|
+
Review the output below against the context and acceptance card. Point to concrete defects, missing evidence, privacy leaks, unsupported claims, and scope drift, each tied to a specific spot. Return findings by severity and a pass or reject. Do not approve a claim that the evidence does not back.
|
|
51
|
+
Output under review: [paste the step-3 output]
|
|
52
|
+
Acceptance card: [paste the step-2 card]
|
|
53
|
+
|
|
54
|
+
[5 / HANDOFF + HARVEST]
|
|
55
|
+
Write two short artifacts. Handoff: where the work is now, split into done / pending / unverified, plus the single next action and the exact baseline to start from. Harvest: one reusable lesson from this loop, written generally enough to apply to a future task, with all private specifics removed.
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Expected output
|
|
59
|
+
|
|
60
|
+
- A context package and an acceptance card with checkable criteria (not prose).
|
|
61
|
+
- A first output whose completion claim you can check against evidence.
|
|
62
|
+
- A guard review that names a real, line-level defect and returns reject when the claim outruns the evidence, or pass with named residual risk when it does not.
|
|
63
|
+
- A revised output where the named blocker is fixed and re-shown with evidence.
|
|
64
|
+
- A handoff that separates done / pending / unverified, and one reusable harvest lesson.
|
|
65
|
+
|
|
66
|
+
## Failure handling
|
|
67
|
+
|
|
68
|
+
- The guard just says "looks good" and finds nothing. It is probably grading tone, not claims. Re-run step 4 and force it to check each completion claim against the acceptance card and point to a specific line or a missing piece of evidence; an empty finding list is only valid if it can say what it checked.
|
|
69
|
+
- The first output looks perfect and you cannot spot the defect. Re-read the completion claim next to the code or evidence it rests on. The classic failure is a claim ("keyboard reorder works") that the code does not actually perform.
|
|
70
|
+
- You only have one AI tool. Run the guard in a fresh turn or a fresh session of the same tool. It is weaker than a second model family (same family tends to miss the same things), but far better than no guard.
|
|
71
|
+
- The loop feels like overhead on a tiny task. It is, for a one-line change. Use the full loop on work another session or person will build on; for throwaway work, skip it.
|
|
72
|
+
|
|
73
|
+
## Privacy note
|
|
74
|
+
|
|
75
|
+
Redact before you paste: replace real product names, file paths, customer or person names, and internal numbers with placeholders. Do not paste a private profile, raw private chat logs, or a non-public path into an external AI. The loop works on a redacted description; it does not need the private original. (If you take the optional "look first" track instead, the shipped synthetic case uploads nothing at all — there is nothing of yours to redact.)
|
|
76
|
+
|
|
77
|
+
## Next step
|
|
78
|
+
|
|
79
|
+
- Connect this loop to the AI tool you actually use day to day: `connect-a-tool.md`.
|
|
80
|
+
- When you receive a "done" artifact you did not produce, pressure-test it: `review-a-half-product.md`.
|
|
81
|
+
- Reuse the full mechanism behind step 4 on higher-stakes work: `../mechanisms/dual-guard/README.md`.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Synthetic Case Library
|
|
2
|
+
|
|
3
|
+
Every case is synthetic and shows the full loop:
|
|
4
|
+
|
|
5
|
+
```text
|
|
6
|
+
messy starting point
|
|
7
|
+
-> workspace setup
|
|
8
|
+
-> profile/context
|
|
9
|
+
-> acceptance
|
|
10
|
+
-> execution prompt
|
|
11
|
+
-> guard review
|
|
12
|
+
-> handoff
|
|
13
|
+
-> harvest
|
|
14
|
+
-> what changes compared with a single raw AI chat
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
- [AI coding long task](./ai-coding-long-task/CASE.md)
|
|
18
|
+
- [Content production and harvest](./content-production-harvest/CASE.md)
|
|
19
|
+
- [Research / knowledge synthesis](./research-knowledge-synthesis/CASE.md)
|
|
20
|
+
- [Multi-tool collaboration](./multi-tool-collaboration/CASE.md)
|
|
21
|
+
- [Personal judgment / growth assistant](./personal-judgment-growth-assistant/CASE.md)
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# AI coding long task
|
|
2
|
+
|
|
3
|
+
This is a fully synthetic case. It does not contain private customer material, real raw conversations, local paths, or private operational routes. It walks one real collaboration loop: a messy request becomes context, acceptance, a first AI output, a guard review that catches a false completion claim, a revised output, a handoff, and a harvest lesson.
|
|
4
|
+
|
|
5
|
+
## Confusing raw input
|
|
6
|
+
|
|
7
|
+
I have this little task board. It started as a quick demo but now I need it cleaned up. Can you refactor it, make drag-and-drop nicer, maybe add keyboard movement too, and make sure the cards look more modern? Last chat already changed some things but I don't remember what. Tests are flaky. I don't want a huge rewrite, but also don't leave it half broken. If you need to change the data shape, do it, unless that is risky. Also make it accessible.
|
|
8
|
+
|
|
9
|
+
## Likely single-agent failure
|
|
10
|
+
|
|
11
|
+
A normal raw AI answer tends to say: "Sure. I will refactor the board, improve drag and drop, add keyboard support, modernize the UI, and update tests." It sounds helpful, but it mixes behavior, design, data migration, and accessibility into one blob. It does not define what must pass, what is out of scope, or how the next session should continue if only half the work is verified.
|
|
12
|
+
|
|
13
|
+
## AI Collaboration OS process
|
|
14
|
+
|
|
15
|
+
1. Context package: Profile: prefers direct bug risk calls, small verified steps, and no silent scope expansion. Context: synthetic task board, local-only, no auth, no deployment, existing task data must survive, keyboard accessibility matters, visual redesign is not in scope.
|
|
16
|
+
2. Acceptance card: Done means the board preserves existing task data, supports drag and keyboard reorder, has tests for both flows, reports changed files and verification output, and leaves a handoff note listing visual polish as unverified rather than done.
|
|
17
|
+
3. Execution prompt: Implement only the reorder behavior described in the acceptance card. Keep the existing data shape. Do not redesign the board. After code, report changed files, tests run, failures, and unverified areas.
|
|
18
|
+
4. First AI output: a fluent "done" claim that overstates what the code does.
|
|
19
|
+
5. Guard review: independent reviewer points to the lines where the claim and code disagree.
|
|
20
|
+
6. Revised output: keyboard reorder implemented and tested; blocker resolved.
|
|
21
|
+
7. Handoff note: Current state: mouse drag and keyboard arrow-key reorder are both implemented and covered by tests (2 passing), and the guard re-review accepted the fix. Completed: data shape preserved; keyboard reorder implemented and tested. Pending: only visual polish for the reorder affordance, carried as unverified. Next action: pick up the visual polish, not the keyboard work.
|
|
22
|
+
8. Harvest seed: Reusable pattern: long coding tasks need an acceptance card before implementation, a guard pass before handoff, and an explicit unverified bucket for visual polish. Do not generalize the synthetic task board data model.
|
|
23
|
+
|
|
24
|
+
## Messy starting point
|
|
25
|
+
|
|
26
|
+
A developer asks an assistant to refactor a small task board, then keeps adding bugs, design requests, accessibility requests, and test fixes across multiple sessions. Each new chat forgets which tradeoffs were rejected, whether keyboard movement is required, and which visual polish is out of scope.
|
|
27
|
+
|
|
28
|
+
## Workspace setup
|
|
29
|
+
|
|
30
|
+
Create the workspace, fill context with the task board boundary, define acceptance around behavior and tests, execute only the reorder slice, challenge the result with guard review, then hand off the exact remaining work.
|
|
31
|
+
|
|
32
|
+
## Profile/context
|
|
33
|
+
|
|
34
|
+
Profile: prefers direct bug risk calls, small verified steps, and no silent scope expansion. Context: synthetic task board, local-only, no auth, no deployment, existing task data must survive, keyboard accessibility matters, visual redesign is not in scope.
|
|
35
|
+
|
|
36
|
+
## Context package
|
|
37
|
+
|
|
38
|
+
Profile: prefers direct bug risk calls, small verified steps, and no silent scope expansion. Context: synthetic task board, local-only, no auth, no deployment, existing task data must survive, keyboard accessibility matters, visual redesign is not in scope.
|
|
39
|
+
|
|
40
|
+
See `artifacts/context-package.md` for the standalone version.
|
|
41
|
+
|
|
42
|
+
## Acceptance card
|
|
43
|
+
|
|
44
|
+
Done means TaskBoard can reorder tasks two ways, both proven by tests, with existing task data preserved.
|
|
45
|
+
|
|
46
|
+
1. AC1 Mouse: a pointer drag reorders a task and the new order is saved to the tasks array.
|
|
47
|
+
2. AC2 Keyboard: focusing a task and pressing ArrowUp or ArrowDown moves that task one slot, for accessibility (keyboard-only users must reach the same outcome as mouse users).
|
|
48
|
+
3. AC3 Tests: both the mouse path and the keyboard path have an automated test that fails before the feature and passes after.
|
|
49
|
+
4. AC4 Data: existing task ids, titles, and fields survive the reorder; no data-shape migration in this slice.
|
|
50
|
+
5. AC5 Scope: visual redesign is out of scope and must be reported as unverified, not done.
|
|
51
|
+
|
|
52
|
+
Reject rule: Reject if any acceptance criterion lacks evidence, or if the completion claim states more than the code and tests prove.
|
|
53
|
+
|
|
54
|
+
See `artifacts/acceptance-card.md` for the standalone version.
|
|
55
|
+
|
|
56
|
+
## Execution prompt
|
|
57
|
+
|
|
58
|
+
```text
|
|
59
|
+
Implement only the reorder behavior described in the acceptance card. Keep the existing data shape. Do not redesign the board. After code, report changed files, tests run, failures, and unverified areas.
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## First AI output
|
|
63
|
+
|
|
64
|
+
The AI returned a confident completion claim:
|
|
65
|
+
|
|
66
|
+
> Done. I refactored TaskBoard and implemented task reordering. Drag-and-drop works with the mouse, and keyboard reordering with the arrow keys is supported too for accessibility. I also added tests, and everything passes.
|
|
67
|
+
|
|
68
|
+
The code only implements pointer drag; the keyboard handler is a stub and there is no keyboard test. The full artifact, with stable line numbers the guard can cite, is in `artifacts/first-ai-output.md`.
|
|
69
|
+
|
|
70
|
+
## Guard review
|
|
71
|
+
|
|
72
|
+
A cross-checking guard reviews the first AI output against the acceptance card and reports a causal chain instead of a one-line verdict.
|
|
73
|
+
|
|
74
|
+
### Guard finding (cause-and-effect chain)
|
|
75
|
+
|
|
76
|
+
1. **Under review:** first-ai-output.md, the TaskBoard.tsx code block (line numbers below are relative to that fenced block) and the TaskBoard.test.tsx block.
|
|
77
|
+
2. **Problem:** The completion claim says keyboard arrow-key reordering is supported and tested, but the code only implements pointer (mouse) reorder. The keyboard handler is an empty stub, and there is no keyboard test.
|
|
78
|
+
3. **Evidence:**
|
|
79
|
+
- Claim vs code: the claim states 'keyboard reordering with the arrow keys is supported', but onKeyDown at TaskBoard.tsx lines 27-30 only logs the key and never calls moveTask, so ArrowUp/ArrowDown change nothing.
|
|
80
|
+
- Claim vs tests: the claim states 'I also added tests, and everything passes', but TaskBoard.test.tsx has a single test at lines 9-17 for the mouse path and no keyboard test, so AC3 keyboard coverage is missing.
|
|
81
|
+
- moveTask at TaskBoard.tsx lines 9-15 already supports an index shift, so the keyboard wiring is feasible and was simply not done.
|
|
82
|
+
4. **Why this cannot pass:** AC2 (keyboard reorder) and AC3 (test for both flows) are not met, and the self-report claims more than the code proves. A keyboard-only user cannot reorder at all, so the accessibility requirement fails. Passing this would trust a fluent claim over the evidence.
|
|
83
|
+
5. **Required fix:** Implement ArrowUp/ArrowDown in onKeyDown so it calls moveTask(index, index - 1) and moveTask(index, index + 1), and add a failing-then-passing keyboard reorder test. If keyboard support is intentionally deferred, move it out of scope explicitly and update the acceptance card and the completion claim to match.
|
|
84
|
+
6. **Verdict:** reject (blocker: keyboard reorder claimed but not implemented or tested)
|
|
85
|
+
|
|
86
|
+
The full review, with line references into `first-ai-output.md`, is in `artifacts/guard-review.md`.
|
|
87
|
+
|
|
88
|
+
## Revised output
|
|
89
|
+
|
|
90
|
+
The blocker is resolved: onKeyDown now reorders with the arrow keys and a keyboard test was added that fails on the old stub and passes on the fix. Guard re-review: blocker resolved. onKeyDown now calls moveTask for ArrowUp/ArrowDown, and the new keyboard test fails against the old stub and passes against the fix. AC2 and AC3 are met. Status: accepted, with visual polish still carried as unverified. The corrected code and the new keyboard test are in `artifacts/revised-output.md`.
|
|
91
|
+
|
|
92
|
+
## Handoff note
|
|
93
|
+
|
|
94
|
+
Current state: mouse drag and keyboard arrow-key reorder are both implemented and covered by tests (2 passing), and the guard re-review accepted the fix. Completed: data shape preserved; keyboard reorder implemented and tested. Pending: only visual polish for the reorder affordance, carried as unverified. Next action: pick up the visual polish, not the keyboard work.
|
|
95
|
+
|
|
96
|
+
## Harvest seed
|
|
97
|
+
|
|
98
|
+
Reusable pattern: long coding tasks need an acceptance card before implementation, a guard pass before handoff, and an explicit unverified bucket for visual polish. Do not generalize the synthetic task board data model.
|
|
99
|
+
|
|
100
|
+
## Before/after comparison
|
|
101
|
+
|
|
102
|
+
| Dimension | Before (raw single-agent chat) | After (AI Collaboration OS) |
|
|
103
|
+
| --- | --- | --- |
|
|
104
|
+
| Scope | Refactor, drag, keyboard, visual polish, and tests blur into one promise. | Current slice is reorder only; visual redesign is explicitly out of scope. |
|
|
105
|
+
| Done standard | "Looks done" based on a fluent reply. | Acceptance card with five checkable criteria (mouse, keyboard, tests, data, scope). |
|
|
106
|
+
| Completion claim | "Keyboard works and tests pass" is trusted as written. | Guard points to the exact lines where the claim and code disagree. |
|
|
107
|
+
| Keyboard accessibility | Silently missing behind a stub handler. | Implemented in the revised output and proven by a keyboard test. |
|
|
108
|
+
| Handoff | Next session restarts and re-asks what was rejected. | Done, pending, and unverified are separated for the next session. |
|
|
109
|
+
| Reusable lesson | Lost after the chat scrolls away. | Harvested: verify completion claims with code and test evidence. |
|
|
110
|
+
|
|
111
|
+
## What changes compared with a single raw AI chat
|
|
112
|
+
|
|
113
|
+
A raw chat would accept the first "done" because it reads well. This loop made the completion claim checkable, so an independent guard caught that keyboard reorder was claimed but never implemented or tested. That gap is exactly what one agent tends not to see in its own fluent answer, and what a guard pointing to specific lines does see.
|
|
114
|
+
|
|
115
|
+
## artifacts
|
|
116
|
+
|
|
117
|
+
- Profile artifact: Profile artifact: direct risk calls; prefer small tested changes; no data-shape migration unless acceptance explicitly allows it; label unverified visual polish.
|
|
118
|
+
- Context artifact: Context artifact: synthetic task board; local-only; no auth or deployment; current slice is reorder behavior; design refresh is a non-goal for this loop.
|
|
119
|
+
- Acceptance artifact: Acceptance artifact: drag reorder and keyboard reorder both need tests; existing task data must survive; completion requires verification output.
|
|
120
|
+
- First AI output artifact: the completion claim plus the flawed TaskBoard code and the single mouse-only test (`artifacts/first-ai-output.md`).
|
|
121
|
+
- Guard artifact: Guard artifact: reject completion because keyboard movement lacks evidence; require a failing-then-passing keyboard reorder test.
|
|
122
|
+
- Revised output artifact: the implemented keyboard reorder and the added keyboard test (`artifacts/revised-output.md`).
|
|
123
|
+
- Handoff artifact: Handoff artifact: mouse drag and keyboard arrow-key reorder are both implemented and covered by tests (2 passing); the guard re-review accepted the fix; only visual polish for the reorder affordance remains unverified. Next action: pick up the visual polish, not the keyboard work.
|
|
124
|
+
- Harvest artifact: Harvest artifact: long coding tasks need acceptance before implementation and guard before handoff; do not generalize this board's data model.
|
|
125
|
+
|
|
126
|
+
Artifact files:
|
|
127
|
+
|
|
128
|
+
- `artifacts/context-package.md`
|
|
129
|
+
- `artifacts/acceptance-card.md`
|
|
130
|
+
- `artifacts/execution-prompt.md`
|
|
131
|
+
- `artifacts/first-ai-output.md`
|
|
132
|
+
- `artifacts/guard-review.md`
|
|
133
|
+
- `artifacts/revised-output.md`
|
|
134
|
+
- `artifacts/handoff-note.md`
|
|
135
|
+
- `artifacts/harvest-seed.md`
|
|
136
|
+
|
|
137
|
+
## raw-input
|
|
138
|
+
|
|
139
|
+
I have this little task board. It started as a quick demo but now I need it cleaned up. Can you refactor it, make drag-and-drop nicer, maybe add keyboard movement too, and make sure the cards look more modern? Last chat already changed some things but I don't remember what. Tests are flaky. I don't want a huge rewrite, but also don't leave it half broken. If you need to change the data shape, do it, unless that is risky. Also make it accessible.
|
|
140
|
+
|
|
141
|
+
## baseline-output
|
|
142
|
+
|
|
143
|
+
A normal raw AI answer tends to say: "Sure. I will refactor the board, improve drag and drop, add keyboard support, modernize the UI, and update tests." It sounds helpful, but it mixes behavior, design, data migration, and accessibility into one blob. It does not define what must pass, what is out of scope, or how the next session should continue if only half the work is verified.
|
|
144
|
+
|
|
145
|
+
## system-run
|
|
146
|
+
|
|
147
|
+
1. Profile sets collaboration defaults: small verified steps, direct risk calls, and no silent rewrite.
|
|
148
|
+
2. Context narrows the current slice to reorder behavior in a synthetic local task board.
|
|
149
|
+
3. Acceptance defines pass criteria before code: data preserved, drag reorder tested, keyboard reorder tested, changed files and verification reported.
|
|
150
|
+
4. Execution prompt tells the AI to implement only reorder behavior and not redesign the board.
|
|
151
|
+
5. Guard review catches the missing keyboard test and blocks the completion claim.
|
|
152
|
+
6. Handoff records mouse and keyboard reorder done and tested with the guard's accepted fix, leaving only visual polish unverified.
|
|
153
|
+
7. Harvest saves the reusable release pattern: keep an unverified bucket instead of pretending polish is done.
|
|
154
|
+
|
|
155
|
+
## comparison
|
|
156
|
+
|
|
157
|
+
A raw chat produces a plausible refactor plan but loses rejected scope and unverified accessibility work. The six-layer workspace keeps the goal, done standard, review finding, next action, and reusable lesson visible.
|
|
158
|
+
|
|
159
|
+
## next-step
|
|
160
|
+
|
|
161
|
+
Copy and run the context package, acceptance card, and execution prompt into your AI tool. After the first answer, paste the guard-review prompt and require it to check the keyboard criterion before accepting the work.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Acceptance card - AI coding long task
|
|
2
|
+
|
|
3
|
+
## Source case
|
|
4
|
+
|
|
5
|
+
- Case id: `ai-coding-long-task`
|
|
6
|
+
- Case title: AI coding long task
|
|
7
|
+
- Privacy status: fully synthetic
|
|
8
|
+
- Private material: none
|
|
9
|
+
|
|
10
|
+
## How to use
|
|
11
|
+
|
|
12
|
+
Paste this before implementation, drafting, research, or judgment work. Ask the assistant to treat these criteria as the pass/fail surface.
|
|
13
|
+
|
|
14
|
+
## Synthetic content
|
|
15
|
+
|
|
16
|
+
Done means TaskBoard can reorder tasks two ways, both proven by tests, with existing task data preserved.
|
|
17
|
+
|
|
18
|
+
1. AC1 Mouse: a pointer drag reorders a task and the new order is saved to the tasks array.
|
|
19
|
+
2. AC2 Keyboard: focusing a task and pressing ArrowUp or ArrowDown moves that task one slot, for accessibility (keyboard-only users must reach the same outcome as mouse users).
|
|
20
|
+
3. AC3 Tests: both the mouse path and the keyboard path have an automated test that fails before the feature and passes after.
|
|
21
|
+
4. AC4 Data: existing task ids, titles, and fields survive the reorder; no data-shape migration in this slice.
|
|
22
|
+
5. AC5 Scope: visual redesign is out of scope and must be reported as unverified, not done.
|
|
23
|
+
|
|
24
|
+
Reject rule: Reject if any acceptance criterion lacks evidence, or if the completion claim states more than the code and tests prove.
|
|
25
|
+
|
|
26
|
+
## Review note
|
|
27
|
+
|
|
28
|
+
Reject work that claims completion without evidence tied to this card.
|
|
29
|
+
|
|
30
|
+
## Next step
|
|
31
|
+
|
|
32
|
+
Use this card with the execution prompt and later guard review.
|
|
33
|
+
|
|
34
|
+
## Why this exists
|
|
35
|
+
|
|
36
|
+
This artifact makes the case runnable and reviewable. A raw chat can produce a smooth answer, but this file preserves the specific state needed for profile, context, acceptance, guard, handoff, and harvest work.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Context package - AI coding long task
|
|
2
|
+
|
|
3
|
+
## Source case
|
|
4
|
+
|
|
5
|
+
- Case id: `ai-coding-long-task`
|
|
6
|
+
- Case title: AI coding long task
|
|
7
|
+
- Privacy status: fully synthetic
|
|
8
|
+
- Private material: none
|
|
9
|
+
|
|
10
|
+
## How to use
|
|
11
|
+
|
|
12
|
+
Paste this before asking an AI tool to continue the task. It gives the assistant the working style, scope, constraints, and known evidence.
|
|
13
|
+
|
|
14
|
+
## Synthetic content
|
|
15
|
+
|
|
16
|
+
Profile artifact: direct risk calls; prefer small tested changes; no data-shape migration unless acceptance explicitly allows it; label unverified visual polish.
|
|
17
|
+
|
|
18
|
+
Context artifact: synthetic task board; local-only; no auth or deployment; current slice is reorder behavior; design refresh is a non-goal for this loop.
|
|
19
|
+
|
|
20
|
+
## Review note
|
|
21
|
+
|
|
22
|
+
Check that facts and assumptions are separated before execution starts.
|
|
23
|
+
|
|
24
|
+
## Next step
|
|
25
|
+
|
|
26
|
+
Use this context to write or verify the acceptance card.
|
|
27
|
+
|
|
28
|
+
## Why this exists
|
|
29
|
+
|
|
30
|
+
This artifact makes the case runnable and reviewable. A raw chat can produce a smooth answer, but this file preserves the specific state needed for profile, context, acceptance, guard, handoff, and harvest work.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Execution prompt - AI coding long task
|
|
2
|
+
|
|
3
|
+
## Source case
|
|
4
|
+
|
|
5
|
+
- Case id: `ai-coding-long-task`
|
|
6
|
+
- Case title: AI coding long task
|
|
7
|
+
- Privacy status: fully synthetic
|
|
8
|
+
- Private material: none
|
|
9
|
+
|
|
10
|
+
## How to use
|
|
11
|
+
|
|
12
|
+
Paste this into the selected AI tool after the context package and acceptance card.
|
|
13
|
+
|
|
14
|
+
## Synthetic content
|
|
15
|
+
|
|
16
|
+
```text
|
|
17
|
+
Implement only the reorder behavior described in the acceptance card. Keep the existing data shape. Do not redesign the board. After code, report changed files, tests run, failures, and unverified areas.
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Review note
|
|
21
|
+
|
|
22
|
+
Confirm the prompt does not expand scope beyond acceptance.
|
|
23
|
+
|
|
24
|
+
## Next step
|
|
25
|
+
|
|
26
|
+
Run guard review on the first artifact produced from this prompt.
|
|
27
|
+
|
|
28
|
+
## Why this exists
|
|
29
|
+
|
|
30
|
+
This artifact makes the case runnable and reviewable. A raw chat can produce a smooth answer, but this file preserves the specific state needed for profile, context, acceptance, guard, handoff, and harvest work.
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# First AI output - AI coding long task
|
|
2
|
+
|
|
3
|
+
## Source case
|
|
4
|
+
|
|
5
|
+
- Case id: `ai-coding-long-task`
|
|
6
|
+
- Case title: AI coding long task
|
|
7
|
+
- Privacy status: fully synthetic
|
|
8
|
+
- Private material: none
|
|
9
|
+
|
|
10
|
+
## How to use
|
|
11
|
+
|
|
12
|
+
Read this as the artifact under review, not as a finished result. It is the AI's first answer, with a confident completion claim and code that does not fully back it up.
|
|
13
|
+
|
|
14
|
+
## Synthetic content
|
|
15
|
+
|
|
16
|
+
Completion claim (what the AI reported):
|
|
17
|
+
|
|
18
|
+
> Done. I refactored TaskBoard and implemented task reordering. Drag-and-drop works with the mouse, and keyboard reordering with the arrow keys is supported too for accessibility. I also added tests, and everything passes.
|
|
19
|
+
|
|
20
|
+
TaskBoard.tsx (first AI output, line numbers are relative to this code block):
|
|
21
|
+
|
|
22
|
+
```tsx
|
|
23
|
+
import { useState } from "react";
|
|
24
|
+
|
|
25
|
+
type Task = { id: string; title: string };
|
|
26
|
+
|
|
27
|
+
export function TaskBoard({ initialTasks }: { initialTasks: Task[] }) {
|
|
28
|
+
const [tasks, setTasks] = useState<Task[]>(initialTasks);
|
|
29
|
+
const [dragIndex, setDragIndex] = useState<number | null>(null);
|
|
30
|
+
|
|
31
|
+
function moveTask(from: number, to: number) {
|
|
32
|
+
if (to < 0 || to >= tasks.length) return;
|
|
33
|
+
const next = tasks.slice();
|
|
34
|
+
const [moved] = next.splice(from, 1);
|
|
35
|
+
next.splice(to, 0, moved);
|
|
36
|
+
setTasks(next);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function onPointerDown(index: number) {
|
|
40
|
+
setDragIndex(index);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function onPointerMove(index: number) {
|
|
44
|
+
if (dragIndex === null || dragIndex === index) return;
|
|
45
|
+
moveTask(dragIndex, index);
|
|
46
|
+
setDragIndex(index);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function onKeyDown(event: React.KeyboardEvent) {
|
|
50
|
+
// TODO: wire arrow keys to moveTask for keyboard reorder
|
|
51
|
+
console.log("key pressed", event.key);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return (
|
|
55
|
+
<ul>
|
|
56
|
+
{tasks.map((task, index) => (
|
|
57
|
+
<li
|
|
58
|
+
key={task.id}
|
|
59
|
+
tabIndex={0}
|
|
60
|
+
onPointerDown={() => onPointerDown(index)}
|
|
61
|
+
onPointerMove={() => onPointerMove(index)}
|
|
62
|
+
onKeyDown={onKeyDown}
|
|
63
|
+
>
|
|
64
|
+
{task.title}
|
|
65
|
+
</li>
|
|
66
|
+
))}
|
|
67
|
+
</ul>
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
TaskBoard.test.tsx (first AI output tests). Self-reported result: 1 passing (mouse drag reorders tasks).
|
|
73
|
+
|
|
74
|
+
```tsx
|
|
75
|
+
import { render, screen, fireEvent } from "@testing-library/react";
|
|
76
|
+
import { TaskBoard } from "./TaskBoard";
|
|
77
|
+
|
|
78
|
+
const sample = [
|
|
79
|
+
{ id: "a", title: "Alpha" },
|
|
80
|
+
{ id: "b", title: "Bravo" }
|
|
81
|
+
];
|
|
82
|
+
|
|
83
|
+
test("mouse drag reorders tasks", () => {
|
|
84
|
+
render(<TaskBoard initialTasks={sample} />);
|
|
85
|
+
const first = screen.getByText("Alpha");
|
|
86
|
+
const second = screen.getByText("Bravo");
|
|
87
|
+
fireEvent.pointerDown(first);
|
|
88
|
+
fireEvent.pointerMove(second);
|
|
89
|
+
const items = screen.getAllByRole("listitem").map((node) => node.textContent);
|
|
90
|
+
expect(items).toEqual(["Bravo", "Alpha"]);
|
|
91
|
+
});
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Defect summary:
|
|
95
|
+
- Claimed arrow-key reorder, but `onKeyDown` is a stub that only logs the key (no `moveTask` call).
|
|
96
|
+
- Claimed full test coverage, but only the mouse path has a test; there is no keyboard test.
|
|
97
|
+
- The line numbers above are relative to each code block so the guard review can cite them.
|
|
98
|
+
|
|
99
|
+
## Review note
|
|
100
|
+
|
|
101
|
+
Do not accept the completion claim on its own. Check each acceptance criterion against the code and tests here before trusting the answer.
|
|
102
|
+
|
|
103
|
+
## Next step
|
|
104
|
+
|
|
105
|
+
Run guard review against this output and find where the claim and the code disagree.
|
|
106
|
+
|
|
107
|
+
## Why this exists
|
|
108
|
+
|
|
109
|
+
This artifact makes the case runnable and reviewable. A raw chat can produce a smooth answer, but this file preserves the specific state needed for profile, context, acceptance, guard, handoff, and harvest work.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Guard review - AI coding long task
|
|
2
|
+
|
|
3
|
+
## Source case
|
|
4
|
+
|
|
5
|
+
- Case id: `ai-coding-long-task`
|
|
6
|
+
- Case title: AI coding long task
|
|
7
|
+
- Privacy status: fully synthetic
|
|
8
|
+
- Private material: none
|
|
9
|
+
|
|
10
|
+
## How to use
|
|
11
|
+
|
|
12
|
+
Use this as the review stance after the first artifact exists. It challenges evidence, privacy, scope, and acceptance alignment.
|
|
13
|
+
|
|
14
|
+
## Synthetic content
|
|
15
|
+
|
|
16
|
+
This review challenges `first-ai-output.md` against the acceptance card.
|
|
17
|
+
|
|
18
|
+
### Guard finding (cause-and-effect chain)
|
|
19
|
+
|
|
20
|
+
1. **Under review:** first-ai-output.md, the TaskBoard.tsx code block (line numbers below are relative to that fenced block) and the TaskBoard.test.tsx block.
|
|
21
|
+
2. **Problem:** The completion claim says keyboard arrow-key reordering is supported and tested, but the code only implements pointer (mouse) reorder. The keyboard handler is an empty stub, and there is no keyboard test.
|
|
22
|
+
3. **Evidence:**
|
|
23
|
+
- Claim vs code: the claim states 'keyboard reordering with the arrow keys is supported', but onKeyDown at TaskBoard.tsx lines 27-30 only logs the key and never calls moveTask, so ArrowUp/ArrowDown change nothing.
|
|
24
|
+
- Claim vs tests: the claim states 'I also added tests, and everything passes', but TaskBoard.test.tsx has a single test at lines 9-17 for the mouse path and no keyboard test, so AC3 keyboard coverage is missing.
|
|
25
|
+
- moveTask at TaskBoard.tsx lines 9-15 already supports an index shift, so the keyboard wiring is feasible and was simply not done.
|
|
26
|
+
4. **Why this cannot pass:** AC2 (keyboard reorder) and AC3 (test for both flows) are not met, and the self-report claims more than the code proves. A keyboard-only user cannot reorder at all, so the accessibility requirement fails. Passing this would trust a fluent claim over the evidence.
|
|
27
|
+
5. **Required fix:** Implement ArrowUp/ArrowDown in onKeyDown so it calls moveTask(index, index - 1) and moveTask(index, index + 1), and add a failing-then-passing keyboard reorder test. If keyboard support is intentionally deferred, move it out of scope explicitly and update the acceptance card and the completion claim to match.
|
|
28
|
+
6. **Verdict:** reject (blocker: keyboard reorder claimed but not implemented or tested)
|
|
29
|
+
|
|
30
|
+
## Review note
|
|
31
|
+
|
|
32
|
+
A review is not a pass unless it names evidence and residual risk.
|
|
33
|
+
|
|
34
|
+
## Next step
|
|
35
|
+
|
|
36
|
+
Fix any blocking finding, then write a handoff note.
|
|
37
|
+
|
|
38
|
+
## Why this exists
|
|
39
|
+
|
|
40
|
+
This artifact makes the case runnable and reviewable. A raw chat can produce a smooth answer, but this file preserves the specific state needed for profile, context, acceptance, guard, handoff, and harvest work.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Handoff note - AI coding long task
|
|
2
|
+
|
|
3
|
+
## Source case
|
|
4
|
+
|
|
5
|
+
- Case id: `ai-coding-long-task`
|
|
6
|
+
- Case title: AI coding long task
|
|
7
|
+
- Privacy status: fully synthetic
|
|
8
|
+
- Private material: none
|
|
9
|
+
|
|
10
|
+
## How to use
|
|
11
|
+
|
|
12
|
+
Paste this into the next session or tool so work resumes from current state instead of restarting.
|
|
13
|
+
|
|
14
|
+
## Synthetic content
|
|
15
|
+
|
|
16
|
+
Handoff artifact: mouse drag and keyboard arrow-key reorder are both implemented and covered by tests (2 passing); the guard re-review accepted the fix; only visual polish for the reorder affordance remains unverified. Next action: pick up the visual polish, not the keyboard work.
|
|
17
|
+
|
|
18
|
+
## Review note
|
|
19
|
+
|
|
20
|
+
Check that completed, pending, blocked, and next action are distinguishable.
|
|
21
|
+
|
|
22
|
+
## Next step
|
|
23
|
+
|
|
24
|
+
Use the handoff note as input to harvest.
|
|
25
|
+
|
|
26
|
+
## Why this exists
|
|
27
|
+
|
|
28
|
+
This artifact makes the case runnable and reviewable. A raw chat can produce a smooth answer, but this file preserves the specific state needed for profile, context, acceptance, guard, handoff, and harvest work.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Harvest seed - AI coding long task
|
|
2
|
+
|
|
3
|
+
## Source case
|
|
4
|
+
|
|
5
|
+
- Case id: `ai-coding-long-task`
|
|
6
|
+
- Case title: AI coding long task
|
|
7
|
+
- Privacy status: fully synthetic
|
|
8
|
+
- Private material: none
|
|
9
|
+
|
|
10
|
+
## How to use
|
|
11
|
+
|
|
12
|
+
Save this after the loop to preserve reusable knowledge without copying private raw material.
|
|
13
|
+
|
|
14
|
+
## Synthetic content
|
|
15
|
+
|
|
16
|
+
Harvest artifact: long coding tasks need acceptance before implementation and guard before handoff; do not generalize this board's data model.
|
|
17
|
+
|
|
18
|
+
## Review note
|
|
19
|
+
|
|
20
|
+
Do not generalize from the synthetic case unless the pattern appears in future work.
|
|
21
|
+
|
|
22
|
+
## Next step
|
|
23
|
+
|
|
24
|
+
Move reusable prompts or rules into the appropriate workspace file.
|
|
25
|
+
|
|
26
|
+
## Why this exists
|
|
27
|
+
|
|
28
|
+
This artifact makes the case runnable and reviewable. A raw chat can produce a smooth answer, but this file preserves the specific state needed for profile, context, acceptance, guard, handoff, and harvest work.
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Revised output - AI coding long task
|
|
2
|
+
|
|
3
|
+
## Source case
|
|
4
|
+
|
|
5
|
+
- Case id: `ai-coding-long-task`
|
|
6
|
+
- Case title: AI coding long task
|
|
7
|
+
- Privacy status: fully synthetic
|
|
8
|
+
- Private material: none
|
|
9
|
+
|
|
10
|
+
## How to use
|
|
11
|
+
|
|
12
|
+
Read this as the corrected answer after the guard review blocked the first one. It resolves the blocker the guard found.
|
|
13
|
+
|
|
14
|
+
## Synthetic content
|
|
15
|
+
|
|
16
|
+
The blocker is resolved: onKeyDown now reorders with the arrow keys and a keyboard test was added that fails on the old stub and passes on the fix.
|
|
17
|
+
|
|
18
|
+
TaskBoard.tsx (revised output, only the keyboard handler changed):
|
|
19
|
+
|
|
20
|
+
```tsx
|
|
21
|
+
function onKeyDown(event: React.KeyboardEvent, index: number) {
|
|
22
|
+
if (event.key === "ArrowUp") {
|
|
23
|
+
event.preventDefault();
|
|
24
|
+
moveTask(index, index - 1);
|
|
25
|
+
}
|
|
26
|
+
if (event.key === "ArrowDown") {
|
|
27
|
+
event.preventDefault();
|
|
28
|
+
moveTask(index, index + 1);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// in the list item: onKeyDown={(event) => onKeyDown(event, index)}
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
TaskBoard.test.tsx (added keyboard reorder test). It fails against the old stub in `first-ai-output.md` and passes against this fix.
|
|
36
|
+
|
|
37
|
+
```tsx
|
|
38
|
+
test("arrow keys reorder tasks for keyboard users", () => {
|
|
39
|
+
render(<TaskBoard initialTasks={sample} />);
|
|
40
|
+
const first = screen.getByText("Alpha");
|
|
41
|
+
first.focus();
|
|
42
|
+
fireEvent.keyDown(first, { key: "ArrowDown" });
|
|
43
|
+
const items = screen.getAllByRole("listitem").map((node) => node.textContent);
|
|
44
|
+
expect(items).toEqual(["Bravo", "Alpha"]);
|
|
45
|
+
});
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Verification after the fix: 2 passing (mouse drag reorders tasks; arrow keys reorder tasks for keyboard users).
|
|
49
|
+
|
|
50
|
+
Guard re-review: blocker resolved. onKeyDown now calls moveTask for ArrowUp/ArrowDown, and the new keyboard test fails against the old stub and passes against the fix. AC2 and AC3 are met. Status: accepted, with visual polish still carried as unverified.
|
|
51
|
+
|
|
52
|
+
## Review note
|
|
53
|
+
|
|
54
|
+
Confirm the blocker is actually fixed with evidence: the new behavior exists and a test proves it.
|
|
55
|
+
|
|
56
|
+
## Next step
|
|
57
|
+
|
|
58
|
+
Carry remaining unverified work, write the handoff note, then harvest the lesson.
|
|
59
|
+
|
|
60
|
+
## Why this exists
|
|
61
|
+
|
|
62
|
+
This artifact makes the case runnable and reviewable. A raw chat can produce a smooth answer, but this file preserves the specific state needed for profile, context, acceptance, guard, handoff, and harvest work.
|