@sandrinio/vbounce 1.0.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * verify_framework.mjs
5
+ *
6
+ * Tests the backward-compatibility of the AI agent prompts against
7
+ * the strict YAML parsing schemas in validate_report.mjs.
8
+ *
9
+ * Triggered manually by humans or automatically by CI when updating brains/.
10
+ */
11
+
12
+ import fs from 'fs';
13
+ import path from 'path';
14
+
15
+ const AGENTS_DIR = path.join(process.cwd(), 'brains', 'claude-agents');
16
+
17
+ // The exact substring signatures that MUST exist in the agent instructions
18
+ // to ensure the LLM knows to output the correct YAML schema.
19
+ const EXPECTED_PROMPT_SIGNATURES = {
20
+ 'developer.md': [
21
+ 'status:',
22
+ 'correction_tax:',
23
+ 'files_modified:',
24
+ 'lessons_flagged:'
25
+ ],
26
+ 'qa.md': [
27
+ 'status: "PASS"',
28
+ 'bugs_found: 0',
29
+ 'status: "FAIL"',
30
+ 'failed_scenarios:'
31
+ ],
32
+ 'architect.md': [
33
+ 'status: "PASS"',
34
+ 'safe_zone_score:',
35
+ 'regression_risk:',
36
+ 'status: "FAIL"',
37
+ 'critical_failures:'
38
+ ],
39
+ 'devops.md': [
40
+ 'type: "story-merge"',
41
+ 'conflicts_detected:',
42
+ 'type: "sprint-release"',
43
+ 'version:'
44
+ ]
45
+ };
46
+
47
+ function main() {
48
+ console.log("===========================================");
49
+ console.log(" V-Bounce OS: Framework Integrity Check");
50
+ console.log("===========================================\n");
51
+
52
+ let hasErrors = false;
53
+
54
+ if (!fs.existsSync(AGENTS_DIR)) {
55
+ console.error(`ERROR: ${AGENTS_DIR} not found.`);
56
+ process.exit(1);
57
+ }
58
+
59
+ const files = fs.readdirSync(AGENTS_DIR).filter(f => f.endsWith('.md'));
60
+
61
+ for (const file of files) {
62
+ const filePath = path.join(AGENTS_DIR, file);
63
+ const content = fs.readFileSync(filePath, 'utf-8');
64
+
65
+ const requiredSignatures = EXPECTED_PROMPT_SIGNATURES[file];
66
+ if (!requiredSignatures) {
67
+ console.log(`[PASS] ${file} (No strict YAML signatures required)`);
68
+ continue;
69
+ }
70
+
71
+ let filePassed = true;
72
+ for (const sig of requiredSignatures) {
73
+ if (!content.includes(sig)) {
74
+ console.error(`[FAIL] ${file} is missing required YAML instruction key: '${sig}'`);
75
+ filePassed = false;
76
+ hasErrors = true;
77
+ }
78
+ }
79
+
80
+ // Check for general Rule 12 presence
81
+ if (!content.includes('YAML frontmatter') && !content.includes('YAML Frontmatter')) {
82
+ console.error(`[FAIL] ${file} appears to be missing the Rule 12 YAML Frontmatter instruction.`);
83
+ filePassed = false;
84
+ hasErrors = true;
85
+ }
86
+
87
+ if (filePassed) {
88
+ console.log(`[PASS] ${file} contains all required YAML extraction signatures.`);
89
+ }
90
+ }
91
+
92
+ console.log("\n-------------------------------------------");
93
+ if (hasErrors) {
94
+ console.error("❌ INTEGRITY CHECK FAILED.");
95
+ console.error("Agent prompts have drifted from the validate_report.mjs schema.");
96
+ console.error("Please fix the agent templates in brains/claude-agents/ to restore pipeline integrity.");
97
+ process.exit(1);
98
+ } else {
99
+ console.log("✅ INTEGRITY CHECK PASSED.");
100
+ console.log("All agent prompts strictly map to the required pipeline metadata schemas.");
101
+ process.exit(0);
102
+ }
103
+ }
104
+
105
+ main();
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # verify_framework.sh
4
+ #
5
+ # Wrapper script to execute the Framework Integrity Check.
6
+
7
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"
8
+ ROOT_DIR="$(dirname "$SCRIPT_DIR")"
9
+
10
+ cd "$ROOT_DIR" || exit 1
11
+
12
+ node ./scripts/verify_framework.mjs
13
+ exit $?
@@ -201,7 +201,13 @@ Examples:
201
201
  e. DevOps runs `hotfix_manager.sh sync` to update any active story worktrees.
202
202
  f. Update Delivery Plan Status to "Done".
203
203
 
204
- 6. Update DELIVERY_PLAN.md: Sprint Status "Active"
204
+ 6. **Parallel Readiness Check** (before bouncing multiple stories simultaneously):
205
+ - Verify test runner config excludes `.worktrees/` (vitest, jest, pytest, etc.)
206
+ - Verify no shared mutable state between worktrees (e.g., shared temp files, singletons writing to same path)
207
+ - Verify `.gitignore` includes `.worktrees/`
208
+ If any check fails, fix before spawning parallel stories. Intermittent test failures from worktree cross-contamination erode trust in the test suite fast.
209
+
210
+ 7. Update DELIVERY_PLAN.md: Sprint Status → "Active"
205
211
  ```
206
212
 
207
213
  ### Step 1: Story Initialization
@@ -215,6 +221,7 @@ mkdir -p .worktrees/STORY-{ID}/.bounce/{tasks,reports}
215
221
  - Read LESSONS.md
216
222
  - Check RISK_REGISTRY.md for risks tagged to this story or its Epic
217
223
  - If `product_documentation/_manifest.json` exists, identify docs relevant to this story's scope (match against manifest descriptions/tags). Include relevant doc references in the task file so the Developer has product context.
224
+ - **Adjacent implementation check:** For stories that modify or extend modules touched by earlier stories in this sprint, identify existing implementations the Developer should reuse. Add to the task file: `"Reuse these existing modules: {list with file paths and brief description of what each provides}"`. This prevents agents from independently re-implementing logic that already exists — a common source of duplication when stories run in parallel.
218
225
  - Create task file in `.worktrees/STORY-{ID}/.bounce/tasks/`
219
226
  - Update DELIVERY_PLAN.md: V-Bounce State → "Bouncing"
220
227
 
@@ -224,6 +231,7 @@ mkdir -p .worktrees/STORY-{ID}/.bounce/{tasks,reports}
224
231
  - Story §1 The Spec + §3 Implementation Guide
225
232
  - LESSONS.md
226
233
  - Relevant react-best-practices rules
234
+ - Adjacent module references (if any — "reuse src/core/X.ts for Y")
227
235
  2. Developer writes code and Implementation Report to .bounce/reports/
228
236
  3. Lead reads report, verifies completeness
229
237
  ```
@@ -295,7 +303,12 @@ After ALL stories are merged into `sprint/S-01`:
295
303
  2. Generate Sprint Report to .bounce/sprint-report.md
296
304
  3. V-Bounce State → "Sprint Review" for all stories
297
305
  4. Present Sprint Report to human
298
- 5. After approval Spawn devops subagent for Sprint Release:
306
+ 5. **BLOCKING STEP Lesson Approval:**
307
+ Review and approve/reject ALL flagged lessons from §4 of the Sprint Report.
308
+ Do NOT proceed to Sprint Release until every lesson has a status of "Yes" or "No".
309
+ Stale lessons lose context — approve them while the sprint is fresh.
310
+ Present each lesson to the human and record approved ones to LESSONS.md immediately.
311
+ 6. After approval → Spawn devops subagent for Sprint Release:
299
312
  - Merge sprint/S-01 → main (--no-ff)
300
313
  - Tag release: v{VERSION}
301
314
  - Run full test suite + build + lint on main
@@ -234,8 +234,7 @@ Any → Parking Lot: Deferred by decision
234
234
 
235
235
  ***HOTFIX TRANSITIONS***
236
236
  Draft → Bouncing: Hotfix template created + Triage confirmed L1
237
- Bouncing → Done: Dev implements + Human manually verifies
238
- Done → Sync: `hotfix_manager.sh sync` run to update other worktrees
237
+ Bouncing → Done: Dev implements + Human manually verifies + DevOps runs `hotfix_manager.sh sync`
239
238
  ```
240
239
 
241
240
  ## Agent Integration
@@ -0,0 +1,146 @@
1
+ ---
2
+ name: file-organization
3
+ description: "**Codebase Cleanliness Standard**: Enforces clean file organization in any codebase. Before creating ANY file, classify it by intent—deliverables go to the project tree, working artifacts go to `/temporary/`. Before committing, review `git diff` to catch misplaced files. Use this skill whenever creating, moving, or committing files. Works with all languages and frameworks. The `/temporary/` folder is git-ignored so working artifacts never get merged. ALWAYS consult this skill when writing files to the repo—it prevents clutter from debug scripts, scratch analysis, throwaway tests, and other AI working artifacts from polluting the codebase."
4
+ compatibility: "Git required. Works with any language or framework."
5
+ ---
6
+
7
+ ## Core Principle
8
+
9
+ Every file you create has an **intent**. You always know why you're creating it. Use that knowledge.
10
+
11
+ - **"I'm creating this because the user asked for it / it solves the task"** → Project tree (root, src/, etc.)
12
+ - **"I'm creating this to help me work — debug, analyze, test an idea"** → `/temporary/`
13
+
14
+ This is not about file types or extensions. A `.test.js` file might be a critical part of the test suite, or it might be a throwaway script you wrote to check a theory. The difference is intent.
15
+
16
+ ## Layer 1: Proactive — Decide at Creation Time
17
+
18
+ Before writing any file, run this mental check:
19
+
20
+ ```
21
+ WHY am I creating this file?
22
+
23
+ ├─ DELIVERABLE — The user asked for this, or it directly fulfills the task
24
+ │ Examples:
25
+ │ - "Add input validation" → validation.ts (deliverable)
26
+ │ - "Write unit tests for auth" → auth.test.ts (deliverable)
27
+ │ - "Create a migration for the new table" → 003_add_users.sql (deliverable)
28
+ │ - "Update the README" → README.md (deliverable)
29
+ │ → CREATE IN PROJECT TREE
30
+
31
+ └─ WORKING ARTIFACT — I need this to help me understand, debug, or explore
32
+ Examples:
33
+ - Script to reproduce a bug → debug-repro.py (working artifact)
34
+ - Markdown notes analyzing the codebase → analysis.md (working artifact)
35
+ - Quick test to verify an assumption → check-behavior.js (working artifact)
36
+ - Output log from a test run → output.txt (working artifact)
37
+ → CREATE IN /temporary/
38
+ ```
39
+
40
+ The question is never "what type of file is this?" — it's **"does this file exist to serve the project, or to serve my working process?"**
41
+
42
+ ## Layer 2: Reactive — Safety Net Before Commit
43
+
44
+ Before committing, review what you've changed. This catches anything that slipped through Layer 1.
45
+
46
+ ```bash
47
+ git diff --name-only
48
+ git status
49
+ ```
50
+
51
+ For each file in the diff, ask:
52
+
53
+ 1. **Did the user's task require this file?** If no → move to `/temporary/`
54
+ 2. **Does this file exist in the project already?** If yes, you're editing existing code — that's fine, leave it
55
+ 3. **Is this a new file I created to help myself work?** If yes → move to `/temporary/`
56
+
57
+ ### Example: "Fix the login bug"
58
+
59
+ ```bash
60
+ $ git status
61
+ modified: src/auth/login.ts # ← The actual fix. Commit this.
62
+ new file: debug-login.py # ← Script I wrote to reproduce the bug. Move to /temporary/
63
+ new file: test-output.log # ← Output from my debugging. Move to /temporary/
64
+ modified: src/auth/login.test.ts # ← Updated existing test. Commit this.
65
+ ```
66
+
67
+ After cleanup:
68
+ ```bash
69
+ $ git status
70
+ modified: src/auth/login.ts # ✅ commit
71
+ modified: src/auth/login.test.ts # ✅ commit
72
+ ```
73
+
74
+ The debug script and log are now safely in `/temporary/`, out of the commit.
75
+
76
+ ### Example: "Add user validation with tests"
77
+
78
+ ```bash
79
+ $ git status
80
+ new file: src/validation/validate.ts # ← Deliverable. Commit.
81
+ new file: src/validation/validate.test.ts # ← User asked for tests. Commit.
82
+ new file: scratch-regex-test.js # ← I wrote this to test regex patterns. /temporary/
83
+ ```
84
+
85
+ Notice how `validate.test.ts` stays because the user asked for tests — it's a deliverable. But `scratch-regex-test.js` was a working artifact.
86
+
87
+ ## Language-Agnostic — Why Intent Beats File Types
88
+
89
+ Static file-type rules break across languages:
90
+
91
+ - Python's `__pycache__/` is already gitignored — don't touch it
92
+ - Java's `target/` is a build artifact — handled by existing `.gitignore`
93
+ - A Go `vendor/` directory might be intentionally committed
94
+ - Database migrations are generated but absolutely committed
95
+ - Protocol buffer outputs, GraphQL codegen — generated but part of the codebase
96
+ - `dist/` and `build/` directories vary by project
97
+
98
+ Trying to categorize by extension or directory name is fragile. Instead, the intent check works universally:
99
+
100
+ **"Did I create this to deliver the task, or to help myself work?"**
101
+
102
+ This one question works whether you're writing Python, TypeScript, Rust, Go, Java, C#, or anything else.
103
+
104
+ ## Things That Are NEVER Working Artifacts
105
+
106
+ Don't accidentally move these to `/temporary/`:
107
+
108
+ - Existing files you modified (they're already tracked in git)
109
+ - Test suites the project already has (`tests/`, `__tests__/`, `spec/`)
110
+ - CI/CD configs (`.github/workflows/`, `Dockerfile`, etc.)
111
+ - Lock files (`package-lock.json`, `Cargo.lock`, `poetry.lock`)
112
+ - Migration files (database schema changes)
113
+ - Generated code that the project commits (codegen output, protobuf, etc.)
114
+ - Config files (`.eslintrc`, `tsconfig.json`, `pyproject.toml`)
115
+
116
+ If a file already exists in the git tree, it belongs there. Your job is only to route **new files you create** during your working process.
117
+
118
+ ## Git Setup
119
+
120
+ Add `/temporary/` to `.gitignore` if it's not there already:
121
+
122
+ ```gitignore
123
+ # AI/developer working artifacts (never commit)
124
+ /temporary/
125
+ ```
126
+
127
+ This is a one-time setup. After this, anything in `/temporary/` is invisible to git.
128
+
129
+ ## Quick Reference
130
+
131
+ ```
132
+ BEFORE CREATING A FILE:
133
+ "Is this a deliverable?" → YES → project tree
134
+ → NO → /temporary/
135
+
136
+ BEFORE COMMITTING:
137
+ Run: git diff --name-only
138
+ For each NEW file: "Did the task require this?" → NO → mv to /temporary/
139
+ For MODIFIED files: leave them (they're already tracked)
140
+ ```
141
+
142
+ ## Why This Matters
143
+
144
+ Working artifacts in the root folder create real problems: teammates see debug scripts and think they're production code, CI might pick up stray test files, code review gets cluttered with irrelevant changes, and over time the repo becomes a mess of half-finished experiments mixed with real code.
145
+
146
+ The `/temporary/` folder gives you a safe space to work freely. Use it for anything and everything you need during your process — it never touches the git history and never confuses anyone.
@@ -0,0 +1,193 @@
1
+ # File Organization Skill — Eval Results
2
+
3
+ ## Eval 1: Repro Script vs. Handler Fix
4
+
5
+ **Prompt:** "I need to fix a race condition in the websocket handler. I wrote a quick Python script to simulate concurrent connections and reproduce the bug. I also fixed the actual handler. Where does each file go?"
6
+
7
+ **Expected Output:** The Python repro script is a working artifact → /temporary/. The websocket handler fix is a deliverable → commit in place.
8
+
9
+ **Relevant Guidance:**
10
+ - "Script to reproduce a bug → debug-repro.py (working artifact)" (Line 33)
11
+ - "I'm creating this because the user asked for it / it solves the task" → Project tree (Line 11)
12
+ - "I'm creating this to help me work — debug, analyze, test an idea" → /temporary/ (Line 12)
13
+
14
+ **Analysis:**
15
+ The skill clearly distinguishes between debugging artifacts ("Script to reproduce a bug") and actual fixes. An agent following the core principle would recognize:
16
+ - The Python script's intent: "help me understand/debug" → /temporary/
17
+ - The handler fix's intent: "solves the task" → project tree
18
+
19
+ The guidance is unambiguous. The agent gets the correct answer.
20
+
21
+ **Rating: PASS**
22
+
23
+ ---
24
+
25
+ ## Eval 2: User-Requested Tests vs. Scratch File
26
+
27
+ **Prompt:** "User asked me to add unit tests for the payment module. I also created a scratch file to test some regex patterns I needed for the validation logic. Where does each go?"
28
+
29
+ **Expected Output:** The unit tests are deliverables (user asked for them) → project tree. The regex scratch file is a working artifact → /temporary/.
30
+
31
+ **Relevant Guidance:**
32
+ - "Write unit tests for auth" → auth.test.ts (deliverable) (Line 26)
33
+ - "Add user validation with tests" example shows validate.test.ts as deliverable because "User asked for tests" (Line 85)
34
+ - "Quick test to verify an assumption → check-behavior.js (working artifact)" (Line 35)
35
+
36
+ **Analysis:**
37
+ The skill explicitly handles this distinction in the "Add user validation with tests" example (Lines 76-85), which directly parallels Eval 2:
38
+ - User-requested tests (validate.test.ts) = deliverable
39
+ - Scratch working files (scratch-regex-test.js) = working artifact
40
+
41
+ The key insight is whether **the user asked for** the tests. The skill states this clearly. An agent would correctly identify:
42
+ - User explicitly asked for unit tests → deliverable
43
+ - Regex pattern scratch file is "to help me work" (testing an assumption) → working artifact
44
+
45
+ **Potential gap:** The skill doesn't address a borderline case where scratch tests could be mistaken for part of the test suite if the agent isn't careful about the "user asked for" criterion. However, the stated guidance is clear enough.
46
+
47
+ **Rating: PASS**
48
+
49
+ ---
50
+
51
+ ## Eval 3: Existing Tracked Tests vs. Debug Script
52
+
53
+ **Prompt:** "I see there's a tests/ directory with existing test files. I also see a file called check-api.sh in the root that I created yesterday to debug an endpoint. What should I do?"
54
+
55
+ **Expected Output:** Leave the tests/ directory alone — it's an existing tracked test suite. Move check-api.sh to /temporary/ since it's a debug working artifact.
56
+
57
+ **Relevant Guidance:**
58
+ - "Existing files you modified (they're already tracked in git)" — Never working artifacts (Line 108)
59
+ - "Test suites the project already has (`tests/`, `__tests__/`, `spec/`)" — Never working artifacts (Line 109)
60
+ - "If a file already exists in the git tree, it belongs there. Your job is only to route **new files you create** during your working process." (Line 116)
61
+
62
+ **Analysis:**
63
+ The skill explicitly states that existing tracked files are "NEVER working artifacts" and gives `tests/` as a direct example. For check-api.sh, the intent is clear: debug artifact, not user-requested deliverable.
64
+
65
+ An agent would correctly identify:
66
+ 1. tests/ is already tracked → don't touch it
67
+ 2. check-api.sh intent: "to help me debug" → /temporary/
68
+
69
+ The guidance is explicit and unambiguous. The agent would get the right answer.
70
+
71
+ **Rating: PASS**
72
+
73
+ ---
74
+
75
+ ## Eval 4: Generated-but-Committed Migration vs. Analysis Notes
76
+
77
+ **Prompt:** "I'm working on a database migration task. I generated a migration file using the ORM CLI, and I also wrote an analysis.md exploring different indexing strategies. Where do these go?"
78
+
79
+ **Expected Output:** The migration file is a deliverable (generated but committed as part of the project) → project tree. The analysis.md is a working artifact → /temporary/.
80
+
81
+ **Relevant Guidance:**
82
+ - "Database migrations are generated but absolutely committed" (Line 94)
83
+ - "Migration files (database schema changes)" — Never working artifacts (Line 112)
84
+ - "Markdown notes analyzing the codebase → analysis.md (working artifact)" (Line 34)
85
+
86
+ **Analysis:**
87
+ The skill handles this well. It explicitly recognizes that "generated" doesn't mean "working artifact" — migrations are generated by the ORM but belong in the project because they're **part of the deliverable** (schema changes that must be committed).
88
+
89
+ For the migration file: The skill states directly "Migration files (database schema changes)" as something that is never a working artifact.
90
+
91
+ For analysis.md: The skill lists "Markdown notes analyzing the codebase → analysis.md (working artifact)" — this directly matches the evaluation scenario.
92
+
93
+ An agent would correctly identify:
94
+ 1. Migration file: "the project commits this" + "database schema changes" → project tree
95
+ 2. analysis.md: "notes analyzing the codebase" + "to help me work" → /temporary/
96
+
97
+ The guidance is explicit and covers both cases directly.
98
+
99
+ **Rating: PASS**
100
+
101
+ ---
102
+
103
+ ## Eval 5: Requested Component vs. Debug Render vs. Existing Test Suite
104
+
105
+ **Prompt:** "I created a new React component as requested, plus a debug-render.jsx to test how it renders in isolation. The project already has a __tests__/ folder. Where does everything go?"
106
+
107
+ **Expected Output:** The React component is a deliverable → project tree. debug-render.jsx is a working artifact → /temporary/. The __tests__/ folder is existing tracked code — don't touch it.
108
+
109
+ **Relevant Guidance:**
110
+ - "The user asked for it / it solves the task" → Project tree (Line 11)
111
+ - "I need this to help me understand, debug, or explore" → /temporary/ (Line 31)
112
+ - "Test suites the project already has (`tests/`, `__tests__/`, `spec/`)" — Never working artifacts (Line 109)
113
+
114
+ **Analysis:**
115
+ This eval tests three things:
116
+ 1. **Requested component:** Clear deliverable intent
117
+ 2. **Debug render file:** Clearly a working artifact ("test how it renders in isolation" = debugging/exploring)
118
+ 3. **Existing __tests__/ folder:** Explicitly listed as something to never move
119
+
120
+ The skill handles all three. The guidance is clear. An agent would get the right answer.
121
+
122
+ **Rating: PASS**
123
+
124
+ ---
125
+
126
+ ## Eval 6: Git Status Cleanup (Layer 2)
127
+
128
+ **Prompt:** "Before committing, I ran git status and see: modified src/api/users.ts, new file src/api/users.test.ts (user asked for tests), new file output.log, new file temp-check.py. How do I clean this up?"
129
+
130
+ **Expected Output:** Commit users.ts (modified existing) and users.test.ts (deliverable). Move output.log and temp-check.py to /temporary/ (working artifacts).
131
+
132
+ **Relevant Guidance:**
133
+ - Layer 2 reactive check (Lines 42-55)
134
+ - "Did the user's task require this file? If no → move to /temporary/" (Line 53)
135
+ - "Does this file exist in the project already? If yes, you're editing existing code — that's fine, leave it" (Line 54)
136
+ - "Is this a new file I created to help myself work? If yes → move to /temporary/" (Line 55)
137
+ - Example showing git status cleanup (Lines 57-74) with similar structure
138
+
139
+ **Analysis:**
140
+ The skill provides the Layer 2 reactive framework directly:
141
+ 1. **modified users.ts:** Already tracked → commit
142
+ 2. **new users.test.ts:** User asked for tests (stated in prompt) → commit
143
+ 3. **new output.log:** Created during working process (debug output) → /temporary/
144
+ 4. **new temp-check.py:** Name itself suggests "to help myself work" + temporary → /temporary/
145
+
146
+ The example (Lines 57-74) shows the exact scenario structure. The three questions in Layer 2 map directly:
147
+ - Q1 (did user ask?): No for output.log and temp-check.py → move
148
+ - Q2 (already exists?): No for new files, but users.ts exists → commit users.ts
149
+ - Q3 (new artifact?): Yes for output.log and temp-check.py → move
150
+
151
+ An agent would get the right answer following the Layer 2 framework.
152
+
153
+ **Rating: PASS**
154
+
155
+ ---
156
+
157
+ ## Summary Assessment
158
+
159
+ | Eval | Result | Confidence | Notes |
160
+ |------|--------|-----------|-------|
161
+ | 1 | PASS | High | Clear distinction between debug script and fix |
162
+ | 2 | PASS | High | Explicit example matches eval scenario |
163
+ | 3 | PASS | High | Existing files explicitly excluded from working artifacts |
164
+ | 4 | PASS | High | Migrations explicitly covered; analysis.md directly exemplified |
165
+ | 5 | PASS | High | All three elements (new component, debug file, existing suite) handled clearly |
166
+ | 6 | PASS | High | Layer 2 framework provides exact decision tree; example mirrors scenario |
167
+
168
+ ## Critical Findings
169
+
170
+ **All evals achieve PASS.** The skill provides:
171
+
172
+ 1. **Clear intent-based framework** that works across all scenarios
173
+ 2. **Explicit examples** that map directly to evals 2, 4, 5, and 6
174
+ 3. **Direct lists** of files that are "NEVER working artifacts," covering edge cases in evals 3 and 5
175
+ 4. **Layer 2 reactive checks** that handle the git status scenario (eval 6) with a concrete decision tree
176
+ 5. **Explicit handling of "generated but committed"** files like migrations (eval 4)
177
+
178
+ The skill successfully distinguishes user-requested deliverables from working artifacts across all cases. Agents following either Layer 1 (proactive) or Layer 2 (reactive) would arrive at correct answers for all six evals.
179
+
180
+ ### Strengths of the Skill
181
+
182
+ - **Not file-type dependent:** The "intent" approach works for all scenarios without fragile extension-based rules
183
+ - **Handles edge cases explicitly:** Migrations, codegen, existing tracked files all explicitly addressed
184
+ - **Concrete examples:** Evals 2, 4, 5 are nearly identical to skill examples
185
+ - **Dual-layer approach:** Catches mistakes at creation time or before commit
186
+
187
+ ### No Significant Gaps Identified
188
+
189
+ All three "focus areas" from the prompt are handled well:
190
+ - **Eval 2 (user-requested vs. scratch tests):** Clear distinction via "user asked for"
191
+ - **Eval 3 (existing tracked files):** Explicit list + general rule about existing files
192
+ - **Eval 4 (generated-but-committed):** Direct mention of migrations + intent-based reasoning
193
+
@@ -0,0 +1,41 @@
1
+ {
2
+ "skill_name": "file-organization",
3
+ "evals": [
4
+ {
5
+ "id": 1,
6
+ "prompt": "I need to fix a race condition in the websocket handler. I wrote a quick Python script to simulate concurrent connections and reproduce the bug. I also fixed the actual handler. Where does each file go?",
7
+ "expected_output": "The Python repro script is a working artifact → /temporary/. The websocket handler fix is a deliverable → commit in place.",
8
+ "files": []
9
+ },
10
+ {
11
+ "id": 2,
12
+ "prompt": "User asked me to add unit tests for the payment module. I also created a scratch file to test some regex patterns I needed for the validation logic. Where does each go?",
13
+ "expected_output": "The unit tests are deliverables (user asked for them) → project tree. The regex scratch file is a working artifact → /temporary/.",
14
+ "files": []
15
+ },
16
+ {
17
+ "id": 3,
18
+ "prompt": "I see there's a tests/ directory with existing test files. I also see a file called check-api.sh in the root that I created yesterday to debug an endpoint. What should I do?",
19
+ "expected_output": "Leave the tests/ directory alone — it's an existing tracked test suite. Move check-api.sh to /temporary/ since it's a debug working artifact.",
20
+ "files": []
21
+ },
22
+ {
23
+ "id": 4,
24
+ "prompt": "I'm working on a database migration task. I generated a migration file using the ORM CLI, and I also wrote an analysis.md exploring different indexing strategies. Where do these go?",
25
+ "expected_output": "The migration file is a deliverable (generated but committed as part of the project) → project tree. The analysis.md is a working artifact → /temporary/.",
26
+ "files": []
27
+ },
28
+ {
29
+ "id": 5,
30
+ "prompt": "I created a new React component as requested, plus a debug-render.jsx to test how it renders in isolation. The project already has a __tests__/ folder. Where does everything go?",
31
+ "expected_output": "The React component is a deliverable → project tree. debug-render.jsx is a working artifact → /temporary/. The __tests__/ folder is existing tracked code — don't touch it.",
32
+ "files": []
33
+ },
34
+ {
35
+ "id": 6,
36
+ "prompt": "Before committing, I ran git status and see: modified src/api/users.ts, new file src/api/users.test.ts (user asked for tests), new file output.log, new file temp-check.py. How do I clean this up?",
37
+ "expected_output": "Commit users.ts (modified existing) and users.test.ts (deliverable). Move output.log and temp-check.py to /temporary/ (working artifacts).",
38
+ "files": []
39
+ }
40
+ ]
41
+ }
@@ -0,0 +1,53 @@
1
+ # .gitignore Template for File Organization Standard
2
+
3
+ Add this to your `./.gitignore` file to ensure `/temporary/` never gets committed:
4
+
5
+ ```gitignore
6
+ # ============================================
7
+ # Local temporary work (NEVER commit)
8
+ # ============================================
9
+ /temporary/
10
+ ```
11
+
12
+ ## Why This Matters
13
+
14
+ The `/temporary/` folder is where agents and developers place all working files that won't be part of the final codebase:
15
+ - Debug scripts
16
+ - Test experiments
17
+ - Analysis documents
18
+ - Exploration code
19
+ - Generated output
20
+
21
+ By adding `/temporary/` to `.gitignore`, you ensure:
22
+ 1. ✅ No clutter in git history
23
+ 2. ✅ Team members only see production code in the repository
24
+ 3. ✅ Safe space for experimentation without affecting commits
25
+ 4. ✅ Reduced cognitive load when browsing the codebase
26
+
27
+ ## Installation
28
+
29
+ If you don't have a `.gitignore` file yet:
30
+ 1. Create a new file called `.gitignore` in the root of your repository
31
+ 2. Add the entry above
32
+ 3. Commit it: `git add .gitignore && git commit -m "Add temporary folder to gitignore"`
33
+
34
+ If you already have a `.gitignore`:
35
+ 1. Open it
36
+ 2. Add the entry above (preferably in a section labeled "Local temporary work")
37
+ 3. Commit the change
38
+
39
+ ## Verification
40
+
41
+ To verify the setup is correct:
42
+ ```bash
43
+ # This should NOT list any files from /temporary/
44
+ git status
45
+
46
+ # This should show that /temporary/ is ignored
47
+ git check-ignore -v /temporary/something.txt
48
+ ```
49
+
50
+ If `/temporary/` files are appearing in `git status`, double-check that:
51
+ - The `.gitignore` entry is spelled correctly (case-sensitive on Linux/Mac)
52
+ - The file is committed (not just created but not staged)
53
+ - You haven't accidentally added `/temporary/` files with `git add -f`
@@ -0,0 +1,48 @@
1
+ # File Organization Quick Checklist
2
+
3
+ ## At File Creation Time
4
+
5
+ ```
6
+ WHY am I creating this file?
7
+
8
+ ├─ DELIVERABLE (serves the project / user asked for it)
9
+ │ → Create in project tree
10
+
11
+ └─ WORKING ARTIFACT (helps me debug / analyze / explore)
12
+ → Create in /temporary/
13
+ ```
14
+
15
+ ## Before Committing
16
+
17
+ ```bash
18
+ git diff --name-only
19
+ git status
20
+ ```
21
+
22
+ For each file:
23
+
24
+ | Question | Answer | Action |
25
+ |----------|--------|--------|
26
+ | Did the user's task require this file? | Yes | Commit |
27
+ | Is this an existing file I modified? | Yes | Commit |
28
+ | Did I create this to help myself work? | Yes | Move to /temporary/ |
29
+ | Not sure? | — | Move to /temporary/ (safer) |
30
+
31
+ ## Never Move These to /temporary/
32
+
33
+ - Existing tracked files you edited
34
+ - Project test suites (`tests/`, `__tests__/`, `spec/`)
35
+ - CI/CD configs (`.github/workflows/`, `Dockerfile`)
36
+ - Lock files (`package-lock.json`, `Cargo.lock`)
37
+ - Migration files
38
+ - Generated code the project commits (protobuf, codegen)
39
+ - Config files (`.eslintrc`, `tsconfig.json`, etc.)
40
+
41
+ ## Common Working Artifacts (Always /temporary/)
42
+
43
+ - Debug/repro scripts you wrote to investigate
44
+ - Analysis or exploration markdown
45
+ - Scratch files testing an idea
46
+ - Console output or logs you captured
47
+ - Experimental code trying different approaches
48
+ - Notes and drafts that aren't official docs