@loomfsm/bundle-code 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/agents/acceptance.md +141 -0
- package/agents/api-contract.md +89 -0
- package/agents/architect.md +52 -0
- package/agents/challenger-reviewer.md +104 -0
- package/agents/classifier.md +74 -0
- package/agents/code-analyzer.md +43 -0
- package/agents/context-doc-verifier.md +94 -0
- package/agents/dependency-auditor.md +42 -0
- package/agents/implementer.md +135 -0
- package/agents/logic-reviewer.md +132 -0
- package/agents/migration.md +55 -0
- package/agents/performance.md +95 -0
- package/agents/plan-conformance.md +127 -0
- package/agents/plan-grounding-check.md +106 -0
- package/agents/planner.md +143 -0
- package/agents/playwright.md +68 -0
- package/agents/research.md +52 -0
- package/agents/security.md +88 -0
- package/agents/style-reviewer.md +85 -0
- package/agents/test.md +206 -0
- package/agents/ui-consistency.md +75 -0
- package/dist/manifest.d.ts +2 -0
- package/dist/manifest.js +34 -0
- package/dist/manifest.js.map +1 -0
- package/dist/src/bundle.d.ts +2 -0
- package/dist/src/bundle.js +424 -0
- package/dist/src/bundle.js.map +1 -0
- package/dist/src/index.d.ts +5 -0
- package/dist/src/index.js +14 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/invariants.d.ts +10 -0
- package/dist/src/invariants.js +208 -0
- package/dist/src/invariants.js.map +1 -0
- package/dist/src/policy-resolver.d.ts +2 -0
- package/dist/src/policy-resolver.js +65 -0
- package/dist/src/policy-resolver.js.map +1 -0
- package/dist/src/sandbox-rules.d.ts +2 -0
- package/dist/src/sandbox-rules.js +40 -0
- package/dist/src/sandbox-rules.js.map +1 -0
- package/dist/test/bundle.test.d.ts +1 -0
- package/dist/test/bundle.test.js +289 -0
- package/dist/test/bundle.test.js.map +1 -0
- package/dist/test/sandbox-rules.test.d.ts +1 -0
- package/dist/test/sandbox-rules.test.js +73 -0
- package/dist/test/sandbox-rules.test.js.map +1 -0
- package/knowledge/references/api-design.md +188 -0
- package/knowledge/references/arch-patterns.md +106 -0
- package/knowledge/references/caching.md +190 -0
- package/knowledge/references/concurrency.md +195 -0
- package/knowledge/references/db-postgres.md +153 -0
- package/knowledge/references/e2e-flutter.md +56 -0
- package/knowledge/references/e2e-playwright.md +53 -0
- package/knowledge/references/error-handling.md +208 -0
- package/knowledge/references/next-app-router.md +231 -0
- package/knowledge/references/observability.md +169 -0
- package/knowledge/references/optimization-strategy.md +197 -0
- package/knowledge/references/perf-flutter.md +62 -0
- package/knowledge/references/perf-nestjs.md +59 -0
- package/knowledge/references/perf-python.md +50 -0
- package/knowledge/references/perf-react.md +52 -0
- package/knowledge/references/react19.md +176 -0
- package/knowledge/references/redis.md +175 -0
- package/knowledge/references/security-backend.md +219 -0
- package/knowledge/references/test-flutter.md +65 -0
- package/knowledge/references/test-nestjs.md +82 -0
- package/knowledge/references/test-python.md +76 -0
- package/knowledge/references/test-react.md +66 -0
- package/knowledge/references/test-strategy.md +175 -0
- package/knowledge/references/ui-flutter.md +56 -0
- package/knowledge/references/ui-web.md +51 -0
- package/package.json +34 -0
- package/schemas/agent-feedback.schema.json +80 -0
- package/schemas/category-vocab.json +170 -0
- package/schemas/classifier-output.schema.json +53 -0
- package/schemas/finding.schema.json +92 -0
- package/schemas/pipeline-state.schema.json +238 -0
- package/schemas/reviewer-output.schema.json +62 -0
- package/schemas/state-extension.schema.json +53 -0
- package/schemas/validator-output.schema.json +48 -0
- package/stack-candidates.yaml +248 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
---
|
|
2
|
+
tags: [testing, strategy, coverage, mocks, flake, integration, tdd]
|
|
3
|
+
stack_signals: []
|
|
4
|
+
summary: |
|
|
5
|
+
Cross-stack test strategy — pin behavior not implementation; coverage % is
|
|
6
|
+
vanity; flaky tests are worse than no tests. Decisions for what to test,
|
|
7
|
+
how to mock, and when integration > unit.
|
|
8
|
+
when_to_load: |
|
|
9
|
+
Plan review and acceptance steps; COMPLEX tasks reviewing test specs; or
|
|
10
|
+
any task where the test surface itself is being designed (TDD bootstrap,
|
|
11
|
+
test refactor, mock-strategy review).
|
|
12
|
+
agent_hints: [test, logic-reviewer, acceptance, challenger-reviewer]
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
# Test Strategy — Senior Stance
|
|
16
|
+
|
|
17
|
+
## When this applies
|
|
18
|
+
Cross-stack reference loaded during plan review and at acceptance time. Complements `test-{stack}.md` (framework specifics) with strategy-level decisions: what to test, how to mock, when integration > unit, flake mitigation, test data management. Loaded by Test Agent on COMPLEX tasks and by logic-reviewer when reviewing test specs.
|
|
19
|
+
|
|
20
|
+
## Default Stance
|
|
21
|
+
Tests pin behavior, not implementation. A test that breaks when you refactor (without behavior change) is a bad test. Coverage % is a vanity metric — meaningful coverage is "every behavior an external caller depends on has a test that fails when that behavior breaks". Slow flaky tests are worse than no tests because people start ignoring them.
|
|
22
|
+
|
|
23
|
+
## Patterns (use these)
|
|
24
|
+
|
|
25
|
+
### Test pyramid — but adjusted for the stack
|
|
26
|
+
- **Unit (many, fast)** — pure functions, business logic in isolation, ~ms per test.
|
|
27
|
+
- **Integration (medium, slower)** — service + real DB / real Redis (test container), ~100ms per test. Where most bugs actually hide.
|
|
28
|
+
- **E2E (few, slowest)** — full stack, user-facing flow, seconds per test.
|
|
29
|
+
|
|
30
|
+
Classic 70/20/10 split is heuristic, not law. For data-heavy backend, 50/40/10 with more integration is often right. For UI-heavy frontend, integration tests at the component level (with React Testing Library / Vue Testing Library) replace many "unit" tests.
|
|
31
|
+
|
|
32
|
+
### Test behavior, not implementation
|
|
33
|
+
- BAD: assert internal method calls (`expect(spy).toHaveBeenCalledTimes(3)`) on private helpers.
|
|
34
|
+
- GOOD: assert observable outcomes — return values, persisted state, side effects to mocked external systems.
|
|
35
|
+
- Refactor freedom: if the behavior is unchanged, the test should pass.
|
|
36
|
+
|
|
37
|
+
### Mock at boundaries, not inside the boundary
|
|
38
|
+
- Mock external systems: HTTP APIs, databases, message queues, file system, time.
|
|
39
|
+
- Don't mock things you own — use the real implementation. Mocking your own service layer just tests that mocks call mocks.
|
|
40
|
+
- Mock the slowest layer, not every layer.
|
|
41
|
+
|
|
42
|
+
### Contract tests for service boundaries
|
|
43
|
+
For cross-service APIs (especially when teams diverge):
|
|
44
|
+
- **Provider** publishes the schema.
|
|
45
|
+
- **Consumer** runs tests against a contract derived from the schema (Pact, JSON Schema, OpenAPI).
|
|
46
|
+
- A breaking change shows up in CI, not in prod.
|
|
47
|
+
|
|
48
|
+
### Property-based testing where it pays
|
|
49
|
+
For pure functions with constrained input space (parsers, validators, math):
|
|
50
|
+
```ts
|
|
51
|
+
fc.assert(fc.property(fc.string(), s => parse(format(s)) === s));
|
|
52
|
+
```
|
|
53
|
+
Generates inputs you'd never think to write. Catches edge cases (empty, unicode, max length) automatically. Use `fast-check` (JS) / `hypothesis` (Python).
|
|
54
|
+
|
|
55
|
+
### Test data management
|
|
56
|
+
- **Factories** > fixtures: `userFactory.build({ role: 'admin' })`. Easy to override one field, rest defaults.
|
|
57
|
+
- **Seed deterministically** — same data every run. Use seeded random (`faker.seed(42)`).
|
|
58
|
+
- **Per-test isolation** — each test creates its own data, transactions roll back, OR DB is wiped between tests.
|
|
59
|
+
- **No shared global state** between tests. Order independence.
|
|
60
|
+
|
|
61
|
+
### Fixed time
|
|
62
|
+
Tests that involve dates/timeouts use a fake clock (`vi.useFakeTimers()` / `freezegun`). Real `Date.now()` in tests = flaky tests near midnight or on DST transitions.
|
|
63
|
+
|
|
64
|
+
### Snapshot tests — sparingly
|
|
65
|
+
- OK for: serialized output of pure functions (JSON shape), small UI component HTML.
|
|
66
|
+
- NOT OK for: anything large, anything with non-deterministic content (timestamps, IDs), entire pages.
|
|
67
|
+
- A snapshot you don't read when it changes is worse than no test.
|
|
68
|
+
|
|
69
|
+
### Flake mitigation
|
|
70
|
+
A flaky test = a real bug 80% of the time, not "just retry". Investigate before marking flaky.
|
|
71
|
+
Common causes:
|
|
72
|
+
- Time-based assertions without fake clock.
|
|
73
|
+
- Test depending on previous test's state (order dependency).
|
|
74
|
+
- Async race not awaited.
|
|
75
|
+
- Network call to flaky external (mock it).
|
|
76
|
+
- Database not cleaned between tests.
|
|
77
|
+
|
|
78
|
+
If you must retry: limit to 2 attempts, alert on retry rate >5%.
|
|
79
|
+
|
|
80
|
+
## Anti-Patterns (DO NOT)
|
|
81
|
+
|
|
82
|
+
### Testing implementation details
|
|
83
|
+
Asserting `private` method called N times, internal state values, exact call order of helpers.
|
|
84
|
+
**Why it bites:** refactor breaks tests even when behavior is identical. Tests become chains forcing implementation, not verifying outcomes.
|
|
85
|
+
**Rule:** test the public surface. If you need to verify private behavior, test it through public calls.
|
|
86
|
+
|
|
87
|
+
### Mocking the thing under test
|
|
88
|
+
`mockUserService.create.mockReturnValue({...})` and then "testing" `userService.create`. You tested the mock.
|
|
89
|
+
**Rule:** never mock the subject under test. Mock its dependencies.
|
|
90
|
+
|
|
91
|
+
### Mocking everything
|
|
92
|
+
Mocked DB, mocked cache, mocked HTTP client, mocked logger, mocked filesystem. You're testing that mocks return what you told them to.
|
|
93
|
+
**Rule:** integration tests run against real-ish dependencies (test containers, in-memory DB, msw for HTTP). Unit tests for pure logic only.
|
|
94
|
+
|
|
95
|
+
### `expect(true).toBe(true)` / always-true tests
|
|
96
|
+
Test passes regardless of implementation.
|
|
97
|
+
**Rule:** every test should fail when the corresponding behavior breaks. Mutation testing surfaces tests that don't.
|
|
98
|
+
|
|
99
|
+
### One mega-test per function
|
|
100
|
+
```ts
|
|
101
|
+
test('user service', async () => {
|
|
102
|
+
// 200 lines testing 15 different behaviors
|
|
103
|
+
});
|
|
104
|
+
```
|
|
105
|
+
**Why it bites:** one assertion fails → can't tell which behavior broke. Debugging means rerunning entire setup.
|
|
106
|
+
**Rule:** one behavior per test. Descriptive name reads as a spec: `it('rejects email without @ symbol')`.
|
|
107
|
+
|
|
108
|
+
### `sleep(100)` for "letting things settle"
|
|
109
|
+
**Why it bites:** flaky on slow CI; wasteful on fast CI; doesn't actually verify the thing finished.
|
|
110
|
+
**Rule:** await the actual condition (`waitFor(() => ...)`, queue.drain, mock-clock advance).
|
|
111
|
+
|
|
112
|
+
### Snapshot tests of huge HTML / JSON blobs
|
|
113
|
+
Diff is unreadable; people approve without reading.
|
|
114
|
+
**Rule:** only snapshot small specific outputs. For large output, write targeted assertions.
|
|
115
|
+
|
|
116
|
+
### Tests that depend on prod data shape
|
|
117
|
+
Pulling from prod DB at test time, asserting "user 42 exists".
|
|
118
|
+
**Rule:** seeded test data. Tests are reproducible offline.
|
|
119
|
+
|
|
120
|
+
### Coverage as a goal
|
|
121
|
+
"We need 80% coverage." Team writes tests that touch lines without verifying behavior.
|
|
122
|
+
**Rule:** coverage is a diagnostic, not a goal. 60% coverage with high-quality behavior tests > 95% coverage with implementation-detail tests.
|
|
123
|
+
|
|
124
|
+
### Skipped/disabled tests with no plan to fix
|
|
125
|
+
`test.skip`, `xit`, commented-out tests.
|
|
126
|
+
**Rule:** delete or fix. Skipped tests rot, lose their ability to catch the bug they were meant to catch.
|
|
127
|
+
|
|
128
|
+
### Generated tests as substitute for thought
|
|
129
|
+
"Cursor wrote 50 tests for this function." 49 of them test the same happy path with slight variations. None test the edge case that actually matters.
|
|
130
|
+
**Rule:** review every test for unique behavioral value before merging.
|
|
131
|
+
|
|
132
|
+
## Decision Framework
|
|
133
|
+
|
|
134
|
+
| Question | Answer |
|
|
135
|
+
|---|---|
|
|
136
|
+
| Pure function with bounded input? | Property-based test |
|
|
137
|
+
| Service with DB, mocked DB? | Integration test with real test container |
|
|
138
|
+
| External HTTP API? | Mock with msw / VCR / contract test |
|
|
139
|
+
| Cross-service contract? | Pact or schema-based contract test |
|
|
140
|
+
| User-facing flow? | E2E test for critical paths only |
|
|
141
|
+
| Time-dependent code? | Fake clock; never real `Date.now()` |
|
|
142
|
+
| Data parsing? | Property-based + edge-case suite |
|
|
143
|
+
| Race condition? | Test concurrent invocations explicitly |
|
|
144
|
+
| Setup-heavy code? | Factory pattern with sensible defaults |
|
|
145
|
+
| Long-running async? | `waitFor` not `sleep`; advance fake timers |
|
|
146
|
+
|
|
147
|
+
## Cost Model
|
|
148
|
+
|
|
149
|
+
| Test type | Speed (typical) | Where most bugs found |
|
|
150
|
+
|---|---|---|
|
|
151
|
+
| Pure unit | < 5ms | Edge cases in algorithms, validation |
|
|
152
|
+
| Integration (test container) | 50-500ms | Service contract bugs, query bugs, transaction issues |
|
|
153
|
+
| E2E (real browser) | 2-30s | UX flow regressions, integration glue |
|
|
154
|
+
| Property-based | varies by N | Inputs you didn't think of |
|
|
155
|
+
|
|
156
|
+
| Anti-pattern | Cost |
|
|
157
|
+
|---|---|
|
|
158
|
+
| Mocked-everything unit tests | False confidence; bugs ship in integration paths |
|
|
159
|
+
| Flaky tests retried | CI time wasted; team loses trust in suite |
|
|
160
|
+
| Coverage-driven testing | Velocity drops; tests don't catch real bugs |
|
|
161
|
+
| One mega-test | Debug time 10x when it fails |
|
|
162
|
+
|
|
163
|
+
## Red Flags in Diff
|
|
164
|
+
|
|
165
|
+
- New unit test that mocks the function being tested → flag (testing the mock).
|
|
166
|
+
- New test asserting `private` method calls or internal state → flag (implementation-detail testing).
|
|
167
|
+
- New `sleep`/`setTimeout` in test code without a real reason → flag (flake risk).
|
|
168
|
+
- New `test.skip` / `xit` / `it.todo` without a tracking issue → flag (rot risk).
|
|
169
|
+
- New snapshot test on output > 100 lines → flag (will be approved without reading).
|
|
170
|
+
- New test using real `Date.now()` / `new Date()` for time-sensitive assertions → flag (use fake clock).
|
|
171
|
+
- New mock for a service the test claims to integration-test → flag (it's now a unit test in disguise).
|
|
172
|
+
- Test setup that copies data from prod / depends on existing data → flag (use factories).
|
|
173
|
+
- Test asserting `toHaveBeenCalledTimes` on a mock 5+ times in a single test → flag (testing the mock orchestration).
|
|
174
|
+
- New external HTTP call in a unit test (no mock) → flag (network in unit suite).
|
|
175
|
+
- New test without arrange/act/assert structure (one giant block) → flag.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
---
|
|
2
|
+
tags: [ui, flutter, dart, material, cupertino, responsive, mobile]
|
|
3
|
+
stack_signals:
|
|
4
|
+
- language: [dart]
|
|
5
|
+
- project_type: [mobile, frontend-app]
|
|
6
|
+
summary: |
|
|
7
|
+
Flutter UI consistency — Material/Cupertino choice, Theme.of usage over
|
|
8
|
+
hardcoded values, MediaQuery / LayoutBuilder responsive layouts, SafeArea,
|
|
9
|
+
text scaling, overflow handling.
|
|
10
|
+
when_to_load: |
|
|
11
|
+
Task touches Flutter UI widgets, styling, layout, or design-system
|
|
12
|
+
adherence on a mobile/Flutter stack. Diff in *.dart with widget trees,
|
|
13
|
+
theme usage, or layout containers.
|
|
14
|
+
agent_hints: [ui-consistency, logic-reviewer, style-reviewer]
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
# UI Consistency: Flutter
|
|
18
|
+
|
|
19
|
+
## Material / Cupertino Consistency
|
|
20
|
+
- Using correct design language for target platform (Material 3 vs Cupertino)?
|
|
21
|
+
- Not mixing Material and Cupertino widgets in same screen?
|
|
22
|
+
- Using `Theme.of(context)` for colors/text styles, not hardcoded values?
|
|
23
|
+
- Custom widgets extend the theme, not override it?
|
|
24
|
+
|
|
25
|
+
## Layout & Responsive
|
|
26
|
+
- Using `MediaQuery` / `LayoutBuilder` for responsive layouts, not fixed sizes?
|
|
27
|
+
- `SafeArea` applied where needed (notch, status bar, bottom bar)?
|
|
28
|
+
- Handles landscape orientation if applicable?
|
|
29
|
+
- Text scales with `MediaQuery.of(context).textScaler` (not the deprecated `textScaleFactor`)?
|
|
30
|
+
- Text overflow handled (`TextOverflow.ellipsis`, `maxLines`) on dynamic content?
|
|
31
|
+
|
|
32
|
+
## State Management
|
|
33
|
+
- Consistent pattern across screens (all Riverpod, or all BLoC — not mixed)?
|
|
34
|
+
- State scoped correctly (not global when local would suffice)?
|
|
35
|
+
|
|
36
|
+
## Navigation
|
|
37
|
+
- Consistent navigation pattern (GoRouter / auto_route / Navigator 2.0)?
|
|
38
|
+
- Back button behavior correct on Android?
|
|
39
|
+
- Deep linking supported if applicable?
|
|
40
|
+
|
|
41
|
+
## Assets & Images
|
|
42
|
+
- Using `CachedNetworkImage` for remote images (not raw `Image.network`)?
|
|
43
|
+
- Placeholder and error builders on network images?
|
|
44
|
+
- Consistent icon usage from single icon set?
|
|
45
|
+
|
|
46
|
+
## Accessibility
|
|
47
|
+
- `Semantics` widgets on custom components?
|
|
48
|
+
- `excludeFromSemantics` on decorative images?
|
|
49
|
+
- Sufficient color contrast?
|
|
50
|
+
- Touch targets at least 48x48 dp?
|
|
51
|
+
|
|
52
|
+
## Patterns
|
|
53
|
+
- Loading/error/empty states use consistent shared widgets?
|
|
54
|
+
- Form validation follows project patterns (`FormField`, validators)?
|
|
55
|
+
- No hardcoded strings — using localization (`AppLocalizations` / `easy_localization`)?
|
|
56
|
+
- Animation durations and curves consistent with project defaults?
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
---
|
|
2
|
+
tags: [ui, design-system, accessibility, react, nextjs, vue, frontend]
|
|
3
|
+
stack_signals:
|
|
4
|
+
- language: [typescript, javascript]
|
|
5
|
+
- project_type: [frontend-app, monorepo]
|
|
6
|
+
summary: |
|
|
7
|
+
Web UI consistency — design tokens over magic numbers, shared component
|
|
8
|
+
library use, theme adherence, accessibility checks, responsive layout.
|
|
9
|
+
when_to_load: |
|
|
10
|
+
Task touches user-visible UI components (React/Next/Vue), styling,
|
|
11
|
+
spacing/typography/theme, or accessibility. Reviewer fan-out includes
|
|
12
|
+
ui-consistency on a web frontend stack. Diff in *.tsx/jsx/vue/svelte/css/scss.
|
|
13
|
+
agent_hints: [ui-consistency, logic-reviewer, style-reviewer]
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
# UI Consistency: Web (React / Next.js / Vue)
|
|
17
|
+
|
|
18
|
+
## Design System
|
|
19
|
+
- Colors from CSS variables / design tokens, not hardcoded hex values?
|
|
20
|
+
- Spacing from consistent scale (Tailwind classes, CSS vars), not magic numbers?
|
|
21
|
+
- Typography using theme-defined sizes/weights, not arbitrary values?
|
|
22
|
+
- Border radius, shadows consistent with design system?
|
|
23
|
+
- Z-index using defined layers, not arbitrary large numbers?
|
|
24
|
+
|
|
25
|
+
## Component Patterns
|
|
26
|
+
- Using shared components from component library, not one-off implementations?
|
|
27
|
+
- Form elements (inputs, selects, buttons) from shared form system?
|
|
28
|
+
- Loading states using shared skeleton/spinner components?
|
|
29
|
+
- Error states using shared error boundary/display components?
|
|
30
|
+
- Empty states using shared empty state component?
|
|
31
|
+
- Modal/dialog following established overlay patterns?
|
|
32
|
+
- Icon usage from consistent icon library (Lucide, Heroicons, etc.)?
|
|
33
|
+
|
|
34
|
+
## Accessibility
|
|
35
|
+
- Semantic HTML used correctly (nav, main, section, article, button vs div)?
|
|
36
|
+
- ARIA labels on interactive elements without visible text?
|
|
37
|
+
- Keyboard navigation works (Tab order, Enter/Space activation, Escape to close)?
|
|
38
|
+
- Focus management correct (focus trap in modals, focus restore on close)?
|
|
39
|
+
- Color contrast meets WCAG AA (4.5:1 for text, 3:1 for large text)?
|
|
40
|
+
- Images have descriptive alt text (or empty alt for decorative)?
|
|
41
|
+
|
|
42
|
+
## Responsive
|
|
43
|
+
- Same breakpoint patterns as rest of app?
|
|
44
|
+
- Mobile behavior consistent (touch targets 44x44px minimum)?
|
|
45
|
+
- No horizontal scroll on mobile viewports?
|
|
46
|
+
- Text readable without zoom on all screen sizes?
|
|
47
|
+
|
|
48
|
+
## Framework-Specific
|
|
49
|
+
- **React:** key props on list items, React.Fragment vs unnecessary divs
|
|
50
|
+
- **Next.js:** using `next/image`, `next/link`, `next/font` where applicable
|
|
51
|
+
- **i18n:** all user-visible text via translation function, no hardcoded strings
|
package/package.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@loomfsm/bundle-code",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Code review / implementation bundle for loom — agents, reference knowledge, schemas, and stack candidates.",
|
|
6
|
+
"license": "Apache-2.0",
|
|
7
|
+
"main": "./dist/src/index.js",
|
|
8
|
+
"types": "./dist/src/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"types": "./dist/src/index.d.ts",
|
|
12
|
+
"import": "./dist/src/index.js"
|
|
13
|
+
},
|
|
14
|
+
"./package.json": "./package.json"
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"dist",
|
|
18
|
+
"agents",
|
|
19
|
+
"knowledge",
|
|
20
|
+
"schemas",
|
|
21
|
+
"stack-candidates.yaml"
|
|
22
|
+
],
|
|
23
|
+
"dependencies": {
|
|
24
|
+
"@loomfsm/kernel": "0.1.0"
|
|
25
|
+
},
|
|
26
|
+
"publishConfig": {
|
|
27
|
+
"access": "public"
|
|
28
|
+
},
|
|
29
|
+
"scripts": {
|
|
30
|
+
"typecheck": "tsc --noEmit",
|
|
31
|
+
"build": "tsc",
|
|
32
|
+
"test": "tsc && node --experimental-sqlite --no-warnings --test dist/test/*.js"
|
|
33
|
+
}
|
|
34
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "agent-feedback.schema.json",
|
|
4
|
+
"title": "Agent Feedback Entry",
|
|
5
|
+
"description": "One row in metrics/agent-feedback.jsonl. Logged when an agent missed an issue that was caught later. Drives past-misses injection and category vocab evolution.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": [
|
|
8
|
+
"schema_version",
|
|
9
|
+
"feedback_id",
|
|
10
|
+
"date",
|
|
11
|
+
"agent",
|
|
12
|
+
"category",
|
|
13
|
+
"pattern_to_look_for",
|
|
14
|
+
"severity",
|
|
15
|
+
"found_by",
|
|
16
|
+
"human_confirmed"
|
|
17
|
+
],
|
|
18
|
+
"additionalProperties": false,
|
|
19
|
+
"properties": {
|
|
20
|
+
"schema_version": { "type": "string", "const": "1.0" },
|
|
21
|
+
"feedback_id": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"pattern": "^fb-\\d{4}-\\d{2}-\\d{2}-[a-z0-9]{4,}$",
|
|
24
|
+
"description": "fb-<YYYY-MM-DD>-<short>"
|
|
25
|
+
},
|
|
26
|
+
"date": { "type": "string", "format": "date" },
|
|
27
|
+
"task_id": {
|
|
28
|
+
"type": ["string", "null"],
|
|
29
|
+
"description": "Optional: link back to the pipeline.jsonl row that ran this agent. Triggers reviewer_misses_post_merge++ on that row when set."
|
|
30
|
+
},
|
|
31
|
+
"agent": {
|
|
32
|
+
"type": "string",
|
|
33
|
+
"enum": [
|
|
34
|
+
"logic-reviewer",
|
|
35
|
+
"challenger-reviewer",
|
|
36
|
+
"style-reviewer",
|
|
37
|
+
"security",
|
|
38
|
+
"performance",
|
|
39
|
+
"acceptance",
|
|
40
|
+
"plan-conformance",
|
|
41
|
+
"plan-grounding-check",
|
|
42
|
+
"context-doc-verifier",
|
|
43
|
+
"ui-consistency",
|
|
44
|
+
"api-contract",
|
|
45
|
+
"playwright",
|
|
46
|
+
"test",
|
|
47
|
+
"implementer"
|
|
48
|
+
]
|
|
49
|
+
},
|
|
50
|
+
"category": {
|
|
51
|
+
"type": "string",
|
|
52
|
+
"description": "MUST be a value from category-vocab.json under vocab[<agent>]. If a new pattern, set 'other' and populate proposed_new_category."
|
|
53
|
+
},
|
|
54
|
+
"proposed_new_category": {
|
|
55
|
+
"type": ["string", "null"],
|
|
56
|
+
"maxLength": 60,
|
|
57
|
+
"description": "When category='other'. Reviewed by /learn for vocab promotion (≥3 confirmed instances → permanent)."
|
|
58
|
+
},
|
|
59
|
+
"pattern_to_look_for": {
|
|
60
|
+
"type": "string",
|
|
61
|
+
"maxLength": 200,
|
|
62
|
+
"description": "Short, grep-friendly pattern. The string injected into reviewer prompts on future runs as past-miss data."
|
|
63
|
+
},
|
|
64
|
+
"missed_issue_summary": { "type": "string", "maxLength": 300 },
|
|
65
|
+
"severity": { "type": "string", "enum": ["high", "medium", "low"] },
|
|
66
|
+
"found_by": {
|
|
67
|
+
"type": "string",
|
|
68
|
+
"enum": ["prod-incident", "human-review", "another-agent", "test", "other"]
|
|
69
|
+
},
|
|
70
|
+
"example_file_line": { "type": ["string", "null"] },
|
|
71
|
+
"human_confirmed": {
|
|
72
|
+
"type": "boolean",
|
|
73
|
+
"description": "True iff a human reviewed and accepted that this is a real miss. Only confirmed entries count toward vocab promotion and pattern auto-promotion."
|
|
74
|
+
},
|
|
75
|
+
"action_taken": {
|
|
76
|
+
"type": ["string", "null"],
|
|
77
|
+
"enum": [null, "vocab-added", "agent-prompt-updated", "logged-only"]
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "category-vocab.json",
|
|
4
|
+
"title": "Finding Category Controlled Vocabulary",
|
|
5
|
+
"description": "Per-agent allowlist of finding categories. The 'category' field on each Finding MUST match a value in this file for that agent. New patterns enter via /agent-feedback (proposed_new_category) and are promoted here after human review.",
|
|
6
|
+
"schema_version": "1.0",
|
|
7
|
+
"vocab": {
|
|
8
|
+
"logic-reviewer": [
|
|
9
|
+
"race-condition",
|
|
10
|
+
"off-by-one",
|
|
11
|
+
"error-swallowed",
|
|
12
|
+
"missing-edge-case",
|
|
13
|
+
"leak-or-cleanup-missing",
|
|
14
|
+
"ordering-assumption",
|
|
15
|
+
"type-confusion",
|
|
16
|
+
"over-engineering",
|
|
17
|
+
"duplicate-logic",
|
|
18
|
+
"regression-risk",
|
|
19
|
+
"unhandled-async",
|
|
20
|
+
"unbounded-recursion-or-loop",
|
|
21
|
+
"spec-deviation",
|
|
22
|
+
"scope-creep",
|
|
23
|
+
"coverage-gap",
|
|
24
|
+
"other"
|
|
25
|
+
],
|
|
26
|
+
"challenger-reviewer": [
|
|
27
|
+
"concurrency-failure",
|
|
28
|
+
"hostile-input",
|
|
29
|
+
"downstream-failure-not-handled",
|
|
30
|
+
"ordering-violation",
|
|
31
|
+
"atomicity-gap",
|
|
32
|
+
"state-leak-across-requests",
|
|
33
|
+
"empty-or-null-input-failure",
|
|
34
|
+
"retry-or-replay-issue",
|
|
35
|
+
"boundary-mismatch",
|
|
36
|
+
"other"
|
|
37
|
+
],
|
|
38
|
+
"style-reviewer": [
|
|
39
|
+
"naming-violation",
|
|
40
|
+
"duplication",
|
|
41
|
+
"anti-pattern-from-claude-md",
|
|
42
|
+
"dead-code",
|
|
43
|
+
"wrong-directory-or-layer",
|
|
44
|
+
"import-rule-violation",
|
|
45
|
+
"missing-export-pattern",
|
|
46
|
+
"loose-typing",
|
|
47
|
+
"inconsistent-with-context-doc",
|
|
48
|
+
"other"
|
|
49
|
+
],
|
|
50
|
+
"security": [
|
|
51
|
+
"injection-sql-or-nosql",
|
|
52
|
+
"xss",
|
|
53
|
+
"auth-bypass",
|
|
54
|
+
"authorization-missing",
|
|
55
|
+
"jwt-pitfall",
|
|
56
|
+
"secret-in-log-or-bundle",
|
|
57
|
+
"csrf",
|
|
58
|
+
"cors-misconfig",
|
|
59
|
+
"sensitive-data-overreturn",
|
|
60
|
+
"rate-limit-missing",
|
|
61
|
+
"ssrf",
|
|
62
|
+
"path-traversal",
|
|
63
|
+
"dependency-vuln",
|
|
64
|
+
"public-cache-on-private-data",
|
|
65
|
+
"other"
|
|
66
|
+
],
|
|
67
|
+
"performance": [
|
|
68
|
+
"n-plus-one",
|
|
69
|
+
"missing-index",
|
|
70
|
+
"full-table-scan",
|
|
71
|
+
"offset-pagination-on-large-table",
|
|
72
|
+
"hot-key-redis",
|
|
73
|
+
"cache-stampede-risk",
|
|
74
|
+
"unbounded-loop-or-collection",
|
|
75
|
+
"sync-call-in-async-path",
|
|
76
|
+
"missing-timeout",
|
|
77
|
+
"missing-pagination",
|
|
78
|
+
"memory-leak",
|
|
79
|
+
"react-rerender-storm",
|
|
80
|
+
"client-bundle-bloat",
|
|
81
|
+
"other"
|
|
82
|
+
],
|
|
83
|
+
"acceptance": [
|
|
84
|
+
"lint-fail",
|
|
85
|
+
"typecheck-fail",
|
|
86
|
+
"test-fail",
|
|
87
|
+
"build-fail",
|
|
88
|
+
"missing-test-coverage",
|
|
89
|
+
"file-too-large",
|
|
90
|
+
"debug-statement-left",
|
|
91
|
+
"loose-typing-introduced",
|
|
92
|
+
"todo-or-hack-comment",
|
|
93
|
+
"ac-not-met",
|
|
94
|
+
"other"
|
|
95
|
+
],
|
|
96
|
+
"plan-conformance": [
|
|
97
|
+
"drift-file-touched-outside-plan",
|
|
98
|
+
"drift-in-file-overreach",
|
|
99
|
+
"drift-not-in-scope-violation",
|
|
100
|
+
"partial-step-not-done",
|
|
101
|
+
"ac-not-satisfied-by-diff",
|
|
102
|
+
"auxiliary-touch",
|
|
103
|
+
"missing-test-coverage",
|
|
104
|
+
"ac-not-met",
|
|
105
|
+
"test-file-modified-by-implementer",
|
|
106
|
+
"other"
|
|
107
|
+
],
|
|
108
|
+
"plan-grounding-check": [
|
|
109
|
+
"citation-file-not-found",
|
|
110
|
+
"citation-range-out-of-bounds",
|
|
111
|
+
"citation-claim-mismatch",
|
|
112
|
+
"unverified-marker",
|
|
113
|
+
"context-doc-cross-mismatch",
|
|
114
|
+
"missing-aaa-block",
|
|
115
|
+
"ac-not-met",
|
|
116
|
+
"other"
|
|
117
|
+
],
|
|
118
|
+
"context-doc-verifier": [
|
|
119
|
+
"claim-not-found",
|
|
120
|
+
"claim-mismatch",
|
|
121
|
+
"naming-convention-mismatch",
|
|
122
|
+
"other"
|
|
123
|
+
],
|
|
124
|
+
"ui-consistency": [
|
|
125
|
+
"spacing-violation",
|
|
126
|
+
"color-token-violation",
|
|
127
|
+
"typography-token-violation",
|
|
128
|
+
"component-not-from-system",
|
|
129
|
+
"responsive-breakpoint-issue",
|
|
130
|
+
"a11y-violation",
|
|
131
|
+
"other"
|
|
132
|
+
],
|
|
133
|
+
"api-contract": [
|
|
134
|
+
"type-mismatch",
|
|
135
|
+
"missing-field",
|
|
136
|
+
"extra-field",
|
|
137
|
+
"breaking-change",
|
|
138
|
+
"version-skew",
|
|
139
|
+
"other"
|
|
140
|
+
],
|
|
141
|
+
"playwright": [
|
|
142
|
+
"selector-flaky",
|
|
143
|
+
"missing-step-from-plan",
|
|
144
|
+
"timing-or-race",
|
|
145
|
+
"test-data-leak",
|
|
146
|
+
"other"
|
|
147
|
+
],
|
|
148
|
+
"test": [
|
|
149
|
+
"skeleton-compile-error",
|
|
150
|
+
"test-unexpectedly-passes",
|
|
151
|
+
"missing-aaa-block",
|
|
152
|
+
"mock-misconfigured",
|
|
153
|
+
"framework-detection-failed",
|
|
154
|
+
"non-aaa-spec",
|
|
155
|
+
"test-spec-count-mismatch",
|
|
156
|
+
"other"
|
|
157
|
+
],
|
|
158
|
+
"implementer": [
|
|
159
|
+
"test-modification-needed",
|
|
160
|
+
"plan-step-ambiguous",
|
|
161
|
+
"plan-references-nonexistent-code",
|
|
162
|
+
"checkpoint-regression",
|
|
163
|
+
"other"
|
|
164
|
+
]
|
|
165
|
+
},
|
|
166
|
+
"promotion_rules": {
|
|
167
|
+
"to_promote_other_into_vocab": "When a finding has category='other' with proposed_new_category set and the same proposed_new_category appears in /agent-feedback for the same agent ≥3 times confirmed by human, add it to vocab[<agent>].",
|
|
168
|
+
"to_retire_unused": "Categories with zero matches across the last 100 tasks for an agent are candidates for removal — surfaced by /learn but not auto-removed."
|
|
169
|
+
}
|
|
170
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "classifier-output.schema.json",
|
|
4
|
+
"title": "Classifier-agent output (v2.2.5 Item 9 + v2.2.6 C4 stack/change_kind extension)",
|
|
5
|
+
"description": "Structured JSON the classifier-agent emits in the context phase. Consumed by the classifier step which writes the fields into state.decisions + state.task_short. v2.2.6 extends the schema with `stack` and `change_kind` fields — the auto-spawn activation lands in v2.2.7 Item 1; this commit ships the schema substrate only.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["schema_version", "agent", "task_short", "refs_to_load", "security_needed", "antipattern_rules_applicable"],
|
|
8
|
+
"additionalProperties": false,
|
|
9
|
+
"properties": {
|
|
10
|
+
"schema_version": { "type": "string", "enum": ["1.0", "1.1"] },
|
|
11
|
+
"agent": { "type": "string", "const": "classifier" },
|
|
12
|
+
"task_id": { "type": ["string", "null"] },
|
|
13
|
+
"task_short": {
|
|
14
|
+
"type": ["string", "null"],
|
|
15
|
+
"description": "Short kebab-case slug summarising the task — used for filenames, branch names, commit subjects.",
|
|
16
|
+
"maxLength": 60
|
|
17
|
+
},
|
|
18
|
+
"refs_to_load": {
|
|
19
|
+
"type": "array",
|
|
20
|
+
"items": { "type": "string" },
|
|
21
|
+
"maxItems": 5,
|
|
22
|
+
"description": "Senior-pattern references (filenames from agents/references/) to inject into agent prompts."
|
|
23
|
+
},
|
|
24
|
+
"security_needed": {
|
|
25
|
+
"type": "boolean",
|
|
26
|
+
"description": "Whether the security reviewer agent should run for this task."
|
|
27
|
+
},
|
|
28
|
+
"antipattern_rules_applicable": {
|
|
29
|
+
"type": "array",
|
|
30
|
+
"items": { "type": "string" },
|
|
31
|
+
"description": "Anti-pattern rule identifiers from CLAUDE.md that the implementer should respect."
|
|
32
|
+
},
|
|
33
|
+
"stack": {
|
|
34
|
+
"type": ["object", "null"],
|
|
35
|
+
"description": "LLM-picked stack identification. v2.2.6 substrate — populated by classifier in v2.2.7 Item 1. When present, classifier's picks override the deterministic resolveStack() defaults; when null, the table-driven detector is the source of truth.",
|
|
36
|
+
"required": ["language", "package_manager"],
|
|
37
|
+
"additionalProperties": true,
|
|
38
|
+
"properties": {
|
|
39
|
+
"language": { "type": "string", "description": "Picked from templates/stack-candidates.yaml.languages[*].name." },
|
|
40
|
+
"package_manager": { "type": ["string", "null"], "description": "Picked from stack-candidates.yaml.package_managers[*].name for the chosen language, or null when no PM applies." },
|
|
41
|
+
"test_command": { "type": ["string", "null"] },
|
|
42
|
+
"lint_command": { "type": ["string", "null"] },
|
|
43
|
+
"build_command": { "type": ["string", "null"] },
|
|
44
|
+
"project_type": { "type": ["string", "null"], "enum": [null, "frontend-app", "backend", "library", "monorepo"] }
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"change_kind": {
|
|
48
|
+
"type": ["string", "null"],
|
|
49
|
+
"enum": [null, "type-only", "logic", "ui", "perf-sensitive", "security-sensitive", "config-only", "docs-only"],
|
|
50
|
+
"description": "Classification of the task's diff shape. v2.2.7 Item 2 consumes this for reviewer selectivity (skip style/performance on type-only diffs). v2.2.6 substrate — emit but no downstream consumer yet."
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|