npm - aigent-team - Versions diffs - 0.1.0 - Mend

aigent-team 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/LICENSE +21 -0
package/README.md +253 -0
package/dist/chunk-N3RYHWTR.js +267 -0
package/dist/cli.js +576 -0
package/dist/index.d.ts +234 -0
package/dist/index.js +27 -0
package/package.json +67 -0
package/templates/shared/git-workflow.md +44 -0
package/templates/shared/project-conventions.md +48 -0
package/templates/teams/ba/agent.yaml +25 -0
package/templates/teams/ba/references/acceptance-criteria.md +87 -0
package/templates/teams/ba/references/api-contract-design.md +110 -0
package/templates/teams/ba/references/requirements-analysis.md +83 -0
package/templates/teams/ba/references/user-story-mapping.md +73 -0
package/templates/teams/ba/skill.md +85 -0
package/templates/teams/be/agent.yaml +34 -0
package/templates/teams/be/conventions.md +102 -0
package/templates/teams/be/references/api-design.md +91 -0
package/templates/teams/be/references/async-processing.md +86 -0
package/templates/teams/be/references/auth-security.md +58 -0
package/templates/teams/be/references/caching.md +79 -0
package/templates/teams/be/references/database.md +65 -0
package/templates/teams/be/references/error-handling.md +106 -0
package/templates/teams/be/references/observability.md +83 -0
package/templates/teams/be/references/review-checklist.md +50 -0
package/templates/teams/be/references/testing.md +100 -0
package/templates/teams/be/review-checklist.md +54 -0
package/templates/teams/be/skill.md +71 -0
package/templates/teams/devops/agent.yaml +35 -0
package/templates/teams/devops/conventions.md +133 -0
package/templates/teams/devops/references/ci-cd.md +218 -0
package/templates/teams/devops/references/cost-optimization.md +218 -0
package/templates/teams/devops/references/disaster-recovery.md +199 -0
package/templates/teams/devops/references/docker.md +237 -0
package/templates/teams/devops/references/infrastructure-as-code.md +238 -0
package/templates/teams/devops/references/kubernetes.md +397 -0
package/templates/teams/devops/references/monitoring.md +224 -0
package/templates/teams/devops/references/review-checklist.md +149 -0
package/templates/teams/devops/references/security.md +225 -0
package/templates/teams/devops/review-checklist.md +72 -0
package/templates/teams/devops/skill.md +131 -0
package/templates/teams/fe/agent.yaml +28 -0
package/templates/teams/fe/conventions.md +80 -0
package/templates/teams/fe/references/accessibility.md +92 -0
package/templates/teams/fe/references/component-architecture.md +87 -0
package/templates/teams/fe/references/css-styling.md +89 -0
package/templates/teams/fe/references/forms.md +73 -0
package/templates/teams/fe/references/performance.md +104 -0
package/templates/teams/fe/references/review-checklist.md +51 -0
package/templates/teams/fe/references/security.md +90 -0
package/templates/teams/fe/references/state-management.md +117 -0
package/templates/teams/fe/references/testing.md +112 -0
package/templates/teams/fe/review-checklist.md +53 -0
package/templates/teams/fe/skill.md +68 -0
package/templates/teams/lead/agent.yaml +18 -0
package/templates/teams/lead/references/cross-team-coordination.md +68 -0
package/templates/teams/lead/references/quality-gates.md +64 -0
package/templates/teams/lead/references/task-decomposition.md +69 -0
package/templates/teams/lead/skill.md +83 -0
package/templates/teams/qa/agent.yaml +32 -0
package/templates/teams/qa/conventions.md +130 -0
package/templates/teams/qa/references/ci-integration.md +337 -0
package/templates/teams/qa/references/e2e-testing.md +292 -0
package/templates/teams/qa/references/mocking.md +249 -0
package/templates/teams/qa/references/performance-testing.md +288 -0
package/templates/teams/qa/references/review-checklist.md +143 -0
package/templates/teams/qa/references/security-testing.md +271 -0
package/templates/teams/qa/references/test-data.md +275 -0
package/templates/teams/qa/references/test-strategy.md +192 -0
package/templates/teams/qa/review-checklist.md +53 -0
package/templates/teams/qa/skill.md +131 -0

package/templates/teams/qa/references/test-data.md ADDED Viewed

@@ -0,0 +1,275 @@
+# Test Data Reference
+## Factory Pattern
+Factories produce valid test objects with sensible defaults. Override only
+what matters for each test — this makes tests self-documenting.
+### TypeScript Factory (with Faker)
+```typescript
+// tests/factories/user.factory.ts
+import { faker } from '@faker-js/faker';
+interface User {
+  id: string;
+  email: string;
+  name: string;
+  role: 'admin' | 'member' | 'viewer';
+  createdAt: Date;
+}
+export function buildUser(overrides: Partial<User> = {}): User {
+  return {
+    id: faker.string.uuid(),
+    email: faker.internet.email(),
+    name: faker.person.fullName(),
+    role: 'member',
+    createdAt: faker.date.recent({ days: 30 }),
+    ...overrides,
+  };
+}
+// Usage in test:
+const admin = buildUser({ role: 'admin' });
+const viewer = buildUser({ role: 'viewer', email: 'viewer@test.com' });
+```
+### Related Object Factories
+```typescript
+// tests/factories/order.factory.ts
+import { faker } from '@faker-js/faker';
+import { buildUser } from './user.factory';
+interface OrderItem {
+  sku: string;
+  name: string;
+  quantity: number;
+  unitPrice: number;
+}
+interface Order {
+  id: string;
+  userId: string;
+  items: OrderItem[];
+  total: number;
+  status: 'pending' | 'paid' | 'shipped';
+}
+export function buildOrderItem(overrides: Partial<OrderItem> = {}): OrderItem {
+  const quantity = overrides.quantity ?? faker.number.int({ min: 1, max: 5 });
+  const unitPrice = overrides.unitPrice ?? parseFloat(faker.commerce.price({ min: 5, max: 200 }));
+  return {
+    sku: faker.string.alphanumeric(8).toUpperCase(),
+    name: faker.commerce.productName(),
+    quantity,
+    unitPrice,
+    ...overrides,
+  };
+}
+export function buildOrder(overrides: Partial<Order> = {}): Order {
+  const items = overrides.items ?? [buildOrderItem(), buildOrderItem()];
+  return {
+    id: faker.string.uuid(),
+    userId: buildUser().id,
+    items,
+    total: items.reduce((sum, i) => sum + i.quantity * i.unitPrice, 0),
+    status: 'pending',
+    ...overrides,
+  };
+}
+```
+### Python Factory (with Faker)
+```python
+# tests/factories/user_factory.py
+from dataclasses import dataclass, field
+from datetime import datetime
+from faker import Faker
+from uuid import uuid4
+fake = Faker()
+@dataclass
+class UserFactory:
+    id: str = field(default_factory=lambda: str(uuid4()))
+    email: str = field(default_factory=fake.email)
+    name: str = field(default_factory=fake.name)
+    role: str = "member"
+    created_at: datetime = field(default_factory=datetime.now)
+# Usage:
+user = UserFactory()
+admin = UserFactory(role="admin")
+```
+---
+## Faker for Realistic Data
+### Why Faker Over Hardcoded Values
+- Hardcoded values hide implicit assumptions (e.g., "John" assumes ASCII names).
+- Faker generates edge cases naturally (long names, special characters, Unicode).
+- Deterministic seed for reproducibility:
+```typescript
+import { faker } from '@faker-js/faker';
+// Set seed for reproducible test runs when debugging
+faker.seed(12345);
+```
+### Common Faker Patterns
+```typescript
+faker.string.uuid()                     // IDs
+faker.internet.email()                   // Email addresses
+faker.person.fullName()                  // Display names
+faker.lorem.paragraph()                  // Long text fields
+faker.date.between({ from: '2024-01-01', to: '2024-12-31' }) // Date ranges
+faker.number.int({ min: 1, max: 100 })  // Bounded integers
+faker.helpers.arrayElement(['a', 'b'])   // Random from set
+faker.string.alphanumeric(10)            // Tokens / codes
+```
+### Avoid
+- `faker.lorem.sentence()` for fields with validation (use realistic values).
+- Random data for fields your test asserts on (override explicitly).
+---
+## Database Isolation Strategies
+Tests that share a database must not leak state. Choose one strategy:
+### 1. Transaction Rollback (fastest)
+Wrap each test in a transaction and roll back after. No cleanup needed.
+```typescript
+// tests/helpers/db.ts
+import { db } from '../../src/db';
+export async function withTransaction<T>(
+  fn: (trx: Transaction) => Promise<T>,
+): Promise<T> {
+  const trx = await db.transaction();
+  try {
+    const result = await fn(trx);
+    return result;
+  } finally {
+    await trx.rollback();
+  }
+}
+// In test:
+test('creates order', async () => {
+  await withTransaction(async (trx) => {
+    const order = await createOrder(trx, buildOrder());
+    expect(order.status).toBe('pending');
+    // Transaction rolls back — no cleanup
+  });
+});
+```
+**Caveat**: Does not work if code under test manages its own transactions.
+### 2. Truncate Between Tests
+Clear all tables before each test. Slower but simpler.
+```typescript
+// tests/helpers/db.ts
+const tables = ['orders', 'order_items', 'users'];
+export async function truncateAll() {
+  // Disable FK checks, truncate, re-enable — one round trip
+  await db.raw(`
+    SET session_replication_role = 'replica';
+    ${tables.map(t => `TRUNCATE TABLE "${t}" CASCADE;`).join('\n')}
+    SET session_replication_role = 'origin';
+  `);
+}
+// vitest.setup.ts
+beforeEach(async () => {
+  await truncateAll();
+});
+```
+### 3. Unique IDs Per Test (no cleanup)
+Each test uses unique identifiers. Data from other tests is invisible
+because queries filter by test-specific IDs.
+```typescript
+test('fetches user orders', async () => {
+  const userId = faker.string.uuid(); // unique to this test
+  await seedOrder({ userId, status: 'paid' });
+  await seedOrder({ userId, status: 'pending' });
+  const orders = await getOrdersByUser(userId);
+  expect(orders).toHaveLength(2);
+  // Other tests' orders are not returned
+});
+```
+Best for read-heavy tests. Does not prevent table bloat in long-running suites.
+### Comparison
+| Strategy | Speed | Isolation | Complexity |
+|---|---|---|---|
+| Transaction rollback | Fastest | Strong | Medium (transaction passthrough) |
+| Truncate | Slow | Strong | Low |
+| Unique IDs | Fast | Partial | Low |
+**Recommendation**: Transaction rollback for unit/integration tests. Truncate
+for E2E against a real database. Unique IDs as a supplementary technique.
+---
+## Test Environment Parity
+Tests must run against an environment that matches production as closely
+as possible.
+### Database
+- Same engine and version (PostgreSQL 16 in prod = PostgreSQL 16 in test).
+- Use Docker Compose or Testcontainers for local/CI parity.
+- Never substitute SQLite for PostgreSQL — query behaviour differs.
+```typescript
+// testcontainers example
+import { PostgreSqlContainer } from '@testcontainers/postgresql';
+let container: StartedPostgreSqlContainer;
+beforeAll(async () => {
+  container = await new PostgreSqlContainer('postgres:16')
+    .withDatabase('test')
+    .start();
+  process.env.DATABASE_URL = container.getConnectionUri();
+}, 60_000);
+afterAll(async () => {
+  await container.stop();
+});
+```
+### External Services
+- Mock external APIs with MSW (see `mocking.md`).
+- For services you own, use contract tests (see `test-strategy.md`).
+- Never call real third-party APIs in automated tests.
+### Configuration
+- Use a dedicated `.env.test` file. Never share `.env` between dev and test.
+- Ensure secrets in test config are test-only credentials, not production keys.

package/templates/teams/qa/references/test-strategy.md ADDED Viewed

@@ -0,0 +1,192 @@
+# Test Strategy Reference
+## Test Pyramid Enforcement
+### Target Ratios (by test count)
+| Level | Share | Max execution time | Typical tools |
+|---|---|---|---|
+| Unit | 70 % | < 10 s total suite | Vitest, Jest, Pytest |
+| Integration | 20 % | < 2 min total suite | Testing Library, Supertest, MSW |
+| E2E | 10 % | < 10 min smoke, < 30 min full | Playwright |
+### Time Budgets
+Every CI run has a wall-clock budget. If a level exceeds its budget,
+optimise before adding more tests:
+- **Unit**: 10 s. If exceeded, check for I/O leaks (unmocked network, file
+  system) or heavy setup. Vitest's `--reporter=verbose --bail=1` helps triage.
+- **Integration**: 2 min. Parallelise with `--pool=forks`. Ensure database
+  truncation is batched, not per-test.
+- **E2E smoke**: 10 min. Run only critical journeys. Full suite runs nightly.
+### Pyramid Inversion Detection
+Run a quarterly audit:
+```bash
+# Count tests per level (adjust globs to your project)
+echo "Unit:        $(find . -name '*.test.ts' -not -path '*/e2e/*' | xargs grep -c 'it\|test(' | awk -F: '{s+=$2} END{print s}')"
+echo "Integration: $(find . -path '*/__tests__/integration/*' -name '*.test.ts' | xargs grep -c 'it\|test(' | awk -F: '{s+=$2} END{print s}')"
+echo "E2E:         $(find . -path '*/e2e/*' -name '*.spec.ts' | xargs grep -c 'it\|test(' | awk -F: '{s+=$2} END{print s}')"
+```
+If E2E > 15 % of total test count, investigate which E2E tests duplicate
+integration-level coverage and demote them.
+---
+## Risk-Based Testing Prioritisation
+Not all features carry equal risk. Allocate test effort proportionally:
+### Risk Matrix
+| Factor | Weight | Examples |
+|---|---|---|
+| Revenue impact | High | Checkout, payment, subscription |
+| User frequency | High | Login, search, navigation |
+| Data sensitivity | High | PII handling, auth, export |
+| Change velocity | Medium | Features under active development |
+| Complexity | Medium | State machines, concurrent workflows |
+| Blast radius | Medium | Shared libraries, core APIs |
+### Prioritisation Process
+1. List all features/modules.
+2. Score each on the factors above (1-3 scale).
+3. Weighted total determines test investment tier:
+   - **Tier 1** (score >= 12): Full coverage — unit, integration, E2E, contract,
+     performance, security.
+   - **Tier 2** (score 7-11): Unit + integration + smoke E2E.
+   - **Tier 3** (score <= 6): Unit tests only; integration on critical paths.
+---
+## Contract Testing (Microservices)
+When services communicate over HTTP/gRPC, use consumer-driven contract tests
+to prevent integration breakage without running all services.
+### Pact Workflow
+```
+Consumer writes contract (Pact test)
+  → Publishes pact to Pact Broker
+  → Provider verifies contract in its own CI
+  → Broker records verification result
+  → Consumer deploy gated on "can-i-deploy" check
+```
+### Consumer Side (TypeScript + Pact)
+```typescript
+import { PactV3 } from '@pact-foundation/pact';
+const provider = new PactV3({
+  consumer: 'OrderService',
+  provider: 'InventoryService',
+});
+describe('Inventory API contract', () => {
+  it('returns stock for a product', async () => {
+    await provider
+      .given('product ABC exists with stock 42')
+      .uponReceiving('a request for product stock')
+      .withRequest({ method: 'GET', path: '/api/inventory/ABC' })
+      .willRespondWith({
+        status: 200,
+        body: { productId: 'ABC', stock: 42 },
+      })
+      .executeTest(async (mockServer) => {
+        const res = await fetch(`${mockServer.url}/api/inventory/ABC`);
+        const data = await res.json();
+        expect(data.stock).toBe(42);
+      });
+  });
+});
+```
+### Provider Side
+```typescript
+const { Verifier } = require('@pact-foundation/pact');
+new Verifier({
+  providerBaseUrl: 'http://localhost:3001',
+  pactBrokerUrl: process.env.PACT_BROKER_URL,
+  provider: 'InventoryService',
+  providerStatesSetupUrl: 'http://localhost:3001/_pact/setup',
+}).verifyProvider();
+```
+---
+## Visual Regression Testing
+Use Chromatic (Storybook) or Playwright screenshots for UI drift detection.
+### Strategy
+- **Component level** (Chromatic): Every Storybook story is a visual test.
+  Run on every PR. Chromatic handles diffing and approval workflow.
+- **Page level** (Playwright): Capture full-page screenshots for critical
+  pages. Compare against baselines.
+### Playwright Visual Comparison
+```typescript
+test('homepage visual regression', async ({ page }) => {
+  await page.goto('/');
+  await expect(page).toHaveScreenshot('homepage.png', {
+    maxDiffPixelRatio: 0.01,
+  });
+});
+```
+### Guidelines
+- Always set a `maxDiffPixelRatio` or `maxDiffPixels` — pixel-perfect
+  comparison is too brittle across environments.
+- Run visual tests in a single browser/OS combination in CI for consistency.
+- Use `toHaveScreenshot` with `fullPage: true` for layout regression.
+- Hide dynamic content (timestamps, avatars) with CSS or masking.
+---
+## Mutation Testing
+Code coverage tells you what code was *executed* by tests. Mutation testing
+tells you what code is actually *verified* by assertions.
+### How It Works
+Stryker (JS/TS) or mutmut (Python) injects small changes ("mutants") into
+your source code — flipping operators, removing calls, changing return values.
+If your tests still pass, the mutant "survived" and your tests are weak.
+### Setup (Stryker)
+```json
+// stryker.conf.json
+{
+  "mutate": ["src/**/*.ts", "!src/**/*.test.ts"],
+  "testRunner": "vitest",
+  "reporters": ["html", "clear-text", "progress"],
+  "thresholds": { "high": 80, "low": 60, "break": 50 }
+}
+```
+### Interpretation
+| Metric | Target | Action if below |
+|---|---|---|
+| Mutation score | >= 70 % | Add assertions; tests execute code but don't verify it |
+| Survived mutants | Review top 10 | Prioritise mutants in Tier 1 modules |
+### When to Run
+- **Not on every PR** — mutation testing is slow (minutes to hours).
+- Run nightly or weekly on critical modules.
+- Gate releases on mutation score for Tier 1 code.

package/templates/teams/qa/review-checklist.md ADDED Viewed

@@ -0,0 +1,53 @@
+### Test Level & Strategy
+- [ ] Tests are at the correct pyramid level (not E2E for logic that could be unit tested)
+- [ ] Risk-proportional coverage: high-risk paths (auth, payment, data mutation) have more tests
+- [ ] Both happy path and failure paths are tested (not just "it works" but "it fails gracefully")
+- [ ] Edge cases covered: null, empty, max-length, unicode, concurrent access, expired session
+### Test Quality
+- [ ] Tests follow AAA pattern (Arrange-Act-Assert) with clear separation
+- [ ] Test names describe expected behavior: `should {outcome} when {condition}`
+- [ ] One logical assertion per test (no mega-tests that assert 10 unrelated things)
+- [ ] Tests are independent — pass when run alone, in any order, in parallel
+- [ ] No snapshot tests for HTML/JSON output (use visual regression for UI instead)
+- [ ] Parameterized tests (`test.each`) used for data-driven scenarios instead of copy-paste
+### Test Data
+- [ ] Uses factories/fixtures with realistic data (Faker, not `"test"/"test"`)
+- [ ] No hardcoded IDs or magic values that depend on external state
+- [ ] Test data is self-contained — each test creates what it needs, cleans up after
+- [ ] No dependency on test execution order or shared mutable state
+### Mocking
+- [ ] Mocks only at system boundaries (HTTP APIs, time, randomness) — not internal modules
+- [ ] Mock behavior matches real API (response format, error codes, edge cases)
+- [ ] No over-mocking — if >3 mocks are needed, consider an integration test instead
+- [ ] MSW/nock used for HTTP mocking (not monkey-patching fetch/axios)
+### E2E Specific
+- [ ] Page Object Model or component abstraction used (no raw selectors in tests)
+- [ ] Elements selected by role/label/text — not CSS class, not complex XPath
+- [ ] Explicit waits for conditions — no `sleep()`/`wait()` with fixed duration
+- [ ] Test data setup via API/seed — not through UI interactions
+- [ ] Passes 10/10 runs locally before merge (flakiness check)
+- [ ] Screenshot/trace captured on failure for debugging
+- [ ] Tests parallelizable — no shared accounts, no port conflicts
+### Flakiness Prevention
+- [ ] No timing-dependent assertions (exact timestamps, animation states)
+- [ ] No shared state between tests (global variables, database records, file system)
+- [ ] Async operations properly awaited — no "fire and forget" in test code
+- [ ] Retry logic only for eventually-consistent operations, not to mask bugs
+- [ ] Environment-agnostic — passes on macOS, Linux, CI (timezone, locale, filesystem)
+### Performance Tests (if applicable)
+- [ ] Pass/fail thresholds defined in the test script (not just "observe the numbers")
+- [ ] Tests run against production-like environment with realistic data volume
+- [ ] Baseline recorded for comparison
+- [ ] All four load types covered: smoke, load, stress, soak
+### CI Integration
+- [ ] New tests added to the correct CI stage (smoke vs regression vs nightly)
+- [ ] Test execution time within budget (unit <5ms, integration <500ms, E2E <30s)
+- [ ] Test report generated (JUnit XML / Allure) for CI visibility
+- [ ] No `@skip` / `.only` / `xit` left in committed code

package/templates/teams/qa/skill.md ADDED Viewed

@@ -0,0 +1,131 @@
+# QA Engineer — Skill Index
+You are a senior QA engineer. You think in terms of risk, confidence, and
+feedback speed — not "more tests." Every test you write or review must justify
+its existence by catching a class of bugs that no other test already covers.
+---
+## Core Principles
+1. **Quality is everyone's job, QA owns the strategy.**
+   You do not gate-keep releases. You design the safety net and teach others to
+   use it. If a bug escapes, the first question is "why didn't our automation
+   catch it?" — not "who wrote the code?"
+2. **Respect the test pyramid.**
+   70 % unit, 20 % integration, 10 % E2E — by count. Inversions are a smell.
+   E2E tests are expensive; each one must protect a critical user journey that
+   lower levels cannot cover.
+3. **Flaky tests are bugs — zero tolerance.**
+   A flaky test is worse than no test: it trains the team to ignore failures.
+   Quarantine immediately, fix within 48 h, or delete. Track flakiness rate as
+   a first-class metric.
+4. **Coverage is a vanity metric.**
+   High coverage with weak assertions proves nothing. Focus on mutation testing
+   score and defect escape rate instead.
+5. **Shift left.**
+   The cheapest bug is the one caught in a unit test during local development.
+   Invest in fast feedback: < 10 s for unit suite, < 2 min for integration,
+   < 10 min for smoke E2E.
+6. **Tests are production code.**
+   They deserve the same care: clear naming, no duplication, proper
+   abstractions (factories, page objects), and code review.
+7. **Test behaviour, not implementation.**
+   If refactoring the source without changing behaviour breaks a test, that
+   test is coupled to implementation and must be rewritten.
+---
+## Anti-Patterns — Flag Immediately
+| Anti-pattern | Why it's harmful | Fix |
+|---|---|---|
+| Testing implementation details | Brittle; breaks on refactor | Assert on outputs and side effects only |
+| Over-mocking | Test proves nothing about real system | Mock at system boundaries only (HTTP, clock, random) |
+| `sleep()` / fixed delays in E2E | Flaky, slow | Use explicit waits / `waitFor` / Playwright auto-wait |
+| Snapshot tests for logic | Fail on any change, nobody reads diffs | Use targeted assertions |
+| Shared mutable test state | Order-dependent failures | Isolate via `beforeEach`, factories, transactions |
+| Test ordering dependency | Hidden coupling | Each test must pass in isolation; randomize order |
+| Ignoring test performance | CI becomes bottleneck | Budget time per level; parallelize |
+| Copy-paste test setup | Maintenance nightmare | Extract factories and helpers |
+---
+## Decision Framework — Choosing the Right Test Level
+```
+Is the logic pure computation (no I/O, no DOM)?
+  YES → Unit test (Vitest / Jest / Pytest)
+Does it involve multiple modules collaborating?
+  YES → Is there a network boundary?
+    YES → Contract test (Pact) + integration test with MSW
+    NO  → Integration test (Testing Library + Vitest)
+Is it a critical user journey (login, checkout, payment)?
+  YES → E2E test (Playwright)
+  NO  → Can you cover it with an integration test?
+    YES → Integration test
+    NO  → E2E test, but justify it in a comment
+```
+When in doubt, start at the lowest level that can catch the bug.
+---
+## Reference Files
+Read these on demand — not every task requires every file.
+| Reference | When to read |
+|---|---|
+| [test-strategy.md](references/test-strategy.md) | Planning a new test suite, reviewing pyramid balance, setting up mutation testing or visual regression |
+| [e2e-testing.md](references/e2e-testing.md) | Writing or reviewing Playwright/Cypress tests, debugging flakiness, setting up Page Object Model |
+| [test-data.md](references/test-data.md) | Building factories, seeding databases, solving test isolation problems |
+| [mocking.md](references/mocking.md) | Setting up MSW, deciding what to mock, verifying mock contracts |
+| [performance-testing.md](references/performance-testing.md) | Writing k6 scripts, defining load scenarios, setting thresholds |
+| [security-testing.md](references/security-testing.md) | OWASP checks, setting up ZAP, verifying rate limits and auth |
+| [ci-integration.md](references/ci-integration.md) | Configuring pipeline stages, test reporting, flakiness tracking |
+| [review-checklist.md](references/review-checklist.md) | Reviewing any PR that touches test code |
+---
+## Workflow Index
+### Writing a new test
+1. Determine test level using the decision framework above.
+2. Read the relevant reference file for that level.
+3. Use factories from `test-data.md` for data setup.
+4. Follow the review checklist before submitting.
+### Investigating a flaky test
+1. Read `e2e-testing.md` > Flakiness Prevention.
+2. Check for anti-patterns in the table above.
+3. Reproduce locally with `--repeat-each=20` (Playwright) or loop.
+4. Fix root cause; never add retries as a permanent solution.
+### Setting up CI test pipeline
+1. Read `ci-integration.md` for stage design and time budgets.
+2. Read `test-strategy.md` for pyramid enforcement.
+3. Configure reporting per `ci-integration.md` > Test Reporting.
+### Performance test planning
+1. Read `performance-testing.md` for scenario types and script templates.
+2. Define thresholds based on SLOs.
+3. Run against a production-like environment only.
+### Security audit
+1. Read `security-testing.md` for OWASP checklist.
+2. Run automated DAST scan with ZAP.
+3. Manual verification for auth bypass and IDOR.
+### Reviewing test code
+1. Open `review-checklist.md` and walk through every section.
+2. Flag any anti-pattern from the table above.
+3. Verify test level matches the decision framework.