general-coding-tools-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,8 @@
1
+ # MCP server
2
+
3
+ This directory is the General Coding Tools MCP server. The [root README](../README.md) describes each skill and subagent and how to use the server.
4
+
5
+ **Build:** `npm install && npm run build`
6
+ **Run:** `npm start` or `node dist/index.js`
7
+
8
+ Content lives in `skills/` and `subagents/` and is bundled into `dist/content.json` at build time.
@@ -0,0 +1 @@
1
+ {"skills":[{"id":"best-practices-audit","name":"best-practices-audit","hasReference":true},{"id":"correctness-audit","name":"correctness-audit","hasReference":true},{"id":"feature-planning","name":"feature-planning","hasReference":true},{"id":"security-audit","name":"security-audit","hasReference":true},{"id":"systematic-debugging","name":"systematic-debugging","hasReference":false}],"subagents":[{"id":"deep-research","name":"deep-research"},{"id":"update-docs","name":"update-docs"},{"id":"verifier","name":"verifier"}],"content":{"skills":{"best-practices-audit":{"content":"---\nname: best-practices-audit\ndescription: Audits code against named industry standards and coding best practices (DRY, SOLID, KISS, YAGNI, Clean Code, OWASP, etc.). Use when the user asks to check best practices, enforce standards, audit for anti-patterns, review code quality against principles, or ensure code follows industry conventions. Works on git diffs, specific files, or an entire codebase.\n---\n\n# Best Practices Audit\n\nAudit code against established industry standards and named best practices. Cite the specific principle violated for every finding so the developer learns *which* standard applies and why.\n\n## Scope\n\nDetermine what to audit based on user request and context:\n\n- **Git diff mode** (default when no scope specified and changes exist): run `git diff` and `git diff --cached` to audit only changed/added code\n- **File/directory mode**: audit the files or directories the user specifies\n- **Codebase mode**: when the user explicitly asks for a full codebase audit, scan the project broadly (focus on source code, skip vendor/node_modules/build artifacts)\n\nRead all in-scope code before producing findings.\n\n## Principles to Enforce\n\nEvaluate code against each category. Skip categories with no findings. See [REFERENCE.md](REFERENCE.md) for detailed definitions and examples of each principle.\n\n### 1. DRY (Don't Repeat Yourself)\n\n- Duplicated logic across functions, components, or modules\n- Copy-pasted code blocks with minor variations\n- Repeated string literals, magic numbers, or config values that should be constants\n- Similar data transformations that could be unified\n\n### 2. SOLID Principles\n\n- **S — Single Responsibility**: classes/modules/functions doing more than one thing\n- **O — Open/Closed**: code that requires modification (instead of extension) to add behavior\n- **L — Liskov Substitution**: subtypes that break the contract of their parent type\n- **I — Interface Segregation**: interfaces/types forcing implementers to depend on methods they don't use\n- **D — Dependency Inversion**: high-level modules depending on concrete implementations instead of abstractions\n\n### 3. KISS (Keep It Simple, Stupid)\n\n- Unnecessary complexity or over-engineering\n- Convoluted control flow when a simpler approach exists\n- Abstractions that add indirection without clear value\n- Clever tricks that sacrifice readability\n\n### 4. YAGNI (You Ain't Gonna Need It)\n\n- Code for features that don't exist yet and aren't requested\n- Premature generalization or unnecessary configurability\n- Unused parameters, flags, or code paths \"just in case\"\n- Speculative abstractions with a single implementation\n\n### 5. Clean Code (Robert C. Martin)\n\n- **Naming**: vague, misleading, or inconsistent names; abbreviations that hinder readability\n- **Functions**: functions longer than ~20 lines; too many parameters (>3); mixed abstraction levels\n- **Comments**: comments that restate the code; commented-out code; missing comments on *why* for non-obvious decisions\n- **Formatting**: inconsistent indentation, spacing, or file organization within the project\n\n### 6. Error Handling Best Practices\n\n- Swallowed exceptions (empty catch blocks)\n- Generic catch-all without meaningful handling\n- Missing error propagation — errors that should bubble up but don't\n- No user-facing feedback on failure\n- Using exceptions for control flow\n\n### 7. Security Standards (OWASP Top 10)\n\n- Unsanitized user input (injection, XSS, path traversal)\n- Broken authentication or session management\n- Sensitive data exposure (secrets in code, insecure storage, unencrypted transmission)\n- Missing access control checks\n- Security misconfiguration (permissive CORS, missing CSP headers)\n- Using components with known vulnerabilities\n\n### 8. Performance Best Practices\n\n- Unnecessary re-renders or re-computations\n- N+1 queries, unbounded result sets, missing pagination\n- Synchronous blocking in async-capable contexts\n- Missing memoization, caching, or debouncing where clearly beneficial\n- Large bundle imports when a smaller alternative exists\n\n### 9. Testing Best Practices\n\n- Untested public API surface or critical paths\n- Tests tightly coupled to implementation details\n- Missing edge case coverage for non-trivial logic\n- Flaky patterns (time-dependent, order-dependent, network-dependent tests)\n- Test code that violates DRY without justification\n\n### 10. Code Organization & Architecture\n\n- Circular dependencies between modules\n- Business logic mixed into UI/presentation layers\n- Shared mutable state across module boundaries\n- Inconsistent project structure or file placement conventions\n- Missing or inconsistent use of the project's established patterns\n\n### 11. Defensive Programming\n\n- Missing input validation at system boundaries (API endpoints, user forms, external data)\n- Assumptions about data shape without type guards or runtime checks\n- Missing null/undefined handling where values can realistically be absent\n- No graceful degradation on partial failures\n\n### 12. Separation of Concerns\n\n- Mixed responsibilities in a single file or function (e.g. data fetching + rendering + business logic)\n- Configuration values hardcoded in business logic\n- Platform-specific code leaking into core/shared modules\n- Presentation logic mixed with data transformation\n\n## Output Format\n\nGroup findings by severity. Each finding MUST name the specific principle violated.\n\n```\n## Critical\nViolations that will cause bugs, data loss, or security vulnerabilities in production.\n\n### [PRINCIPLE] Brief title\n**File**: `path/to/file.ts` (lines X-Y)\n**Principle**: Full name of the principle and a one-line explanation of what it requires.\n**Violation**: What the code does wrong and the concrete impact.\n**Fix**: Specific, actionable suggestion.\n\n## Warning\nViolations that degrade maintainability, readability, or robustness.\n\n(same structure)\n\n## Suggestion\nImprovements aligned with best practices but not urgent.\n\n(same structure)\n\n## Summary\n- Total findings: N (X critical, Y warning, Z suggestion)\n- Principles most frequently violated: list the top 2-3\n- Overall assessment: 1-2 sentence verdict on the code's adherence to standards\n```\n\n## Linter Tools\n\nBefore producing findings, **always run the available linters** on in-scope code to supplement your manual review. Linter output should be incorporated into your findings (cite the linter rule alongside the principle).\n\n### ESLint (TypeScript/React)\n\nRun from the `app/` directory. Config: `app/eslint.config.js` (flat config with TypeScript-ESLint, React Hooks, React Refresh).\n\n```bash\ncd app && npx eslint . # full codebase\ncd app && npx eslint src/path/to/file.ts # specific file(s)\ncd app && npx eslint --fix . # auto-fix what's possible (only with user approval)\n```\n\n### Ruff (Python)\n\nRun from the project root. Config: `ruff.toml` (pycodestyle, pyflakes, isort, pep8-naming, pyupgrade, bugbear, simplify, bandit).\n\n```bash\nruff check scripts/ # all Python scripts\nruff check scripts/wireframe.py # specific file\nruff check --fix scripts/ # auto-fix (only with user approval)\n```\n\n### How to use linter output\n\n1. Run the relevant linter(s) based on which file types are in scope.\n2. For each linter error/warning, map it to the matching principle category (e.g. `@typescript-eslint/no-unused-vars` → Clean Code / Naming, `react-hooks/set-state-in-effect` → Performance / React Best Practices, `S101` → Security / OWASP).\n3. Include linter findings in the appropriate severity section. Linter errors that indicate real bugs or security issues go under **Critical**; style/convention issues go under **Suggestion**.\n4. If the linter finds no issues for a file type, note \"ESLint: clean\" or \"Ruff: clean\" in the Summary.\n\n## Rules\n\n- **Name the principle**: every finding must cite the specific standard (e.g. \"DRY\", \"SRP from SOLID\", \"OWASP A03: Injection\"). This is the core value of this skill.\n- **Be specific**: always cite file paths and line numbers.\n- **Be actionable**: every finding must include a concrete fix.\n- **Respect scope**: only audit what's in scope. In diff mode, only flag issues in changed lines (and their immediate context).\n- **Don't duplicate code-quality-review**: focus on named principles and standards, not generic bug-hunting. If using both skills, they complement each other.\n- **Pragmatism over dogma**: a principle violation is only worth flagging if fixing it provides real value. Don't flag trivial or pedantic violations that would add noise.\n- **Context matters**: consider the project's scale, team size, and existing patterns. A startup prototype has different standards than a production system.\n","reference":"# Best Practices Reference\n\nDetailed definitions, rationale, and code examples for each principle audited by this skill.\n\n## Table of Contents\n\n1. [DRY](#1-dry-dont-repeat-yourself)\n2. [SOLID](#2-solid-principles)\n3. [KISS](#3-kiss-keep-it-simple-stupid)\n4. [YAGNI](#4-yagni-you-aint-gonna-need-it)\n5. [Clean Code](#5-clean-code)\n6. [Error Handling](#6-error-handling)\n7. [Security (OWASP)](#7-security-owasp-top-10)\n8. [Performance](#8-performance)\n9. [Testing](#9-testing)\n10. [Code Organization](#10-code-organization--architecture)\n11. [Defensive Programming](#11-defensive-programming)\n12. [Separation of Concerns](#12-separation-of-concerns)\n\n---\n\n## 1. DRY (Don't Repeat Yourself)\n\n**Source**: *The Pragmatic Programmer* — Andy Hunt & Dave Thomas (1999)\n\n**Principle**: Every piece of knowledge must have a single, unambiguous, authoritative representation within a system.\n\n**What it covers**: Not just code duplication — also duplicated logic, data definitions, and documentation that can fall out of sync.\n\n**Bad**:\n```ts\n// User validation in registration handler\nif (!email || !email.includes('@')) throw new Error('Invalid email');\nif (!password || password.length < 8) throw new Error('Weak password');\n\n// Same validation repeated in profile update handler\nif (!email || !email.includes('@')) throw new Error('Invalid email');\nif (!password || password.length < 8) throw new Error('Weak password');\n```\n\n**Good**:\n```ts\nfunction validateCredentials(email: string, password: string) {\n if (!email || !email.includes('@')) throw new Error('Invalid email');\n if (!password || password.length < 8) throw new Error('Weak password');\n}\n```\n\n**Caveat**: Not all similar-looking code is a DRY violation. Two functions that happen to share structure but serve different purposes and will evolve independently are fine as-is. Premature deduplication can create coupling.\n\n---\n\n## 2. SOLID Principles\n\n**Source**: Robert C. Martin (aggregated ~2000s, acronym coined by Michael Feathers)\n\n### S — Single Responsibility Principle (SRP)\n\nA class/module should have one, and only one, reason to change.\n\n**Bad**: A `UserService` that handles registration, email sending, and report generation.\n**Good**: Separate `UserRegistration`, `EmailService`, and `ReportGenerator`.\n\n### O — Open/Closed Principle (OCP)\n\nSoftware entities should be open for extension but closed for modification. Add new behavior by adding new code, not changing existing code.\n\n**Bad**: A payment processor with a growing `switch` statement for each new payment method.\n**Good**: A strategy pattern where each payment method implements a `PaymentProcessor` interface.\n\n### L — Liskov Substitution Principle (LSP)\n\nSubtypes must be substitutable for their base types without altering correctness. If `Square extends Rectangle`, calling `setWidth()` must not break expectations.\n\n### I — Interface Segregation Principle (ISP)\n\nNo client should be forced to depend on methods it does not use. Prefer many small, focused interfaces over one large one.\n\n### D — Dependency Inversion Principle (DIP)\n\nHigh-level modules should not depend on low-level modules. Both should depend on abstractions. Abstractions should not depend on details.\n\n**Bad**: `OrderService` directly imports and instantiates `PostgresDatabase`.\n**Good**: `OrderService` depends on a `Database` interface; the concrete implementation is injected.\n\n---\n\n## 3. KISS (Keep It Simple, Stupid)\n\n**Source**: U.S. Navy design principle (1960s), widely adopted in software engineering.\n\n**Principle**: Most systems work best if they are kept simple rather than made complicated. Simplicity should be a key goal and unnecessary complexity should be avoided.\n\n**Common violations**:\n- Replacing a simple `if/else` with a factory + strategy + registry pattern for two cases\n- Using metaprogramming/reflection when straightforward code works\n- Creating deep inheritance hierarchies when composition or plain functions suffice\n- Writing a custom solution for something the language/framework already provides\n\n---\n\n## 4. YAGNI (You Ain't Gonna Need It)\n\n**Source**: Extreme Programming (XP) — Kent Beck & Ron Jeffries\n\n**Principle**: Don't implement something until you actually need it, not when you foresee you *might* need it.\n\n**Common violations**:\n- Adding plugin architectures when the app has one implementation\n- Creating abstract base classes with a single concrete subclass\n- Building configuration options nobody has asked for\n- Adding feature flags before there's more than one variant\n\n**Relationship with KISS**: YAGNI is about *scope* (don't build it yet), KISS is about *complexity* (build it simply).\n\n---\n\n## 5. Clean Code\n\n**Source**: *Clean Code* — Robert C. Martin (2008)\n\n### Naming\n- Names should reveal intent: `getUserPermissions()` not `getData()`\n- Avoid abbreviations unless universally understood (`id`, `url`, `http` are fine; `usrPrmLst` is not)\n- Boolean names should read as questions: `isActive`, `hasPermission`, `canEdit`\n- Consistent vocabulary: don't mix `fetch`, `get`, `retrieve`, `load` for the same concept\n\n### Functions\n- Should do one thing, at one level of abstraction\n- Prefer fewer than 3 parameters; use an options object for more\n- Avoid flag arguments (`render(true)`) — split into two named functions\n- Side effects should be obvious from the name or documented\n\n### Comments\n- Good: explain *why* a non-obvious decision was made\n- Bad: restate *what* the code does (`// increment i by 1`)\n- Worst: commented-out code left in the codebase\n\n---\n\n## 6. Error Handling\n\n**Sources**: *Clean Code* Chapter 7; language-specific community standards\n\n- **Don't swallow errors**: empty `catch {}` blocks hide bugs\n- **Fail fast**: validate inputs early and throw/return immediately on invalid state\n- **Use typed/specific errors**: catch specific error types rather than generic `catch(e)`\n- **Errors are not control flow**: don't use try/catch for expected branching logic\n- **Always handle promises**: every Promise should have a `.catch()` or be `await`ed in a try block\n- **Provide context**: error messages should include what failed and why, with enough info to debug\n\n---\n\n## 7. Security (OWASP Top 10)\n\n**Source**: OWASP Foundation — updated periodically (latest: 2021)\n\n| ID | Category | What to look for |\n|----|----------|-----------------|\n| A01 | Broken Access Control | Missing auth checks, IDOR, privilege escalation |\n| A02 | Cryptographic Failures | Plaintext secrets, weak hashing, unencrypted sensitive data |\n| A03 | Injection | SQL injection, XSS, command injection, path traversal |\n| A04 | Insecure Design | Missing threat modeling, no rate limiting, no abuse prevention |\n| A05 | Security Misconfiguration | Default credentials, overly permissive CORS, verbose errors in production |\n| A06 | Vulnerable Components | Outdated dependencies with known CVEs |\n| A07 | Auth Failures | Weak passwords allowed, no brute-force protection, broken session management |\n| A08 | Data Integrity Failures | Missing integrity checks, insecure deserialization |\n| A09 | Logging Failures | No audit trail, sensitive data in logs |\n| A10 | SSRF | Server making requests to user-controlled URLs without validation |\n\n---\n\n## 8. Performance\n\n**Sources**: Web.dev, framework-specific documentation, general CS principles\n\n- **Avoid premature optimization** — but do avoid *obviously* bad patterns:\n - O(n^2) when O(n) or O(n log n) is straightforward\n - Fetching entire tables/collections when only a subset is needed\n - Re-computing values on every render/call that could be memoized\n- **Minimize bundle size**: tree-shake, lazy-load routes/components, avoid importing entire libraries for one utility\n- **Batch operations**: reduce network round-trips, use bulk APIs, batch DOM updates\n- **Debounce/throttle**: user input handlers that trigger expensive work\n\n---\n\n## 9. Testing\n\n**Sources**: *xUnit Test Patterns* — Gerard Meszaros; *Growing Object-Oriented Software, Guided by Tests* — Freeman & Pryce\n\n- **AAA pattern**: Arrange, Act, Assert — keep tests structured and readable\n- **Test behavior, not implementation**: tests should survive refactors that don't change behavior\n- **One assertion per concept**: a test should verify one logical thing (may use multiple `expect` calls if they test the same concept)\n- **Deterministic**: no random data, no reliance on wall-clock time, no network calls in unit tests\n- **Test the contract**: focus on public API, not private internals\n- **Coverage priorities**: critical paths and edge cases first; don't chase 100% coverage on trivial code\n\n---\n\n## 10. Code Organization & Architecture\n\n**Sources**: *Clean Architecture* — Robert C. Martin; *Patterns of Enterprise Application Architecture* — Martin Fowler\n\n- **Dependency direction**: dependencies should point inward (toward core/domain logic), not outward (toward frameworks/IO)\n- **Feature cohesion**: related code should live together (by feature/domain), not scattered by technical role\n- **No circular dependencies**: if A imports B and B imports A, extract shared code to C\n- **Consistent file structure**: follow the project's established conventions for where things go\n- **Layered boundaries**: keep clear boundaries between data access, business logic, and presentation\n\n---\n\n## 11. Defensive Programming\n\n**Source**: *Code Complete* — Steve McConnell; *The Pragmatic Programmer*\n\n- **Validate at boundaries**: every system entry point (API endpoint, form handler, external data source) must validate inputs\n- **Fail gracefully**: partial failures should not crash the entire system\n- **Guard clauses**: return early on invalid conditions instead of deeply nesting the happy path\n- **Type narrowing**: use type guards, assertions, or schema validation (e.g. Zod) for external data\n- **Avoid assumptions**: if a value *can* be null/undefined according to its type, handle it\n\n---\n\n## 12. Separation of Concerns\n\n**Source**: Edsger W. Dijkstra (1974); foundational software engineering principle\n\n- **Each module addresses one concern**: rendering, data fetching, state management, and business logic should be separable\n- **Configuration over hardcoding**: environment-specific values belong in config, not scattered in source\n- **Platform boundaries**: core logic should be portable; framework-specific code stays at the edges\n- **Data vs. presentation**: keep data transformation separate from how it's displayed\n"},"correctness-audit":{"content":"---\nname: correctness-audit\ndescription: Reviews code for correctness bugs, uncaught edge cases, and scalability problems. Use when reviewing code changes, performing code audits, or when the user asks for a review or quality check. For security vulnerabilities use security-audit; for design, maintainability, and principle violations use best-practices-audit.\n---\n\n# Code Quality Review\n\nPerform a systematic review focused on **correctness** and **runtime concerns**: will this code work correctly under all realistic inputs and load? Every finding must cite the file, line(s), dimension, and a concrete fix. For security vulnerabilities, use `security-audit`. For principle violations (DRY, SOLID, Clean Code), use `best-practices-audit`.\n\n## Scope\n\nDetermine what to review based on context:\n\n- **Git diff mode** (default when no scope specified and changes exist): run `git diff` and `git diff --cached` to review only changed/added code and its immediate context\n- **File/directory mode**: review the files or directories the user specifies\n- **Full review mode**: when the user asks for a full review, scan all source code (skip vendor/node_modules/build artifacts)\n\nRead all in-scope code before producing findings.\n\n## Dimensions to Evaluate\n\nEvaluate code against each dimension. Skip dimensions with no findings. See [REFERENCE.md](REFERENCE.md) for detailed definitions, concrete examples, and fixes.\n\n### 1. Logic Bugs\n\n- **Wrong operators**: `<` vs `<=`, `==` vs `===`, `&&` vs `||`, bitwise vs logical operators\n- **Off-by-one errors**: loop boundaries, slice/splice indices, pagination offset calculations\n- **Incorrect variable**: copy-paste errors where the wrong variable is used (e.g. checking `a > 0` but intending `b > 0`)\n- **Boolean logic inversions**: conditions that are the exact opposite of what they should be (missing `!`, De Morgan's law violations)\n- **Mutating instead of cloning**: modifying an input argument or shared reference when a local copy is required\n- **Shadowed variables**: inner-scope declaration masking an outer-scope variable of the same name, causing silent incorrect reads\n- **Assignment in condition**: `if (x = getValue())` when `===` was intended\n- **Short-circuit misuse**: relying on `&&` or `||` for side effects in code paths where the right-hand side must always run\n\n### 2. Type & Coercion Bugs\n\n- **Implicit type coercion**: `+` operator on mixed `string | number` producing concatenation instead of addition; `==` coercing types unexpectedly\n- **Unsafe casts**: `as T` assertions on data from external sources (API responses, `JSON.parse`, database rows typed as `any`) without runtime validation\n- **Integer/float confusion**: using floating-point arithmetic where integer arithmetic is required (financial amounts, indices, counts); missing `Math.floor`/`Math.round` on division results\n- **Precision loss**: `Number` used for values > `Number.MAX_SAFE_INTEGER` (2⁵³-1); should use `BigInt` or a decimal library\n- **NaN propagation**: arithmetic on a value that may be `NaN` without a guard; `NaN === NaN` is always `false`; `isNaN(\"string\")` returns `true`\n- **Nullable column mismatch**: TypeScript type says `string` but the database column is nullable; the value can be `null` at runtime\n\n### 3. Null, Undefined & Missing Value Bugs\n\n- **Unguarded property access**: accessing `.foo` on a value that can realistically be `null` or `undefined` at runtime (API response fields, optional config, database nullable columns)\n- **Destructuring without defaults**: `const { limit } = options` where `options` may be `undefined`, or `limit` may be absent\n- **Array access without bounds check**: `arr[0]` on an array that may be empty; `arr[arr.length - 1]` on a zero-length array\n- **`find()` result not checked**: `.find()` returns `undefined` when no match exists; using the result directly without a null guard will throw\n- **Optional chaining gaps**: using `a.b.c` when `a` or `b` can be nullish; should be `a?.b?.c`\n- **Early return missing**: function continues executing after a condition should have terminated it\n\n### 4. Async & Promise Bugs\n\n- **Missing `await`**: `async` function calls whose result is not awaited, running fire-and-forget when the caller depends on the result\n- **Unhandled promise rejections**: `.then()` without `.catch()`, or top-level `async` functions with no try/catch, that silently swallow errors\n- **Sequential awaits that should be parallel**: awaiting independent async operations in series (`await a(); await b()`) when `Promise.all([a(), b()])` would be faster and correct\n- **`Promise.all` vs `Promise.allSettled`**: using `Promise.all` when any single rejection should not abort all others; vs. using `Promise.allSettled` when the caller actually needs to fail fast\n- **Async function returning void unintentionally**: a function signature of `async (): Promise<void>` that actually should return a value the caller uses\n- **Race between async operations**: two concurrent async paths writing to the same location (state, DB row, file) without synchronization\n- **Uncleaned async resources**: `setInterval`, `setTimeout`, event listeners, or subscriptions started inside a component/class that are never cleaned up when the scope is destroyed\n\n### 5. Stale Closures & Captured State\n\n- **Stale closure over mutable variable**: a callback or timeout captures a variable by reference; by the time the callback runs, the variable has changed\n- **Loop variable capture**: `for (var i = 0; ...)` with async/callback inside — all callbacks share the same `i` by the time they run (use `let` or pass `i` as an argument)\n- **React hooks missing dependencies**: a `useEffect` or `useCallback` that reads a prop or state value not listed in the dependency array — the callback sees the initial value forever\n- **Event listener capturing stale props**: a DOM event listener added once in a `useEffect` that captures `props.onEvent` at mount time, missing all future updates\n- **Memoization with wrong keys**: `useMemo` / `useCallback` / `React.memo` used with a dependency array that doesn't actually capture everything the computation depends on\n\n### 6. Resource Leaks & Missing Cleanup\n\n- **Event listeners never removed**: `addEventListener` called on mount, no corresponding `removeEventListener` on unmount\n- **Intervals/timeouts never cleared**: `setInterval` / `setTimeout` not captured in a ref or cancelled on component unmount\n- **Subscriptions not cancelled**: Realtime, WebSocket, or observable subscriptions opened but never `.unsubscribe()` / `.close()` called\n- **File/stream handles not closed**: `fs.open`, database connections, or readable streams that are opened but not closed on all exit paths (including error paths)\n- **Growing in-memory collections**: caches, queues, or maps that are added to but never evicted from, unbounded over time\n\n### 7. Uncaught Edge Cases — Inputs\n\n- **Empty string**: functions that receive a user-provided string and assume it is non-empty (`.split()`, `.charAt(0)`, regex matching)\n- **Empty array or object**: loops or transforms on collections that assume at least one element\n- **Zero and negative numbers**: code that divides by a user-supplied value without guarding against zero; index calculations that go negative\n- **Numeric boundaries**: values at or near `Number.MAX_SAFE_INTEGER`, `Number.MIN_SAFE_INTEGER`, `Infinity`, `-Infinity`, `NaN`\n- **Unicode and emoji**: string `.length` counts UTF-16 code units, not characters; a single emoji is 2 code units — truncation, substring, and split operations can corrupt multi-code-unit characters\n- **Null bytes and control characters**: untrusted strings containing `\\0`, `\\r`, `\\n` passed to file paths, log messages, or downstream systems\n- **Very long inputs**: strings or arrays far larger than typical — does the code O(n) scale gracefully, or does it load everything into memory?\n\n### 8. Uncaught Edge Cases — External Data & Network\n\n- **Non-200 HTTP responses not handled**: `fetch` resolves (does not reject) on 4xx/5xx — the caller must explicitly check `response.ok` or `response.status`\n- **Partial or truncated responses**: streaming or chunked data where the full payload may not arrive\n- **Timeout not set**: outbound HTTP calls with no timeout; one slow downstream service hangs the entire request chain indefinitely\n- **Retry without backoff**: immediately retrying failed network calls in a tight loop instead of using exponential backoff with jitter\n- **Malformed JSON**: `JSON.parse()` throws on invalid input; this must be wrapped in try/catch\n- **Unexpected API shape**: downstream API fields assumed to be present and correctly typed without validation; treat all external data as `unknown`\n- **Stale or cached data returned on error**: error handlers that silently return the last-known-good cached value without signalling the failure to the caller\n\n### 9. Concurrency & Shared State\n\n- **Check-then-act (TOCTOU)**: reading a value, checking a condition, then acting — another concurrent operation can change the value between check and act\n- **Non-atomic read-modify-write**: incrementing a counter or appending to a list stored outside the current execution context without a lock or atomic operation\n- **Reentrant function calls**: an async function that can be called again before its first invocation completes, with both invocations sharing mutable state\n- **Global/module-level mutable state**: variables at module scope that accumulate or change across requests (dangerous in server contexts where module scope is shared between requests in the same isolate)\n- **Event ordering assumptions**: code that assumes async events will arrive in a specific order (e.g., \"message A always before message B\") without enforcement\n\n### 10. Scalability — Algorithmic Complexity\n\n- **O(n²) or worse nested loops**: an inner loop that iterates over the same or a related collection for every outer iteration; grows quadratically\n- **Linear scan where constant lookup exists**: using `Array.includes()`, `Array.find()`, or `Array.indexOf()` inside a loop where converting to a `Set` or `Map` would make lookups O(1)\n- **Repeated sorting**: sorting the same array on each render or request when it could be sorted once and cached\n- **Unnecessary full-collection passes**: multiple `.filter().map().reduce()` chains on the same array that could be combined into a single pass\n- **Regex recompilation**: constructing `new RegExp(pattern)` inside a loop when the pattern is constant — compile once outside the loop\n\n### 11. Scalability — Database & I/O\n\n- **N+1 queries**: fetching a list of N records, then issuing a separate query for each one in a loop — should be a single join or an `IN (...)` query\n- **Unbounded queries**: `SELECT * FROM table` or `.findAll()` without `LIMIT` — returns the entire table; grows unbounded as data grows\n- **Missing pagination**: API endpoints that return all results instead of pages; clients and servers both suffer as dataset grows\n- **Fetching more columns than needed**: `SELECT *` when only 2-3 columns are used; pulls unnecessary data across the network and into memory\n- **Queries inside render or hot paths**: database or API calls triggered on every render cycle or in tight loops rather than cached or batched\n- **Sequential queries that could be parallel**: `await db.query(A); await db.query(B)` where A and B are independent — use `Promise.all`\n- **Missing index implied by access pattern**: code that filters or sorts on a column that will clearly require a full table scan without an index (flag based on the access pattern — don't claim to know the schema unless you can read it)\n\n### 12. Scalability — Memory & Throughput\n\n- **Loading full dataset into memory**: reading an entire file, table, or collection into an array when streaming or cursor-based processing would avoid the memory spike\n- **Unbounded `Promise.all`**: `Promise.all(items.map(asyncFn))` where `items` can be very large — spawns thousands of concurrent operations, exhausting connections or memory\n- **No backpressure on queues**: pushing work into a queue faster than it can be consumed, with no throttling or rejection when the queue is full\n- **In-memory coordination state**: using a module-level `Map` or `Set` as a cache, queue, or lock that is not shared between process replicas — breaks on horizontal scale-out\n- **No connection pooling**: creating a new database connection per request instead of using a pool\n- **Repeated expensive computation**: calling an expensive pure function with the same inputs repeatedly without memoization or caching the result\n\n## Static Analysis Tools\n\nBefore producing findings, **run available linters** on in-scope code and incorporate their output into findings.\n\n### TypeScript compiler\n```bash\nnpx tsc --noEmit\n```\nType errors, implicit `any`, and unchecked nulls. Map findings to Dimension 2 (Type & Coercion) or Dimension 3 (Null/Undefined).\n\n### ESLint\n```bash\nnpx eslint --ext .ts,.tsx src/\n```\nKey rules that surface bugs: `no-unused-vars`, `no-undef`, `@typescript-eslint/no-floating-promises`, `@typescript-eslint/no-misused-promises`, `react-hooks/exhaustive-deps`, `no-constant-condition`, `no-self-assign`.\n\n### Ruff (Python)\n```bash\nruff check --select E,F,B,C90 .\n```\n`F` = Pyflakes (undefined names, unused imports), `B` = Bugbear (common bug patterns), `C90` = McCabe complexity.\n\n### How to use tool output\n1. Map each tool finding to its dimension (e.g., `@typescript-eslint/no-floating-promises` → Dimension 4: Async & Promise Bugs).\n2. Linter errors that indicate real runtime bugs go under **Critical**; style findings go under **Suggestion**.\n3. Note \"tsc: clean\" / \"ESLint: clean\" in the Summary if no issues.\n\n## Output Format\n\nGroup findings by severity, not by dimension. Each finding must name the dimension it falls under.\n\n```\n## Critical\nIssues that will cause incorrect behavior, data loss, or crashes in production.\n\n### [Dimension] Brief title\n**File**: `path/to/file.ts` (lines X–Y)\n**Dimension**: Full dimension name — one-line explanation of what correct code requires.\n**Problem**: What the code does wrong and the concrete runtime impact (what breaks, when, and for whom).\n**Fix**: Specific, actionable code change.\n\n## Warning\nIssues likely to cause bugs under realistic inputs or load, or that will cause failures during future changes.\n\n(same structure)\n\n## Suggestion\nImprovements that reduce risk or improve robustness but are not urgently broken.\n\n(same structure)\n\n## Summary\n- Total findings: N (X critical, Y warning, Z suggestion)\n- Dimensions most frequently violated: list top 2–3\n- Linter results: tsc: clean / ESLint: N issues / Ruff: clean (etc.)\n- Overall assessment: 1–2 sentence verdict on correctness and robustness\n```\n\n## Rules\n\n- **Be specific**: always cite file paths and line numbers.\n- **Be actionable**: every finding must include a concrete fix — not \"handle null\" but \"add `if (!user) return notFound()` before line 42.\"\n- **Model the failure**: every Critical finding must describe what actually breaks at runtime — which input triggers it, what the symptom is.\n- **Severity by real-world impact**: rate by what breaks in production, not theoretical worst-case.\n- **No fluff**: skip dimensions with no findings. Don't praise code that is merely acceptable.\n- **Respect scope**: in diff mode, only flag issues in changed lines and their immediate context. Don't audit the entire file when asked about a one-line change.\n- **Don't duplicate other skills**: correctness bugs only — no security (use `security-audit`), no principle violations (use `best-practices-audit`). Edge cases and concurrency bugs that are also security vulnerabilities should be flagged here for correctness and referenced to `security-audit` for the security angle.\n","reference":"# Correctness Audit — Reference\n\nDetailed definitions, failure patterns, concrete examples, and fixes for each dimension in `SKILL.md`.\n\n---\n\n## 1. Logic Bugs\n\n### Wrong Comparison Operator\n\nThe single most common logic bug. `<` vs `<=` is the canonical off-by-one; `==` vs `===` produces silent type coercion in JavaScript.\n\n**Violation:**\n```ts\n// WRONG — excludes the last valid page\nif (page < totalPages) fetchPage(page); // misses page === totalPages\n\n// WRONG — \"0\" == 0 is true in JS; both branches trigger unexpectedly\nif (status == 0) handlePending();\nif (status == false) handleEmpty(); // also true for 0, \"\", null, undefined\n```\n**Fix:**\n```ts\nif (page <= totalPages) fetchPage(page);\nif (status === 0) handlePending();\n```\n\n### Mutation of Input Arguments\n\nFunctions that mutate their arguments create invisible coupling — the caller's data changes without warning.\n\n**Violation:**\n```ts\nfunction normalize(items: Item[]) {\n items.sort((a, b) => a.id - b.id); // mutates the caller's array\n return items;\n}\n```\n**Fix:**\n```ts\nfunction normalize(items: Item[]) {\n return [...items].sort((a, b) => a.id - b.id); // local copy\n}\n```\n\n### Shadowed Variable\n\nA variable declared inside an inner scope shares the name of an outer-scope variable. Reads in the inner scope silently use the inner version, ignoring the outer.\n\n**Violation:**\n```ts\nconst user = getCurrentUser();\nif (condition) {\n const user = await fetchUser(id); // shadows outer `user`\n applyPermissions(user); // uses inner — correct\n}\nlog(user.id); // uses outer — developer may have intended inner\n```\n**Fix**: Use distinct names. Lint rule: `no-shadow`.\n\n### Boolean Logic Inversion (De Morgan)\n\nMissing or extra negations produce conditions that are the exact opposite of intent.\n\n**Violation:**\n```ts\n// Intent: \"allow if admin OR owner\"\n// Bug: \"allow if NOT admin AND NOT owner\" (blocks everyone who should be allowed)\nif (!isAdmin && !isOwner) return allowAccess();\n```\n**Fix:**\n```ts\nif (isAdmin || isOwner) return allowAccess();\n```\n\n---\n\n## 2. Type & Coercion Bugs\n\n### `+` Operator on Mixed Types\n\nJavaScript's `+` operator does string concatenation when either operand is a string. A number read from an input field, query param, or JSON-as-string will concatenate instead of add.\n\n**Violation:**\n```ts\n// req.query.count is always a string\nconst total = req.query.count + 10; // \"510\" not 15\n```\n**Fix:**\n```ts\nconst total = Number(req.query.count) + 10;\n// or: parseInt(req.query.count, 10) + 10\n```\n\n### Floating-Point Arithmetic in Financial Logic\n\nIEEE 754 doubles cannot represent most decimal fractions exactly. `0.1 + 0.2 === 0.30000000000000004` — do not use `number` for money.\n\n**Violation:**\n```ts\nconst total = price * quantity; // $10.10 * 3 = $30.299999999999997\n```\n**Fix**: Store monetary values as integer cents in the database. Perform all arithmetic in cents. Convert to decimal only for display.\n\n### NaN Propagation\n\nArithmetic involving `NaN` always produces `NaN`. A single bad input silently corrupts all downstream calculations. `NaN === NaN` is `false`, so equality checks miss it.\n\n**Violation:**\n```ts\nconst score = parseInt(rawInput); // \"abc\" → NaN\nconst adjusted = score + bonus; // NaN — no warning\nif (adjusted > threshold) award(); // never triggers\n```\n**Fix:**\n```ts\nconst score = parseInt(rawInput, 10);\nif (!Number.isFinite(score)) throw new Error(`Invalid score: ${rawInput}`);\n```\n\n### `JSON.parse` Without Validation\n\n`JSON.parse` returns `any` in TypeScript. Treating the result as a typed value without runtime validation means any shape mismatch (missing field, wrong type, null) silently becomes a bug downstream.\n\n**Violation:**\n```ts\nconst payload = JSON.parse(body) as WebhookPayload;\nprocessEvent(payload.eventType); // crashes if eventType is missing\n```\n**Fix:**\n```ts\nconst raw: unknown = JSON.parse(body);\nconst payload = WebhookPayloadSchema.parse(raw); // throws on invalid shape\nprocessEvent(payload.eventType); // safe\n```\n\n---\n\n## 3. Null, Undefined & Missing Value Bugs\n\n### Unguarded `.find()` Result\n\n`Array.find()` returns `undefined` when no match exists. Using the result directly without checking throws at runtime.\n\n**Violation:**\n```ts\nconst config = configs.find(c => c.id === targetId);\nreturn config.value; // TypeError: Cannot read properties of undefined\n```\n**Fix:**\n```ts\nconst config = configs.find(c => c.id === targetId);\nif (!config) throw new Error(`Config ${targetId} not found`);\nreturn config.value;\n```\n\n### Empty Array Access\n\n`arr[0]` on an empty array returns `undefined`, not an error. If the code then accesses a property of the result, it throws.\n\n**Violation:**\n```ts\nconst latest = events[0].timestamp; // undefined.timestamp if events = []\n```\n**Fix:**\n```ts\nconst latest = events[0]?.timestamp ?? null;\n// or: if (events.length === 0) return null;\n```\n\n### Nullable Database Column Treated as Non-Null\n\nA TypeScript type may say `string` for a column that is nullable in the database. The type is wrong — any row inserted with `NULL` will produce `null` at runtime.\n\n**Pattern to flag**: Reading `.foo` on a database row without checking if the type declaration matches the actual schema's nullable constraints.\n\n---\n\n## 4. Async & Promise Bugs\n\n### Missing `await` on Critical Path\n\nA fire-and-forget async call looks correct but the caller does not know if it succeeded or failed, and the function may return before the operation completes.\n\n**Violation:**\n```ts\nasync function deleteUser(id: string) {\n revokeTokens(id); // NOT awaited — may not complete before function returns\n await db.delete(id);\n return { success: true };\n}\n```\n**Fix:**\n```ts\nasync function deleteUser(id: string) {\n await revokeTokens(id); // must complete before deleting the user\n await db.delete(id);\n return { success: true };\n}\n```\n\n### Unhandled Promise Rejection\n\nA `.then()` without `.catch()` silently drops errors. In Node.js, unhandled rejections crash the process in newer versions.\n\n**Violation:**\n```ts\nfetchData().then(process); // rejection from fetchData or process is silently lost\n```\n**Fix:**\n```ts\nfetchData().then(process).catch(err => logger.error(\"fetchData failed\", err));\n// or use async/await with try/catch\n```\n\n### Sequential Awaits on Independent Operations\n\nTwo independent async operations awaited in series take `T_a + T_b` time instead of `max(T_a, T_b)`.\n\n**Violation:**\n```ts\nconst user = await fetchUser(id);\nconst config = await fetchConfig(); // independent — no reason to wait for user first\n```\n**Fix:**\n```ts\nconst [user, config] = await Promise.all([fetchUser(id), fetchConfig()]);\n```\n\n### `Promise.all` Fail-Fast When Partial Failure Is Acceptable\n\n`Promise.all` rejects as soon as any promise rejects, abandoning the remaining operations. If partial success is acceptable, `Promise.allSettled` is correct.\n\n**Violation:**\n```ts\n// Sending notifications — one failure shouldn't prevent others\nawait Promise.all(users.map(u => sendNotification(u))); // one failure cancels all\n```\n**Fix:**\n```ts\nconst results = await Promise.allSettled(users.map(u => sendNotification(u)));\nconst failures = results.filter(r => r.status === \"rejected\");\nif (failures.length > 0) logger.warn(`${failures.length} notifications failed`);\n```\n\n### Unbounded `Promise.all` on Large Array\n\nSpawning thousands of concurrent async operations exhausts database connections, file handles, or external API rate limits.\n\n**Violation:**\n```ts\nawait Promise.all(thousandsOfItems.map(item => processItem(item)));\n```\n**Fix**: Use a concurrency-limited batch runner:\n```ts\n// Process in chunks of 10 at a time\nfor (let i = 0; i < items.length; i += 10) {\n await Promise.all(items.slice(i, i + 10).map(processItem));\n}\n// or use a library like p-limit\n```\n\n---\n\n## 5. Stale Closures & Captured State\n\n### Loop Variable Capture with `var`\n\n`var` is function-scoped, not block-scoped. All closures created inside the loop capture the same variable, which has its final value by the time the callbacks run.\n\n**Violation:**\n```ts\nfor (var i = 0; i < 5; i++) {\n setTimeout(() => console.log(i), 0); // logs \"5\" five times, not 0,1,2,3,4\n}\n```\n**Fix:**\n```ts\nfor (let i = 0; i < 5; i++) { // `let` is block-scoped; each iteration gets its own `i`\n setTimeout(() => console.log(i), 0);\n}\n```\n\n### React `useEffect` Stale Closure\n\nA `useEffect` callback captures prop/state values at the time of the effect's creation. If those values change but the effect's dependency array doesn't include them, the callback operates on stale values forever.\n\n**Violation:**\n```tsx\nuseEffect(() => {\n const interval = setInterval(() => {\n // `count` is captured at mount and never updates\n setCount(count + 1); // always adds 1 to the initial value\n }, 1000);\n return () => clearInterval(interval);\n}, []); // missing `count` in deps\n```\n**Fix:**\n```tsx\nuseEffect(() => {\n const interval = setInterval(() => {\n setCount(c => c + 1); // functional update — always uses current value\n }, 1000);\n return () => clearInterval(interval);\n}, []);\n```\n\n---\n\n## 6. Resource Leaks & Missing Cleanup\n\n### Event Listener Never Removed\n\nAdding a listener in a component's mount phase without removing it on unmount causes the handler to fire after the component is gone, often throwing on de-referenced state.\n\n**Violation:**\n```tsx\nuseEffect(() => {\n window.addEventListener(\"resize\", handleResize);\n // no cleanup — handleResize fires after unmount, references stale state\n}, []);\n```\n**Fix:**\n```tsx\nuseEffect(() => {\n window.addEventListener(\"resize\", handleResize);\n return () => window.removeEventListener(\"resize\", handleResize);\n}, [handleResize]);\n```\n\n### Interval Not Cleared on Unmount\n\nA `setInterval` that updates component state will throw `Can't perform a React state update on an unmounted component` after unmount.\n\n**Violation:**\n```tsx\nuseEffect(() => {\n setInterval(tick, 1000); // interval ID discarded; can never be cleared\n}, []);\n```\n**Fix:**\n```tsx\nuseEffect(() => {\n const id = setInterval(tick, 1000);\n return () => clearInterval(id);\n}, []);\n```\n\n### Growing Unbounded Cache\n\nAn in-memory cache that is added to without eviction grows without bound and eventually exhausts memory.\n\n**Violation:**\n```ts\nconst cache = new Map<string, Result>(); // module-level, grows forever\nfunction getCached(key: string) {\n if (!cache.has(key)) cache.set(key, compute(key));\n return cache.get(key)!;\n}\n```\n**Fix**: Add a max-size eviction policy (LRU), a TTL, or use a bounded cache library. At minimum, document that the key space must be finite and bounded.\n\n---\n\n## 7. Edge Cases — Inputs\n\n### Empty String Assumptions\n\nA function receiving a user-supplied string must handle `\"\"` explicitly — it is falsy in JavaScript, which sometimes helps but often misleads.\n\n**Violation:**\n```ts\nfunction getInitials(name: string) {\n return name.split(\" \").map(w => w[0]).join(\"\"); // name=\"\" → [][\"\"][0] → undefined\n}\n```\n**Fix:**\n```ts\nfunction getInitials(name: string) {\n if (!name.trim()) return \"\";\n return name.trim().split(/\\s+/).map(w => w[0].toUpperCase()).join(\"\");\n}\n```\n\n### Unicode / Emoji String Length\n\nJavaScript strings are UTF-16. Emoji and many non-Latin characters are represented as surrogate pairs — two code units each. `.length`, `.slice()`, `.charAt()`, and `.split(\"\")` all operate on code units, not characters.\n\n**Violation:**\n```ts\nconst truncated = message.slice(0, 100); // may split a surrogate pair, producing \"?\"\nconst len = \"👋\".length; // 2, not 1\n```\n**Fix:**\n```ts\n// Use Array.from or spread to iterate by Unicode code point\nconst chars = Array.from(message);\nconst truncated = chars.slice(0, 100).join(\"\");\nconst len = Array.from(\"👋\").length; // 1\n```\n\n### Division by Zero\n\nAny user-supplied or computed value used as a divisor must be checked.\n\n**Violation:**\n```ts\nconst avgScore = totalScore / userCount; // NaN or Infinity when userCount = 0\n```\n**Fix:**\n```ts\nconst avgScore = userCount === 0 ? 0 : totalScore / userCount;\n```\n\n---\n\n## 8. Edge Cases — External Data & Network\n\n### `fetch` Does Not Reject on HTTP Errors\n\n`fetch` only rejects on network failure (DNS, timeout, no connection). A 400, 404, or 500 response resolves normally with `response.ok === false`.\n\n**Violation:**\n```ts\nconst data = await fetch(\"/api/users\").then(r => r.json()); // 500 → parsed error body, no throw\n```\n**Fix:**\n```ts\nconst response = await fetch(\"/api/users\");\nif (!response.ok) throw new Error(`HTTP ${response.status}: ${await response.text()}`);\nconst data = await response.json();\n```\n\n### Missing Request Timeout\n\nA `fetch` call with no timeout will wait indefinitely if the server hangs. In a serverless function, this exhaust the function's max execution time and blocks the client.\n\n**Violation:**\n```ts\nconst response = await fetch(url); // no timeout\n```\n**Fix:**\n```ts\nconst response = await fetch(url, { signal: AbortSignal.timeout(5_000) }); // 5 second max\n```\n\n### `JSON.parse` Not Wrapped in Try/Catch\n\n`JSON.parse` throws a `SyntaxError` on malformed input. If the input comes from an external source it can fail at any time.\n\n**Violation:**\n```ts\nconst data = JSON.parse(rawBody); // throws on malformed JSON; crashes the handler\n```\n**Fix:**\n```ts\nlet data: unknown;\ntry {\n data = JSON.parse(rawBody);\n} catch {\n return badRequest(\"Invalid JSON body\");\n}\n```\n\n---\n\n## 9. Concurrency & Shared State\n\n### Non-Atomic Read-Modify-Write\n\nRead a value, compute a new value, write it back. If two concurrent operations both read the same initial value, the second write silently overwrites the first.\n\n**Violation (application layer):**\n```ts\nconst balance = await getBalance(userId); // both read 100\nconst newBalance = balance - amount; // both compute 50\nawait setBalance(userId, newBalance); // second write wins: 50 instead of 0\n```\n**Fix**: Use a database-level atomic update (`UPDATE ... SET coins = coins - $amount WHERE coins >= $amount`), or use `SELECT FOR UPDATE` to lock the row for the duration of the transaction.\n\n**Violation (JavaScript):**\n```ts\nlet counter = 0;\nasync function increment() {\n const current = counter; // read\n await someAsync(); // yields — another increment may run here\n counter = current + 1; // write: first increment's result is lost\n}\n```\n**Fix**: For in-process counters, use a mutex or perform the increment synchronously without yielding.\n\n### Reentrant Async Function\n\nAn async function that is called again before its first invocation finishes, with both invocations modifying shared state.\n\n**Pattern to flag:**\n```ts\nlet isSyncing = false; // in-memory guard\n\nasync function sync() {\n if (isSyncing) return; // TOCTOU: two callers can both read false simultaneously\n isSyncing = true;\n await doSync();\n isSyncing = false;\n}\n```\n**Fix**: The guard only works if `isSyncing = true` is set synchronously before the first `await`. The code above is actually fine for this reason — flag it only if there is a `await` before setting the flag. For distributed/multi-instance systems, an in-memory flag is insufficient and must be moved to a database or Redis.\n\n---\n\n## 10. Scalability — Algorithmic Complexity\n\n### Linear Scan Inside a Loop — O(n²)\n\nUsing `Array.includes()`, `Array.find()`, or `Array.indexOf()` inside a loop that iterates over a collection of size n performs n × n = n² operations.\n\n**Violation:**\n```ts\n// O(n²): for each item, scan all blockedIds\nconst visible = items.filter(item => !blockedIds.includes(item.id));\n```\n**Fix:**\n```ts\n// O(n): one-time Set construction + O(1) lookups\nconst blockedSet = new Set(blockedIds);\nconst visible = items.filter(item => !blockedSet.has(item.id));\n```\n\n### Regex Recompilation in a Loop\n\n`new RegExp(pattern)` compiles the pattern every call. If called in a loop with a constant pattern, this is wasted work.\n\n**Violation:**\n```ts\nfor (const line of lines) {\n if (new RegExp(\"^ERROR:\").test(line)) handle(line); // compiles every iteration\n}\n```\n**Fix:**\n```ts\nconst errorPattern = /^ERROR:/; // compile once\nfor (const line of lines) {\n if (errorPattern.test(line)) handle(line);\n}\n```\n\n---\n\n## 11. Scalability — Database & I/O\n\n### N+1 Queries\n\nFetching a list, then issuing one query per row in a loop, is the most common database scalability bug. It turns one round-trip into N+1 round-trips.\n\n**Violation:**\n```ts\nconst posts = await db.query(\"SELECT * FROM posts LIMIT 20\");\nfor (const post of posts) {\n // 20 separate queries — one per post\n post.author = await db.query(\"SELECT * FROM users WHERE id = $1\", [post.author_id]);\n}\n```\n**Fix:**\n```ts\nconst posts = await db.query(\"SELECT * FROM posts LIMIT 20\");\nconst authorIds = posts.map(p => p.author_id);\nconst authors = await db.query(\"SELECT * FROM users WHERE id = ANY($1)\", [authorIds]);\nconst authorMap = new Map(authors.map(a => [a.id, a]));\nposts.forEach(p => { p.author = authorMap.get(p.author_id); });\n```\n\n### Unbounded Query\n\nA query with no `LIMIT` returns the entire table. Tables grow over time; this query will eventually time out, exhaust memory, or cause OOM.\n\n**Violation:**\n```ts\nconst users = await db.query(\"SELECT * FROM users WHERE active = true\");\n// returns 10 rows today; returns 100,000 rows in a year\n```\n**Fix:**\n```ts\nconst users = await db.query(\n \"SELECT id, display_name FROM users WHERE active = true LIMIT $1 OFFSET $2\",\n [pageSize, page * pageSize]\n);\n```\n\n---\n\n## 12. Scalability — Memory & Throughput\n\n### Loading Full Dataset Into Memory\n\nReading an entire file, table, or collection into an array before processing. Memory usage grows linearly with data size.\n\n**Violation:**\n```ts\nconst allEvents = await db.query(\"SELECT * FROM events\"); // 10 million rows\nconst processed = allEvents.map(transform);\n```\n**Fix**: Use cursor-based streaming or pagination:\n```ts\nlet cursor = 0;\nwhile (true) {\n const batch = await db.query(\"SELECT * FROM events WHERE id > $1 LIMIT 1000\", [cursor]);\n if (batch.length === 0) break;\n batch.forEach(transform);\n cursor = batch[batch.length - 1].id;\n}\n```\n\n### In-Memory Coordination State That Breaks on Scale-Out\n\nA module-level `Map`, `Set`, or variable used as a cache, rate limiter, or deduplication store is **not shared** between multiple server instances or worker processes. When the service scales out or restarts, the state is lost or silently per-instance.\n\n**Violation:**\n```ts\n// Works on one instance; breaks when there are two\nconst rateLimitCache = new Map<string, number>(); // module-level\n\nfunction checkRateLimit(userId: string): boolean {\n const count = rateLimitCache.get(userId) ?? 0;\n rateLimitCache.set(userId, count + 1);\n return count < 10;\n}\n```\n**Fix**: Move shared state to a database (Redis, PostgreSQL) that all instances can access. Flag this whenever module-level mutable state is used for coordination in a server context.\n"},"feature-planning":{"content":"---\nname: feature-planning\ndescription: Extensively plans a proposed feature before any code is written. Use when the user asks to plan, design, or spec out a feature, or when they say \"plan this feature\", \"design this\", or want to think through a feature before building it.\n---\n\n# Feature Planning\n\nEnter plan mode and produce a thorough, implementation-ready feature plan. Do not write any code until the plan is approved.\n\n## Trigger\n\nWhen this skill is invoked, **immediately enter plan mode** using the EnterPlanMode tool. All planning work happens inside plan mode.\n\n## Scope\n\n- **User describes a feature**: Treat the description as the starting point. Explore the codebase to understand where the feature fits before designing anything.\n- **Request is vague or ambiguous**: Ask clarifying questions using AskUserQuestion before proceeding. Do not assume intent. Common ambiguities to probe:\n - Who is the target user or actor?\n - What is the expected behavior vs. current behavior?\n - Are there constraints (performance, compatibility, platform)?\n - What is explicitly out of scope?\n - Are there related features this interacts with?\n- **User provides a detailed spec**: Validate it against the codebase. Identify gaps, contradictions, or unstated assumptions and raise them before planning.\n\nDo NOT skip clarification. A plan built on wrong assumptions wastes more time than a question.\n\n## Process\n\n### 1. Understand Context\n\n- Read the project's SPEC.md, README, CLAUDE.md, and any relevant docs to understand the system's architecture, conventions, and existing features.\n- Explore the codebase areas the feature will touch. Identify existing patterns, data models, state management, and UI conventions.\n- Map out what already exists that the feature will interact with or depend on.\n- Output: A brief summary of the current system context relevant to this feature.\n\n### 2. Clarify Requirements\n\n- If any of the following are unclear, ask before continuing:\n - **Functional requirements**: What exactly should the feature do? What are the inputs, outputs, and user flows?\n - **Non-functional requirements**: Performance targets, data volume expectations, offline behavior, accessibility.\n - **Boundaries**: What is in scope vs. out of scope for this iteration?\n - **Dependencies**: Does this require new APIs, services, migrations, or third-party integrations?\n- Output: A clear, numbered list of confirmed requirements.\n\n### 3. Design the Feature\n\nProduce a plan that covers each of the following sections. Skip a section only if it genuinely does not apply.\n\n#### 3a. User-Facing Behavior\n- Describe the feature from the user's perspective: what they see, what they do, what happens.\n- Cover the happy path end-to-end.\n- Define error states and what the user sees when things go wrong (invalid input, network failure, permission denied, etc.).\n\n#### 3b. Data Model Changes\n- New types, interfaces, database tables, or schema changes.\n- Migrations needed and their reversibility.\n- Impact on existing data (backwards compatibility, data backfill).\n\n#### 3c. Architecture & Module Design\n- Which files/modules will be created or modified.\n- How the feature integrates with the existing architecture (state management, routing, API layer, etc.).\n- Clear responsibility boundaries: what each new module/function owns.\n\n#### 3d. API & Integration Points\n- New endpoints, webhooks, or external service calls.\n- Request/response shapes.\n- Authentication and authorization requirements.\n\n#### 3e. State Management\n- What state the feature introduces (local, global, persisted, cached).\n- State transitions and lifecycle.\n- How state syncs across components or with the backend.\n\n#### 3f. Implementation Steps\n- An ordered sequence of concrete implementation steps.\n- Each step should be small enough to be a single commit.\n- Note dependencies between steps (what must come before what).\n- Identify which steps can be done in parallel.\n\n### 4. Analyze Quality Dimensions\n\nProactively evaluate the proposed design against each of these dimensions. For each, explicitly state what risks exist and how the design addresses them. If a dimension does not apply, say so briefly. See [REFERENCE.md](REFERENCE.md) for named standards, plan quality criteria, templates, and anti-patterns.\n\n#### Bugs & Correctness\n*(Applies `correctness-audit` — Dimensions 1–9: Logic Bugs through Concurrency & Shared State)*\n\nReview the design against the `correctness-audit` dimensions. State which are highest-risk for this feature:\n- **Logic bugs**: off-by-one errors, boolean inversions, wrong operators in proposed conditional logic\n- **Null / undefined**: fields that can be absent — are they guarded? Do nullable DB columns match their TypeScript types?\n- **Async & Promise**: are concurrent async paths safe? Is there risk of fire-and-forget on critical writes?\n- **Concurrency / TOCTOU**: can concurrent requests (multiple users, tabs, or duplicate submissions) corrupt shared state? Does any step read-check-act on data another operation could change between check and act?\n\n#### Edge Cases\n*(Applies `correctness-audit` — Dimensions 7 & 8: Edge Case Inputs, External Data & Network)*\n\n- **Empty state**: what does the user see before any data exists for this feature?\n- **Boundary values**: max field lengths, max collection sizes, numeric overflow — are they defined and enforced at both the API and database layers?\n- **Network failures**: if an operation fails mid-way, what state is the system left in? Is partial completion visible to the user?\n- **Reentrant / concurrent usage**: double-submit, multiple tabs, back-button navigation mid-flow.\n- **External data**: any third-party API or webhook payload — is it validated as `unknown` before use, not cast directly to a typed shape?\n\n#### Design Quality\n*(SOLID — Robert C. Martin; Clean Architecture — Robert C. Martin & Martin Fowler)*\n\n- **SRP**: does each new module have one clearly stated reason to change?\n- **OCP**: can new behavior be added by extension without modifying existing modules?\n- **DIP**: do high-level modules depend on abstractions, not concrete implementations?\n- **Dependency direction**: do dependencies point inward (domain ← application ← infrastructure)? No domain module should depend on a framework or I/O layer.\n- Does the design follow existing project patterns, or introduce a new one? If new, is the justification explicitly stated?\n\n#### Maintainability\n*(Clean Code — Robert C. Martin; The Pragmatic Programmer — Hunt & Thomas)*\n\n- Will a developer unfamiliar with this feature understand it from the plan alone, without asking the author?\n- Are proposed module and function names self-documenting?\n- Are non-obvious design decisions explained in the plan's rationale, not left as tribal knowledge?\n- Are implicit contracts between modules made explicit (typed interfaces, documented invariants)?\n\n#### Modularity\n*(SOLID — SRP, ISP, DIP; UNIX philosophy)*\n\n- Can each new component be unit-tested in isolation, without the full stack?\n- Are new module dependencies unidirectional? Does the design introduce any circular imports?\n- Could any new module be replaced or reused independently of the others?\n\n#### Simplicity\n*(KISS — Clarence Johnson, 1960; YAGNI — Extreme Programming, Kent Beck & Ron Jeffries)*\n\n- **KISS**: is this the simplest design that satisfies the stated requirements?\n- **YAGNI**: are there components designed for hypothetical future requirements not in scope for this iteration?\n- Does the language or framework already provide something the design is building from scratch?\n- Is there unnecessary indirection — interfaces, factories, registries — with only one concrete implementation?\n\n#### Scalability\n*(Applies `correctness-audit` — Dimensions 10–12: Algorithmic Complexity, Database & I/O, Memory & Throughput)*\n\n- Will this design function correctly at 10× the current data volume without architectural changes?\n- Are there unbounded database queries (no `LIMIT`) or full-collection loads into memory?\n- Are there N+1 query patterns that will emerge as data grows?\n- Is any coordination state stored in-memory in a way that breaks under horizontal scale-out?\n\n#### Security\n*(Applies `security-audit` — use the relevant domains for each new design element)*\n\nMap each new element of the design to the applicable security-audit domains:\n- **New API endpoint** → §2 Authorization, §5 Input Validation, §6 API Security, §8 Rate Limiting\n- **New database table or function** → §7 Database Security (RLS, REVOKE, CHECK constraints)\n- **New auth flow or session handling** → §1 Authentication & Session Management\n- **New external service call or webhook** → §6 API7 SSRF, §10 webhook deduplication & signature\n- **New financial operation** → §10 Financial & Transaction Integrity, §9 Concurrency & Race Conditions\n- **New user data stored or transmitted** → §13 Data Privacy & Retention, §4 Cryptography & Secrets\n\n### 5. Identify Risks & Open Questions\n\n- List anything that could go wrong or that you're uncertain about.\n- Flag technical risks (performance cliffs, migration dangers, dependency on unstable APIs).\n- Flag product risks (user confusion, feature conflicts, scope creep).\n- For each risk, suggest a mitigation or note that it needs a decision.\n\n## Output Format\n\nWrite the plan to the plan file with this structure:\n\n```\n# Feature: [Name]\n\n## Context\n[Brief summary of current system state relevant to this feature]\n\n## Requirements\n1. [Confirmed requirement]\n2. ...\n\n## Design\n\n### User-Facing Behavior\n[Description with happy path and error states]\n\n### Data Model Changes\n[Types, schemas, migrations]\n\n### Architecture\n[Modules, files, integration points]\n\n### API & Integration Points\n[Endpoints, external calls]\n\n### State Management\n[State shape, transitions, sync]\n\n### Implementation Steps\n1. [Step with description]\n2. ...\n\n## Quality Analysis\n\n### Bugs & Correctness\n[Risks and mitigations]\n\n### Edge Cases\n[Identified edge cases and how they're handled]\n\n### Design Quality\n[Assessment]\n\n### Maintainability\n[Assessment]\n\n### Modularity\n[Assessment]\n\n### Simplicity\n[Assessment]\n\n### Scalability\n[Assessment]\n\n### Security\n[Assessment]\n\n## Risks & Open Questions\n- [Risk/question with proposed mitigation or decision needed]\n\n## Out of Scope\n- [What this plan explicitly does not cover]\n```\n\n## Rules\n\n- **Plan mode first**: Always enter plan mode before doing any planning work. The plan is written to the plan file, not output as chat.\n- **No code**: Do not write implementation code during planning. The plan is the deliverable.\n- **Ask, don't assume**: If the request is ambiguous, ask clarifying questions. Prefer one round of good questions over multiple rounds of back-and-forth.\n- **Read before designing**: Explore the codebase thoroughly. Reference actual file paths, function names, and patterns from the project.\n- **Be concrete**: Implementation steps should reference specific files and modules, not vague descriptions like \"update the backend.\"\n- **Be honest about uncertainty**: If you're unsure about something, flag it as an open question rather than making a guess that will become the plan.\n- **Respect existing patterns**: The plan should extend the project's architecture, not fight it. If a new pattern is warranted, justify why.\n- **Scope boundaries**: Clearly state what is and isn't included. Prevent scope creep by naming it.\n- **Name the pattern**: when the design follows or introduces a named pattern (Repository, Strategy, ADR, C4 Container), name it and note its source so the rationale is traceable.\n- **Delegate to audit skills**: the quality analysis does not re-describe what the audit skills cover in detail — it identifies which domains apply and defers to those skills for the specific checklist.\n","reference":"# Feature Planning — Reference\n\nDetailed standards, plan quality criteria, templates, and anti-patterns for the skill defined in `SKILL.md`.\n\n---\n\n## 1. Design Methodologies\n\n### C4 Model (Simon Brown)\n*Applicable to: Architecture & Module Design section*\n\nUse C4 vocabulary to describe architecture at the right level of detail. Don't describe implementation-level detail in architecture, or architecture-level detail in a code comment.\n\n- **System Context**: How the feature fits in the broader product and what external systems it touches.\n- **Container**: Major runtime components (web app, API server, database, message queue, cache). A new Edge Function or a new Supabase table is a container-level concern.\n- **Component**: Key modules within a container (e.g., `useNotifications` hook, `NotificationService` class). Most features are designed at this level.\n- **Code**: Only describe at this level for non-obvious or algorithmically critical parts.\n\nWhen writing the Architecture section, identify which C4 level is appropriate. A simple UI tweak is Code-level. A new backend service is Container-level.\n\n### Architecture Decision Records (ADR)\n*Applicable to: any significant or non-obvious design choice in the plan*\n\nWhen the plan makes a non-obvious design choice (e.g., \"use Realtime instead of polling\", \"store as JSONB instead of normalized columns\"), embed a mini-ADR in the rationale:\n\n```\n**Decision**: [What was chosen]\n**Context**: [Why a decision was needed; what problem this solves; what alternatives were considered]\n**Consequences**: [What becomes easier; what becomes harder; what is explicitly ruled out]\n```\n\nThis prevents \"we chose X\" from becoming tribal knowledge. The next developer reading the code needs to know *why*, not just *what*.\n\n### RFC-Style Specification\n*Applicable to: complex or high-risk features affecting multiple systems or teams*\n\nFor features that significantly affect multiple teams or carry high design risk, structure the plan to include:\n\n- **Abstract**: 2–3 sentence summary of the feature and its purpose.\n- **Motivation**: Why this is needed now. What problem it solves. Why existing solutions are insufficient.\n- **Drawbacks**: Reasons not to build this, or not to build it this way.\n- **Alternatives**: Other approaches considered and why they were rejected.\n\n---\n\n## 2. Plan Quality Criteria\n\nA plan section is \"done\" when it meets these criteria. Self-check before calling `ExitPlanMode`.\n\n### Context\n- [ ] References actual file paths, function names, and patterns from the real codebase (not generic descriptions).\n- [ ] Identifies all existing systems the feature will interact with or depend on.\n- [ ] Notes which existing files will change, not just what will be added.\n\n### Requirements\n- [ ] Functional requirements describe observable behavior (inputs, outputs, user flows) — not implementation details.\n- [ ] Non-functional requirements name specific targets (\"p95 latency < 200ms\", \"works offline for up to 24h\") — not vague aspirations (\"it should be fast\").\n- [ ] Out of scope is stated explicitly for anything a reader might reasonably assume is included.\n\n### User-Facing Behavior\n- [ ] Happy path is described end-to-end from the user's perspective.\n- [ ] Every error state has an explicit description of what the user sees — not \"show an error\" but \"display 'Something went wrong. Try again.' with a retry button.\"\n- [ ] Empty state is defined (what the user sees before any data exists for this feature).\n- [ ] Loading / pending state is defined if the feature involves async operations.\n\n### Data Model Changes\n- [ ] New tables include all columns with types, nullability, defaults, CHECK constraints, and FK `ON DELETE` behavior.\n- [ ] RLS requirements are stated for every new table.\n- [ ] Index requirements are stated based on the query access patterns described in the plan.\n- [ ] Migration is characterized as destructive / non-destructive, and whether a data backfill is needed.\n\n### Architecture\n- [ ] Lists specific files to be created and specific existing files to be modified.\n- [ ] Responsibility of each new module is stated in one sentence.\n- [ ] Dependency graph between new modules is described (what imports what).\n- [ ] No circular dependencies introduced.\n\n### API & Integration Points\n- [ ] Endpoint paths, HTTP methods, request bodies, and response shapes are defined.\n- [ ] Auth requirements are stated per endpoint.\n- [ ] Error response shapes and status codes are defined (not just the 200 case).\n\n### Implementation Steps\n- [ ] Each step is small enough to be a single commit.\n- [ ] Dependencies between steps are noted (what must come before what).\n- [ ] Steps that can be parallelized are identified.\n- [ ] The first step is always safe to merge independently (non-breaking change).\n\n---\n\n## 3. Plan Section Templates\n\n### Data Model Changes\n\n**Bad** (too vague):\n> We'll add a notifications table.\n\n**Good** (specific):\n> **New table**: `notifications`\n>\n> | Column | Type | Constraints |\n> |--------|------|-------------|\n> | `id` | `UUID` | `PRIMARY KEY DEFAULT gen_random_uuid()` |\n> | `user_id` | `UUID` | `NOT NULL REFERENCES auth.users(id) ON DELETE CASCADE` |\n> | `type` | `TEXT` | `NOT NULL CHECK (type IN ('quest_complete', 'reward_earned', 'system'))` |\n> | `read_at` | `TIMESTAMPTZ` | nullable — null means unread |\n> | `created_at` | `TIMESTAMPTZ` | `NOT NULL DEFAULT now()` |\n>\n> **RLS**: `USING (user_id = auth.uid())` for SELECT; no UPDATE/DELETE for users.\n> **Index**: `(user_id, created_at DESC)` — supports the \"latest N unread for user\" query.\n> **Migration**: Non-destructive (new table). No backfill required.\n\n---\n\n### Implementation Steps\n\n**Bad** (too vague):\n> 1. Build the backend.\n> 2. Build the frontend.\n> 3. Add tests.\n\n**Good** (specific):\n> 1. **[Migration]** Add `notifications` table and RLS policy. Non-destructive; safe to ship independently.\n> 2. **[Edge Function]** `POST /notifications/mark-read` — Zod-validated body, updates `read_at`, returns 204. Blocked by step 1.\n> 3. **[React hook]** `useNotifications()` — Realtime subscription scoped to `auth.uid()`. Can be built in parallel with step 2.\n> 4. **[UI]** `<NotificationBell>` — badge count, dropdown list, \"mark all read\" action. Blocked by step 3.\n> 5. **[Test]** Integration test: verify user A cannot read user B's notifications (RLS enforcement). Blocked by step 1.\n\n---\n\n### API Endpoint\n\n> **`POST /api/quests/:questId/complete`**\n> - **Auth**: Requires valid JWT; `getUser()` server-side (not `getSession()`).\n> - **Authorization**: Verify `quest.user_id === authenticatedUser.id` before any mutation.\n> - **Request body**: `{ evidence: string }` — validated with Zod; `evidence` max 500 chars, non-empty.\n> - **Response (200)**: `{ coinsAwarded: number, newBalance: number }`\n> - **Response (404)**: Quest not found or does not belong to caller. (Do not distinguish between the two — prevents enumeration.)\n> - **Response (409)**: Quest already completed.\n> - **Response (422)**: Schema validation failure with field-level errors.\n\n---\n\n### Architecture Decision Record (inline)\n\n> **Decision**: Use Supabase Realtime for live notification updates instead of polling.\n> **Context**: The feature requires users to see new notifications without refreshing. Polling every N seconds introduces latency and unnecessary load. Realtime is already available in the project infrastructure.\n> **Consequences**: Simpler client code (no polling interval to manage); subscription must be cleaned up on component unmount to avoid leaks; does not work for users behind restrictive firewalls (acceptable for this use case).\n\n---\n\n## 4. Common Planning Anti-Patterns\n\n### Premature Generalization\n*(YAGNI — Extreme Programming, Kent Beck & Ron Jeffries)*\n\nThe plan designs a general-purpose system for one concrete use case. Examples: building a \"plugin architecture\" when one integration is needed; an \"event bus\" when one event type exists; an \"action system\" for a single action type.\n\n**Signal**: The architecture section describes abstractions (interfaces, factories, registries) where no concrete second implementation exists or is planned.\n\n**Remedy**: Design for the concrete case. Note in Out of Scope that generalization is deferred until a second concrete case exists.\n\n---\n\n### Over-Complex Control Flow\n*(KISS — Clarence Johnson)*\n\nThe design requires a developer to trace through several interacting systems to follow one user action. Each hop (component → service → event → consumer → database) multiplies failure modes and debugging surface.\n\n**Signal**: Implementation steps require more than 3 conceptual layers for a straightforward operation.\n\n**Remedy**: Simplify the call chain. Prefer direct calls over event-driven patterns until the added complexity is justified by a concrete requirement (e.g., \"multiple independent consumers\", \"decoupled deployment\").\n\n---\n\n### Missing Error States in User-Facing Behavior\n*(Defensive Programming — Steve McConnell, Code Complete)*\n\nThe user-facing behavior section describes only the happy path. Network failures, validation errors, empty states, and permission-denied cases are left undefined. These become inconsistent behavior implemented ad-hoc during implementation.\n\n**Signal**: The user-facing behavior section has no \"when X fails, the user sees…\" entries.\n\n**Remedy**: For every user-visible action, add an explicit error state: what message appears, where it appears, and whether the user can recover (retry vs. dead end).\n\n---\n\n### Unstated Assumptions\n*(The Pragmatic Programmer — Hunt & Thomas: \"Don't Assume, Check\")*\n\nThe plan assumes an external API contract, an existing service capability, a team decision, or an infrastructure arrangement that has not been confirmed. These become discovered blockers during implementation.\n\n**Signal**: Phrases like \"we'll integrate with X\", \"X already supports this\", or \"the infra team will handle Y\" without a reference or confirmation.\n\n**Remedy**: Flag every unconfirmed assumption as an explicit open question in Risks & Open Questions, with a named owner and a decision deadline if possible.\n\n---\n\n### Circular Module Dependencies\n*(Clean Architecture — Robert C. Martin)*\n\nThe architecture introduces a dependency cycle: A imports B, B imports C, C imports A. This prevents independent testing, makes initialization order fragile, and is a source of \"works but nobody knows why\" bugs.\n\n**Signal**: In the dependency graph, any arrow forms a loop.\n\n**Remedy**: Extract the shared dependency into a third module that neither A nor C depend on, or invert one dependency using an interface (Dependency Inversion Principle).\n\n---\n\n### Data Model Without Constraints\n*(Defensive Programming; database design best practices)*\n\nNew tables are defined without `NOT NULL`, `CHECK`, or explicit FK `ON DELETE` behavior. Constraints are the last line of defense — they enforce correctness even when the application layer has a bug or is bypassed (e.g., a direct DB migration, a future code path).\n\n**Signal**: A table definition where any column that should always have a value lacks `NOT NULL`; a financial amount column without a `CHECK (amount > 0)` constraint; a FK without a stated `ON DELETE` policy.\n\n**Remedy**: For every new column, explicitly state: nullable or not, default value, and any domain constraint. For every FK: `CASCADE`, `SET NULL`, or `RESTRICT` — never leave it unstated.\n"},"security-audit":{"content":"---\nname: security-audit\ndescription: Performs a thorough security audit against established industry standards (OWASP Top 10 2021, OWASP API Security Top 10 2023, CWE taxonomy, GDPR, PCI-DSS). Use when reviewing for security vulnerabilities, hardening production systems, auditing auth/payment/database code, or conducting periodic security reviews. Works on git diffs, specific files, or an entire codebase.\n---\n\n# Security Audit\n\nAudit code against established security standards and threat models. Every finding **must** cite the specific standard ID (OWASP, CWE, GDPR article, etc.) so the developer understands the authoritative source for each requirement. This skill is for security-specific review; for clean code and architecture concerns, use `best-practices-audit` instead.\n\n## Scope\n\nDetermine what to audit based on user request and context:\n\n- **Git diff mode** (default when no scope specified and changes exist): run `git diff` and `git diff --cached` to audit only changed/added code and its immediate context\n- **File/directory mode**: audit the files or directories the user specifies\n- **Full audit mode**: when the user asks for a full security review, scan all source code (skip vendor/node_modules/build artifacts); prioritize files touching auth, payments, database, and external integrations\n\nRead all in-scope code before producing findings.\n\n## Domains to Evaluate\n\nCheck each domain. Skip domains with no findings. See [REFERENCE.md](REFERENCE.md) for detailed definitions, standard IDs, and concrete examples.\n\n### 1. Authentication & Session Management\n*(OWASP A07:2021, CWE-287, CWE-384)*\n\n- Using `getSession()` instead of server-side `getUser()` for auth decisions (JWT trusting without server validation)\n- Missing token expiry enforcement; long-lived tokens without rotation\n- Weak or missing logout (session not invalidated server-side)\n- OAuth state parameter missing or not validated (CSRF on OAuth flows)\n- Trusting client-provided user identity without server-side verification\n- Credentials stored in localStorage instead of httpOnly cookies\n\n### 2. Authorization & Access Control\n*(OWASP A01:2021, OWASP API2:2023, CWE-284, CWE-639)*\n\n- BOLA/IDOR: object IDs accepted from user input without ownership verification\n- Missing Row-Level Security (RLS) policies on database tables\n- Privilege escalation paths: routes or RPCs accessible to roles that shouldn't have access\n- Broken function-level auth: admin/internal endpoints not restricted by role\n- REVOKE gaps: functions or tables accessible to PUBLIC or anon when they shouldn't be\n- Assuming the presence of a valid JWT implies authorization (JWT ≠ authz check)\n\n### 3. Injection\n*(OWASP A03:2021, CWE-89, CWE-79, CWE-77, CWE-94)*\n\n- **SQL injection**: raw string interpolation in queries; use parameterized queries or an ORM\n- **XSS**: unsanitized user content inserted into HTML; missing `Content-Security-Policy`\n- **Command injection**: user input passed to shell commands, `exec()`, `eval()`, `Function()`\n- **Template injection**: user-controlled strings rendered by a template engine\n- **Schema pollution (PostgreSQL)**: SECURITY DEFINER functions without `SET search_path = ''`; attacker-controlled schemas prepended to search path\n\n### 4. Cryptography & Secrets\n*(OWASP A02:2021, CWE-327, CWE-798, CWE-312, CWE-321)*\n\n- Hardcoded credentials, API keys, tokens, or secrets in source code or `.env.example`\n- Secrets in environment variables loaded client-side (exposed in browser bundles)\n- Weak hashing algorithms (MD5, SHA-1) used for security purposes\n- Tokens or sensitive data stored in plaintext in the database instead of a secrets vault\n- Missing HTTPS enforcement; secrets transmitted over HTTP\n- JWT secrets that are short, guessable, or shared across environments\n\n### 5. Input Validation & Output Encoding\n*(CWE-20, CWE-116, CWE-601, OWASP A03:2021)*\n\n- No schema validation (Zod, Yup, JSON Schema, etc.) at API boundaries\n- Validation only on the client, not enforced on the server\n- Missing length/range constraints on user-supplied strings (no `maxLength`, no `CHECK` constraint)\n- Missing content-type validation on file uploads\n- Open redirects: user-controlled URL passed directly to redirect without allowlist validation\n- Missing `encodeURIComponent` on user data placed in URLs\n\n### 6. API Security\n*(OWASP API Top 10 2023)*\n\n- **API1 — BOLA**: resources returned or modified by user-supplied ID without ownership check\n- **API2 — Broken Auth**: unprotected endpoints, missing JWT verification, bearer token in URL\n- **API3 — Broken Object Property Level Auth**: response includes fields (e.g. `role`, `coins`, `internal_id`) that the caller should not see\n- **API4 — Unrestricted Resource Consumption**: no rate limiting, pagination, or request size limits\n- **API5 — Broken Function Level Auth**: non-public actions (admin, delete, ban) not verified against caller's role\n- **API7 — SSRF**: URL parameters or webhook URLs accepted from user input without allowlist validation\n- **API8 — Security Misconfiguration**: permissive CORS (`*`), verbose error messages leaking stack traces or schema details, debug endpoints in production\n- **API10 — Unsafe Consumption of APIs**: external API responses trusted without validation; webhooks not verified via HMAC signature\n\n### 7. Database Security\n*(CWE-250, CWE-284, PostgreSQL Security Best Practices)*\n\n- Tables created without `ENABLE ROW LEVEL SECURITY`\n- Missing `REVOKE EXECUTE` on SECURITY DEFINER functions from `PUBLIC`, `authenticated`, `anon`\n- SECURITY DEFINER functions without `SET search_path = ''` (schema pollution vector)\n- Missing `REVOKE TRUNCATE` on financial, audit, or compliance tables\n- Overly permissive RLS policies (e.g., `USING (true)` on sensitive tables)\n- Direct client-to-database connections bypassing application security layer\n- Sensitive columns (tokens, PII) stored in plaintext instead of encrypted columns or vault references\n- Missing `CHECK` constraints on financial columns (e.g., balance `>= 0`, amount sign validation)\n\n### 8. Rate Limiting & Denial-of-Service\n*(OWASP API4:2023, CWE-770, CWE-400)*\n\n- No rate limiting on authentication endpoints (brute force enabler)\n- No rate limiting on expensive operations (sync, export, AI calls, file uploads)\n- Rate limits implemented in-memory per process/isolate (bypassed by horizontal scaling or redeployment)\n- Missing request body size limits (memory exhaustion)\n- Unbounded database queries without `LIMIT` clause (full table scan DoS)\n- No backoff or circuit breaker for outbound calls to third-party services\n\n### 9. Concurrency & Race Conditions\n*(CWE-362, CWE-367 TOCTOU)*\n\n- Check-then-act patterns on financial or inventory data without database-level locking\n- Double-spend or double-grant risk: no idempotency key or `ON CONFLICT DO NOTHING` guard\n- Missing advisory locks or `SELECT FOR UPDATE` on critical rows during multi-step transactions\n- Non-atomic read-modify-write sequences on shared state (coin balance, stock count, etc.)\n- Idempotency keys that can be `NULL` (treated as distinct by PostgreSQL UNIQUE, allowing bypass)\n\n### 10. Financial & Transaction Integrity\n*(PCI-DSS Req 6 & 10, CWE-362)*\n\n- Client-side coin/credit/reward calculation (any value trusted from client is a vulnerability)\n- Missing `CHECK` constraint on transaction amount sign (credits vs. debits not enforced at DB level)\n- Coin or balance modification without an audit trail (append-only transaction log)\n- Webhook events not deduplicated by a provider-assigned event ID (replay attack enabler)\n- Webhook signature not verified (unauthenticated financial state changes)\n- Deletion of financial transaction records (violates audit trail requirements; potential legal violation)\n- Missing `NOT NULL` on idempotency key column for transaction tables\n\n### 11. Security Logging & Monitoring\n*(OWASP A09:2021, CWE-778, CWE-117)*\n\n- Security-relevant events not logged (auth failures, permission denials, validation failures, HMAC failures)\n- Log injection: unsanitized user input included directly in log messages\n- Sensitive data (passwords, tokens, card numbers, PII) written to logs\n- No structured logging — free-text logs that can't be queried or alerted on\n- Missing correlation between security events and user/request IDs\n- No alerting or anomaly detection on suspicious event patterns\n- Logs stored in a volatile medium (in-memory, ephemeral filesystem) that survives restarts but not scaling events\n\n### 12. Secrets & Environment Security\n*(CWE-798, CWE-312, 12-Factor App)*\n\n- Secrets committed to git (`.env`, private keys, API tokens in source files)\n- Fallback to insecure defaults when env vars are absent (e.g., CORS origin falling back to `*`)\n- Using the same secrets across development, staging, and production environments\n- Secrets logged or included in error messages\n- Client-side environment variables (prefixed `VITE_`, `NEXT_PUBLIC_`, etc.) containing server-side secrets\n- Secrets passed as CLI arguments (visible in process list)\n\n### 13. Data Privacy & Retention\n*(GDPR Art. 5/17/25, CCPA, CWE-359)*\n\n- PII stored longer than necessary (no retention policy or purge cron)\n- No anonymization path for account deletion (right to erasure, GDPR Art. 17)\n- PII in logs, error messages, or analytics events that shouldn't be there\n- Missing `ON DELETE SET NULL` or equivalent for user-linked tables that must survive account deletion\n- Financial records with FK `ON DELETE CASCADE` that would purge legally required audit evidence\n- No consent record for data collection (GDPR Art. 6)\n- User data returned in API responses without field-level access checks (over-fetching)\n\n### 14. Security Misconfiguration\n*(OWASP A05:2021, CWE-16)*\n\n- Permissive CORS (`Access-Control-Allow-Origin: *` on authenticated endpoints)\n- Missing `Content-Security-Policy`, `X-Frame-Options`, `X-Content-Type-Options` headers\n- HTTP used instead of HTTPS; missing HSTS header\n- Debug/development endpoints or verbose error responses in production\n- Default credentials or example configurations deployed\n- Database or storage buckets with public access that should be private\n- Missing `SameSite` attribute on session cookies\n- JWT verification disabled on functions that handle authenticated user data\n\n### 15. Supply Chain & Dependency Security\n*(OWASP A06:2021, CWE-1357)*\n\n- Dependencies with known CVEs (run `npm audit`, `pip audit`, `bun audit`)\n- Unpinned dependency versions (`*`, `latest`, `^` for production dependencies)\n- Dependencies pulled from non-official registries without integrity hashing\n- Dev dependencies installed in production containers\n- Missing integrity subresource hashing on CDN-loaded scripts\n\n### 16. TypeScript / JavaScript Specific\n*(CWE-843 Type Confusion, CWE-915 Improperly Controlled Modification)*\n\n- `as any` or `as unknown as T` casts that bypass type checking on externally-sourced data\n- Prototype pollution: `Object.assign(target, userControlledObject)` or spread of unvalidated input onto objects\n- `eval()`, `new Function()`, `setTimeout(string)`, or `innerHTML =` with user-controlled content\n- `JSON.parse()` result used without validation (treat parsed JSON as `unknown`, not `any`)\n- Arithmetic on `bigint` and `number` without explicit conversion (silent precision loss)\n- Async functions missing `await` on promises that should be awaited (unhandled rejection, ordering bug)\n\n## Static Analysis Tools\n\nBefore producing findings, **run available tools** on in-scope code. Incorporate tool output into your findings (cite the tool rule alongside the standard ID).\n\n### npm / bun audit (dependency vulnerabilities)\n```bash\nnpm audit --audit-level=moderate # or: bun audit\n```\nMap findings to **OWASP A06:2021** and the specific CVE ID.\n\n### ESLint with security plugins\n```bash\n# Check for eslint-plugin-security in devDependencies first\nnpx eslint --ext .ts,.tsx src/\n```\nKey rules to look for: `security/detect-object-injection`, `security/detect-non-literal-regexp`, `no-eval`, `no-implied-eval`.\n\n### Semgrep (if available)\n```bash\nsemgrep --config=p/owasp-top-ten .\nsemgrep --config=p/typescript .\n```\n\n### Ruff with Bandit rules (Python)\n```bash\nruff check --select S . # Bandit security rules\n```\n\n### How to use tool output\n1. Map each tool finding to its security domain (e.g., a SQL injection ESLint rule → Domain 3: Injection).\n2. Critical CVEs or injection/auth findings → **Critical**. Outdated deps with low-severity CVEs → **Warning** or **Suggestion**.\n3. If a tool is not present or produces no findings, note \"npm audit: clean\" etc. in the Summary.\n\n## Output Format\n\nGroup findings by severity. Each finding **must** name the specific standard violated.\n\n```\n## Critical\nViolations that are directly exploitable or enable data theft, privilege escalation, or financial fraud.\n\n### [DOMAIN] Brief title\n**File**: `path/to/file.ts` (lines X–Y)\n**Standard**: OWASP A01:2021 / CWE-639 — one-line description of what the standard requires.\n**Violation**: What the code does wrong and the concrete attack scenario.\n**Fix**: Specific, actionable code change or architectural remedy.\n\n## High\nViolations that create significant risk but require specific conditions or chaining to exploit.\n\n(same structure)\n\n## Medium\nDefense-in-depth gaps, missing controls, or violations that increase attack surface.\n\n(same structure)\n\n## Low\nBest-practice deviations, hardening opportunities, or compliance gaps unlikely to be directly exploited.\n\n(same structure)\n\n## Summary\n- Total findings: N (X critical, Y high, Z medium, W low)\n- Highest-risk area: name the domain with the most severe findings\n- Key standards violated: list specific OWASP/CWE IDs\n- Overall security posture: 1–2 sentence verdict\n- Recommended immediate action: the single most urgent fix\n```\n\n## Rules\n\n- **Cite the standard**: every finding must reference a specific standard ID (OWASP A-code, CWE-NNN, GDPR Art. N, PCI-DSS Req. N). This is the core value of this skill.\n- **Model the attack**: every Critical or High finding must describe the realistic attack scenario, not just the code smell.\n- **Be specific**: always cite file paths and line numbers.\n- **Be actionable**: every finding must include a concrete fix — not \"add validation\" but \"use a Zod schema on the request body and reject with 400 if it fails.\"\n- **Severity by exploitability**: rate severity by real-world exploitability and impact, not theoretical worst-case.\n- **Don't duplicate best-practices-audit**: focus on security vulnerabilities and compliance gaps. Architecture and clean code issues belong in the other skill.\n- **No false positives over findings**: if something is ambiguous, note it as a question for the developer rather than flagging it as a violation.\n- **Defense-in-depth counts**: a control missing a second layer of enforcement (e.g., RLS present but no CHECK constraint) is a Medium finding even if the first layer is sound.\n","reference":"# Security Audit — Reference\n\nDetailed definitions, standard sources, violation examples, and fixes for each domain in `SKILL.md`.\n\n---\n\n## 1. Authentication & Session Management\n**Standards**: OWASP A07:2021 — Identification and Authentication Failures; CWE-287 Improper Authentication; CWE-384 Session Fixation; RFC 6750 Bearer Token Usage\n\n### `getSession()` vs. `getUser()` — OWASP A07:2021\n\n`getSession()` reads the JWT from the client-supplied cookie/header and parses it locally. A tampered or expired JWT can appear valid if clock skew or local validation is used. `getUser()` performs a server-side round-trip to the authorization server, guaranteeing the token is currently valid and the user account has not been revoked.\n\n**Violation pattern (Supabase/TypeScript):**\n```ts\n// WRONG — trusts client-supplied JWT locally\nconst { data: { session } } = await supabase.auth.getSession();\nconst userId = session?.user?.id;\n```\n**Fix:**\n```ts\n// CORRECT — server validates the token\nconst { data: { user }, error } = await supabase.auth.getUser(authHeader);\nif (error || !user) return unauthorized();\n```\n\n### OAuth State Parameter — CWE-352 CSRF\n\nThe OAuth `state` parameter must be a cryptographically random nonce stored server-side (or signed cookie). Without it, an attacker can force a victim to link their account to the attacker's OAuth token.\n\n**Fix**: Generate `state = crypto.randomUUID()`, store in DB or signed cookie with short TTL, validate on callback before exchanging code.\n\n---\n\n## 2. Authorization & Access Control\n**Standards**: OWASP A01:2021 — Broken Access Control; OWASP API1:2023 — Broken Object Level Authorization; CWE-284 Improper Access Control; CWE-639 Authorization Bypass Through User-Controlled Key\n\n### BOLA / IDOR\n\nThe most prevalent API vulnerability class. Any time a user-controlled identifier (UUID, integer, slug) is used to look up a resource, ownership must be verified server-side — it cannot be assumed from the JWT alone.\n\n**Violation pattern:**\n```ts\n// WRONG — trusts caller-supplied userId\nconst { id } = req.body;\nconst resource = await db.query(\"SELECT * FROM documents WHERE id = $1\", [id]);\nreturn resource; // returns any user's document\n```\n**Fix:**\n```ts\n// CORRECT — adds ownership column to WHERE clause\nconst resource = await db.query(\n \"SELECT * FROM documents WHERE id = $1 AND owner_id = $2\",\n [id, authenticatedUser.id]\n);\nif (!resource) return notFound(); // don't reveal existence\n```\n\n### Row-Level Security (PostgreSQL)\n\nEvery table with user-scoped data must have RLS enabled AND a policy defined. RLS enabled with no policies = no access. RLS disabled = all data visible to any authenticated DB connection.\n\n**Required pattern:**\n```sql\nALTER TABLE documents ENABLE ROW LEVEL SECURITY;\n\nCREATE POLICY \"users_own_documents\"\n ON documents FOR ALL\n TO authenticated\n USING (owner_id = auth.uid());\n```\n\n**High-risk gap**: Financial tables (transactions, payment records) should have RLS but also block UPDATE/DELETE via separate policies or triggers — RLS `FOR ALL` with `USING` only controls SELECT.\n\n---\n\n## 3. Injection\n**Standards**: OWASP A03:2021 — Injection; CWE-89 SQL Injection; CWE-79 XSS; CWE-77 Command Injection; CWE-94 Code Injection\n\n### SQL Injection — CWE-89\n\nAny string concatenation or interpolation in a SQL query is potentially exploitable. The fix is always parameterized queries (also called prepared statements).\n\n**Violation:**\n```ts\n// WRONG\nconst result = await db.query(`SELECT * FROM users WHERE name = '${name}'`);\n```\n**Fix:**\n```ts\n// CORRECT\nconst result = await db.query(\"SELECT * FROM users WHERE name = $1\", [name]);\n```\n\n### Schema Pollution (PostgreSQL SECURITY DEFINER) — CWE-89\n\nA function with `SECURITY DEFINER` runs with the privileges of the function's owner (often a superuser). If `search_path` is not pinned, an attacker who can create schemas may prepend a malicious schema, causing the function to resolve table names to their injected versions.\n\n**Violation:**\n```sql\nCREATE OR REPLACE FUNCTION credit_coins(uid uuid, amount int)\nRETURNS void\nLANGUAGE plpgsql\nSECURITY DEFINER AS $$\nBEGIN\n UPDATE profiles SET coins = coins + amount WHERE id = uid;\nEND;\n$$;\n```\n**Fix:**\n```sql\nCREATE OR REPLACE FUNCTION public.credit_coins(uid uuid, amount int)\nRETURNS void\nLANGUAGE plpgsql\nSECURITY DEFINER\nSET search_path = '' -- pins search path; no user schema can be injected\nAS $$\nBEGIN\n UPDATE public.profiles SET coins = coins + amount WHERE id = uid;\nEND;\n$$;\n```\n\n### XSS — CWE-79\n\nNever assign user-controlled content to `innerHTML`, `outerHTML`, `document.write()`, or React's `dangerouslySetInnerHTML` without sanitization.\n\n**Violation:**\n```ts\nelement.innerHTML = userInput; // executes embedded <script> tags\n```\n**Fix:**\n```ts\nelement.textContent = userInput; // text node — never executed as HTML\n// If HTML is genuinely needed, use DOMPurify:\nelement.innerHTML = DOMPurify.sanitize(userInput, { ALLOWED_TAGS: ['b', 'i'] });\n```\n\n---\n\n## 4. Cryptography & Secrets\n**Standards**: OWASP A02:2021 — Cryptographic Failures; CWE-327 Use of Broken Algorithm; CWE-798 Hardcoded Credentials; CWE-312 Cleartext Storage; NIST SP 800-131A\n\n### Hardcoded Secrets — CWE-798\n\nAny secret in source code is compromised the moment the repo is cloned. Even private repos have been breached.\n\n**Scan for**: `apiKey =`, `password =`, `secret =`, `token =`, `-----BEGIN RSA PRIVATE KEY-----` in `.ts`, `.js`, `.json`, `.toml`, `.yaml` files.\n\n**Fix**: Rotate immediately. Store in environment variables loaded at runtime (never in source), or a secrets manager (HashiCorp Vault, AWS Secrets Manager, Supabase Vault).\n\n### Broken Hash Algorithms — CWE-327\n\nMD5 and SHA-1 are collision-compromised. Never use for password hashing, HMAC, or integrity verification.\n\n- Passwords: use `bcrypt` (cost ≥ 12), `argon2id`, or `scrypt`.\n- HMAC: use SHA-256 minimum. `HMAC-SHA256` is the baseline for webhook signatures.\n- File integrity: SHA-256 minimum.\n\n### Client-Side Secret Exposure\n\nIn Vite: `VITE_*` variables are embedded in the JS bundle and visible to any user who opens DevTools. In Next.js: `NEXT_PUBLIC_*` is the same. Never put API keys or service secrets in these variables.\n\n---\n\n## 5. Input Validation & Output Encoding\n**Standards**: CWE-20 Improper Input Validation; CWE-116 Improper Encoding; CWE-601 Open Redirect; OWASP Input Validation Cheat Sheet\n\n### Server-Side Validation is Non-Negotiable\n\nClient-side validation (React form validation, browser `required` attributes) is UX, not security. Any attacker can send raw HTTP requests bypassing the client entirely.\n\n**Required pattern (TypeScript with Zod):**\n```ts\nconst Schema = z.object({\n username: z.string().min(1).max(30),\n amount: z.number().int().positive().max(10_000),\n});\n\nconst parsed = Schema.safeParse(req.body);\nif (!parsed.success) return badRequest(parsed.error.flatten());\n// Use parsed.data — never req.body — downstream\n```\n\n### Defense-in-Depth: Database CHECK Constraints\n\nApplication validation can be bypassed (direct DB connection, migration mistake, future code path). CHECK constraints are the last line of defense.\n\n```sql\n-- Prevents negative balance under any race condition\nALTER TABLE profiles ADD CONSTRAINT chk_coins_non_negative CHECK (coins >= 0);\n\n-- Enforces transaction sign by type\nALTER TABLE coin_transactions ADD CONSTRAINT chk_credit_positive\n CHECK (tx_type NOT IN ('quest_reward', 'purchase') OR amount > 0);\nALTER TABLE coin_transactions ADD CONSTRAINT chk_debit_negative\n CHECK (tx_type NOT IN ('cosmetic_purchase', 'refund') OR amount < 0);\n```\n\n### Open Redirect — CWE-601\n\n```ts\n// WRONG — attacker crafts ?next=https://evil.com\nconst next = req.query.next;\nres.redirect(next);\n\n// CORRECT — validate against allowlist\nconst ALLOWED_PATHS = ['/dashboard', '/profile', '/settings'];\nconst next = req.query.next;\nif (!ALLOWED_PATHS.includes(next)) return res.redirect('/dashboard');\nres.redirect(next);\n```\n\n---\n\n## 6. API Security\n**Standards**: OWASP API Security Top 10 2023\n\n### API1:2023 — Broken Object Level Authorization (BOLA)\n\nSee Domain 2. Every resource access must verify ownership. This is the #1 API vulnerability.\n\n### API3:2023 — Broken Object Property Level Authorization\n\nAPIs often return full database row objects. If the object contains fields the caller should not see (other users' data, internal flags, admin properties), this is a data exposure violation.\n\n**Fix**: Explicitly allowlist fields returned in API responses. Never return `SELECT *` to the client.\n\n```ts\n// WRONG\nreturn res.json(userRow); // includes password_hash, role, internal_flags\n\n// CORRECT\nreturn res.json({\n id: userRow.id,\n displayName: userRow.display_name,\n avatarUrl: userRow.avatar_url,\n});\n```\n\n### API7:2023 — Server-Side Request Forgery (SSRF)\n\nIf the application fetches a URL derived from user input, an attacker can target internal services (metadata endpoints, Redis, internal databases).\n\n**Violation:**\n```ts\n// WRONG — user controls the URL\nconst data = await fetch(req.body.webhookUrl);\n```\n**Fix:** Validate URL against a strict allowlist of expected domains. Block private IP ranges (10.x, 172.16.x–172.31.x, 192.168.x, 169.254.x, ::1, fc00::/7).\n\n### API8:2023 — Security Misconfiguration\n\n- CORS `Access-Control-Allow-Origin: *` on authenticated endpoints allows any origin to read responses.\n- Verbose error messages that expose stack traces, SQL query structure, or internal paths.\n- Debug endpoints (`/debug`, `/metrics`, `/__admin`) accessible in production.\n\n---\n\n## 7. Database Security\n**Standards**: CWE-250 Execution with Unnecessary Privileges; PostgreSQL Security Best Practices; CIS PostgreSQL Benchmark\n\n### Principle of Least Privilege\n\nEvery database role should have only the minimum permissions required. The `public` schema grants `CREATE` to all roles by default in PostgreSQL < 15 — revoke this explicitly.\n\n```sql\nREVOKE CREATE ON SCHEMA public FROM PUBLIC;\nREVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC;\n\n-- Then explicitly grant only what each role needs\nGRANT SELECT, INSERT ON public.profiles TO authenticated;\n```\n\n### REVOKE EXECUTE on SECURITY DEFINER Functions\n\nSECURITY DEFINER functions run as their owner. If PUBLIC or `authenticated` can call them without restriction, any logged-in user can trigger privileged operations.\n\n```sql\n-- After defining any SECURITY DEFINER function:\nREVOKE EXECUTE ON FUNCTION public.credit_coins(uuid, int) FROM PUBLIC;\nREVOKE EXECUTE ON FUNCTION public.credit_coins(uuid, int) FROM authenticated;\nREVOKE EXECUTE ON FUNCTION public.credit_coins(uuid, int) FROM anon;\n-- Re-grant only to service_role or internal callers as needed\n```\n\n### REVOKE TRUNCATE on Audit Tables\n\n`TRUNCATE` bypasses RLS and row-level triggers. Any role that can TRUNCATE an audit table can silently destroy evidence.\n\n```sql\nREVOKE TRUNCATE ON TABLE public.coin_transactions FROM PUBLIC;\nREVOKE TRUNCATE ON TABLE public.coin_transactions FROM authenticated;\nREVOKE TRUNCATE ON TABLE public.coin_transactions FROM service_role;\n-- Even service_role should not be able to bulk-erase financial records\n```\n\n---\n\n## 8. Rate Limiting & Denial-of-Service\n**Standards**: OWASP API4:2023 — Unrestricted Resource Consumption; CWE-770 Allocation of Resources Without Limits; CWE-400 Uncontrolled Resource Consumption\n\n### In-Memory Rate Limiting Is Ineffective\n\nRate limits implemented with an in-process `Map` or `LRU` cache are reset on process restart and are not shared across horizontal replicas. An attacker simply retries after waiting for a cold deploy, or routes requests to different instances.\n\n**Correct approach**: Store rate limit counters in a database (Redis, PostgreSQL) keyed by user ID and action type. The counter must be incremented atomically in the same transaction as the action.\n\n**PostgreSQL pattern:**\n```sql\n-- Atomic check-and-increment\nINSERT INTO rate_limits (user_id, action, window_start, count)\nVALUES ($1, $2, date_trunc('minute', now()), 1)\nON CONFLICT (user_id, action, window_start)\nDO UPDATE SET count = rate_limits.count + 1\nRETURNING count;\n-- If returned count > max_allowed, reject with 429\n```\n\n### Missing Rate Limits on Auth Endpoints\n\nAuthentication endpoints (login, password reset, OTP verification) without rate limiting enable brute-force and credential-stuffing attacks.\n\n**Recommended limits (baseline):**\n- Login: 5 attempts per minute per IP\n- Password reset: 3 per hour per email\n- OTP verification: 3 attempts per code before invalidating\n\n---\n\n## 9. Concurrency & Race Conditions\n**Standards**: CWE-362 Concurrent Execution Using Shared Resource with Improper Synchronization (TOCTOU); CWE-367 TOCTOU Race Condition\n\n### Check-Then-Act on Financial Data\n\nThe most dangerous race condition pattern in financial systems: read the balance, check if sufficient, then deduct. If two requests run concurrently, both checks pass against the same stale balance.\n\n**Violation:**\n```sql\n-- Thread 1 and Thread 2 both read balance = 100 at the same time\nSELECT coins FROM profiles WHERE id = $1; -- both see 100\n-- Both check: 100 >= 50 → true\nUPDATE profiles SET coins = coins - 50 WHERE id = $1; -- both run\n-- Result: balance = 0 instead of 50. Or worse, -50 if CHECK constraint absent.\n```\n\n**Fix — advisory lock + FOR UPDATE:**\n```sql\nBEGIN;\nSELECT pg_advisory_xact_lock(hashtext($1::text)); -- serialize per user\nSELECT coins FROM profiles WHERE id = $1 FOR UPDATE; -- lock the row\n-- Now deduct safely — only one transaction holds the lock\nUPDATE profiles SET coins = coins - $2 WHERE id = $1 AND coins >= $2;\nCOMMIT;\n```\n\n### Idempotency Key Bypass\n\nIf an idempotency key column allows `NULL`, PostgreSQL's UNIQUE constraint treats each `NULL` as a distinct value — meaning `NULL` keys do not deduplicate. This allows unlimited replay of reward operations.\n\n```sql\n-- WRONG — NULLs are not unique in PostgreSQL\nidempotency_key TEXT UNIQUE -- NULL can appear unlimited times\n\n-- CORRECT\nidempotency_key TEXT NOT NULL UNIQUE -- enforces exactly-once\n```\n\n---\n\n## 10. Financial & Transaction Integrity\n**Standards**: PCI-DSS v4 Req. 6 (Secure Systems), Req. 10 (Audit Logs); ISO 27001 A.9; CWE-362\n\n### Server-Authoritative Coin Logic\n\nAny value computed or provided by the client that affects financial state is a vulnerability. The server must compute all rewards, deductions, and balances independently.\n\n**Pattern to flag:**\n```ts\n// WRONG — client tells server how many coins to award\nconst { userId, coinsEarned } = req.body;\nawait creditCoins(userId, coinsEarned); // attacker sends coinsEarned = 99999\n```\n\n**Correct:** The server computes the reward based on verified activity data (e.g., verified GitHub events), never from a client-supplied amount.\n\n### Append-Only Transaction Log\n\nCoin/credit transaction tables must be immutable after insert. Updates would allow retroactive falsification of balances; deletes destroy the audit trail.\n\n```sql\n-- Trigger blocking updates to financial records\nCREATE OR REPLACE FUNCTION block_transaction_updates()\nRETURNS trigger LANGUAGE plpgsql AS $$\nBEGIN\n RAISE EXCEPTION 'Updates to coin_transactions are not permitted';\nEND;\n$$;\n\nCREATE TRIGGER no_update_coin_transactions\nBEFORE UPDATE ON coin_transactions\nFOR EACH ROW EXECUTE FUNCTION block_transaction_updates();\n```\n\n### Webhook Deduplication — Replay Attack\n\nPayment providers may retry webhooks. Without deduplication on the provider's event ID, the same payment event can credit coins multiple times.\n\n```sql\nINSERT INTO payment_events (provider_event_id, payload, received_at)\nVALUES ($1, $2, now())\nON CONFLICT (provider_event_id) DO NOTHING;\n-- Only process coins if INSERT affected 1 row (i.e., event was new)\n```\n\n---\n\n## 11. Security Logging & Monitoring\n**Standards**: OWASP A09:2021 — Security Logging and Monitoring Failures; CWE-778 Insufficient Logging; CWE-117 Log Injection; NIST SP 800-92\n\n### What Must Be Logged\n\nAt minimum, log these events with timestamp, user ID, IP address, and action detail:\n- Authentication failures (wrong password, expired token, missing auth header)\n- Authorization failures (access denied to a resource)\n- Input validation failures that look like attacks (unexpected field shapes, oversized inputs)\n- Cryptographic verification failures (HMAC mismatch on webhooks)\n- Rate limit hits\n- Account actions (password change, email change, account deletion)\n- Financial anomalies (deduction larger than balance attempted)\n\n### Log Injection — CWE-117\n\nIf log messages are constructed using string interpolation with user input, an attacker can inject newlines to forge log entries.\n\n**Violation:**\n```ts\nlogger.info(`User logged in: ${req.body.username}`);\n// Attacker sends username = \"admin\\nSECURITY: Admin password changed\"\n```\n**Fix**: Use structured logging (JSON with separate fields), never string interpolation.\n```ts\nlogger.info({ event: \"login\", username: req.body.username }); // safe\n```\n\n---\n\n## 12. Secrets & Environment Security\n**Standards**: CWE-798 Hardcoded Credentials; CWE-312 Cleartext Storage; The Twelve-Factor App (Factor III: Config)\n\n### Env Var Fallback to Insecure Default\n\nA common pattern in \"developer-friendly\" code is to fall back to a permissive default if an env var is missing. This silently disables security in production if the env var is misconfigured.\n\n**Violation:**\n```ts\n// WRONG — falls back to wildcard CORS if env var missing\nconst origin = Deno.env.get(\"ALLOWED_ORIGIN\") ?? \"*\";\n```\n**Fix:**\n```ts\n// CORRECT — hard-error on missing config; fail secure\nconst origin = Deno.env.get(\"ALLOWED_ORIGIN\");\nif (!origin) throw new Error(\"ALLOWED_ORIGIN env var is required\");\n```\n\n---\n\n## 13. Data Privacy & Retention\n**Standards**: GDPR Art. 5 (data minimization), Art. 17 (right to erasure), Art. 25 (privacy by design); CCPA §1798.105; CWE-359 Exposure of Private Information\n\n### Right to Erasure — Account Deletion\n\nOn account deletion, the application must:\n1. Delete or anonymize personal data (name, email, avatar, IP, user-agent)\n2. Retain legally required financial records (PCI-DSS, EU VAT — typically 7–10 years)\n3. Preserve abuse/moderation evidence (content reports, security flags)\n4. Nullify sender references in shared records (e.g., chat messages become anonymous)\n\n**Critical FK patterns:**\n```sql\n-- Chat: anonymize messages, don't delete them (conversation history remains intact)\nsender_id UUID REFERENCES auth.users(id) ON DELETE SET NULL\n\n-- Transactions: retain for audit; user_id becomes orphaned (no cascade)\nuser_id UUID -- intentionally no FK constraint, or FK with ON DELETE SET NULL\n```\n\n### Data Minimization — GDPR Art. 5(1)(c)\n\nDo not collect or store more data than necessary. Flag:\n- IP addresses stored permanently when 30/90 day retention suffices\n- User-agent strings logged indefinitely (they are PII under GDPR)\n- Full request bodies logged when only metadata is needed for debugging\n- `SELECT *` queries that pull PII columns into contexts that don't need them\n\n---\n\n## 14. Security Misconfiguration\n**Standards**: OWASP A05:2021; CWE-16 Configuration; CIS Benchmarks; OWASP Secure Headers\n\n### Required Security Headers\n\n```\nContent-Security-Policy: default-src 'self'; script-src 'self'; object-src 'none'\nX-Frame-Options: DENY\nX-Content-Type-Options: nosniff\nReferrer-Policy: strict-origin-when-cross-origin\nStrict-Transport-Security: max-age=63072000; includeSubDomains; preload\nPermissions-Policy: geolocation=(), microphone=(), camera=()\n```\n\n### CORS Misconfiguration\n\n`Access-Control-Allow-Origin: *` on an authenticated endpoint effectively disables CORS protection — any origin can make credentialed requests and read the response.\n\nThe origin allowlist must be an explicit list of trusted domains, validated server-side. Never reflect the request `Origin` header without verification.\n\n```ts\n// WRONG — reflects any origin\nconst origin = req.headers.get(\"origin\");\nheaders.set(\"Access-Control-Allow-Origin\", origin ?? \"*\");\n\n// CORRECT — validate against explicit allowlist\nconst ALLOWED = new Set([\"https://app.example.com\"]);\nconst requestOrigin = req.headers.get(\"origin\") ?? \"\";\nif (ALLOWED.has(requestOrigin)) {\n headers.set(\"Access-Control-Allow-Origin\", requestOrigin);\n headers.set(\"Vary\", \"Origin\");\n}\n```\n\n---\n\n## 15. Supply Chain & Dependency Security\n**Standards**: OWASP A06:2021 — Vulnerable and Outdated Components; CWE-1357; SLSA Framework\n\n### Dependency Audit\n\nRun `npm audit` or `bun audit` and treat results as:\n- **Critical/High CVEs** → block deployment; patch immediately\n- **Moderate CVEs** → fix within the sprint\n- **Low CVEs** → fix in next dependency update cycle\n\n### Version Pinning\n\nUse exact versions in `package.json` for production dependencies, or lock with `package-lock.json`/`bun.lockb`. The `^` prefix allows minor version bumps that could introduce regressions or security fixes you haven't reviewed.\n\n---\n\n## 16. TypeScript / JavaScript Specific\n**Standards**: CWE-843 Type Confusion; CWE-915 Prototype Pollution; CWE-94 Code Injection; OWASP Cheat Sheet: DOM-based XSS\n\n### Prototype Pollution — CWE-915\n\nMerging user-controlled objects onto existing objects can overwrite properties on `Object.prototype`, affecting all objects in the process.\n\n**Violation:**\n```ts\nfunction mergeOptions(defaults: object, userOptions: unknown) {\n return Object.assign(defaults, userOptions); // if userOptions is {\"__proto__\": {\"admin\": true}}\n}\n```\n**Fix**: Validate and allowlist the keys of user-controlled objects before merging. Use `Object.create(null)` for dictionaries that must not inherit from `Object.prototype`. Use schema validation (Zod) to strip unknown keys.\n\n### `as any` Type Assertions on External Data — CWE-843\n\nExternal data (API responses, webhook payloads, database query results typed as `any`, `JSON.parse()` output) must be treated as `unknown` and parsed through a validator before use. Using `as any` or `as ExpectedType` directly bypasses TypeScript's safety guarantees entirely.\n\n```ts\n// WRONG\nconst payload = JSON.parse(body) as WebhookPayload;\ncreditCoins(payload.userId, payload.amount); // if payload.amount is a string: NaN coins\n\n// CORRECT\nconst parsed = WebhookPayloadSchema.safeParse(JSON.parse(body));\nif (!parsed.success) return badRequest();\ncreditCoins(parsed.data.userId, parsed.data.amount); // type-safe and validated\n```\n\n### Unhandled Promise Rejections — CWE-755\n\nIn async TypeScript/JavaScript, a missing `await` means the promise runs in the background and any rejection is silently swallowed (or crashes Node.js). This is especially dangerous in financial operations where you need to know if the DB write succeeded.\n\n```ts\n// WRONG — fire-and-forget on a critical operation\nlogSecurityEvent(userId, \"auth_failure\"); // rejection silently lost\n\n// CORRECT — await or explicitly handle\nawait logSecurityEvent(userId, \"auth_failure\");\n// or: void logSecurityEvent(...).catch(err => console.error(\"Failed to log:\", err));\n```\n"},"systematic-debugging":{"content":"---\nname: systematic-debugging\ndescription: Guides root-cause analysis with a structured process: reproduce, isolate, hypothesize, verify. Use when debugging bugs, investigating failures, or when the user says something is broken or not working as expected.\n---\n# Systematic Debugging\n\nWork through failures in order. Don't guess at fixes until the cause is narrowed down.\n\n## Scope\n\n- **User reports a bug**: Clarify what \"wrong\" means (error message, wrong result, crash, hang). Get steps to reproduce or environment details if missing.\n- **User points at code**: Treat that as the suspected area; still reproduce and isolate before changing code.\n- **Logs/stack traces provided**: Use them to form hypotheses; don't ignore them.\n\n## Process\n\n### 1. Reproduce\n\n- Confirm the failure is reproducible. If not, note that and list what's needed (e.g. data, env, steps).\n- Identify: one-off or intermittent? In which environment (dev/staging/prod, OS, version)?\n- Output: \"Reproducible: yes/no. How: …\"\n\n### 2. Isolate\n\n- Shrink the problem: minimal input, minimal code path, or minimal config that still fails.\n- Bisect if useful: which commit, which option, which input range?\n- Remove variables (other features, network, time) to see when the failure goes away.\n- Output: \"Failure occurs when: …\" and \"Failure does not occur when: …\"\n\n### 3. Hypothesize\n\n- State one or more concrete hypotheses that explain the observed behavior (e.g. \"null passed here\", \"race between A and B\", \"wrong type at runtime\").\n- Tie each hypothesis to evidence from reproduce/isolate (logs, stack trace, line numbers).\n- Prefer the simplest hypothesis that fits the evidence.\n- Output: \"Hypothesis: …\" with \"Evidence: …\"\n\n### 4. Verify\n\n- Propose a minimal check (log, assert, unit test, or one-line change) that would confirm or rule out the top hypothesis.\n- If the user can run it, give the exact step. If you can run it (e.g. tests), do it.\n- After verification: \"Confirmed: …\" or \"Ruled out; next hypothesis: …\"\n\n### 5. Fix\n\n- Only suggest a fix after the cause is confirmed or highly likely.\n- Fix the root cause when possible; document or ticket workarounds if you suggest one.\n- Suggest a regression test or assertion so the bug doesn't come back.\n\n## Output\n\n- Prefer short bullets over long paragraphs.\n- Always cite file/line/function when pointing at code.\n- If stuck (can't reproduce, no logs), say what's missing and what would help next.\n- Don't suggest random fixes (e.g. \"try clearing cache\") without tying them to a hypothesis.\n","reference":null}},"subagents":{"deep-research":{"content":"---\nname: deep-research\nmodel: default\ndescription: Deep research and literature review. Use when the user asks for deep research, literature review, or to thoroughly investigate a topic. Searches the web, consults reputable sources, and synthesizes an answer with pros/cons and comparisons when relevant.\nreadonly: true\n---\n\n# Deep Research\n\nYour job is to **thoroughly research a topic** using web search and reputable sources, then synthesize the best answer. When multiple approaches or answers exist, compare them with pros and cons.\n\n## When you're used\n\n- User asks for \"deep research,\" \"literature review,\" or \"thoroughly investigate\" a topic.\n- User wants an evidence-based answer with sources.\n- User asks for pros/cons or a comparison of options.\n\n## Exa MCP (use when available)\n\nThe Exa MCP provides semantic search over the live web and code. **Use Exa for real-time web research, code examples, and company/org research** when the tools are available. Prefer Exa over generic web search when you need high-quality, relevant results or code/docs from the open-source ecosystem.\n\n| Tool | When to use |\n|------|--------------|\n| **Web Search (Exa)** | General web research: current practices, comparisons, how-to, opinions, blog posts, official docs. Use for \"how does X work?\", \"best practices for Y\", \"X vs Y\", or time-sensitive topics. Query in natural language; Exa returns semantically relevant pages with snippets. |\n| **Code Context Search** | Code snippets, examples, and documentation from open source repos. Use when the user needs \"how to do X in language/framework Y\", code examples, or implementation patterns. Complements official docs with real-world usage. |\n| **Company Research** | Research companies and organizations: what they do, products, recent news, structure. Use for \"tell me about company X\", due diligence, or market/competitor context. |\n\n**How to use Exa effectively:**\n- **Queries**: Use clear, specific queries (e.g. \"React Server Components best practices 2024\" rather than \"React\"). Include stack, year, or context when it matters.\n- **Combine with other sources**: Use Exa for discovery and breadth; use AlphaXiv for academic papers when the topic is literature/research. Fetch full pages (e.g. with browser or fetch) when you need to cite or quote a specific passage.\n- **Cite**: Exa returns URLs and snippets — cite the URL and page title in your Sources; don't present Exa's summary as the primary source when you can point to the actual page.\n\nIf Exa tools are not available, fall back to web search and fetch as needed.\n\n## AlphaXiv tools (use when available)\n\nAlphaXiv tools query arXiv and related academic content. **Use them for literature review, finding papers, or surveying recent research.** If these tools are available, prefer them for academic topics; otherwise use Exa or web search.\n\n| Tool | When to use |\n|------|--------------|\n| **answer_research_query** | Survey recent papers on a question (e.g. \"What do recent papers do for X?\", \"How do papers handle Y?\"). Use for state-of-the-art, common methods, or trends. |\n| **search_for_paper_by_title** | Find a specific paper by exact or approximate title when you know the name or a close match. |\n| **find_papers_feed** | Get arXiv papers by topic, sort (Hot, Comments, Views, Likes, GitHub, Recommended), and time interval. Use for \"what's trending in X\" or \"recent papers in topic Y.\" Topics include cs.*, math.*, physics.*, stat.*, q-bio.*, etc. |\n| **answer_pdf_query** | Answer a question about a single paper given its PDF URL (arxiv.org/pdf/..., alphaxiv.org, or semantic scholar). Use after you have a paper URL and need to extract a specific claim or method. |\n| **read_files_from_github_repository** | Read files or directory from a paper's linked GitHub repo (when the paper has a codebase). Use to summarize implementation or repo structure. |\n| **find_organizations** | Look up canonical organization names for filtering find_papers_feed by institution. |\n\nAlphaXiv covers all of arXiv (physics, math, CS, stats, etc.), not only AI. Use **find_papers_feed** with the right topic (e.g. cs.LG, math.AP, quant-ph) for the domain.\n\n## Process\n\n1. **Clarify the question** — If the request is vague, state what you're treating as the research question in one sentence.\n2. **Search** — Use the right source for the topic:\n - **Academic / literature**: AlphaXiv (answer_research_query, find_papers_feed, answer_pdf_query) when available.\n - **Web / practice / code / companies**: Exa MCP (Web Search, Code Context Search, Company Research) when available; otherwise web search and fetch full pages when needed.\n Prefer official docs, established institutions, recent content for time-sensitive topics, and multiple viewpoints when the topic is debated.\n3. **Synthesize** — Answer the question clearly. If there are several valid answers or approaches:\n - Compare them (e.g. \"Option A vs Option B\").\n - List pros and cons for each where relevant.\n - State which is best for which situation, or that it depends on context.\n4. **Cite** — For key claims, note the source (title, site, or URL). No need to cite every sentence; enough that the user can verify and go deeper.\n\n## Output format\n\n```\n## Research question\n[One sentence]\n\n## Summary\n[2–4 sentences: direct answer and main takeaway]\n\n## Details / Comparison\n[Structured by theme or by option. Use subsections if helpful. Include pros/cons and comparisons when several answers exist.]\n\n## Sources\n- [Source 1]: [URL or citation]\n- [Source 2]: …\n```\n\n- Prefer clear structure over long paragraphs.\n- If the topic is narrow and there's one clear answer, keep it concise; if it's broad or contested, add more comparison and nuance.\n- If you couldn't find good sources on part of the question, say so and what would help (e.g. different search terms, type of source).\n\n## Rules\n\n- Use Exa MCP for web/code/company research when available; use AlphaXiv for academic/literature when available. Fall back to web search if neither is available.\n- Use search and the web; don't rely only on prior knowledge. Prefer recent, reputable sources.\n- Don't invent sources or URLs. If you can't access a page, say so.\n- Do not take everything you read as fact. The internet is full of misinformation.\n- Stay on topic. If the user scopes the question (e.g. \"for Python\" or \"in healthcare\"), keep the answer within that scope.\n- You are read-only: research and report only. No code or file changes.\n"},"update-docs":{"content":"---\nname: update-docs\nmodel: default\ndescription: Updates project documentation to match the code. Main focus is docs (architecture, how the project is built, setup, deploy, contributing, README). Use when the user asks to update docs or after code changes; update README, docs folder, docstrings, and comments so they reflect current behavior.\n---\n\n# Update Docs\n\nYou keep **project documentation** in sync with the code. Your main focus is documentation as a whole: how the project is built, how to run it, and how it fits together. Update only what's wrong or missing; don't rewrite docs that are already accurate. Document what actually exists—no invented APIs or behavior.\n\n## Scope\n\n- **User specifies what to update**: e.g. \"update the docs,\" \"update the README,\" \"add docstrings,\" \"refresh the architecture doc.\" Do that.\n- **Post-implementation**: When invoked after code changes, identify what changed and update the relevant docs: any docs in the repo (e.g. `docs/`, `doc/`, architecture or design docs), README, docstrings, comments in changed files, or generated API docs if the project has them.\n- **No scope given**: Ask what to document (which files or doc types) or infer from recent changes and update the minimum needed.\n\nMatch the project's existing style: docstring format (Google, NumPy, Sphinx, etc.), README and docs structure, and tone.\n\n## Documentation standards (reference)\n\nWhen the project has no strong convention, align with widely used standards so docs are consistent and useful.\n\n- **Diátaxis** (https://diataxis.fr/): Organize content by user need. Use **tutorials** for learning a task step-by-step, **how-to guides** for solving a specific problem, **reference** for technical lookup (APIs, options), and **explanation** for background and concepts. When adding or restructuring docs, prefer the right type (e.g. don't turn a reference into a long tutorial).\n- **Google developer documentation style guide** (https://developers.google.com/style): For tone and formatting — write in second person (\"you\"), active voice; use sentence case for headings; put conditions before instructions; bold UI elements, code in code font; keep examples and link text descriptive. Clarity for the audience over rigid rules.\n\nApply these as guidance; always preserve or match the project's existing style when it has one.\n\n## Process\n\n1. **Identify what to update** — From the request or from the diff: what changed (modules, architecture, setup, behavior)? Which doc targets are affected (docs folder, README, docstrings, comments)?\n2. **Read current docs** — Check existing project docs (e.g. `docs/`), README, docstrings, comments in changed files, and any API docs. Note what's outdated, missing, or wrong.\n3. **Update** — Fix inaccuracies, add missing sections or docstrings, remove references to removed code. Keep changes minimal.\n4. **Verify** — Ensure examples in docs still run or match the code (e.g. function names, commands, args). Don't leave broken code blocks or outdated commands.\n\n## What to document\n\n- **Project documentation** (primary): Any docs that describe how the project is built and used — e.g. `docs/`, `doc/`, or standalone files. This includes:\n - **Architecture / design**: How the system is structured, main components, data flow. Update when structure or responsibilities change.\n - **Setup and build**: How to install, configure, build, and run (dev and prod). Update when dependencies, env vars, or commands change.\n - **Deploy and ops**: How to deploy, runbooks, environment-specific notes. Update when pipelines or procedures change.\n - **Contributing**: How to contribute, branch strategy, code style, where to put things. Update when workflow or conventions change.\n- **README**: Entry point for the repo — install/run, config, env vars, project structure, links to fuller docs. Update when setup or usage changes.\n- **Docstrings**: Public modules, classes, and functions. Parameters, return value, raised exceptions, and a one-line summary. Use the project's docstring convention.\n- **Comments**: Inline and block comments in the code. In changed files, check comments for accuracy—update or remove comments that describe old behavior, wrong assumptions, or obsolete TODOs. Don't leave comments that contradict the code.\n- **API docs**: If the project generates them (Sphinx, Typedoc, etc.), update source comments/docstrings so the generated output is correct; only regenerate if that's part of the workflow.\n\nSkip internal/private implementation details unless the project explicitly documents them. Prefer \"what and how to use\" over \"how it's implemented.\"\n\n## Output\n\n- **Updated**: List files and sections changed (e.g. \"docs/architecture.md: Components\" / \"README: Installation, Usage\" / \"module.py: function X docstring\").\n- **Added**: New sections or docstrings added, with file and name.\n- **Removed**: Obsolete sections or references removed.\n- If nothing needed updating, say so in one sentence.\n\nKeep the summary to bullets. No long prose.\n\n## Rules\n\n- Document only what the code does. Don't add features or behavior in the docs that aren't in the code.\n- Preserve existing formatting and style (headers, lists, code blocks, docstring style).\n- If the code is unclear and you can't document it confidently, note that and suggest a code comment or refactor instead of guessing.\n- Don't duplicate large chunks of code in docs or README; reference the source or keep examples short and runnable.\n"},"verifier":{"content":"---\nname: verifier\nmodel: default\ndescription: Validates that completed work matches what was claimed. Use after the main agent marks tasks done—checks that implementations exist and work, and that no unstated changes were made.\nreadonly: true\n---\n\n# Verifier\n\nYou are a skeptical validator. Your job is to confirm that work claimed complete actually exists and works, and that nothing extra was done without being stated.\n\n## What to verify\n\n1. **Claims vs. reality**\n - Identify what the main agent said it did (from the conversation or task list).\n - For each claim: confirm the implementation exists, is in the right place, and does what was described.\n - Run relevant tests or commands. Don't accept \"tests pass\" without running them.\n - Flag anything that was claimed but is missing, incomplete, or broken.\n\n2. **No unstated changes**\n - Compare the current state of the codebase to what was in scope for the task (e.g. the files or areas the user asked to change).\n - Look for edits the main agent made but did not mention: new files, modified files, refactors, \"cleanups,\" or behavior changes that weren't part of the request.\n - If you have access to git: use the diff (staged or unstaged) to see what actually changed versus what was discussed.\n - Report any changes that go beyond what was claimed or requested.\n\n## Process\n\n1. From context, extract: (a) what was requested, (b) what the main agent said it did.\n2. Verify each stated deliverable (code exists, tests run, behavior matches).\n3. Check the diff or modified files for changes that weren't mentioned.\n4. Summarize: passed, incomplete, or out-of-scope changes.\n\n## Output\n\n- **Verified**: What was claimed and confirmed (with brief evidence, e.g. \"tests pass\", \"file X contains Y\").\n- **Missing or broken**: What was claimed but isn't there or doesn't work (file, line, and what's wrong).\n- **Unstated changes**: What was changed but not mentioned (file and a one-line description). Ask whether the user wanted these or if they should be reverted.\n\nKeep each section to bullets. If everything checks out and there are no unstated changes, say so clearly in one or two sentences.\n\n## Rules\n\n- Don't take claims at face value. Inspect the code and run checks.\n- Prefer evidence (test output, diff, file contents) over summary.\n- For \"unstated changes,\" distinguish clearly between obvious scope creep (e.g. refactoring unrelated code) and trivial side effects (e.g. formatting in an edited file). Flag the former; mention the latter only if relevant.\n- If the task was vague, note what you assumed was in scope so the user can correct.\n"}}}}
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * General Coding Tools MCP Server
4
+ * Exposes skills and subagents as MCP resources and tools for use in Cursor, Claude, and Smithery.
5
+ */
6
+ export {};
package/dist/index.js ADDED
@@ -0,0 +1,170 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * General Coding Tools MCP Server
4
+ * Exposes skills and subagents as MCP resources and tools for use in Cursor, Claude, and Smithery.
5
+ */
6
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
7
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
8
+ import { z } from "zod";
9
+ import { readFileSync, existsSync } from "fs";
10
+ import { fileURLToPath } from "url";
11
+ import { dirname, join } from "path";
12
+ const __dirname = dirname(fileURLToPath(import.meta.url));
13
+ // Load bundled content (generated by scripts/bundle-content.cjs)
14
+ function loadContent() {
15
+ const contentPath = join(__dirname, "content.json");
16
+ if (!existsSync(contentPath)) {
17
+ throw new Error("content.json not found. Run 'npm run build' from mcp-server to bundle Skills and Subagents.");
18
+ }
19
+ return JSON.parse(readFileSync(contentPath, "utf8"));
20
+ }
21
+ const DATA = loadContent();
22
+ const RESOURCE_PREFIX = "general-coding-tools-mcp://";
23
+ const server = new McpServer({
24
+ name: "general-coding-tools-mcp",
25
+ version: "1.0.0",
26
+ });
27
+ // --- Resources: one per skill, skill/reference, and subagent ---
28
+ for (const s of DATA.skills) {
29
+ const uri = `${RESOURCE_PREFIX}skill/${s.id}`;
30
+ server.registerResource(`skill-${s.id}`, uri, {
31
+ title: `Skill: ${s.name}`,
32
+ description: `General Coding Tools skill: ${s.name}`,
33
+ mimeType: "text/markdown",
34
+ }, async () => {
35
+ const entry = DATA.content.skills[s.name];
36
+ return {
37
+ contents: [{ uri, mimeType: "text/markdown", text: entry.content }],
38
+ };
39
+ });
40
+ if (DATA.content.skills[s.name]?.reference) {
41
+ const refUri = `${RESOURCE_PREFIX}skill/${s.id}/reference`;
42
+ server.registerResource(`skill-${s.id}-reference`, refUri, {
43
+ title: `Skill reference: ${s.name}`,
44
+ description: `Reference material for skill ${s.name}`,
45
+ mimeType: "text/markdown",
46
+ }, async () => {
47
+ const entry = DATA.content.skills[s.name];
48
+ return {
49
+ contents: [{ uri: refUri, mimeType: "text/markdown", text: entry.reference }],
50
+ };
51
+ });
52
+ }
53
+ }
54
+ for (const a of DATA.subagents) {
55
+ const uri = `${RESOURCE_PREFIX}subagent/${a.id}`;
56
+ server.registerResource(`subagent-${a.id}`, uri, {
57
+ title: `Subagent: ${a.name}`,
58
+ description: `General Coding Tools subagent: ${a.name}`,
59
+ mimeType: "text/markdown",
60
+ }, async () => {
61
+ const entry = DATA.content.subagents[a.name];
62
+ return {
63
+ contents: [{ uri, mimeType: "text/markdown", text: entry.content }],
64
+ };
65
+ });
66
+ }
67
+ // --- Tools ---
68
+ server.registerTool("list_skills", {
69
+ title: "List skills",
70
+ description: "List all available General Coding Tools skills (e.g. systematic-debugging, correctness-audit).",
71
+ inputSchema: z.object({}),
72
+ }, async () => {
73
+ const list = DATA.skills.map((s) => ({ id: s.id, name: s.name, hasReference: s.hasReference }));
74
+ return { content: [{ type: "text", text: JSON.stringify(list, null, 2) }] };
75
+ });
76
+ server.registerTool("list_subagents", {
77
+ title: "List subagents",
78
+ description: "List all available General Coding Tools subagents (e.g. deep-research, update-docs, verifier).",
79
+ inputSchema: z.object({}),
80
+ }, async () => {
81
+ const list = DATA.subagents.map((a) => ({ id: a.id, name: a.name }));
82
+ return { content: [{ type: "text", text: JSON.stringify(list, null, 2) }] };
83
+ });
84
+ server.registerTool("get_skill", {
85
+ title: "Get skill content",
86
+ description: "Get the full content of a skill by name (id). Use list_skills to see available names.",
87
+ inputSchema: z.object({
88
+ name: z.string().describe("Skill id (e.g. systematic-debugging, correctness-audit)"),
89
+ include_reference: z.boolean().optional().default(false).describe("Include REFERENCE.md if present"),
90
+ }),
91
+ }, async ({ name, include_reference }) => {
92
+ const skill = DATA.skills.find((s) => s.id === name || s.name === name);
93
+ if (!skill) {
94
+ return {
95
+ content: [{ type: "text", text: `Unknown skill: ${name}. Use list_skills to see available skills.` }],
96
+ isError: true,
97
+ };
98
+ }
99
+ const entry = DATA.content.skills[skill.name];
100
+ let text = entry.content;
101
+ if (include_reference && entry.reference) {
102
+ text += "\n\n---\n\n## Reference\n\n" + entry.reference;
103
+ }
104
+ return { content: [{ type: "text", text }] };
105
+ });
106
+ server.registerTool("get_subagent", {
107
+ title: "Get subagent content",
108
+ description: "Get the full content of a subagent by name (id). Use list_subagents to see available names.",
109
+ inputSchema: z.object({
110
+ name: z.string().describe("Subagent id (e.g. deep-research, update-docs, verifier)"),
111
+ }),
112
+ }, async ({ name }) => {
113
+ const subagent = DATA.subagents.find((a) => a.id === name || a.name === name);
114
+ if (!subagent) {
115
+ return {
116
+ content: [{ type: "text", text: `Unknown subagent: ${name}. Use list_subagents to see available subagents.` }],
117
+ isError: true,
118
+ };
119
+ }
120
+ const entry = DATA.content.subagents[subagent.name];
121
+ return { content: [{ type: "text", text: entry.content }] };
122
+ });
123
+ // --- Prompts: apply skill / subagent with user message ---
124
+ for (const s of DATA.skills) {
125
+ const promptName = `apply_skill_${s.id.replace(/-/g, "_")}`;
126
+ server.registerPrompt(promptName, {
127
+ title: `Apply skill: ${s.name}`,
128
+ description: `Apply the "${s.name}" skill. Use when the user wants to follow this skill's process.`,
129
+ argsSchema: {
130
+ user_message: z.string().describe("What the user asked or the current task"),
131
+ },
132
+ }, async ({ user_message }) => {
133
+ const entry = DATA.content.skills[s.name];
134
+ const text = `I will follow the **${s.name}** skill.\n\n---\n\n${entry.content}\n\n---\n\nUser request: ${user_message ?? "(no message provided)"}`;
135
+ return {
136
+ messages: [
137
+ { role: "user", content: { type: "text", text: String(user_message ?? "") } },
138
+ { role: "assistant", content: { type: "text", text } },
139
+ ],
140
+ };
141
+ });
142
+ }
143
+ for (const a of DATA.subagents) {
144
+ const promptName = `apply_subagent_${a.id.replace(/-/g, "_")}`;
145
+ server.registerPrompt(promptName, {
146
+ title: `Apply subagent: ${a.name}`,
147
+ description: `Apply the "${a.name}" subagent. Use when the user wants this agent's behavior.`,
148
+ argsSchema: {
149
+ user_message: z.string().describe("What the user asked or the current task"),
150
+ },
151
+ }, async ({ user_message }) => {
152
+ const entry = DATA.content.subagents[a.name];
153
+ const text = `I will follow the **${a.name}** subagent.\n\n---\n\n${entry.content}\n\n---\n\nUser request: ${user_message ?? "(no message provided)"}`;
154
+ return {
155
+ messages: [
156
+ { role: "user", content: { type: "text", text: String(user_message ?? "") } },
157
+ { role: "assistant", content: { type: "text", text } },
158
+ ],
159
+ };
160
+ });
161
+ }
162
+ // --- Run ---
163
+ async function main() {
164
+ const transport = new StdioServerTransport();
165
+ await server.connect(transport);
166
+ }
167
+ main().catch((err) => {
168
+ console.error(err);
169
+ process.exit(1);
170
+ });
package/package.json ADDED
@@ -0,0 +1,32 @@
1
+ {
2
+ "name": "general-coding-tools-mcp",
3
+ "version": "1.0.0",
4
+ "description": "MCP server exposing General Coding Tools (skills and subagents) for use in Cursor, Claude, and Smithery",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "bin": {
8
+ "general-coding-tools-mcp": "dist/index.js"
9
+ },
10
+ "scripts": {
11
+ "build": "node scripts/bundle-content.cjs && tsc",
12
+ "prepare": "npm run build",
13
+ "prepublishOnly": "npm run build",
14
+ "start": "node dist/index.js"
15
+ },
16
+ "files": [
17
+ "dist"
18
+ ],
19
+ "engines": {
20
+ "node": ">=18"
21
+ },
22
+ "keywords": ["mcp", "cursor", "smithery", "skills", "general-coding-tools"],
23
+ "license": "MIT",
24
+ "dependencies": {
25
+ "@modelcontextprotocol/sdk": "^1.26.0",
26
+ "zod": "^3.23.0"
27
+ },
28
+ "devDependencies": {
29
+ "@types/node": "^20.0.0",
30
+ "typescript": "^5.0.0"
31
+ }
32
+ }