npm - @mcp-graph-workflow/agent-graph-flow - Versions diffs - 0.1.0 - Mend

@mcp-graph-workflow/agent-graph-flow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/README.md +40 -0
package/dist/cli/index.d.ts +1 -0
package/dist/cli/index.js +12842 -0
package/dist/index.d.ts +43 -0
package/dist/index.js +48 -0
package/package.json +142 -0
package/src/skills/analyze/ambiguity-audit.md +46 -0
package/src/skills/analyze/decompose-prd.md +26 -0
package/src/skills/analyze/grill-me.md +26 -0
package/src/skills/analyze/to-prd.md +57 -0
package/src/skills/any/code-detachment.md +26 -0
package/src/skills/any/lessons-consult.md +26 -0
package/src/skills/any/wip-one.md +26 -0
package/src/skills/design/design-an-interface.md +26 -0
package/src/skills/design/seam-audit.md +26 -0
package/src/skills/domain/crypto/common-mistakes.md +71 -0
package/src/skills/domain/ml/common-mistakes.md +55 -0
package/src/skills/domain/rag/chunk-overlap-strategy.md +27 -0
package/src/skills/domain/sqlite-perf/fts5-tuning.md +25 -0
package/src/skills/domain/sqlite-perf/wal-mode.md +26 -0
package/src/skills/domain/systems/common-mistakes.md +62 -0
package/src/skills/domain/testing/vitest-isolation.md +31 -0
package/src/skills/domain/typescript/zod-v4-migration.md +27 -0
package/src/skills/implement/anti-hallucination.md +28 -0
package/src/skills/implement/pure-decision-pattern.md +26 -0
package/src/skills/implement/tracer-bullet-tdd.md +26 -0
package/src/skills/plan/budget-aware-picking.md +26 -0
package/src/skills/plan/plan-sprint.md +26 -0
package/src/skills/plan/to-issues.md +67 -0
package/src/skills/review/citation-coverage-review.md +26 -0
package/src/skills/review/deep-module-review.md +26 -0
package/src/skills/review/zoom-out.md +34 -0
package/src/skills/validate/dod-checklist.md +30 -0
package/src/skills/validate/harness-regression-check.md +26 -0

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,43 @@
+import { z } from 'zod/v4';
+/*!
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ * Copyright © 2026 Diego Lima Nogueira de Paula
+ *
+ * This file is part of agent-graph-flow.
+ *
+ * agent-graph-flow is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Affero General Public License v3.0 or later, as
+ * published by the Free Software Foundation. See LICENSE for the full terms.
+ */
+/**
+ * Motor de fases — colapsa as 9 fases internas (herdadas do graph-flow legado)
+ * em 3 fases canônicas SHAPE → BUILD → SHIP. Remove *cerimônia*, não
+ * *disciplina*: o rigor (TDD/AC/DoD) vive dentro de BUILD. O enum interno é
+ * preservado para compat de dados/CLI via `toCanonicalPhase`.
+ *
+ * Ref: RFC token-economy-redesign §6.1.
+ */
+declare const CanonicalPhaseSchema: z.ZodEnum<{
+    SHAPE: "SHAPE";
+    BUILD: "BUILD";
+    SHIP: "SHIP";
+}>;
+type CanonicalPhase = z.infer<typeof CanonicalPhaseSchema>;
+/** Fases canônicas em ordem de ciclo. */
+declare const CANONICAL_PHASES: ("SHAPE" | "BUILD" | "SHIP")[];
+/**
+ * agent-graph-flow — public entrypoint.
+ *
+ * Promessa (filtro de toda decisão): software rápido · best-practice SWE ·
+ * custo de token brutalmente baixo. Ver CLAUDE.md.
+ *
+ * M0 expõe apenas identidade do produto. M1 traz o motor (graph/context/RAG/
+ * planner/code-intelligence) e re-exporta os módulos públicos do core.
+ */
+declare const VERSION = "0.1.0";
+declare const PROMISE: string;
+export { CANONICAL_PHASES as PHASES, PROMISE, type CanonicalPhase as Phase, VERSION };

package/dist/index.js ADDED Viewed

@@ -0,0 +1,48 @@
+import { z } from 'zod/v4';
+// src/core/lifecycle/phase.ts
+var CanonicalPhaseSchema = z.enum(["SHAPE", "BUILD", "SHIP"]);
+var CANONICAL_PHASES = CanonicalPhaseSchema.options;
+z.enum([
+  "ANALYZE",
+  "DESIGN",
+  "PLAN",
+  "IMPLEMENT",
+  "VALIDATE",
+  "REVIEW",
+  "HANDOFF",
+  "DEPLOY",
+  "LISTENING"
+]);
+// src/index.ts
+var VERSION = "0.1.0";
+var PROMISE = "Agente SWE aut\xF4nomo, local-first e token-frugal: PRD vira grafo de execu\xE7\xE3o persistente, TDD obrigat\xF3rio, custo de token brutalmente baixo.";
+/*!
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ * Copyright © 2026 Diego Lima Nogueira de Paula
+ *
+ * This file is part of mcp-graph.
+ *
+ * mcp-graph is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Affero General Public License v3.0 or later, as published by
+ * the Free Software Foundation. See LICENSE for the full terms.
+ *
+ * mcp-graph is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.
+ *
+ * Commercial licenses are available — see COMMERCIAL.md.
+ */
+/*!
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ * Copyright © 2026 Diego Lima Nogueira de Paula
+ *
+ * This file is part of agent-graph-flow.
+ *
+ * agent-graph-flow is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Affero General Public License v3.0 or later, as
+ * published by the Free Software Foundation. See LICENSE for the full terms.
+ */
+export { CANONICAL_PHASES as PHASES, PROMISE, VERSION };

package/package.json ADDED Viewed

@@ -0,0 +1,142 @@
+{
+  "name": "@mcp-graph-workflow/agent-graph-flow",
+  "version": "0.1.0",
+  "description": "Agente SWE autônomo, local-first e token-frugal: PRD → grafo de execução persistente, TDD obrigatório, custo de token brutalmente baixo. AGPL v3.",
+  "type": "module",
+  "main": "dist/index.js",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    }
+  },
+  "bin": {
+    "agent-graph-flow": "dist/cli/index.js",
+    "agf": "dist/cli/index.js"
+  },
+  "scripts": {
+    "build": "tsup",
+    "dev": "tsx src/cli/index.ts",
+    "test": "vitest run",
+    "test:node": "vitest run --project=node",
+    "test:blast": "vitest run --changed HEAD",
+    "test:watch": "vitest",
+    "typecheck": "tsc --noEmit",
+    "lint": "eslint src/ --max-warnings 30",
+    "lint:fix": "eslint src/ --fix",
+    "demo": "npm run build && node scripts/demo.mjs",
+    "test:blast:full": "vitest run --changed HEAD",
+    "test:smoke": "vitest run --config vitest.smoke.config.ts",
+    "test:clear": "vitest --clearCache"
+  },
+  "keywords": [
+    "swe-agent",
+    "autonomous-agent",
+    "prd",
+    "task-graph",
+    "local-first",
+    "token-frugal",
+    "tdd",
+    "mcp"
+  ],
+  "files": [
+    "dist/",
+    "src/skills/",
+    "README.md",
+    "LICENSE"
+  ],
+  "author": "Diego Nogueira (https://github.com/DiegoNogueiraDev)",
+  "license": "AGPL-3.0-or-later",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/DiegoNogueiraDev/graph-flow.git"
+  },
+  "engines": {
+    "node": ">=20.0.0"
+  },
+  "devDependencies": {
+    "@commitlint/cli": "^21.0.0",
+    "@commitlint/config-conventional": "^21.0.0",
+    "@eslint/js": "^10.0.1",
+    "@types/adm-zip": "^0.5.8",
+    "@types/better-sqlite3": "^7.6.13",
+    "@types/node": "^25.3.3",
+    "@types/ws": "^8.18.1",
+    "@vitest/coverage-v8": "^4.0.0",
+    "eslint": "^10.2.0",
+    "eslint-plugin-security": "^4.0.0",
+    "graphology": "^0.26.0",
+    "graphology-types": "^0.24.8",
+    "husky": "^9.1.7",
+    "ink-testing-library": "^4.0.0",
+    "jsdom": "^29.1.1",
+    "lint-staged": "^16.4.0",
+    "tree-sitter-c": "^0.24.1",
+    "tree-sitter-c-sharp": "^0.23.1",
+    "tree-sitter-cpp": "^0.23.4",
+    "tree-sitter-go": "^0.25.0",
+    "tree-sitter-java": "^0.23.5",
+    "tree-sitter-kotlin": "^0.3.1",
+    "tree-sitter-lua": "^2.1.3",
+    "tree-sitter-php": "^0.24.2",
+    "tree-sitter-python": "^0.25.0",
+    "tree-sitter-ruby": "^0.23.1",
+    "tree-sitter-rust": "^0.24.0",
+    "tree-sitter-swift": "^0.7.1",
+    "ts-morph": "^28.0.0",
+    "tsup": "^8.5.1",
+    "tsx": "^4.21.0",
+    "typescript-eslint": "^8.58.1",
+    "vitest": "^4.0.18"
+  },
+  "dependencies": {
+    "@mcp-graph-workflow/mcp-graph": "^13.27.0",
+    "@modelcontextprotocol/sdk": "^1.29.0",
+    "@types/react": "^19.2.16",
+    "adm-zip": "^0.5.17",
+    "better-sqlite3": "^12.6.2",
+    "cheerio": "^1.2.0",
+    "commander": "^14.0.3",
+    "glob": "^13.0.6",
+    "ink": "^6.8.0",
+    "ink-spinner": "^5.0.0",
+    "ink-text-input": "^6.0.0",
+    "lru-cache": "^11.2.7",
+    "mammoth": "^1.12.0",
+    "onnxruntime-node": "^1.26.0",
+    "pdf-parse": "^2.4.5",
+    "react": "^19.2.7",
+    "web-tree-sitter": "^0.26.8",
+    "ws": "^8.20.0",
+    "yaml": "^2.8.2",
+    "zod": "^4.3.6"
+  },
+  "optionalDependencies": {
+    "@github/copilot-sdk": "^1.0.0",
+    "intelephense": ">=1.10.0",
+    "typescript": "^6.0.2",
+    "typescript-language-server": ">=4.0.0"
+  },
+  "peerDependencies": {
+    "typescript": ">=5.0.0 || >=6.0.0"
+  },
+  "peerDependenciesMeta": {
+    "typescript": {
+      "optional": true
+    }
+  },
+  "overrides": {
+    "basic-ftp": ">=6.0.0",
+    "protobufjs": ">=8.0.2",
+    "@protobufjs/utf8": ">=1.1.1",
+    "ip-address": ">=10.1.1",
+    "@tootallnate/once": ">=3.0.1",
+    "fast-uri": ">=3.1.2",
+    "js-cookie": ">=3.0.6"
+  },
+  "publishConfig": {
+    "access": "public",
+    "registry": "https://registry.npmjs.org/"
+  }
+}

package/src/skills/analyze/ambiguity-audit.md ADDED Viewed

@@ -0,0 +1,46 @@
+---
+name: ambiguity-audit
+description: Classify each AC item as SPECIFIED / PARTIALLY / UNSPECIFIED before implementation; surface the alternatives you'd otherwise pick silently
+category: analyze
+phases: [ANALYZE, IMPLEMENT]
+---
+# ambiguity-audit
+§EPIC-13.2 wraps `src/core/decisions/ambiguity-audit-types.ts`. Run this skill BEFORE writing any code so the unspecified items are escalated to the user instead of guessed at.
+## When to use
+- `start_task` returned a task with ≥ 3 acceptance criteria
+- Any AC contains words like "appropriately", "good", "optimal", "if needed"
+- You catch yourself about to make a design choice the AC didn't dictate
+## Three-level classification
+For every AC bullet, label it exactly one of:
+| Label | Meaning | Action |
+|---|---|---|
+| **SPECIFIED** | The AC names a concrete observable outcome (input → output) with no judgement call | Implement directly |
+| **PARTIALLY** | The AC names the outcome but leaves at least one shape detail open (format, threshold, edge-case behavior) | Pick the most conservative option, document the choice in `rationale` |
+| **UNSPECIFIED** | The AC requires a decision the user has not made (algorithm, UX, error handling) | List 2–3 alternatives and ASK before coding |
+## Output shape (persist to `node.metadata.ambiguityAudit`)
+```json
+{
+  "specified": ["AC1", "AC4"],
+  "partial":   ["AC2"],
+  "unspecified": [
+    { "item": "AC3", "alternatives": ["throw on duplicate", "upsert silently", "return existing record"] }
+  ]
+}
+```
+`finish_task` reads this metadata and refuses to mark `done` if `unspecified.length > 0` and the parent has no follow-up decision node.
+## Anti-patterns
+- Marking everything SPECIFIED to skip the conversation — the audit is for YOU first
+- Listing one alternative under UNSPECIFIED — "alternative" implies plural; if only one path exists, it's PARTIALLY at most
+- Auditing after coding — by then the bias is locked in

package/src/skills/analyze/decompose-prd.md ADDED Viewed

@@ -0,0 +1,26 @@
+---
+name: decompose-prd
+description: Break a PRD into atomic XS/S subtasks with acceptance criteria
+category: analyze
+phases: [ANALYZE, PLAN]
+---
+# decompose-prd
+## When to use
+Right after `import_prd`, before any sprint planning. The PRD ships as a few large epics; you need every leaf to be ≤ 2h and have testable AC.
+## Steps
+1. Read the imported epic via `node` action='get'.
+2. For each undocumented requirement, create child subtasks with xpSize XS or S. Title format: `Eα.Tβ — <verb>-<object> (S)`.
+3. Each AC must be GIVEN/WHEN/THEN testable; minimum 5 AC per task.
+4. Link `depends_on` edges only when serial execution is mandatory.
+5. Run `analyze(mode='ready')` to confirm DoR (≥ 7 checks pass).
+## Anti-patterns
+- "TBD" in AC fields — every AC measurable up front.
+- M/L tasks left undecomposed — split into XS+XS+S before sprint planning.
+- Phantom subtasks (no AC, no testFiles) inflating sprint capacity.

package/src/skills/analyze/grill-me.md ADDED Viewed

@@ -0,0 +1,26 @@
+---
+name: grill-me
+description: Stress-test a decision by surfacing assumptions and counter-arguments
+category: analyze
+phases: [ANALYZE, DESIGN, REVIEW]
+---
+# grill-me
+## When to use
+Before locking a non-trivial design decision (ADR-worthy). Use to surface implicit assumptions and find the strongest counter-argument before committing.
+## Steps
+1. State the proposed decision in one sentence.
+2. List 3 assumptions the decision rests on. Tag each: load-bearing / convenient / wishful.
+3. For each assumption, ask "what changes if it's wrong?".
+4. Generate the strongest possible counter-position (steel-man, not straw-man).
+5. Document residual risk in the ADR `## Consequences` section.
+## Anti-patterns
+- Skipping load-bearing assumptions because "obviously true".
+- Self-grilling without changing the decision — performative.
+- Stopping at the weakest counter ("but that's silly").

package/src/skills/analyze/to-prd.md ADDED Viewed

@@ -0,0 +1,57 @@
+---
+name: to-prd
+description: Synthesize the current conversation context into a PRD ready for import_prd; do not interview the user, just consolidate what you already know
+category: analyze
+phases: [ANALYZE]
+---
+# to-prd
+Port of `skills-main/to-prd` adapted for mcp-graph: the output is consumed by `import_prd` (or filed as a GitHub issue when the user already runs spec-kit).
+## When to use
+You have an exploratory conversation that's converged on a feature, but no PRD node exists in the graph yet. Stop coding. Synthesize first.
+## Process
+1. **Explore the repo** if you haven't already — `query_graph`, `code_intelligence` for callers.
+2. **Sketch deep modules**: list the modules you will build/modify. Prefer deep modules (simple interface, lots of behavior, rarely changes) over shallow facades.
+3. **Confirm** module boundaries with the user; ask which modules they want test coverage for.
+4. **Write the PRD** using the template below, then `import_prd` it as a draft epic.
+## PRD template
+```markdown
+## Problem Statement
+The user's pain, in the user's words.
+## Solution
+What changes from the user's perspective.
+## User Stories
+1. As a <actor>, I want <feature>, so that <benefit>
+… long, exhaustive list
+## Implementation Decisions
+- Modules to build/modify (no file paths — they rot)
+- Module interfaces
+- Architectural decisions, schema changes, API contracts
+## Testing Decisions
+- What "good test" means here (test behavior, not implementation)
+- Modules to test
+- Prior art (similar tests already in the codebase)
+## Out of Scope
+What this PRD does NOT cover.
+## Further Notes
+Anything else.
+```
+## Anti-patterns
+- Interviewing the user from scratch when context already exists
+- Pasting file paths or code into the PRD (they go stale)
+- Single mega-story instead of many user stories

package/src/skills/any/code-detachment.md ADDED Viewed

@@ -0,0 +1,26 @@
+---
+name: code-detachment
+description: Don't hand-edit AI mistakes — explain via prompt and let the AI fix
+category: any
+phases: [IMPLEMENT, REVIEW]
+---
+# code-detachment
+## When to use
+When the agent produced wrong code. The instinct is to "just fix it" by hand. Resist — that re-creates an error pattern the agent will repeat.
+## Steps
+1. Diagnose: which assumption did the agent get wrong?
+2. Write a prompt that names the wrong assumption and the right one (concretely).
+3. Let the agent retry. Compare the new output to the wrong one to validate fix.
+4. If the same class of mistake recurs ≥ 3 times, document the pattern in CLAUDE.md or add a feedback memory.
+5. Hand-edit only when the cost of the round-trip exceeds the value of the lesson.
+## Anti-patterns
+- Silent hand-fixes that hide the failure pattern.
+- Detailed prompts that re-explain the entire codebase — only the wrong assumption.
+- Treating CLAUDE.md as immutable; it's an evolving spec.

package/src/skills/any/lessons-consult.md ADDED Viewed

@@ -0,0 +1,26 @@
+---
+name: lessons-consult
+description: Query lessons_learned at start_task to avoid re-walking known failures
+category: any
+phases: [IMPLEMENT, ANALYZE]
+---
+# lessons-consult
+## When to use
+At start_task, automatically — the lessons-consultant (E22.D5) injects up to 3 most relevant past lessons into modelHint context. Use this skill when investigating manually.
+## Steps
+1. After loading task context, search lessons via `consultLessons(db, nodeText, 3)`.
+2. For each high-confidence lesson (≥ 0.85), surface the recommendedAction.
+3. If the lesson recommends `skip-similar` and the current task pattern matches, escalate to approval before continuing.
+4. After completion, update applied_count via `persistLesson` (UPSERT).
+5. Periodically prune lessons with applied_count = 1 and age > 90d (decayed).
+## Anti-patterns
+- Ignoring lessons because "this task is different" without comparing patterns.
+- Recording new lessons that duplicate existing ones (UPSERT handles this).
+- Letting lesson confidence stay frozen — re-grade on contradicting evidence.

package/src/skills/any/wip-one.md ADDED Viewed

@@ -0,0 +1,26 @@
+---
+name: wip-one
+description: Single in-progress task per agent; finish before starting another
+category: any
+phases: [IMPLEMENT, VALIDATE]
+---
+# wip-one
+## When to use
+Always. Little's Law says cycle_time = WIP / throughput; lowering WIP lowers cycle time without sacrificing throughput.
+## Steps
+1. Before `start_task`, run `query_graph "SELECT id, title FROM nodes WHERE status='in_progress'"`.
+2. If a row returned: finish_task or revert it before starting new work.
+3. Long-running task blocked? Mark it `blocked` (not `in_progress`) with rationale.
+4. Honor backpressure-detector (E22.C2) signals; pull, don't push.
+5. Audit weekly: `metrics(action='wip_history')` should hover at 1.
+## Anti-patterns
+- Switching tasks because the current one is stuck — root cause first.
+- Counting "background reading" as progress — it's not.
+- Multiple in_progress with the same agent — invalid graph state.

package/src/skills/design/design-an-interface.md ADDED Viewed

@@ -0,0 +1,26 @@
+---
+name: design-an-interface
+description: Define a deep module's public surface before writing implementation
+category: design
+phases: [DESIGN, PLAN]
+---
+# design-an-interface
+## When to use
+Before implementing a module that other modules will depend on. Ousterhout: "modules should be deep" — small interface, large implementation.
+## Steps
+1. List the operations callers need. Cap at 5 named exports.
+2. For each operation, write the type signature; add JSDoc with one example.
+3. Sketch the impl without writing code: pseudocode in 3–6 bullets.
+4. Run `analyze(mode='deep_module')` after first impl pass; depth ratio < 0.2 is good.
+5. Write the test for the interface BEFORE the impl (TDD red).
+## Anti-patterns
+- Exporting internal helpers because "tests need them".
+- Naming exports with implementation detail (`createSqliteFooStore` vs `createFooStore`).
+- Passing more than 4 params unwrapped — bundle into options object.

package/src/skills/design/seam-audit.md ADDED Viewed

@@ -0,0 +1,26 @@
+---
+name: seam-audit
+description: Classify dependencies into 4 seams to plan substitution + testability
+category: design
+phases: [DESIGN, REVIEW]
+---
+# seam-audit
+## When to use
+When a module is hard to test, brittle to change, or coupled to a vendor SDK. Categorize each import to know where to put a stand-in.
+## Steps
+1. Run `analyze(mode='seam_audit', file=<path>)` to classify imports.
+2. For each `true-external` (e.g. anthropic, openai), wrap behind an adapter; never import in core.
+3. For each `local-substitutable` (better-sqlite3, fs), inject through an interface so tests get a stand-in.
+4. For `remote-owned` (axios, MCP), enforce timeout + retry policy.
+5. `in-process` imports stay free; consider merging if only one consumer.
+## Anti-patterns
+- Hiding SDK clients in core under "convenience" wrappers.
+- Mocking `fs` with `vi.mock` instead of injecting; brittle to refactors.
+- Untested timeouts on remote-owned (default infinite hangs).

package/src/skills/domain/crypto/common-mistakes.md ADDED Viewed

@@ -0,0 +1,71 @@
+---
+domain: crypto
+topic: common-mistakes
+triggers: [encryption, hashing, key_management, jwt, tls]
+discovered_at: 2026-04-30T00:00:00.000Z
+source_task: extracta-paper2code
+confidence: 0.85
+---
+# Cryptography — Common Mistakes
+Patterns where the code uses crypto APIs correctly *as documented* but
+the security property does not hold. These all have one rule: when in
+doubt, use the high-level construct (libsodium, age, Tink) instead of
+hand-rolling primitives.
+## Encryption
+- **ECB mode** — leaks data patterns. Default to AES-GCM (authenticated)
+  or ChaCha20-Poly1305. Never use raw AES-ECB or AES-CBC without an
+  encrypt-then-MAC.
+- **Static IV / nonce reuse** — GCM nonce reuse is catastrophic (plaintext
+  recovery). Generate per-message; persist last-used counter or use
+  random 96-bit nonces.
+- **Key derived from password without KDF** — use Argon2id (or scrypt /
+  PBKDF2 with high iterations). Never feed a password directly to a
+  symmetric cipher key slot.
+## Hashing & signatures
+- **MD5 / SHA-1 still in use** — collision-vulnerable. Use SHA-256 minimum;
+  BLAKE2/3 for performance.
+- **HMAC instead of signature for cross-trust-boundary auth** — HMAC
+  requires a shared secret. For multi-party verification, use an
+  asymmetric signature (Ed25519).
+- **String comparison on MAC / token** — `===` is timing-leaky. Use
+  `crypto.timingSafeEqual` (Node) or `hmac.compare_digest` (Python).
+## Tokens / sessions
+- **JWT `alg: none`** — accept-any-algorithm libraries let an attacker
+  set `alg: none` and forge tokens. Pin allowed algorithms.
+- **JWT signed with HMAC, public key as secret** — RS256 token verified
+  as HS256 with the public key as the "secret" lets the attacker sign
+  tokens. Pin algorithm AND key type.
+- **Session ID in URL** — leaks via Referer headers + browser history.
+  Use cookies with `Secure; HttpOnly; SameSite`.
+## Key management
+- **Hard-coded keys in source** — every public-repo scan finds them.
+  Use env vars (or KMS). Rotate after exposure.
+- **Same key for encryption and signing** — separate keys per purpose;
+  failures in one don't compromise the other.
+- **No key rotation plan** — every key needs a rotation cadence written
+  down before deploy. "We'll rotate when we need to" = never.
+## Randomness
+- **`Math.random()` / `random.random()` for secrets** — not
+  cryptographically secure. Use `crypto.randomBytes` (Node), `secrets`
+  module (Python), `crypto/rand` (Go).
+- **Truncating UUIDs for IDs** — UUIDv4 randomness is in 122 bits;
+  truncating to 8 chars (32 bits) gives birthday collisions at ~65k
+  values.
+## When to escalate
+If a task touches authentication, encryption, or session management,
+require explicit answers to: which library, which algorithm, which key
+lifecycle. "Use whatever is standard" is UNSPECIFIED.

package/src/skills/domain/ml/common-mistakes.md ADDED Viewed

@@ -0,0 +1,55 @@
+---
+domain: ml
+topic: common-mistakes
+triggers: [paper_to_code, ml_implementation, model_training, hyperparam_check]
+discovered_at: 2026-04-30T00:00:00.000Z
+source_task: extracta-paper2code
+confidence: 0.8
+---
+# ML Implementation — Common Mistakes
+Curated from `paper2code/paper_to_code_mistakes.md`. The code runs but does
+not implement what the paper describes — these are systematic, not bugs.
+## Notation mismatches
+- **BatchNorm momentum** — PyTorch `momentum=x` ≈ TensorFlow `momentum=1-x`.
+- **Dropout rate vs keep probability** — modern papers usually drop probability;
+  pre-2018 papers often keep probability.
+- **"Same padding"** — TF handles it automatically; PyTorch needs
+  `padding=kernel_size // 2` (asymmetric for even kernels).
+- **Tensor layout** — PyTorch NCHW, TensorFlow NHWC. Every conv/pool/reshape
+  must account for the difference when porting.
+## Activation gotchas
+- **GELU** — exact (PyTorch ≥ 1.12) ≠ tanh approximation (BERT, GPT-2). Different outputs.
+- **SiLU vs Swish** — same function; Swish-with-trainable-β is the variant.
+## Training-loop landmines
+- **Loss scaling** — paper reports per-token loss; framework may report
+  per-batch sum. Check before comparing.
+- **Gradient clipping order** — clip *after* loss.backward() but *before*
+  optimizer.step(). Order swap silently changes effective LR.
+- **Learning-rate schedule warm-up** — many papers use linear warm-up over the
+  first N steps then cosine decay; "decay from step 0" is a different recipe.
+- **Weight decay on biases / LayerNorm** — most modern code skips bias and
+  LayerNorm params from weight decay; if the paper doesn't say, default to
+  exclusion (matches HF defaults).
+## Evaluation traps
+- **Accuracy reported on training set** — silently reproduces a training-time
+  metric the paper never claimed.
+- **Beam search vs greedy** — beam_size=1 is greedy; the paper number was
+  probably with beam_size=4 (translation) or 5 (summarization).
+- **Tokenizer mismatch** — BPE vocab from the paper vs your tokenizer can
+  shift perplexity by 5+ points without changing the model.
+## When to escalate
+If the AC says "match paper Table 2" but the difference falls in any of the
+buckets above, mark UNSPECIFIED in `ambiguity-audit` and ask which convention
+the paper's official repo uses.