@mcp-graph-workflow/agent-graph-flow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +40 -0
  2. package/dist/cli/index.d.ts +1 -0
  3. package/dist/cli/index.js +12842 -0
  4. package/dist/index.d.ts +43 -0
  5. package/dist/index.js +48 -0
  6. package/package.json +142 -0
  7. package/src/skills/analyze/ambiguity-audit.md +46 -0
  8. package/src/skills/analyze/decompose-prd.md +26 -0
  9. package/src/skills/analyze/grill-me.md +26 -0
  10. package/src/skills/analyze/to-prd.md +57 -0
  11. package/src/skills/any/code-detachment.md +26 -0
  12. package/src/skills/any/lessons-consult.md +26 -0
  13. package/src/skills/any/wip-one.md +26 -0
  14. package/src/skills/design/design-an-interface.md +26 -0
  15. package/src/skills/design/seam-audit.md +26 -0
  16. package/src/skills/domain/crypto/common-mistakes.md +71 -0
  17. package/src/skills/domain/ml/common-mistakes.md +55 -0
  18. package/src/skills/domain/rag/chunk-overlap-strategy.md +27 -0
  19. package/src/skills/domain/sqlite-perf/fts5-tuning.md +25 -0
  20. package/src/skills/domain/sqlite-perf/wal-mode.md +26 -0
  21. package/src/skills/domain/systems/common-mistakes.md +62 -0
  22. package/src/skills/domain/testing/vitest-isolation.md +31 -0
  23. package/src/skills/domain/typescript/zod-v4-migration.md +27 -0
  24. package/src/skills/implement/anti-hallucination.md +28 -0
  25. package/src/skills/implement/pure-decision-pattern.md +26 -0
  26. package/src/skills/implement/tracer-bullet-tdd.md +26 -0
  27. package/src/skills/plan/budget-aware-picking.md +26 -0
  28. package/src/skills/plan/plan-sprint.md +26 -0
  29. package/src/skills/plan/to-issues.md +67 -0
  30. package/src/skills/review/citation-coverage-review.md +26 -0
  31. package/src/skills/review/deep-module-review.md +26 -0
  32. package/src/skills/review/zoom-out.md +34 -0
  33. package/src/skills/validate/dod-checklist.md +30 -0
  34. package/src/skills/validate/harness-regression-check.md +26 -0
@@ -0,0 +1,43 @@
1
+ import { z } from 'zod/v4';
2
+
3
+ /*!
4
+ * SPDX-License-Identifier: AGPL-3.0-or-later
5
+ * Copyright © 2026 Diego Lima Nogueira de Paula
6
+ *
7
+ * This file is part of agent-graph-flow.
8
+ *
9
+ * agent-graph-flow is free software: you can redistribute it and/or modify it
10
+ * under the terms of the GNU Affero General Public License v3.0 or later, as
11
+ * published by the Free Software Foundation. See LICENSE for the full terms.
12
+ */
13
+ /**
14
+ * Motor de fases — colapsa as 9 fases internas (herdadas do graph-flow legado)
15
+ * em 3 fases canônicas SHAPE → BUILD → SHIP. Remove *cerimônia*, não
16
+ * *disciplina*: o rigor (TDD/AC/DoD) vive dentro de BUILD. O enum interno é
17
+ * preservado para compat de dados/CLI via `toCanonicalPhase`.
18
+ *
19
+ * Ref: RFC token-economy-redesign §6.1.
20
+ */
21
+
22
+ declare const CanonicalPhaseSchema: z.ZodEnum<{
23
+ SHAPE: "SHAPE";
24
+ BUILD: "BUILD";
25
+ SHIP: "SHIP";
26
+ }>;
27
+ type CanonicalPhase = z.infer<typeof CanonicalPhaseSchema>;
28
+ /** Fases canônicas em ordem de ciclo. */
29
+ declare const CANONICAL_PHASES: ("SHAPE" | "BUILD" | "SHIP")[];
30
+
31
+ /**
32
+ * agent-graph-flow — public entrypoint.
33
+ *
34
+ * Promessa (filtro de toda decisão): software rápido · best-practice SWE ·
35
+ * custo de token brutalmente baixo. Ver CLAUDE.md.
36
+ *
37
+ * M0 expõe apenas identidade do produto. M1 traz o motor (graph/context/RAG/
38
+ * planner/code-intelligence) e re-exporta os módulos públicos do core.
39
+ */
40
+ declare const VERSION = "0.1.0";
41
+ declare const PROMISE: string;
42
+
43
+ export { CANONICAL_PHASES as PHASES, PROMISE, type CanonicalPhase as Phase, VERSION };
package/dist/index.js ADDED
@@ -0,0 +1,48 @@
1
+ import { z } from 'zod/v4';
2
+
3
+ // src/core/lifecycle/phase.ts
4
+ var CanonicalPhaseSchema = z.enum(["SHAPE", "BUILD", "SHIP"]);
5
+ var CANONICAL_PHASES = CanonicalPhaseSchema.options;
6
+ z.enum([
7
+ "ANALYZE",
8
+ "DESIGN",
9
+ "PLAN",
10
+ "IMPLEMENT",
11
+ "VALIDATE",
12
+ "REVIEW",
13
+ "HANDOFF",
14
+ "DEPLOY",
15
+ "LISTENING"
16
+ ]);
17
+
18
+ // src/index.ts
19
+ var VERSION = "0.1.0";
20
+ var PROMISE = "Agente SWE aut\xF4nomo, local-first e token-frugal: PRD vira grafo de execu\xE7\xE3o persistente, TDD obrigat\xF3rio, custo de token brutalmente baixo.";
21
+ /*!
22
+ * SPDX-License-Identifier: AGPL-3.0-or-later
23
+ * Copyright © 2026 Diego Lima Nogueira de Paula
24
+ *
25
+ * This file is part of mcp-graph.
26
+ *
27
+ * mcp-graph is free software: you can redistribute it and/or modify it under the
28
+ * terms of the GNU Affero General Public License v3.0 or later, as published by
29
+ * the Free Software Foundation. See LICENSE for the full terms.
30
+ *
31
+ * mcp-graph is distributed in the hope that it will be useful, but WITHOUT ANY
32
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
33
+ * A PARTICULAR PURPOSE.
34
+ *
35
+ * Commercial licenses are available — see COMMERCIAL.md.
36
+ */
37
+ /*!
38
+ * SPDX-License-Identifier: AGPL-3.0-or-later
39
+ * Copyright © 2026 Diego Lima Nogueira de Paula
40
+ *
41
+ * This file is part of agent-graph-flow.
42
+ *
43
+ * agent-graph-flow is free software: you can redistribute it and/or modify it
44
+ * under the terms of the GNU Affero General Public License v3.0 or later, as
45
+ * published by the Free Software Foundation. See LICENSE for the full terms.
46
+ */
47
+
48
+ export { CANONICAL_PHASES as PHASES, PROMISE, VERSION };
package/package.json ADDED
@@ -0,0 +1,142 @@
1
+ {
2
+ "name": "@mcp-graph-workflow/agent-graph-flow",
3
+ "version": "0.1.0",
4
+ "description": "Agente SWE autônomo, local-first e token-frugal: PRD → grafo de execução persistente, TDD obrigatório, custo de token brutalmente baixo. AGPL v3.",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js"
12
+ }
13
+ },
14
+ "bin": {
15
+ "agent-graph-flow": "dist/cli/index.js",
16
+ "agf": "dist/cli/index.js"
17
+ },
18
+ "scripts": {
19
+ "build": "tsup",
20
+ "dev": "tsx src/cli/index.ts",
21
+ "test": "vitest run",
22
+ "test:node": "vitest run --project=node",
23
+ "test:blast": "vitest run --changed HEAD",
24
+ "test:watch": "vitest",
25
+ "typecheck": "tsc --noEmit",
26
+ "lint": "eslint src/ --max-warnings 30",
27
+ "lint:fix": "eslint src/ --fix",
28
+ "demo": "npm run build && node scripts/demo.mjs",
29
+ "test:blast:full": "vitest run --changed HEAD",
30
+ "test:smoke": "vitest run --config vitest.smoke.config.ts",
31
+ "test:clear": "vitest --clearCache"
32
+ },
33
+ "keywords": [
34
+ "swe-agent",
35
+ "autonomous-agent",
36
+ "prd",
37
+ "task-graph",
38
+ "local-first",
39
+ "token-frugal",
40
+ "tdd",
41
+ "mcp"
42
+ ],
43
+ "files": [
44
+ "dist/",
45
+ "src/skills/",
46
+ "README.md",
47
+ "LICENSE"
48
+ ],
49
+ "author": "Diego Nogueira (https://github.com/DiegoNogueiraDev)",
50
+ "license": "AGPL-3.0-or-later",
51
+ "repository": {
52
+ "type": "git",
53
+ "url": "git+https://github.com/DiegoNogueiraDev/graph-flow.git"
54
+ },
55
+ "engines": {
56
+ "node": ">=20.0.0"
57
+ },
58
+ "devDependencies": {
59
+ "@commitlint/cli": "^21.0.0",
60
+ "@commitlint/config-conventional": "^21.0.0",
61
+ "@eslint/js": "^10.0.1",
62
+ "@types/adm-zip": "^0.5.8",
63
+ "@types/better-sqlite3": "^7.6.13",
64
+ "@types/node": "^25.3.3",
65
+ "@types/ws": "^8.18.1",
66
+ "@vitest/coverage-v8": "^4.0.0",
67
+ "eslint": "^10.2.0",
68
+ "eslint-plugin-security": "^4.0.0",
69
+ "graphology": "^0.26.0",
70
+ "graphology-types": "^0.24.8",
71
+ "husky": "^9.1.7",
72
+ "ink-testing-library": "^4.0.0",
73
+ "jsdom": "^29.1.1",
74
+ "lint-staged": "^16.4.0",
75
+ "tree-sitter-c": "^0.24.1",
76
+ "tree-sitter-c-sharp": "^0.23.1",
77
+ "tree-sitter-cpp": "^0.23.4",
78
+ "tree-sitter-go": "^0.25.0",
79
+ "tree-sitter-java": "^0.23.5",
80
+ "tree-sitter-kotlin": "^0.3.1",
81
+ "tree-sitter-lua": "^2.1.3",
82
+ "tree-sitter-php": "^0.24.2",
83
+ "tree-sitter-python": "^0.25.0",
84
+ "tree-sitter-ruby": "^0.23.1",
85
+ "tree-sitter-rust": "^0.24.0",
86
+ "tree-sitter-swift": "^0.7.1",
87
+ "ts-morph": "^28.0.0",
88
+ "tsup": "^8.5.1",
89
+ "tsx": "^4.21.0",
90
+ "typescript-eslint": "^8.58.1",
91
+ "vitest": "^4.0.18"
92
+ },
93
+ "dependencies": {
94
+ "@mcp-graph-workflow/mcp-graph": "^13.27.0",
95
+ "@modelcontextprotocol/sdk": "^1.29.0",
96
+ "@types/react": "^19.2.16",
97
+ "adm-zip": "^0.5.17",
98
+ "better-sqlite3": "^12.6.2",
99
+ "cheerio": "^1.2.0",
100
+ "commander": "^14.0.3",
101
+ "glob": "^13.0.6",
102
+ "ink": "^6.8.0",
103
+ "ink-spinner": "^5.0.0",
104
+ "ink-text-input": "^6.0.0",
105
+ "lru-cache": "^11.2.7",
106
+ "mammoth": "^1.12.0",
107
+ "onnxruntime-node": "^1.26.0",
108
+ "pdf-parse": "^2.4.5",
109
+ "react": "^19.2.7",
110
+ "web-tree-sitter": "^0.26.8",
111
+ "ws": "^8.20.0",
112
+ "yaml": "^2.8.2",
113
+ "zod": "^4.3.6"
114
+ },
115
+ "optionalDependencies": {
116
+ "@github/copilot-sdk": "^1.0.0",
117
+ "intelephense": ">=1.10.0",
118
+ "typescript": "^6.0.2",
119
+ "typescript-language-server": ">=4.0.0"
120
+ },
121
+ "peerDependencies": {
122
+ "typescript": ">=5.0.0 || >=6.0.0"
123
+ },
124
+ "peerDependenciesMeta": {
125
+ "typescript": {
126
+ "optional": true
127
+ }
128
+ },
129
+ "overrides": {
130
+ "basic-ftp": ">=6.0.0",
131
+ "protobufjs": ">=8.0.2",
132
+ "@protobufjs/utf8": ">=1.1.1",
133
+ "ip-address": ">=10.1.1",
134
+ "@tootallnate/once": ">=3.0.1",
135
+ "fast-uri": ">=3.1.2",
136
+ "js-cookie": ">=3.0.6"
137
+ },
138
+ "publishConfig": {
139
+ "access": "public",
140
+ "registry": "https://registry.npmjs.org/"
141
+ }
142
+ }
@@ -0,0 +1,46 @@
1
+ ---
2
+ name: ambiguity-audit
3
+ description: Classify each AC item as SPECIFIED / PARTIALLY / UNSPECIFIED before implementation; surface the alternatives you'd otherwise pick silently
4
+ category: analyze
5
+ phases: [ANALYZE, IMPLEMENT]
6
+ ---
7
+
8
+ # ambiguity-audit
9
+
10
+ §EPIC-13.2 wraps `src/core/decisions/ambiguity-audit-types.ts`. Run this skill BEFORE writing any code so the unspecified items are escalated to the user instead of guessed at.
11
+
12
+ ## When to use
13
+
14
+ - `start_task` returned a task with ≥ 3 acceptance criteria
15
+ - Any AC contains words like "appropriately", "good", "optimal", "if needed"
16
+ - You catch yourself about to make a design choice the AC didn't dictate
17
+
18
+ ## Three-level classification
19
+
20
+ For every AC bullet, label it exactly one of:
21
+
22
+ | Label | Meaning | Action |
23
+ |---|---|---|
24
+ | **SPECIFIED** | The AC names a concrete observable outcome (input → output) with no judgement call | Implement directly |
25
+ | **PARTIALLY** | The AC names the outcome but leaves at least one shape detail open (format, threshold, edge-case behavior) | Pick the most conservative option, document the choice in `rationale` |
26
+ | **UNSPECIFIED** | The AC requires a decision the user has not made (algorithm, UX, error handling) | List 2–3 alternatives and ASK before coding |
27
+
28
+ ## Output shape (persist to `node.metadata.ambiguityAudit`)
29
+
30
+ ```json
31
+ {
32
+ "specified": ["AC1", "AC4"],
33
+ "partial": ["AC2"],
34
+ "unspecified": [
35
+ { "item": "AC3", "alternatives": ["throw on duplicate", "upsert silently", "return existing record"] }
36
+ ]
37
+ }
38
+ ```
39
+
40
+ `finish_task` reads this metadata and refuses to mark `done` if `unspecified.length > 0` and the parent has no follow-up decision node.
41
+
42
+ ## Anti-patterns
43
+
44
+ - Marking everything SPECIFIED to skip the conversation — the audit is for YOU first
45
+ - Listing one alternative under UNSPECIFIED — "alternative" implies plural; if only one path exists, it's PARTIALLY at most
46
+ - Auditing after coding — by then the bias is locked in
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: decompose-prd
3
+ description: Break a PRD into atomic XS/S subtasks with acceptance criteria
4
+ category: analyze
5
+ phases: [ANALYZE, PLAN]
6
+ ---
7
+
8
+ # decompose-prd
9
+
10
+ ## When to use
11
+
12
+ Right after `import_prd`, before any sprint planning. The PRD ships as a few large epics; you need every leaf to be ≤ 2h and have testable AC.
13
+
14
+ ## Steps
15
+
16
+ 1. Read the imported epic via `node` action='get'.
17
+ 2. For each undocumented requirement, create child subtasks with xpSize XS or S. Title format: `Eα.Tβ — <verb>-<object> (S)`.
18
+ 3. Each AC must be GIVEN/WHEN/THEN testable; minimum 5 AC per task.
19
+ 4. Link `depends_on` edges only when serial execution is mandatory.
20
+ 5. Run `analyze(mode='ready')` to confirm DoR (≥ 7 checks pass).
21
+
22
+ ## Anti-patterns
23
+
24
+ - "TBD" in AC fields — every AC measurable up front.
25
+ - M/L tasks left undecomposed — split into XS+XS+S before sprint planning.
26
+ - Phantom subtasks (no AC, no testFiles) inflating sprint capacity.
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: grill-me
3
+ description: Stress-test a decision by surfacing assumptions and counter-arguments
4
+ category: analyze
5
+ phases: [ANALYZE, DESIGN, REVIEW]
6
+ ---
7
+
8
+ # grill-me
9
+
10
+ ## When to use
11
+
12
+ Before locking a non-trivial design decision (ADR-worthy). Use to surface implicit assumptions and find the strongest counter-argument before committing.
13
+
14
+ ## Steps
15
+
16
+ 1. State the proposed decision in one sentence.
17
+ 2. List 3 assumptions the decision rests on. Tag each: load-bearing / convenient / wishful.
18
+ 3. For each assumption, ask "what changes if it's wrong?".
19
+ 4. Generate the strongest possible counter-position (steel-man, not straw-man).
20
+ 5. Document residual risk in the ADR `## Consequences` section.
21
+
22
+ ## Anti-patterns
23
+
24
+ - Skipping load-bearing assumptions because "obviously true".
25
+ - Self-grilling without changing the decision — performative.
26
+ - Stopping at the weakest counter ("but that's silly").
@@ -0,0 +1,57 @@
1
+ ---
2
+ name: to-prd
3
+ description: Synthesize the current conversation context into a PRD ready for import_prd; do not interview the user, just consolidate what you already know
4
+ category: analyze
5
+ phases: [ANALYZE]
6
+ ---
7
+
8
+ # to-prd
9
+
10
+ Port of `skills-main/to-prd` adapted for mcp-graph: the output is consumed by `import_prd` (or filed as a GitHub issue when the user already runs spec-kit).
11
+
12
+ ## When to use
13
+
14
+ You have an exploratory conversation that's converged on a feature, but no PRD node exists in the graph yet. Stop coding. Synthesize first.
15
+
16
+ ## Process
17
+
18
+ 1. **Explore the repo** if you haven't already — `query_graph`, `code_intelligence` for callers.
19
+ 2. **Sketch deep modules**: list the modules you will build/modify. Prefer deep modules (simple interface, lots of behavior, rarely changes) over shallow facades.
20
+ 3. **Confirm** module boundaries with the user; ask which modules they want test coverage for.
21
+ 4. **Write the PRD** using the template below, then `import_prd` it as a draft epic.
22
+
23
+ ## PRD template
24
+
25
+ ```markdown
26
+ ## Problem Statement
27
+ The user's pain, in the user's words.
28
+
29
+ ## Solution
30
+ What changes from the user's perspective.
31
+
32
+ ## User Stories
33
+ 1. As a <actor>, I want <feature>, so that <benefit>
34
+ … long, exhaustive list
35
+
36
+ ## Implementation Decisions
37
+ - Modules to build/modify (no file paths — they rot)
38
+ - Module interfaces
39
+ - Architectural decisions, schema changes, API contracts
40
+
41
+ ## Testing Decisions
42
+ - What "good test" means here (test behavior, not implementation)
43
+ - Modules to test
44
+ - Prior art (similar tests already in the codebase)
45
+
46
+ ## Out of Scope
47
+ What this PRD does NOT cover.
48
+
49
+ ## Further Notes
50
+ Anything else.
51
+ ```
52
+
53
+ ## Anti-patterns
54
+
55
+ - Interviewing the user from scratch when context already exists
56
+ - Pasting file paths or code into the PRD (they go stale)
57
+ - Single mega-story instead of many user stories
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: code-detachment
3
+ description: Don't hand-edit AI mistakes — explain via prompt and let the AI fix
4
+ category: any
5
+ phases: [IMPLEMENT, REVIEW]
6
+ ---
7
+
8
+ # code-detachment
9
+
10
+ ## When to use
11
+
12
+ When the agent produced wrong code. The instinct is to "just fix it" by hand. Resist — that re-creates an error pattern the agent will repeat.
13
+
14
+ ## Steps
15
+
16
+ 1. Diagnose: which assumption did the agent get wrong?
17
+ 2. Write a prompt that names the wrong assumption and the right one (concretely).
18
+ 3. Let the agent retry. Compare the new output to the wrong one to validate fix.
19
+ 4. If the same class of mistake recurs ≥ 3 times, document the pattern in CLAUDE.md or add a feedback memory.
20
+ 5. Hand-edit only when the cost of the round-trip exceeds the value of the lesson.
21
+
22
+ ## Anti-patterns
23
+
24
+ - Silent hand-fixes that hide the failure pattern.
25
+ - Detailed prompts that re-explain the entire codebase — only the wrong assumption.
26
+ - Treating CLAUDE.md as immutable; it's an evolving spec.
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: lessons-consult
3
+ description: Query lessons_learned at start_task to avoid re-walking known failures
4
+ category: any
5
+ phases: [IMPLEMENT, ANALYZE]
6
+ ---
7
+
8
+ # lessons-consult
9
+
10
+ ## When to use
11
+
12
+ At start_task, automatically — the lessons-consultant (E22.D5) injects up to 3 most relevant past lessons into modelHint context. Use this skill when investigating manually.
13
+
14
+ ## Steps
15
+
16
+ 1. After loading task context, search lessons via `consultLessons(db, nodeText, 3)`.
17
+ 2. For each high-confidence lesson (≥ 0.85), surface the recommendedAction.
18
+ 3. If the lesson recommends `skip-similar` and the current task pattern matches, escalate to approval before continuing.
19
+ 4. After completion, update applied_count via `persistLesson` (UPSERT).
20
+ 5. Periodically prune lessons with applied_count = 1 and age > 90d (decayed).
21
+
22
+ ## Anti-patterns
23
+
24
+ - Ignoring lessons because "this task is different" without comparing patterns.
25
+ - Recording new lessons that duplicate existing ones (UPSERT handles this).
26
+ - Letting lesson confidence stay frozen — re-grade on contradicting evidence.
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: wip-one
3
+ description: Single in-progress task per agent; finish before starting another
4
+ category: any
5
+ phases: [IMPLEMENT, VALIDATE]
6
+ ---
7
+
8
+ # wip-one
9
+
10
+ ## When to use
11
+
12
+ Always. Little's Law says cycle_time = WIP / throughput; lowering WIP lowers cycle time without sacrificing throughput.
13
+
14
+ ## Steps
15
+
16
+ 1. Before `start_task`, run `query_graph "SELECT id, title FROM nodes WHERE status='in_progress'"`.
17
+ 2. If a row returned: finish_task or revert it before starting new work.
18
+ 3. Long-running task blocked? Mark it `blocked` (not `in_progress`) with rationale.
19
+ 4. Honor backpressure-detector (E22.C2) signals; pull, don't push.
20
+ 5. Audit weekly: `metrics(action='wip_history')` should hover at 1.
21
+
22
+ ## Anti-patterns
23
+
24
+ - Switching tasks because the current one is stuck — root cause first.
25
+ - Counting "background reading" as progress — it's not.
26
+ - Multiple in_progress with the same agent — invalid graph state.
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: design-an-interface
3
+ description: Define a deep module's public surface before writing implementation
4
+ category: design
5
+ phases: [DESIGN, PLAN]
6
+ ---
7
+
8
+ # design-an-interface
9
+
10
+ ## When to use
11
+
12
+ Before implementing a module that other modules will depend on. Ousterhout: "modules should be deep" — small interface, large implementation.
13
+
14
+ ## Steps
15
+
16
+ 1. List the operations callers need. Cap at 5 named exports.
17
+ 2. For each operation, write the type signature; add JSDoc with one example.
18
+ 3. Sketch the impl without writing code: pseudocode in 3–6 bullets.
19
+ 4. Run `analyze(mode='deep_module')` after first impl pass; depth ratio < 0.2 is good.
20
+ 5. Write the test for the interface BEFORE the impl (TDD red).
21
+
22
+ ## Anti-patterns
23
+
24
+ - Exporting internal helpers because "tests need them".
25
+ - Naming exports with implementation detail (`createSqliteFooStore` vs `createFooStore`).
26
+ - Passing more than 4 params unwrapped — bundle into options object.
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: seam-audit
3
+ description: Classify dependencies into 4 seams to plan substitution + testability
4
+ category: design
5
+ phases: [DESIGN, REVIEW]
6
+ ---
7
+
8
+ # seam-audit
9
+
10
+ ## When to use
11
+
12
+ When a module is hard to test, brittle to change, or coupled to a vendor SDK. Categorize each import to know where to put a stand-in.
13
+
14
+ ## Steps
15
+
16
+ 1. Run `analyze(mode='seam_audit', file=<path>)` to classify imports.
17
+ 2. For each `true-external` (e.g. anthropic, openai), wrap behind an adapter; never import in core.
18
+ 3. For each `local-substitutable` (better-sqlite3, fs), inject through an interface so tests get a stand-in.
19
+ 4. For `remote-owned` (axios, MCP), enforce timeout + retry policy.
20
+ 5. `in-process` imports stay free; consider merging if only one consumer.
21
+
22
+ ## Anti-patterns
23
+
24
+ - Hiding SDK clients in core under "convenience" wrappers.
25
+ - Mocking `fs` with `vi.mock` instead of injecting; brittle to refactors.
26
+ - Untested timeouts on remote-owned (default infinite hangs).
@@ -0,0 +1,71 @@
1
+ ---
2
+ domain: crypto
3
+ topic: common-mistakes
4
+ triggers: [encryption, hashing, key_management, jwt, tls]
5
+ discovered_at: 2026-04-30T00:00:00.000Z
6
+ source_task: extracta-paper2code
7
+ confidence: 0.85
8
+ ---
9
+
10
+ # Cryptography — Common Mistakes
11
+
12
+ Patterns where the code uses crypto APIs correctly *as documented* but
13
+ the security property does not hold. These all have one rule: when in
14
+ doubt, use the high-level construct (libsodium, age, Tink) instead of
15
+ hand-rolling primitives.
16
+
17
+ ## Encryption
18
+
19
+ - **ECB mode** — leaks data patterns. Default to AES-GCM (authenticated)
20
+ or ChaCha20-Poly1305. Never use raw AES-ECB or AES-CBC without an
21
+ encrypt-then-MAC.
22
+ - **Static IV / nonce reuse** — GCM nonce reuse is catastrophic (plaintext
23
+ recovery). Generate per-message; persist last-used counter or use
24
+ random 96-bit nonces.
25
+ - **Key derived from password without KDF** — use Argon2id (or scrypt /
26
+ PBKDF2 with high iterations). Never feed a password directly to a
27
+ symmetric cipher key slot.
28
+
29
+ ## Hashing & signatures
30
+
31
+ - **MD5 / SHA-1 still in use** — collision-vulnerable. Use SHA-256 minimum;
32
+ BLAKE2/3 for performance.
33
+ - **HMAC instead of signature for cross-trust-boundary auth** — HMAC
34
+ requires a shared secret. For multi-party verification, use an
35
+ asymmetric signature (Ed25519).
36
+ - **String comparison on MAC / token** — `===` is timing-leaky. Use
37
+ `crypto.timingSafeEqual` (Node) or `hmac.compare_digest` (Python).
38
+
39
+ ## Tokens / sessions
40
+
41
+ - **JWT `alg: none`** — accept-any-algorithm libraries let an attacker
42
+ set `alg: none` and forge tokens. Pin allowed algorithms.
43
+ - **JWT signed with HMAC, public key as secret** — RS256 token verified
44
+ as HS256 with the public key as the "secret" lets the attacker sign
45
+ tokens. Pin algorithm AND key type.
46
+ - **Session ID in URL** — leaks via Referer headers + browser history.
47
+ Use cookies with `Secure; HttpOnly; SameSite`.
48
+
49
+ ## Key management
50
+
51
+ - **Hard-coded keys in source** — every public-repo scan finds them.
52
+ Use env vars (or KMS). Rotate after exposure.
53
+ - **Same key for encryption and signing** — separate keys per purpose;
54
+ failures in one don't compromise the other.
55
+ - **No key rotation plan** — every key needs a rotation cadence written
56
+ down before deploy. "We'll rotate when we need to" = never.
57
+
58
+ ## Randomness
59
+
60
+ - **`Math.random()` / `random.random()` for secrets** — not
61
+ cryptographically secure. Use `crypto.randomBytes` (Node), `secrets`
62
+ module (Python), `crypto/rand` (Go).
63
+ - **Truncating UUIDs for IDs** — UUIDv4 randomness is in 122 bits;
64
+ truncating to 8 chars (32 bits) gives birthday collisions at ~65k
65
+ values.
66
+
67
+ ## When to escalate
68
+
69
+ If a task touches authentication, encryption, or session management,
70
+ require explicit answers to: which library, which algorithm, which key
71
+ lifecycle. "Use whatever is standard" is UNSPECIFIED.
@@ -0,0 +1,55 @@
1
+ ---
2
+ domain: ml
3
+ topic: common-mistakes
4
+ triggers: [paper_to_code, ml_implementation, model_training, hyperparam_check]
5
+ discovered_at: 2026-04-30T00:00:00.000Z
6
+ source_task: extracta-paper2code
7
+ confidence: 0.8
8
+ ---
9
+
10
+ # ML Implementation — Common Mistakes
11
+
12
+ Curated from `paper2code/paper_to_code_mistakes.md`. The code runs but does
13
+ not implement what the paper describes — these are systematic, not bugs.
14
+
15
+ ## Notation mismatches
16
+
17
+ - **BatchNorm momentum** — PyTorch `momentum=x` ≈ TensorFlow `momentum=1-x`.
18
+ - **Dropout rate vs keep probability** — modern papers usually drop probability;
19
+ pre-2018 papers often keep probability.
20
+ - **"Same padding"** — TF handles it automatically; PyTorch needs
21
+ `padding=kernel_size // 2` (asymmetric for even kernels).
22
+ - **Tensor layout** — PyTorch NCHW, TensorFlow NHWC. Every conv/pool/reshape
23
+ must account for the difference when porting.
24
+
25
+ ## Activation gotchas
26
+
27
+ - **GELU** — exact (PyTorch ≥ 1.12) ≠ tanh approximation (BERT, GPT-2). Different outputs.
28
+ - **SiLU vs Swish** — same function; Swish-with-trainable-β is the variant.
29
+
30
+ ## Training-loop landmines
31
+
32
+ - **Loss scaling** — paper reports per-token loss; framework may report
33
+ per-batch sum. Check before comparing.
34
+ - **Gradient clipping order** — clip *after* loss.backward() but *before*
35
+ optimizer.step(). Order swap silently changes effective LR.
36
+ - **Learning-rate schedule warm-up** — many papers use linear warm-up over the
37
+ first N steps then cosine decay; "decay from step 0" is a different recipe.
38
+ - **Weight decay on biases / LayerNorm** — most modern code skips bias and
39
+ LayerNorm params from weight decay; if the paper doesn't say, default to
40
+ exclusion (matches HF defaults).
41
+
42
+ ## Evaluation traps
43
+
44
+ - **Accuracy reported on training set** — silently reproduces a training-time
45
+ metric the paper never claimed.
46
+ - **Beam search vs greedy** — beam_size=1 is greedy; the paper number was
47
+ probably with beam_size=4 (translation) or 5 (summarization).
48
+ - **Tokenizer mismatch** — BPE vocab from the paper vs your tokenizer can
49
+ shift perplexity by 5+ points without changing the model.
50
+
51
+ ## When to escalate
52
+
53
+ If the AC says "match paper Table 2" but the difference falls in any of the
54
+ buckets above, mark UNSPECIFIED in `ambiguity-audit` and ask which convention
55
+ the paper's official repo uses.