sentinelayer-cli 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/README.md +23 -2
  2. package/package.json +4 -4
  3. package/src/agents/ai-governance/index.js +12 -0
  4. package/src/agents/ai-governance/tools/base.js +171 -0
  5. package/src/agents/ai-governance/tools/eval-regression.js +47 -0
  6. package/src/agents/ai-governance/tools/hitl-audit.js +81 -0
  7. package/src/agents/ai-governance/tools/index.js +52 -0
  8. package/src/agents/ai-governance/tools/prompt-drift.js +42 -0
  9. package/src/agents/ai-governance/tools/provenance-check.js +69 -0
  10. package/src/agents/backend/index.js +12 -0
  11. package/src/agents/backend/tools/base.js +189 -0
  12. package/src/agents/backend/tools/circuit-breaker-check.js +123 -0
  13. package/src/agents/backend/tools/idempotency-audit.js +105 -0
  14. package/src/agents/backend/tools/index.js +87 -0
  15. package/src/agents/backend/tools/retry-audit.js +132 -0
  16. package/src/agents/backend/tools/timeout-audit.js +144 -0
  17. package/src/agents/code-quality/index.js +12 -0
  18. package/src/agents/code-quality/tools/base.js +159 -0
  19. package/src/agents/code-quality/tools/complexity-measure.js +197 -0
  20. package/src/agents/code-quality/tools/coupling-analysis.js +81 -0
  21. package/src/agents/code-quality/tools/cycle-detect.js +49 -0
  22. package/src/agents/code-quality/tools/dep-graph.js +196 -0
  23. package/src/agents/code-quality/tools/index.js +89 -0
  24. package/src/agents/data-layer/index.js +12 -0
  25. package/src/agents/data-layer/tools/base.js +181 -0
  26. package/src/agents/data-layer/tools/index-audit.js +165 -0
  27. package/src/agents/data-layer/tools/index.js +83 -0
  28. package/src/agents/data-layer/tools/migration-scan.js +135 -0
  29. package/src/agents/data-layer/tools/query-explain.js +120 -0
  30. package/src/agents/data-layer/tools/tenancy-scan.js +166 -0
  31. package/src/agents/documentation/index.js +12 -0
  32. package/src/agents/documentation/tools/api-diff.js +91 -0
  33. package/src/agents/documentation/tools/base.js +151 -0
  34. package/src/agents/documentation/tools/dead-link-check.js +58 -0
  35. package/src/agents/documentation/tools/docstring-coverage.js +78 -0
  36. package/src/agents/documentation/tools/index.js +52 -0
  37. package/src/agents/documentation/tools/readme-freshness.js +61 -0
  38. package/src/agents/envelope/fix-cycle.js +45 -0
  39. package/src/agents/envelope/index.js +31 -0
  40. package/src/agents/envelope/loop.js +150 -0
  41. package/src/agents/envelope/pulse.js +18 -0
  42. package/src/agents/envelope/stream.js +40 -0
  43. package/src/agents/infrastructure/index.js +12 -0
  44. package/src/agents/infrastructure/tools/base.js +171 -0
  45. package/src/agents/infrastructure/tools/checkov-run.js +32 -0
  46. package/src/agents/infrastructure/tools/drift-detect.js +59 -0
  47. package/src/agents/infrastructure/tools/iam-least-priv-check.js +78 -0
  48. package/src/agents/infrastructure/tools/index.js +52 -0
  49. package/src/agents/infrastructure/tools/tflint-run.js +31 -0
  50. package/src/agents/jules/loop.js +7 -4
  51. package/src/agents/jules/swarm/sub-agent.js +5 -1
  52. package/src/agents/jules/tools/auth-audit.js +10 -1
  53. package/src/agents/mode.js +113 -0
  54. package/src/agents/observability/index.js +12 -0
  55. package/src/agents/observability/tools/alert-audit.js +39 -0
  56. package/src/agents/observability/tools/base.js +181 -0
  57. package/src/agents/observability/tools/dashboard-gap.js +42 -0
  58. package/src/agents/observability/tools/index.js +54 -0
  59. package/src/agents/observability/tools/log-schema-check.js +74 -0
  60. package/src/agents/observability/tools/span-coverage.js +74 -0
  61. package/src/agents/persona-visuals.js +38 -0
  62. package/src/agents/release/index.js +12 -0
  63. package/src/agents/release/tools/base.js +181 -0
  64. package/src/agents/release/tools/changelog-diff.js +86 -0
  65. package/src/agents/release/tools/feature-flag-audit.js +126 -0
  66. package/src/agents/release/tools/index.js +61 -0
  67. package/src/agents/release/tools/rollback-verify.js +129 -0
  68. package/src/agents/release/tools/semver-check.js +109 -0
  69. package/src/agents/reliability/index.js +12 -0
  70. package/src/agents/reliability/tools/backpressure-check.js +129 -0
  71. package/src/agents/reliability/tools/base.js +181 -0
  72. package/src/agents/reliability/tools/chaos-probe.js +109 -0
  73. package/src/agents/reliability/tools/graceful-degradation-check.js +114 -0
  74. package/src/agents/reliability/tools/health-check-audit.js +111 -0
  75. package/src/agents/reliability/tools/index.js +87 -0
  76. package/src/agents/run-persona.js +109 -0
  77. package/src/agents/security/index.js +12 -0
  78. package/src/agents/security/tools/authz-audit.js +134 -0
  79. package/src/agents/security/tools/base.js +190 -0
  80. package/src/agents/security/tools/crypto-review.js +175 -0
  81. package/src/agents/security/tools/index.js +97 -0
  82. package/src/agents/security/tools/sast-scan.js +175 -0
  83. package/src/agents/security/tools/secrets-scan.js +216 -0
  84. package/src/agents/supply-chain/index.js +12 -0
  85. package/src/agents/supply-chain/tools/attestation-check.js +42 -0
  86. package/src/agents/supply-chain/tools/base.js +151 -0
  87. package/src/agents/supply-chain/tools/index.js +52 -0
  88. package/src/agents/supply-chain/tools/lockfile-integrity.js +73 -0
  89. package/src/agents/supply-chain/tools/package-verify.js +56 -0
  90. package/src/agents/supply-chain/tools/sbom-diff.js +34 -0
  91. package/src/agents/testing/index.js +12 -0
  92. package/src/agents/testing/tools/base.js +202 -0
  93. package/src/agents/testing/tools/coverage-gap.js +144 -0
  94. package/src/agents/testing/tools/flake-detect.js +125 -0
  95. package/src/agents/testing/tools/index.js +85 -0
  96. package/src/agents/testing/tools/mutation-test.js +143 -0
  97. package/src/agents/testing/tools/snapshot-diff.js +103 -0
  98. package/src/auth/gate.js +65 -37
  99. package/src/cli.js +1 -1
  100. package/src/commands/chat.js +3 -10
  101. package/src/commands/legacy-args.js +10 -0
  102. package/src/commands/omargate.js +36 -2
  103. package/src/commands/persona.js +46 -1
  104. package/src/commands/scan.js +3 -10
  105. package/src/commands/session.js +654 -6
  106. package/src/commands/spec.js +3 -10
  107. package/src/coord/events-log.js +141 -0
  108. package/src/coord/handshake.js +719 -0
  109. package/src/coord/index.js +35 -0
  110. package/src/coord/paths.js +84 -0
  111. package/src/coord/priority.js +62 -0
  112. package/src/coord/tarjan.js +157 -0
  113. package/src/cost/tokenizer.js +160 -0
  114. package/src/cost/tracker.js +61 -0
  115. package/src/daemon/artifact-lineage.js +362 -0
  116. package/src/daemon/assignment-ledger.js +117 -0
  117. package/src/daemon/ast-drift.js +496 -0
  118. package/src/daemon/ingest-refresh.js +69 -2
  119. package/src/ingest/engine.js +15 -0
  120. package/src/ingest/ownership.js +380 -0
  121. package/src/legacy-cli.js +68 -1
  122. package/src/orchestrator/kai-chen.js +126 -0
  123. package/src/review/ai-review.js +3 -10
  124. package/src/review/compliance-pack.js +389 -0
  125. package/src/review/investor-dd-config.js +54 -0
  126. package/src/review/investor-dd-file-loop.js +303 -0
  127. package/src/review/investor-dd-file-router.js +406 -0
  128. package/src/review/investor-dd-html-report.js +233 -0
  129. package/src/review/investor-dd-notification.js +120 -0
  130. package/src/review/investor-dd-orchestrator.js +405 -0
  131. package/src/review/investor-dd-persona-runner.js +275 -0
  132. package/src/review/live-validator.js +253 -0
  133. package/src/review/omargate-orchestrator.js +90 -2
  134. package/src/review/persona-prompts.js +244 -56
  135. package/src/review/reconciliation-rules.js +329 -0
  136. package/src/review/reproducibility-chain.js +136 -0
  137. package/src/review/scan-modes.js +102 -3
  138. package/src/session/agent-registry.js +7 -0
  139. package/src/session/analytics.js +479 -0
  140. package/src/session/daemon.js +609 -14
  141. package/src/session/file-locks.js +666 -0
  142. package/src/session/paths.js +4 -0
  143. package/src/session/recap.js +567 -0
  144. package/src/session/redact.js +82 -0
  145. package/src/session/runtime-bridge.js +24 -1
  146. package/src/session/scoring.js +406 -0
  147. package/src/session/setup-guides.js +304 -0
  148. package/src/session/store.js +318 -2
  149. package/src/session/stream.js +9 -1
  150. package/src/session/sync.js +753 -0
  151. package/src/session/tasks.js +1054 -0
  152. package/src/session/templates.js +188 -0
  153. package/src/swarm/runtime.js +1 -8
@@ -3,8 +3,30 @@
3
3
  *
4
4
  * Each persona gets a domain-focused prompt that constrains the LLM
5
5
  * to analyze code through a specific security/quality lens.
6
+ *
7
+ * v0.8+ (Phase G hardening): every persona prompt now includes a common
8
+ * "FAANG-grade rigor preamble" that forces the LLM to use the SWE
9
+ * framework (src/SWE_excellence_framework.md) checklist for its domain,
10
+ * enumerate what it actually looked at, and refuse to return empty
11
+ * findings without stating what it verified. Phase E audit surfaced
12
+ * 58 distinct gaps across 7 Codex PRs that the previous persona prompts
13
+ * missed entirely because they encouraged brevity over completeness.
6
14
  */
7
15
 
16
+ const FAANG_GRADE_PREAMBLE = `You are an investor-due-diligence, FAANG-acquirer-grade reviewer. Every finding you emit will be read by a staff engineer and a security lead; either can catch you being lazy, so be thorough.
17
+
18
+ Non-negotiables for your review:
19
+
20
+ 1. Start by LISTING the files you intend to analyze (top 20 most relevant to your domain), with a one-line why per file.
21
+ 2. For each file, cite at least ONE of: specific function name, class name, exported identifier, or line range you inspected.
22
+ 3. Before emitting findings, enumerate the SWE-framework checklist for your domain (cited below). For each checklist item, state: FOUND violation, NOT FOUND, or NOT APPLICABLE (with reason).
23
+ 4. Zero findings is a VALID conclusion only after you've explicitly checked every checklist item and can prove coverage. If you cannot enumerate what you looked at, you haven't done the work.
24
+ 5. Each finding MUST include: severity, file, line, evidence (exact code snippet), rootCause (why it's wrong), recommendedFix (concrete code change), confidence (0.0-1.0).
25
+ 6. Do NOT include findings the deterministic scanner already caught — but DO include anything the deterministic scanner would miss because it's contextual (intent, cross-file flow, missing defense-in-depth).
26
+ 7. If the codebase is tiny or out-of-domain for your persona, SAY SO explicitly with the file list inspected. Do not pad with speculative findings.
27
+
28
+ Your output must help an acquirer decide whether to buy this codebase. Be FOUND-violations accurate, not speculation-padded.`;
29
+
8
30
  const PERSONA_PROMPTS = {
9
31
  security: {
10
32
  role: "Nina Patel — Security Specialist",
@@ -24,21 +46,23 @@ Evidence standard: Every finding MUST include file:line, exploit scenario, and r
24
46
  Do NOT report hypothetical issues without concrete code evidence.`,
25
47
  },
26
48
 
27
- architecture: {
28
- role: "Maya Volkov — Architecture Specialist",
29
- focus: `You are an architecture specialist reviewing code for structural quality.
49
+ backend: {
50
+ role: "Maya Volkov — Backend Runtime Specialist (ex-AWS Platform)",
51
+ focus: `You are a backend runtime specialist reviewing server-side code for trust-boundary failures.
30
52
 
31
53
  Focus areas:
32
- - God components/modules (>300 LOC, >10 responsibilities)
33
- - Circular dependencies between modules
34
- - Tight coupling between layers (presentation data access)
35
- - Missing abstraction boundaries (business logic in route handlers)
36
- - State management sprawl (>15 useState in a component)
37
- - Missing error boundaries and fallback handling
38
- - Inconsistent naming/organization patterns
39
- - Dead code and unreachable paths
40
-
41
- Evidence standard: Every finding MUST include file:line, coupling graph or complexity metric, and refactoring guidance.`,
54
+ - Unsafe request handling: unvalidated inputs reaching handlers, type-coercion assumptions
55
+ - Runtime crashes: unhandled rejections, unclosed transactions, resource leaks
56
+ - Database transaction safety: transaction scope, atomicity, rollback paths
57
+ - Worker retry patterns: exponential backoff + jitter (no retry storms, no linear)
58
+ - Circuit breakers on external dependencies; fail-closed on store outage
59
+ - Timeouts explicit on every outbound call (no implicit runtime defaults)
60
+ - Idempotency on mutation endpoints; idempotency-key lifecycle
61
+ - Rate limiting on auth / payment / AI endpoints with fail-closed behavior
62
+ - Contracts: explicit request/response schemas enforced at the boundary
63
+ - Background jobs: queue backpressure, DLQ configuration, poison-pill handling
64
+
65
+ Evidence standard: Every finding MUST include file:line, failure scenario, blast radius, and the resilience pattern to apply.`,
42
66
  },
43
67
 
44
68
  testing: {
@@ -58,38 +82,40 @@ Focus areas:
58
82
  Evidence standard: Every finding MUST include the untested code path (file:line) and a concrete test case outline.`,
59
83
  },
60
84
 
61
- performance: {
62
- role: "Arjun MehtaPerformance Specialist",
63
- focus: `You are a performance specialist reviewing code for latency and efficiency issues.
85
+ "code-quality": {
86
+ role: "Ethan ParkCode Quality & Complexity Specialist (ex-Meta Code Health)",
87
+ focus: `You are a code quality and complexity specialist reviewing for structural integrity.
64
88
 
65
89
  Focus areas:
66
- - N+1 query patterns (loop-based database calls)
67
- - Missing database indexes on WHERE/JOIN/ORDER BY columns
68
- - Unbounded data fetching (no LIMIT, no pagination)
69
- - Synchronous blocking in async contexts
70
- - Memory leaks (unclosed connections, event listeners, timers)
71
- - Bundle size bloat (large imports, no tree shaking, no code splitting)
72
- - Missing caching for expensive computations
73
- - Render performance (unnecessary re-renders, missing memoization)
74
-
75
- Evidence standard: Every finding MUST include file:line, estimated performance impact, and optimization approach.`,
90
+ - God components / modules (>300 LOC or >10 responsibilities)
91
+ - Circular dependencies between core modules
92
+ - Tight coupling across layer boundaries (presentation data access)
93
+ - Missing abstraction boundaries (business logic inside route handlers)
94
+ - State-management sprawl (>15 useState in a component)
95
+ - Missing error boundaries on route components / agent loops
96
+ - Inconsistent naming / organization patterns
97
+ - Dead code, unreachable paths, commented-out code blocks
98
+ - Refactor triggers exceeded without action (LOC, cyclomatic complexity)
99
+
100
+ Evidence standard: Every finding MUST include file:line, coupling graph or complexity metric, and concrete refactoring guidance.`,
76
101
  },
77
102
 
78
- compliance: {
79
- role: "Leila FaroukCompliance Specialist",
80
- focus: `You are a compliance specialist reviewing code for regulatory adherence.
103
+ "data-layer": {
104
+ role: "Dr. Linh Tran Data Layer Specialist (ex-Netflix Data Platforms)",
105
+ focus: `You are a data-layer specialist reviewing query safety, migration integrity, and tenancy boundaries.
81
106
 
82
107
  Focus areas:
83
- - PII handling without encryption or access controls
84
- - Missing audit logging for data access and mutations
85
- - GDPR: data retention without deletion mechanisms
86
- - SOC2: missing access controls, no principle of least privilege
87
- - HIPAA: PHI exposure, missing BAA requirements
88
- - Missing consent tracking for data collection
89
- - Insecure data export/download without authorization
90
- - Missing data classification and sensitivity labels
91
-
92
- Evidence standard: Every finding MUST include the regulatory requirement, the gap, and the remediation with compliance evidence.`,
108
+ - N+1 query patterns in ORM loops or service-layer iteration
109
+ - Missing indexes on WHERE / JOIN / ORDER BY / foreign-key columns
110
+ - Unbounded data fetching (no LIMIT, no pagination, no cursor)
111
+ - Migration safety: no data loss, no long locks on hot tables, backfill batched
112
+ - Tenancy leaks: cross-tenant data accessible via missing filters or auth checks
113
+ - Schema / application-model drift (Pydantic/ORM models out of sync with DB schema)
114
+ - Query budgets enforced: p95 / p99 latency SLIs per critical path
115
+ - Read / write separation; read replicas used for heavy reads
116
+ - Integrity: referential constraints, CHECK constraints, NOT NULL where required
117
+
118
+ Evidence standard: Every finding MUST include file:line, observed query pattern or schema gap, expected vs actual behavior, and the corrective migration / index / query shape.`,
93
119
  },
94
120
 
95
121
  documentation: {
@@ -249,36 +275,62 @@ export function buildPersonaReviewPrompt({
249
275
  return buildGenericPrompt({ targetPath, deterministicSummary, maxFindings });
250
276
  }
251
277
 
278
+ const checklist = SWE_FRAMEWORK_CHECKLIST[personaId] || [];
279
+ const checklistBlock = checklist.length > 0
280
+ ? `## SWE framework checklist for ${persona.role}
281
+ You MUST report, for each item below, one of: FOUND | NOT FOUND | NOT APPLICABLE (with reason).
282
+ This enumeration goes in your output under \`coverage\` (before \`findings\`).
283
+
284
+ ${checklist.map((item, i) => `${i + 1}. ${item}`).join("\n")}
285
+ `
286
+ : "";
287
+
252
288
  return `# ${persona.role}
253
289
 
290
+ ${FAANG_GRADE_PREAMBLE}
291
+
254
292
  ${persona.focus}
255
293
 
294
+ ${checklistBlock}
256
295
  ## Context
257
296
  Target: ${targetPath || "(not provided)"}
258
- Deterministic scan: P0=${deterministicSummary.P0 || 0} P1=${deterministicSummary.P1 || 0} P2=${deterministicSummary.P2 || 0} P3=${deterministicSummary.P3 || 0}
297
+ Deterministic scan summary (already reported, do NOT repeat): P0=${deterministicSummary.P0 || 0} P1=${deterministicSummary.P1 || 0} P2=${deterministicSummary.P2 || 0} P3=${deterministicSummary.P3 || 0}
259
298
 
260
299
  ## Output Contract
261
- Return a JSON array of findings. Maximum ${maxFindings} findings. Each finding:
300
+ Return a JSON OBJECT (not array) with this shape return ONLY the JSON, no other text:
262
301
  \`\`\`json
263
302
  {
264
- "severity": "P0|P1|P2|P3",
265
- "file": "path/to/file.ext",
266
- "line": 42,
267
- "title": "Brief description",
268
- "evidence": "Concrete code evidence at file:line",
269
- "rootCause": "Why this is a problem",
270
- "recommendedFix": "Specific fix to apply",
271
- "confidence": 0.85
303
+ "inspectedFiles": [
304
+ { "file": "path/to/file.ext", "why": "reason file is in-scope for this persona" }
305
+ ],
306
+ "coverage": [
307
+ { "checklist": "item-1-short-name", "status": "FOUND|NOT_FOUND|NOT_APPLICABLE", "reason": "..." }
308
+ ],
309
+ "findings": [
310
+ {
311
+ "severity": "P0|P1|P2|P3",
312
+ "file": "path/to/file.ext",
313
+ "line": 42,
314
+ "title": "Brief description",
315
+ "evidence": "Concrete code excerpt at file:line (min 1 line)",
316
+ "rootCause": "Why this is a problem",
317
+ "recommendedFix": "Specific code change to apply",
318
+ "confidence": 0.85,
319
+ "checklistItem": "which-checklist-item-this-violates (if applicable)"
320
+ }
321
+ ]
272
322
  }
273
323
  \`\`\`
274
324
 
275
325
  Rules:
276
- - Only report findings you have HIGH confidence in (>= 0.7)
277
- - Every finding MUST have concrete file:line evidence
278
- - Do NOT repeat findings already in the deterministic scan
279
- - Do NOT report hypothetical/speculative issues
280
- - Focus on REAL, EXPLOITABLE, IMPACTFUL problems in your domain
281
- - Return ONLY the JSON array, no other text
326
+ - Maximum ${maxFindings} findings.
327
+ - Only report findings you have HIGH confidence in (>= 0.7).
328
+ - Every finding MUST have concrete file:line evidence AND a non-empty \`evidence\` code excerpt.
329
+ - Do NOT repeat findings already in the deterministic scan.
330
+ - Do NOT report hypothetical/speculative issues.
331
+ - Focus on REAL, EXPLOITABLE, IMPACTFUL problems in your domain.
332
+ - \`inspectedFiles\` and \`coverage\` are REQUIRED even when \`findings\` is empty.
333
+ - Zero findings is valid ONLY when \`coverage\` demonstrates every checklist item was evaluated.
282
334
  `;
283
335
  }
284
336
 
@@ -292,5 +344,141 @@ Return a JSON array of up to ${maxFindings} findings with: severity, file, line,
292
344
  Only report findings with concrete evidence. Do NOT repeat deterministic findings.`;
293
345
  }
294
346
 
347
+ /**
348
+ * SWE framework checklist per persona. Derived from src/SWE_excellence_framework.md
349
+ * plus Phase E audit findings (tasks/senti-audit-summary.md). Each persona MUST
350
+ * enumerate these items in its `coverage` output before emitting findings.
351
+ */
352
+ const SWE_FRAMEWORK_CHECKLIST = {
353
+ security: [
354
+ "Payload redaction on all log/stream write paths (no raw tokens/PII in session streams, Jira, error intake, runtime bridge events)",
355
+ "SSRF: URL/network tools have explicit allowlist; empty allowlist MUST default-deny (not default-allow)",
356
+ "Auth bypass justification: routes marked skipAuth cite explicit reason and have test coverage",
357
+ "Idempotency: mutation endpoints use idempotency keys; POST/PUT/PATCH/DELETE not retry-unsafe",
358
+ "Rate limiting: auth / payment / AI endpoints; fail-closed on rate-limit store outage",
359
+ "MCP token audience validation; no token passthrough",
360
+ "Cryptographic primitives: no weak hashing, no hardcoded keys, TLS validation enabled",
361
+ "Input validation before trusting external data (LLM prompts, user forms, uploads)",
362
+ "Session management: token leakage, fixation, cookie httpOnly/secure/sameSite",
363
+ "Secrets: no credential literals; env var indirection; rotation policy documented",
364
+ ],
365
+ backend: [
366
+ "Idempotency keys on mutation endpoints (POST/PUT/PATCH/DELETE that aren't retry-safe)",
367
+ "Rate limiting on auth / payment / AI with fail-closed behavior on store outage",
368
+ "Explicit timeouts on every outbound call (no implicit runtime defaults)",
369
+ "Circuit breakers with persistent state across process restarts",
370
+ "Database transaction boundaries scoped to atomic units; rollback paths tested",
371
+ "Retry policies use exponential backoff + jitter (no linear, no zero-jitter)",
372
+ "Request validation before trusting external data (body, headers, query, uploads)",
373
+ "Background jobs: queue backpressure, DLQ configured, poison-pill handling",
374
+ ],
375
+ testing: [
376
+ "Critical paths have test coverage (auth, payment, data mutation, kill switches)",
377
+ "Kill-switch tests exercise the CLI surface, not just programmatic API (SWE §O.1, spec §5.7)",
378
+ "Fault-injection coverage: error paths, abort paths, malformed input",
379
+ "Integration tests for API endpoints (not just unit)",
380
+ "E2E tests for critical user flows",
381
+ "No mock-only tests that hide contract drift between mock and prod",
382
+ "Eval artifacts exist for prompt/policy/model-route changes (SWE §I.2)",
383
+ "Edge cases: empty inputs, boundary values, concurrent operations",
384
+ ],
385
+ "code-quality": [
386
+ "Module boundaries enforced (no business logic in route handlers or controllers)",
387
+ "Files >500 LOC flagged; >15 useState or >10 responsibilities per component",
388
+ "Circular dependencies across core modules flagged",
389
+ "Shared-state hotspots that block concurrent execution",
390
+ "Error boundaries present on route components / agent loops",
391
+ "Cross-cutting concerns consolidated (logging, telemetry, retry) not scattered",
392
+ "Domain boundaries: session/daemon/review modules don't directly import each other's internals",
393
+ "Dead code, unreachable paths, commented-out blocks flagged for removal",
394
+ ],
395
+ "data-layer": [
396
+ "N+1 patterns in ORM loops or service-layer iteration",
397
+ "Indexes on WHERE / JOIN / ORDER BY / foreign-key columns",
398
+ "Unbounded data fetching (no LIMIT / pagination / cursor) flagged",
399
+ "Migration safety: no data loss, no long locks on hot tables, backfill batched",
400
+ "Tenancy boundaries enforced in every query (project_id / org_id filters)",
401
+ "Schema vs application-model drift (Pydantic / ORM matches DB schema)",
402
+ "Query budgets enforced: p95 / p99 latency SLIs per critical path",
403
+ "Read / write separation; read replicas used for heavy reads",
404
+ ],
405
+ documentation: [
406
+ "README setup instructions match current state",
407
+ "Runbooks for incident response include kill-switch invocation",
408
+ "API endpoints documented with schemas",
409
+ "Architecture decision records for non-obvious choices",
410
+ "Onboarding docs reference live entry points",
411
+ ],
412
+ reliability: [
413
+ "External call timeouts configured with deadlines",
414
+ "Retry with exponential backoff + jitter (no linear backoff, no zero-jitter)",
415
+ "Circuit breakers with persistent state across process restarts",
416
+ "Graceful degradation when upstream unavailable",
417
+ "Health checks and liveness / readiness probes",
418
+ "Queue backpressure strategy documented",
419
+ "Fallback/degrade event taxonomy emitted (SWE §L.1 line 1136)",
420
+ "Persistence contracts for in-memory daemon state (crash recovery)",
421
+ ],
422
+ release: [
423
+ "Pinned GitHub Actions (SHA, not @main)",
424
+ "Artifact signing + provenance attestation on release artifacts",
425
+ "Rollback path tested before publish",
426
+ "Smoke tests after deploy block promotion",
427
+ "Branch protection on main with required checks",
428
+ "Canary / staged rollout gates",
429
+ "Quality gates block merge: lint, test, build, security scan",
430
+ "Workflow_dispatch paths enforce the same actor/signing policy as tag-push",
431
+ ],
432
+ observability: [
433
+ "Structured logging with trace/correlation IDs",
434
+ "Model spans: model identity, prompt hash, tokens in/out, cost",
435
+ "Tool spans and agent spans with timing + status",
436
+ "Error tracking integration (Sentry / equivalent)",
437
+ "Latency SLIs / SLOs for critical paths",
438
+ "Dashboards exist for key business + operational metrics",
439
+ "Fallback / degrade events tracked",
440
+ "Silent error swallows flagged (empty catch blocks, try {} catch {} with no re-throw / log)",
441
+ ],
442
+ infrastructure: [
443
+ "IAM: least privilege, no wildcard actions on production resources",
444
+ "Public resources behind WAF / rate limit",
445
+ "Encryption at rest and in transit with key rotation",
446
+ "No hardcoded infra values (IPs, ARNs, account IDs)",
447
+ "VPC / subnet isolation between tiers",
448
+ "Secrets rotation policy",
449
+ "Backup + DR drilled with RPO / RTO targets",
450
+ "Terraform drift check passes",
451
+ ],
452
+ "supply-chain": [
453
+ "Dependencies with known CVEs (critical/high blocks merge)",
454
+ "Dependency pinning (exact versions, not ^ / ~)",
455
+ "Lockfile integrity checks in CI",
456
+ "SBOM generated per release",
457
+ "Provenance attestation on package publish",
458
+ "License compliance check",
459
+ "Typosquat risk scan on new dependencies",
460
+ ],
461
+ frontend: [
462
+ "XSS via dangerouslySetInnerHTML without sanitization",
463
+ "Token / secret storage in localStorage / sessionStorage (should be httpOnly cookies)",
464
+ "Accessibility: alt text, labels, keyboard navigation, focus rings, aria",
465
+ "Bundle size budgets (initial JS < 200KB)",
466
+ "Error boundaries around route components",
467
+ "Loading / error states for every async data fetch",
468
+ "CLS-causing patterns: images without dimensions, dynamic content injection",
469
+ "Responsive breakpoint coverage on core flows",
470
+ ],
471
+ "ai-governance": [
472
+ "Prompt injection vectors in user-facing LLM prompts",
473
+ "Input sanitization before LLM calls",
474
+ "Rate limiting and cost/token budget enforcement on AI endpoints",
475
+ "Human-in-the-loop for high-risk AI decisions (write paths, config changes)",
476
+ "Model versioning and eval regression checks before prompt/route changes",
477
+ "Tool/agent permission escalation risks (agents spawning sub-agents beyond allowed budget)",
478
+ "Audit trail for AI-generated actions (provenance metadata linking commit → workflow → artifact)",
479
+ "Kill switches for autonomous flows; rollback path verified",
480
+ ],
481
+ };
482
+
295
483
  export const PERSONA_IDS = Object.keys(PERSONA_PROMPTS);
296
- export { PERSONA_PROMPTS };
484
+ export { PERSONA_PROMPTS, SWE_FRAMEWORK_CHECKLIST, FAANG_GRADE_PREAMBLE };