waypoint-codex 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +17 -24
  2. package/dist/src/cli.js +15 -36
  3. package/dist/src/core.js +16 -323
  4. package/dist/src/templates.js +1 -4
  5. package/dist/src/upgrade.js +98 -8
  6. package/package.json +1 -1
  7. package/templates/.agents/skills/backend-context-interview/SKILL.md +70 -0
  8. package/templates/.agents/skills/backend-ship-audit/SKILL.md +221 -0
  9. package/templates/.agents/skills/backend-ship-audit/agents/openai.yaml +3 -0
  10. package/templates/.agents/skills/backend-ship-audit/references/audit-framework.md +228 -0
  11. package/templates/.agents/skills/backend-ship-audit/references/report-template.md +92 -0
  12. package/templates/.agents/skills/frontend-context-interview/SKILL.md +60 -0
  13. package/templates/.agents/skills/frontend-ship-audit/SKILL.md +87 -0
  14. package/templates/.agents/skills/frontend-ship-audit/agents/openai.yaml +3 -0
  15. package/templates/.agents/skills/frontend-ship-audit/references/guidance-file-updates.md +57 -0
  16. package/templates/.agents/skills/frontend-ship-audit/references/report-template.md +51 -0
  17. package/templates/.agents/skills/frontend-ship-audit/references/review-framework.md +83 -0
  18. package/templates/.agents/skills/frontend-ship-audit/scripts/create_frontend_audit.py +81 -0
  19. package/templates/.codex/agents/plan-reviewer.toml +1 -2
  20. package/templates/.waypoint/README.md +2 -5
  21. package/templates/.waypoint/SOUL.md +1 -1
  22. package/templates/.waypoint/agent-operating-manual.md +12 -9
  23. package/templates/.waypoint/agents/code-health-reviewer.md +10 -1
  24. package/templates/.waypoint/agents/plan-reviewer.md +7 -2
  25. package/templates/.waypoint/config.toml +0 -3
  26. package/templates/managed-agents-block.md +38 -2
  27. package/templates/.waypoint/automations/README.md +0 -18
  28. package/templates/.waypoint/automations/docs-garden.toml +0 -7
  29. package/templates/.waypoint/automations/repo-health.toml +0 -8
  30. package/templates/.waypoint/rules/README.md +0 -6
@@ -9,17 +9,84 @@ export function buildInitArgs(projectRoot, config) {
9
9
  if (config.profile === "app-friendly") {
10
10
  args.push("--app-friendly");
11
11
  }
12
- const featureMap = config.features ?? {};
13
- if (featureMap.roles) {
14
- args.push("--with-roles");
12
+ return args;
13
+ }
14
+ function parseVersion(version) {
15
+ const trimmed = version.trim().replace(/^v/, "");
16
+ const match = trimmed.match(/^(\d+)\.(\d+)\.(\d+)(?:-([0-9A-Za-z.-]+))?$/);
17
+ if (!match) {
18
+ return null;
15
19
  }
16
- if (featureMap.rules) {
17
- args.push("--with-rules");
20
+ return {
21
+ core: [Number(match[1]), Number(match[2]), Number(match[3])],
22
+ prerelease: match[4] ? match[4].split(".") : [],
23
+ };
24
+ }
25
+ function compareIdentifiers(left, right) {
26
+ const leftNumeric = /^\d+$/.test(left);
27
+ const rightNumeric = /^\d+$/.test(right);
28
+ if (leftNumeric && rightNumeric) {
29
+ return Number(left) - Number(right);
18
30
  }
19
- if (featureMap.automations) {
20
- args.push("--with-automations");
31
+ if (leftNumeric) {
32
+ return -1;
21
33
  }
22
- return args;
34
+ if (rightNumeric) {
35
+ return 1;
36
+ }
37
+ return left.localeCompare(right);
38
+ }
39
+ export function compareVersions(left, right) {
40
+ const leftParsed = parseVersion(left);
41
+ const rightParsed = parseVersion(right);
42
+ if (!leftParsed || !rightParsed) {
43
+ return left.localeCompare(right);
44
+ }
45
+ for (let index = 0; index < leftParsed.core.length; index += 1) {
46
+ const difference = leftParsed.core[index] - rightParsed.core[index];
47
+ if (difference !== 0) {
48
+ return difference;
49
+ }
50
+ }
51
+ const leftPrerelease = leftParsed.prerelease;
52
+ const rightPrerelease = rightParsed.prerelease;
53
+ if (leftPrerelease.length === 0 && rightPrerelease.length === 0) {
54
+ return 0;
55
+ }
56
+ if (leftPrerelease.length === 0) {
57
+ return 1;
58
+ }
59
+ if (rightPrerelease.length === 0) {
60
+ return -1;
61
+ }
62
+ const length = Math.max(leftPrerelease.length, rightPrerelease.length);
63
+ for (let index = 0; index < length; index += 1) {
64
+ const leftIdentifier = leftPrerelease[index];
65
+ const rightIdentifier = rightPrerelease[index];
66
+ if (leftIdentifier === undefined) {
67
+ return -1;
68
+ }
69
+ if (rightIdentifier === undefined) {
70
+ return 1;
71
+ }
72
+ const difference = compareIdentifiers(leftIdentifier, rightIdentifier);
73
+ if (difference !== 0) {
74
+ return difference;
75
+ }
76
+ }
77
+ return 0;
78
+ }
79
+ function latestWaypointVersion(options) {
80
+ const npmBinary = options.npmBinary ?? process.env.WAYPOINT_NPM_COMMAND ?? npmBinaryForPlatform();
81
+ const latest = spawnSync(npmBinary, ["view", "waypoint-codex", "version"], {
82
+ stdio: "pipe",
83
+ encoding: "utf8",
84
+ });
85
+ if ((latest.status ?? 1) !== 0) {
86
+ return null;
87
+ }
88
+ const version = latest.stdout?.trim();
89
+ return version ? version : null;
23
90
  }
24
91
  function hasWaypointConfig(projectRoot) {
25
92
  return existsSync(path.join(projectRoot, ".waypoint/config.toml"));
@@ -53,3 +120,26 @@ export function upgradeWaypoint(options) {
53
120
  });
54
121
  return doctor.status ?? 1;
55
122
  }
123
+ export function maybeUpgradeWaypointBeforeInit(options) {
124
+ const nodeBinary = options.nodeBinary ?? process.execPath;
125
+ const npmBinary = options.npmBinary ?? process.env.WAYPOINT_NPM_COMMAND ?? npmBinaryForPlatform();
126
+ const stdio = options.stdio ?? "inherit";
127
+ const latestVersion = latestWaypointVersion({ npmBinary });
128
+ if (!latestVersion || compareVersions(latestVersion, options.currentVersion) <= 0) {
129
+ return null;
130
+ }
131
+ console.log(`Waypoint CLI ${options.currentVersion} is older than latest ${latestVersion}. Updating before init...`);
132
+ const update = spawnSync(npmBinary, ["install", "-g", "waypoint-codex@latest"], {
133
+ stdio,
134
+ });
135
+ if ((update.status ?? 1) !== 0) {
136
+ return update.status ?? 1;
137
+ }
138
+ const reexecArgs = options.initArgs.includes("--skip-cli-update")
139
+ ? options.initArgs
140
+ : [...options.initArgs, "--skip-cli-update"];
141
+ const init = spawnSync(nodeBinary, [options.cliEntry, "init", ...reexecArgs], {
142
+ stdio,
143
+ });
144
+ return init.status ?? 1;
145
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "waypoint-codex",
3
- "version": "0.8.0",
3
+ "version": "0.9.0",
4
4
  "description": "Codex-native repository operating system: scaffolding, docs routing, repo-local skills, doctor, and sync.",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -0,0 +1,70 @@
1
+ ---
2
+ name: backend-context-interview
3
+ description: Gather and persist durable backend project context when missing or insufficient for implementation, architecture decisions, or ship-readiness review. Use when backend choices depend on scale, criticality, compliance, tenant model, compatibility, reliability, security posture, or similar context that is not clearly documented.
4
+ ---
5
+
6
+ # Backend Context Interview
7
+
8
+ Use this skill when relevant backend context is missing, stale, contradictory, or too weak to support correct implementation or review decisions.
9
+
10
+ ## Goals
11
+
12
+ 1. identify the missing backend context that materially affects the work
13
+ 2. ask only high-leverage questions that cannot be answered from the repo or guidance files
14
+ 3. persist durable context into the project root guidance file
15
+ 4. avoid repeated questioning in future tasks
16
+
17
+ ## When to use
18
+
19
+ Use this skill when the current task depends on context such as:
20
+ - internal tool vs public internet-facing product
21
+ - expected scale, concurrency, and criticality
22
+ - regulatory, privacy, or compliance requirements
23
+ - multi-tenant vs single-tenant behavior
24
+ - backward compatibility requirements
25
+ - uptime and reliability expectations
26
+ - migration and rollback risk tolerance
27
+ - security posture expectations
28
+ - observability or incident response expectations
29
+ - infrastructure constraints that materially affect design
30
+
31
+ Do not use this skill when the answer is already clearly present in `AGENTS.md`, architecture docs, runbooks, or the task itself.
32
+
33
+ ## Workflow
34
+
35
+ ### 1. Check persisted context first
36
+
37
+ Inspect the project root guidance files.
38
+
39
+ Priority:
40
+ 1. `AGENTS.md`
41
+
42
+ Look for:
43
+ - `## Project Context`
44
+ - `## Backend Context`
45
+ - equivalent sections with the same intent
46
+
47
+ If the existing section is accurate and sufficient, do not interview the user.
48
+
49
+ ### 2. Determine what is actually missing
50
+
51
+ Only ask questions that materially affect implementation or review choices.
52
+
53
+ Good triggers:
54
+ - public service vs internal tool changes reliability and security bar
55
+ - scale and concurrency change architecture depth and observability expectations
56
+ - compatibility requirements change migration and API decisions
57
+ - tenant model changes authorization and data-isolation design
58
+
59
+ Do not ask broad or low-value questions.
60
+
61
+ ### 3. Ask concise grouped questions
62
+
63
+ Ask the minimum set of questions needed.
64
+
65
+ Suggested categories:
66
+ - product type and exposure
67
+ - scale and criticality
68
+ - data sensitivity and compliance
69
+
70
+ Do not ask generic product questions that do not affect backend engineering.
@@ -0,0 +1,221 @@
1
+ ---
2
+ name: backend-ship-audit
3
+ description: Audit a backend scope for practical ship readiness with evidence-based findings focused on real release risk rather than style. Use when reviewing a backend service, feature, endpoint group, worker, scheduler, API surface, pull request, or directory to decide whether it is ready to ship; when the backend scope must be resolved from repository structure; when complete-file reading is required to understand behavior and dependencies; when only high-leverage deployment-context questions should be asked after repository exploration; when durable backend context should be persisted in project-root AGENTS.md; or when a timestamped audit should be written under .waypoint/audit/.
4
+ ---
5
+
6
+ # Backend ship audit
7
+
8
+ Follow this workflow in order. Optimize for practical release readiness. Ignore style-only issues.
9
+
10
+ Use bundled resources as follows:
11
+ - Use `references/audit-framework.md` for detailed evaluation prompts and severity calibration.
12
+ - Use `references/report-template.md` for the audit structure and finding format.
13
+
14
+ ## 1. Resolve the reviewable unit
15
+
16
+ Turn the user request into the narrowest defensible backend unit that can be audited end to end.
17
+
18
+ Determine and state:
19
+ - requested scope
20
+ - assumed scope used for the audit
21
+ - code and docs directly in scope
22
+ - adjacent dependencies and boundaries that materially affect behavior
23
+ - explicit out-of-scope areas
24
+
25
+ Use repository structure to narrow broad requests. Prefer a reviewable unit such as a single service, endpoint group, worker flow, API surface, migration set, or pull request boundary over a vague "entire backend" scope.
26
+
27
+ If the request is ambiguous, make the best defensible narrowing decision from the repository and state it in the audit. Do not block on clarification unless the ambiguity itself is the primary risk.
28
+
29
+ ## 2. Build complete understanding from the repository
30
+
31
+ Explore the repository deeply enough to understand the scoped backend and the dependencies that materially affect ship readiness.
32
+
33
+ Start with a map, then move to complete reads:
34
+ 1. Read project guidance first. Prefer root `AGENTS.md`, plus any scoped guidance files if present.
35
+ 2. Read architecture docs, ADRs, onboarding docs, runbooks, incident notes, and API or contract docs that touch the scope.
36
+ 3. Map entry points and main flows with fast search tools.
37
+ 4. Read the full contents of every file that matters to architecture understanding, behavior, or findings.
38
+ 5. Trace the end-to-end flow across handlers, validation, services, repositories, persistence, jobs, queues, external clients, configuration, and tests.
39
+
40
+ Read complete files for relevant materials. Do not rely on grep hits, matched snippets, partial chunks, or truncated previews for any file that informs architectural understanding or a finding.
41
+
42
+ Read complete files for all relevant artifacts you encounter, including when present:
43
+ - backend source files
44
+ - handlers, controllers, routes, services, jobs, workers, schedulers
45
+ - schemas, models, serializers, DTOs, validators
46
+ - repositories, query layers, persistence code
47
+ - migrations and migration helpers
48
+ - queue and background processing code
49
+ - configuration files and environment examples
50
+ - tests and test helpers
51
+ - API specs and contract definitions
52
+ - runbooks and operational docs
53
+ - incident notes or postmortems
54
+
55
+ Skip obviously irrelevant material such as vendored dependencies, generated artifacts, build outputs, caches, or unrelated subsystems.
56
+
57
+ Keep a working inventory of fully read files. Include that inventory in the audit under a "What was read" section.
58
+
59
+ When code and docs disagree, trust code for current behavior unless the docs clearly describe runtime behavior not visible in the repository. Call out the discrepancy if it matters to shipping risk.
60
+
61
+ ## 3. Build an internal system model
62
+
63
+ Before asking questions or writing findings, form a concrete model of:
64
+ - request and event entry points
65
+ - trust boundaries
66
+ - data flow and persistence path
67
+ - transaction and consistency boundaries
68
+ - async boundaries and retry paths
69
+ - authn and authz assumptions
70
+ - tenant scoping model when relevant
71
+ - configuration, feature flags, and runtime dependencies
72
+ - observability and operational control points
73
+
74
+ Use that model to identify what can fail, what can race, what can duplicate, what can corrupt data, and what can become hard to detect or recover.
75
+
76
+ ## 4. Ask only missing high-leverage questions
77
+
78
+ Ask questions only after repository exploration and only when the answer materially changes the ship-readiness bar.
79
+
80
+ Group questions by topic. Keep them concise. Usually ask no more than 3 to 8 questions total unless the backend context is unusually under-specified.
81
+
82
+ Target questions at facts that cannot be established from the codebase or docs, such as:
83
+ - deployment exposure and user type
84
+ - expected scale, burstiness, and concurrency
85
+ - data sensitivity or compliance constraints
86
+ - single-tenant or multi-tenant expectations
87
+ - uptime, recovery, and rollback tolerance
88
+ - backward compatibility and API stability requirements
89
+ - observability and incident response expectations
90
+ - security posture or access-control expectations
91
+
92
+ Do not ask generic architecture-preference questions. Do not ask for information already visible in the repository.
93
+
94
+ If answers are unavailable, proceed with explicit assumptions. Place assumptions in the audit and calibrate severity to the confidence of the evidence.
95
+
96
+ ## 5. Persist durable backend context
97
+
98
+ Persist durable deployment context into the project root guidance file by editing it manually.
99
+
100
+ Choose the target file in this order:
101
+ 1. `AGENTS.md` in the project root
102
+ 2. no edit if it does not exist, unless the user explicitly asked to create one
103
+
104
+ Prefer updating an existing `## Backend Context` section. Add the section only if it is missing. Preserve surrounding content exactly. Do not overwrite unrelated sections. Do not duplicate existing context. Do not make any edit if the existing section is already accurate and sufficiently complete.
105
+
106
+ Persist only stable context such as:
107
+ - internal vs internet-facing exposure
108
+ - expected scale and criticality
109
+ - data sensitivity or compliance constraints
110
+ - tenant model
111
+ - compatibility requirements
112
+ - reliability expectations
113
+ - security posture assumptions
114
+ - rollback tolerance
115
+
116
+ Do not persist transient audit findings, temporary mitigations, or release-specific defects.
117
+
118
+ Make this edit manually. Prefer the smallest precise change that preserves all unrelated guidance exactly.
119
+
120
+ ## 6. Evaluate ship readiness with a backend-risk lens
121
+
122
+ Assess the scoped backend like a strong backend reviewer. Focus on real ship risk, not code taste.
123
+
124
+ Evaluate at least these categories when relevant:
125
+ - scope and architecture fit
126
+ - API and contract quality
127
+ - input validation and trust boundaries
128
+ - domain modeling correctness
129
+ - data integrity and consistency
130
+ - transaction boundaries and idempotency
131
+ - migration safety and rollback safety
132
+ - failure handling and retry semantics
133
+ - timeouts, cancellation, and backpressure
134
+ - concurrency and race risks
135
+ - queue and background job correctness
136
+ - authorization and access control
137
+ - authentication assumptions
138
+ - secret handling and configuration safety
139
+ - tenant isolation
140
+ - security vulnerabilities and unsafe defaults
141
+ - boundary clarity between layers and services
142
+ - reliability under expected production conditions
143
+ - observability, alertability, and debuggability
144
+ - test coverage for meaningful failure modes
145
+ - future legibility and maintainability as it affects shipping risk
146
+
147
+ Use judgment. Do not force findings in every category.
148
+
149
+ Treat missing evidence carefully:
150
+ - Missing tests, docs, or operational controls can be findings if the absence creates real release risk.
151
+ - Unknown deployment context can lower confidence or elevate a question into a release condition.
152
+ - Lack of proof is not the same as proof of failure. State uncertainty explicitly.
153
+
154
+ ## 7. Calibrate priority consistently
155
+
156
+ Use this model exactly:
157
+ - `P0`: clear ship blocker; likely severe outage, data loss, data corruption, critical security issue, or fundamentally unsafe release
158
+ - `P1`: serious issue that should usually be fixed before shipping; substantial reliability, security, integrity, or operational risk
159
+ - `P2`: important issue that may be acceptable only with conscious risk acceptance; not an immediate blocker in all contexts
160
+ - `P3`: moderate weakness or gap; address soon but not necessarily before launch
161
+ - `P4`: minor improvement with limited near-term impact
162
+
163
+ Do not inflate severity. Severity depends on deployment context, blast radius, reversibility, exploitability, and confidence.
164
+
165
+ ## 8. Write the audit document
166
+
167
+ Write the final audit to:
168
+
169
+ `.waypoint/audit/dd-mm-yyyy-hh-mm-backend-audit.md`
170
+
171
+ Create directories if needed.
172
+
173
+ Use `references/report-template.md` as the base structure. Include at minimum:
174
+ - title and timestamp
175
+ - requested scope and assumed scope
176
+ - in-scope, adjacent dependencies, and out-of-scope areas
177
+ - what was read
178
+ - open questions and explicit assumptions
179
+ - concise system understanding
180
+ - ship recommendation
181
+ - prioritized findings
182
+ - release conditions or follow-up actions
183
+
184
+ Prefer path and line-based evidence when practical. If line numbers are not practical, cite the full file path plus the specific symbol, function, class, migration, endpoint, or test that supports the finding.
185
+
186
+ ## 9. Format findings for actionability
187
+
188
+ Give every finding:
189
+ - ID
190
+ - title
191
+ - priority from `P0` to `P4`
192
+ - why it matters
193
+ - evidence
194
+ - affected area
195
+ - risk if shipped as-is
196
+ - recommended fix
197
+ - confidence level if evidence is incomplete
198
+
199
+ Use concise, concrete language. Tie every finding to repository evidence or an explicit unanswered question that materially affects readiness.
200
+
201
+ ## 10. Make the ship decision explicit
202
+
203
+ End with a clear recommendation:
204
+ - `Ready to ship`
205
+ - `Ready to ship with explicit risk acceptance`
206
+ - `Not ready to ship`
207
+
208
+ Use `Not ready to ship` when there is a `P0`, an unresolved `P1`, or a material unknown that could plausibly conceal a `P0` or `P1` under the stated deployment context.
209
+
210
+ If the scope is acceptable only with conditions, list the exact conditions.
211
+
212
+ ## 11. Avoid low-value review behavior
213
+
214
+ Do not include:
215
+ - style policing
216
+ - subjective nitpicks
217
+ - trivial refactor suggestions without ship impact
218
+ - generic best-practice commentary unsupported by repository evidence
219
+ - vague advice such as "add more tests" without naming the missing failure mode or blind spot
220
+
221
+ Prefer a short audit with strong evidence over a long audit with weak claims.
@@ -0,0 +1,3 @@
1
+ display_name: Backend Ship Audit
2
+ short_description: Audit a backend scope for real ship risk and write an evidence-based readiness report.
3
+ default_prompt: Audit this backend scope for ship readiness. Resolve scope from the repository, read relevant backend code and docs completely, ask only missing high-leverage questions, persist durable backend context, and write a prioritized audit under .waypoint/audit/.
@@ -0,0 +1,228 @@
1
+ # Backend audit framework
2
+
3
+ Use this reference to turn repository understanding into a practical release-risk review.
4
+
5
+ ## Table of contents
6
+
7
+ 1. [Scope and architecture fit](#1-scope-and-architecture-fit)
8
+ 2. [API and contract quality](#2-api-and-contract-quality)
9
+ 3. [Domain modeling and correctness](#3-domain-modeling-and-correctness)
10
+ 4. [Data integrity and consistency](#4-data-integrity-and-consistency)
11
+ 5. [Migration safety and rollback safety](#5-migration-safety-and-rollback-safety)
12
+ 6. [Async, queue, and worker correctness](#6-async-queue-and-worker-correctness)
13
+ 7. [Failure handling, timeouts, and backpressure](#7-failure-handling-timeouts-and-backpressure)
14
+ 8. [Security, auth, and tenant isolation](#8-security-auth-and-tenant-isolation)
15
+ 9. [Reliability and operational readiness](#9-reliability-and-operational-readiness)
16
+ 10. [Test evidence](#10-test-evidence)
17
+ 11. [Severity calibration](#11-severity-calibration)
18
+ 12. [Handling unknowns](#12-handling-unknowns)
19
+
20
+ ## 1. Scope and architecture fit
21
+
22
+ Check whether the scoped backend unit has a clear responsibility and a defensible boundary.
23
+
24
+ Look for:
25
+ - hidden coupling to unrelated subsystems that makes the requested scope impossible to reason about safely
26
+ - ownership split across layers or services without clear contracts
27
+ - feature behavior controlled by undocumented flags or runtime configuration
28
+ - operational behavior that only exists in deployment config but is absent from docs and tests
29
+
30
+ Evidence to seek:
31
+ - route or handler entry points
32
+ - service and repository boundaries
33
+ - config loading paths
34
+ - feature flag checks
35
+ - ADRs or architecture docs
36
+
37
+ ## 2. API and contract quality
38
+
39
+ Check whether external or internal contracts are safe to release under the expected compatibility bar.
40
+
41
+ Look for:
42
+ - request validation gaps
43
+ - response-shape drift from spec or tests
44
+ - undocumented behavior changes
45
+ - missing idempotency on retryable writes
46
+ - unsafe pagination or filtering semantics
47
+ - partial-success behavior without clear client contract
48
+ - incompatible changes to event payloads, queues, or partner-facing APIs
49
+
50
+ Evidence to seek:
51
+ - handlers, validators, serializers, DTOs, OpenAPI or protobuf specs
52
+ - consumer tests or contract tests
53
+ - backward-compatibility notes in docs or migrations
54
+
55
+ ## 3. Domain modeling and correctness
56
+
57
+ Check whether the code actually enforces business invariants.
58
+
59
+ Look for:
60
+ - invariants enforced only in UI or caller code
61
+ - state transitions with missing guards
62
+ - invalid states representable in persistence
63
+ - derived fields that can drift from source-of-truth data
64
+ - duplicate logic that can diverge under edge cases
65
+
66
+ Evidence to seek:
67
+ - model logic, service logic, validators, persistence constraints, tests
68
+
69
+ ## 4. Data integrity and consistency
70
+
71
+ Check how writes behave under failure, retries, and concurrent activity.
72
+
73
+ Look for:
74
+ - missing transactions around multi-step writes
75
+ - transactions that include external calls and can hang or partially commit
76
+ - non-atomic read-modify-write flows
77
+ - retry paths that can duplicate side effects
78
+ - absence of uniqueness or foreign-key enforcement where the invariant depends on it
79
+ - eventual-consistency behavior with no compensating logic or visibility
80
+
81
+ Evidence to seek:
82
+ - transaction boundaries
83
+ - repositories and queries
84
+ - migration constraints and indexes
85
+ - retry wrappers and dedup logic
86
+ - job enqueue order relative to commits
87
+
88
+ ## 5. Migration safety and rollback safety
89
+
90
+ Check whether schema and data changes are safe under real deploy conditions.
91
+
92
+ Look for:
93
+ - destructive or irreversible migrations without a rollback story
94
+ - schema changes that break old binaries during rolling deploys
95
+ - backfills mixed into request paths or startup
96
+ - long-running or locking operations on hot tables
97
+ - reliance on application code to keep dual-write or dual-read windows safe without tests
98
+
99
+ Evidence to seek:
100
+ - migration files
101
+ - deploy docs
102
+ - backfill code
103
+ - startup hooks
104
+ - compatibility tests
105
+
106
+ ## 6. Async, queue, and worker correctness
107
+
108
+ Check whether background processing is safe under duplicate delivery, delay, and partial failure.
109
+
110
+ Look for:
111
+ - no idempotency for retryable jobs
112
+ - poison-message loops
113
+ - missing dead-letter or terminal-failure handling
114
+ - enqueue-before-commit races
115
+ - jobs that assume fresh state but run much later
116
+ - scheduled tasks that can overlap or race
117
+ - side effects without deduplication keys
118
+
119
+ Evidence to seek:
120
+ - worker code
121
+ - queue wrappers
122
+ - job payload definitions
123
+ - retry config
124
+ - scheduling config
125
+ - tests for duplicate or delayed execution
126
+
127
+ ## 7. Failure handling, timeouts, and backpressure
128
+
129
+ Check whether the system fails predictably under dependency issues and traffic spikes.
130
+
131
+ Look for:
132
+ - outbound calls without timeout or cancellation
133
+ - retries without budget, jitter, or bounding
134
+ - fan-out paths with unbounded concurrency
135
+ - silent fallbacks that hide corruption or dropped work
136
+ - blocking behavior in request paths that should shed load
137
+ - missing admission control or queue bounds where bursts are expected
138
+
139
+ Evidence to seek:
140
+ - HTTP or RPC client construction
141
+ - context propagation
142
+ - timeout configuration
143
+ - retry policies
144
+ - worker pool limits
145
+ - queue depth controls
146
+
147
+ ## 8. Security, auth, and tenant isolation
148
+
149
+ Check whether trust boundaries are explicit and enforced server-side.
150
+
151
+ Look for:
152
+ - authz performed only in callers or UI layers
153
+ - object-level access checks missing on reads or writes
154
+ - tenant scoping inferred from untrusted input
155
+ - secrets in code, logs, examples, or unsafe defaults
156
+ - admin flows exposed through general routes without stronger controls
157
+ - unsafe deserialization, query injection, SSRF, path traversal, or similar flaws relevant to the stack
158
+
159
+ Evidence to seek:
160
+ - middleware
161
+ - route guards
162
+ - policy checks
163
+ - repository filters
164
+ - config files and environment examples
165
+ - tests for unauthorized access
166
+
167
+ ## 9. Reliability and operational readiness
168
+
169
+ Check whether on-call engineers could detect, triage, and mitigate failures.
170
+
171
+ Look for:
172
+ - missing structured logs around critical writes or state transitions
173
+ - no metrics for queues, retries, dead letters, or error rates
174
+ - no tracing through core flows
175
+ - lack of alerting hooks for silent backlog growth or repeated failure
176
+ - missing runbooks for risky operations or recovery paths
177
+ - no feature flag, kill switch, or disable path for a new high-risk flow
178
+
179
+ Evidence to seek:
180
+ - logging calls
181
+ - metrics and tracing instrumentation
182
+ - alert config
183
+ - dashboards
184
+ - runbooks
185
+ - incident notes
186
+
187
+ ## 10. Test evidence
188
+
189
+ Check whether tests cover the failure modes that matter for shipping.
190
+
191
+ Look for:
192
+ - tests only for happy paths
193
+ - no coverage for authorization failures, duplicate delivery, retries, partial failures, migrations, or concurrency-sensitive paths
194
+ - mocks that hide contract drift or persistence behavior
195
+ - no integration coverage where correctness depends on transaction, queue, or DB behavior
196
+
197
+ Evidence to seek:
198
+ - unit tests
199
+ - integration tests
200
+ - end-to-end tests
201
+ - test helpers and fixtures
202
+ - migration or backfill tests
203
+
204
+ ## 11. Severity calibration
205
+
206
+ Calibrate severity using blast radius, exploitability, reversibility, and confidence.
207
+
208
+ Typical anchors:
209
+ - P0: a likely severe outage, corruption path, data-loss path, or critical security flaw with no acceptable workaround
210
+ - P1: a serious integrity, security, or reliability gap that should usually block release
211
+ - P2: a material weakness that may be acceptable with explicit risk acceptance and monitoring
212
+ - P3: a moderate gap with limited immediate blast radius
213
+ - P4: a minor improvement with little near-term release impact
214
+
215
+ Increase severity when the backend is public, high-scale, handles sensitive data, or has low rollback tolerance.
216
+ Decrease severity when the flow is internal, low-volume, reversible, heavily monitored, and has a strong kill switch.
217
+
218
+ ## 12. Handling unknowns
219
+
220
+ Treat missing context as a first-class part of the audit.
221
+
222
+ When context is missing:
223
+ - ask only the questions that change severity or release conditions
224
+ - state assumptions explicitly
225
+ - lower confidence when evidence is incomplete
226
+ - promote the unknown into a release condition when the uncertainty itself is risky
227
+
228
+ Do not invent certainty.