mustflow 2.75.2 → 2.85.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +40 -3
  2. package/dist/cli/commands/docs.js +86 -2
  3. package/dist/cli/commands/script-pack.js +9 -0
  4. package/dist/cli/i18n/en.js +180 -2
  5. package/dist/cli/i18n/es.js +180 -2
  6. package/dist/cli/i18n/fr.js +180 -2
  7. package/dist/cli/i18n/hi.js +180 -2
  8. package/dist/cli/i18n/ko.js +180 -2
  9. package/dist/cli/i18n/zh.js +180 -2
  10. package/dist/cli/lib/repo-map.js +27 -6
  11. package/dist/cli/lib/run-root-trust.js +15 -1
  12. package/dist/cli/lib/script-pack-registry.js +275 -6
  13. package/dist/cli/lib/validation/index.js +2 -2
  14. package/dist/cli/lib/validation/primitives.js +4 -1
  15. package/dist/cli/script-packs/code-change-impact.js +172 -0
  16. package/dist/cli/script-packs/code-dependency-graph.js +181 -0
  17. package/dist/cli/script-packs/code-export-diff.js +160 -0
  18. package/dist/cli/script-packs/code-outline.js +33 -5
  19. package/dist/cli/script-packs/code-route-outline.js +155 -0
  20. package/dist/cli/script-packs/docs-reference-drift.js +150 -0
  21. package/dist/cli/script-packs/repo-config-chain.js +163 -0
  22. package/dist/cli/script-packs/repo-env-contract.js +156 -0
  23. package/dist/cli/script-packs/repo-related-files.js +161 -0
  24. package/dist/cli/script-packs/repo-secret-risk-scan.js +147 -0
  25. package/dist/core/change-impact.js +383 -0
  26. package/dist/core/change-verification.js +32 -5
  27. package/dist/core/code-outline.js +460 -79
  28. package/dist/core/config-chain.js +595 -0
  29. package/dist/core/config-loading.js +121 -4
  30. package/dist/core/dependency-graph.js +490 -0
  31. package/dist/core/env-contract.js +450 -0
  32. package/dist/core/export-diff.js +359 -0
  33. package/dist/core/line-endings.js +26 -13
  34. package/dist/core/public-json-contracts.js +126 -0
  35. package/dist/core/reference-drift.js +388 -0
  36. package/dist/core/related-files.js +493 -0
  37. package/dist/core/route-outline.js +964 -0
  38. package/dist/core/script-pack-suggestions.js +131 -5
  39. package/dist/core/secret-risk-scan.js +440 -0
  40. package/dist/core/source-anchors.js +13 -1
  41. package/package.json +1 -1
  42. package/schemas/README.md +44 -6
  43. package/schemas/change-impact-report.schema.json +150 -0
  44. package/schemas/code-outline-report.schema.json +1 -1
  45. package/schemas/code-symbol-read-report.schema.json +64 -4
  46. package/schemas/commands.schema.json +12 -0
  47. package/schemas/config-chain-report.schema.json +187 -0
  48. package/schemas/dependency-graph-report.schema.json +149 -0
  49. package/schemas/env-contract-report.schema.json +203 -0
  50. package/schemas/export-diff-report.schema.json +220 -0
  51. package/schemas/reference-drift-report.schema.json +166 -0
  52. package/schemas/related-files-report.schema.json +145 -0
  53. package/schemas/route-outline-report.schema.json +200 -0
  54. package/schemas/secret-risk-scan-report.schema.json +152 -0
  55. package/templates/default/common/.mustflow/config/commands.toml +21 -0
  56. package/templates/default/i18n.toml +21 -9
  57. package/templates/default/locales/en/.mustflow/docs/agent-workflow.md +1 -1
  58. package/templates/default/locales/en/.mustflow/skills/INDEX.md +8 -2
  59. package/templates/default/locales/en/.mustflow/skills/architecture-deepening-review/SKILL.md +28 -11
  60. package/templates/default/locales/en/.mustflow/skills/astro-code-change/SKILL.md +71 -27
  61. package/templates/default/locales/en/.mustflow/skills/cross-agent-session-reference/SKILL.md +146 -0
  62. package/templates/default/locales/en/.mustflow/skills/dependency-upgrade-review/SKILL.md +3 -1
  63. package/templates/default/locales/en/.mustflow/skills/github-contribution-quality-gate/SKILL.md +48 -11
  64. package/templates/default/locales/en/.mustflow/skills/javascript-code-change/SKILL.md +15 -13
  65. package/templates/default/locales/en/.mustflow/skills/node-code-change/SKILL.md +16 -14
  66. package/templates/default/locales/en/.mustflow/skills/routes.toml +21 -9
  67. package/templates/default/locales/en/.mustflow/skills/security-privacy-review/SKILL.md +3 -1
  68. package/templates/default/locales/en/.mustflow/skills/test-suite-performance-review/SKILL.md +314 -0
  69. package/templates/default/locales/en/.mustflow/skills/typescript-code-change/SKILL.md +13 -10
  70. package/templates/default/manifest.toml +15 -1
@@ -0,0 +1,314 @@
1
+ ---
2
+ mustflow_doc: skill.test-suite-performance-review
3
+ locale: en
4
+ canonical: true
5
+ revision: 1
6
+ lifecycle: mustflow-owned
7
+ authority: procedure
8
+ name: test-suite-performance-review
9
+ description: Apply this skill when test-suite runtime, CI feedback latency, test selection, shard balance, worker scheduling, retry policy, flaky-test handling, fixture setup, database or container test lifecycle, coverage or artifact overhead, test-result caching, test discovery, or test performance claims are planned, edited, reviewed, or reported.
10
+ metadata:
11
+ mustflow_schema: "1"
12
+ mustflow_kind: procedure
13
+ pack_id: mustflow.core
14
+ skill_id: mustflow.core.test-suite-performance-review
15
+ command_intents:
16
+ - changes_status
17
+ - changes_diff_summary
18
+ - build
19
+ - test_related
20
+ - test
21
+ - test_audit
22
+ - docs_validate_fast
23
+ - test_release
24
+ - mustflow_check
25
+ ---
26
+
27
+ # Test Suite Performance Review
28
+
29
+ <!-- mustflow-section: purpose -->
30
+ ## Purpose
31
+
32
+ Make test suites faster without turning the default verification path into wishful thinking.
33
+
34
+ The review question is not "can the suite run in parallel?" It is "where is the wall-clock time
35
+ actually spent, which tests are safe to skip or run later, which workers are waiting, which shared
36
+ resources are saturated, and what evidence keeps a faster path trustworthy?"
37
+
38
+ <!-- mustflow-section: use-when -->
39
+ ## Use When
40
+
41
+ - Test or CI runtime, developer-loop feedback time, shard balance, worker count, test discovery,
42
+ coverage collection, trace or artifact generation, retry policy, flaky-test handling, or test
43
+ result caching is created, changed, reviewed, or reported.
44
+ - A task changes command contracts, CI workflows, test runner configuration, test grouping, test
45
+ selection, test scheduling, package scripts, fixture setup, database setup, container lifecycle,
46
+ browser test behavior, or coverage and report defaults for performance reasons.
47
+ - A report claims tests are faster, optimized, selected, cached, parallelized, sharded, hermetic,
48
+ stable, less flaky, or safer to run on every PR.
49
+ - A test suite is slow because of repeated process startup, full-directory discovery, repeated
50
+ migration or seed work, per-test containers, sleeps, external internet calls, huge artifacts,
51
+ tail shards, or over-broad full-suite execution.
52
+
53
+ <!-- mustflow-section: do-not-use-when -->
54
+ ## Do Not Use When
55
+
56
+ - The task only adds or updates behavior tests; use `test-design-guard` or `test-maintenance`.
57
+ - The task only reviews whether production code is testable; use `testability-boundary-review`.
58
+ - The task is ordinary application hot-path performance with no test runner, fixture, CI, or
59
+ verification-loop behavior; use `performance-budget-check`.
60
+ - The user only wants a one-time local command result and no persistent test or CI behavior changes.
61
+ - The proposed change weakens verification by deleting tests, skipping failed tests, removing
62
+ assertions, disabling coverage gates, or hiding flaky tests without an evidence-based policy.
63
+
64
+ <!-- mustflow-section: required-inputs -->
65
+ ## Required Inputs
66
+
67
+ - Suite surface: local command, CI job, package script, test runner, shard system, coverage job,
68
+ browser test job, database-backed test job, or release gate.
69
+ - Timing ledger: discovery, process startup, shared setup, fixture creation, test body, database or
70
+ container setup, cleanup, coverage, report generation, artifact upload, queue wait, shard tail,
71
+ and idle-worker time when available.
72
+ - Test timing evidence: p50 and p95 per test or file, longest tests, previous failures, flaky
73
+ history, shard start and finish times, worker utilization, retry count, and timeout outliers.
74
+ - Selection ledger: changed files, dependency graph, runtime coverage or touched-file evidence,
75
+ newly added tests, previous failed tests, unsafe-change fallback rules, and scheduled full-suite
76
+ safety net.
77
+ - Isolation ledger: time, randomness, UUIDs, locale, timezone, environment variables, home
78
+ directory, filesystem paths, network, database, containers, queues, browser profiles, caches,
79
+ module state, and runner process reuse.
80
+ - Resource ledger: CPU, memory, DB connections, browser processes, containers, network, filesystem,
81
+ GPU, SQLite or PostgreSQL locks, port allocation, and any scarce resource that needs a token or
82
+ affinity rule.
83
+ - Cache ledger: declared inputs, cache keys, volatile values, hit rate, miss reasons, runner-local
84
+ cache, remote cache, result-cache eligibility, and invalidation behavior.
85
+ - Relevant command-intent entries for tests, builds, docs, release checks, and mustflow validation.
86
+
87
+ <!-- mustflow-section: preconditions -->
88
+ ## Preconditions
89
+
90
+ - The task matches the Use When conditions and does not match the Do Not Use When exclusions.
91
+ - Higher-priority instructions and `.mustflow/config/commands.toml` have been checked for the
92
+ current scope.
93
+ - Required inputs are available, or missing timing, selection, cache, isolation, or resource
94
+ evidence can be reported without guessing.
95
+ - If a change touches production behavior or test coverage meaning, use the matching behavior,
96
+ test-design, security, data, or release skill before accepting the performance change.
97
+ - If test result caching or selected-test execution is introduced, the full-suite fallback and
98
+ scheduled full-suite safety net are defined first.
99
+
100
+ <!-- mustflow-section: allowed-edits -->
101
+ ## Allowed Edits
102
+
103
+ - Add or refine test timing collection, historical duration stores, selected-test manifests,
104
+ dependency-to-test maps, runner scheduling, shard assignment, worker limits, retry classification,
105
+ fixture lifecycle, local stubs, fake timers, cache keys, coverage defaults, and CI report policy.
106
+ - Add or adjust tests, fixtures, docs, command contracts, release checks, and template surfaces
107
+ directly tied to the test-suite performance behavior.
108
+ - Move expensive setup from per-test to per-worker, per-module, or per-session scope only when
109
+ mutable state still has a cheap isolation layer.
110
+ - Do not remove tests, weaken assertions, disable failure artifacts, or mark flaky tests as normal
111
+ success to make a suite look faster.
112
+ - Do not add unbounded parallelism, shared database schemas, cross-test mutable fixtures, or result
113
+ caches when hidden inputs are not declared.
114
+
115
+ <!-- mustflow-section: procedure -->
116
+ ## Procedure
117
+
118
+ 1. Measure before optimizing.
119
+ - Split elapsed time into discovery, process startup, shared setup, fixture creation, test body,
120
+ database or container setup, cleanup, coverage, report generation, artifact upload, queue wait,
121
+ shard tail, and idle-worker time.
122
+ - Report p50 and p95 per test or file when available. Average-only timing hides the last slow
123
+ shard, and the last slow shard decides CI wall time.
124
+ 2. Classify the bottleneck.
125
+ - Discovery bottleneck: full tree scans, broad classpath scans, fixtures and generated files in
126
+ search paths, or missing precomputed test manifests.
127
+ - Startup bottleneck: one process, JVM, Python, Node, browser, container, or dependency-injection
128
+ graph per test instead of a safe persistent worker.
129
+ - Fixture bottleneck: repeated migrations, seed data, model loads, browser profile setup, or
130
+ container creation.
131
+ - Scheduling bottleneck: file-count sharding, no historical duration data, no work stealing,
132
+ huge test files, or worker count beyond the resource optimum.
133
+ - Artifact bottleneck: coverage, screenshots, videos, traces, full logs, HTML reports, or upload
134
+ work paid on every PR path instead of only failure, retry, nightly, or release paths.
135
+ 3. Replace blanket full-suite execution with selected execution only when fallback is explicit.
136
+ - Include affected tests from changed files, new tests, and previous failed tests.
137
+ - Fall back to the full suite for changes to lockfiles, compiler or runtime settings, test runner
138
+ configuration, shared fixtures, migrations, database schema, generated contracts, package
139
+ metadata that affects runtime, or any change the selector cannot understand.
140
+ - Keep a scheduled full-suite path for nightly, release, or pre-merge confidence.
141
+ 4. Do not build impact analysis from imports alone.
142
+ - Combine static dependency evidence with previous runtime evidence such as touched files,
143
+ classes, schemas, config keys, fixtures, resources, and test-to-source mappings.
144
+ - Prefer file or class granularity when method-level tracing costs more than it saves.
145
+ - Treat dynamic imports, reflection, generated code, config-driven branches, and framework magic
146
+ as reasons to widen selection or fall back.
147
+ 5. Run likely failures early.
148
+ - Order previous failed tests, recently changed tests, tests near the changed code, historically
149
+ flaky tests, and long tests before low-risk tests.
150
+ - Use early-fail jobs for fast developer feedback, but keep a separate full-data job that
151
+ continues collecting remaining failures when the workflow needs all failures.
152
+ 6. Cache only hermetic test results.
153
+ - A successful test result can be reused only when the test binary, test code, affected product
154
+ code, fixtures, environment, toolchain, locale, timezone, and declared resources match.
155
+ - Do not include volatile values such as commit SHA, build number, wall-clock time, random temp
156
+ roots, or runner-specific paths in cache keys unless they truly affect output.
157
+ - Do include language version, compiler flags, runtime flags, timezone, locale, dependency lock
158
+ content, fixture versions, and DB schema version when they affect behavior.
159
+ - Track cache hit rate and miss reasons after adding a cache; a cache with invisible misses is
160
+ performance theater.
161
+ 7. Preserve warm local and remote caches.
162
+ - Avoid clearing dependency caches, transformed source caches, test discovery indexes, compiled
163
+ output, and package caches at the start of every CI job.
164
+ - Prefer runner-local SSD caches or content-hash restore over slow network-volume scans.
165
+ 8. Avoid full discovery on every run.
166
+ - Generate or maintain package-level test manifests when the project can do so safely.
167
+ - Exclude fixtures, snapshots, generated output, archived tests, vendored code, and build output
168
+ from discovery paths.
169
+ - For JVM and similar ecosystems, prefer explicit include patterns over broad classpath scanning
170
+ when the runner supports it.
171
+ 9. Reuse workers before increasing worker count.
172
+ - Keep interpreters, JIT, dependency injection, ASTs, browser engines, and database clients warm
173
+ across multiple tests when isolation allows it.
174
+ - Reset static state, module caches, timers, environment, and global fixtures between tests.
175
+ - Replace workers only after measured memory leaks, not by default after every test.
176
+ 10. Size work units for the scheduler.
177
+ - Do not send thousands of tiny remote jobs where queue setup, sandbox setup, input transfer,
178
+ and result collection cost more than the test body.
179
+ - Bundle micro-tests by package, runtime, or fixture affinity while preserving per-test result
180
+ reporting.
181
+ 11. Shard by historical duration, not file count.
182
+ - Use recent successful durations, trimmed means, or exponential moving averages.
183
+ - Assign longest tests first to the currently lightest shard.
184
+ - Give new tests a directory or type median, and cap timeout outliers so one historic failure
185
+ does not poison future placement.
186
+ 12. Add work stealing when static shards still leave idle workers.
187
+ - Let early workers take not-yet-started work from overloaded shards.
188
+ - Keep tests that share expensive setup together unless the tail cost is worse than duplicated
189
+ setup.
190
+ 13. Split huge files below the file level when supported.
191
+ - File-level sharding is fake parallelism when one file contains most of the runtime.
192
+ - Confirm that the runner actually honors case-level sharding; unsupported sharding can cause
193
+ every shard to run the whole suite.
194
+ 14. Schedule by resource tokens and affinity.
195
+ - Tag tests by CPU, memory, DB connections, browser processes, containers, GPU, filesystem, and
196
+ network pressure.
197
+ - Use resource tokens or locks for scarce shared resources instead of disabling all parallelism.
198
+ - Keep expensive fixture groups together when setup reuse beats perfect load balance.
199
+ 15. Tune worker count empirically.
200
+ - Compare worker counts such as 1, 2, 4, and 8 on the same commit.
201
+ - Stop increasing workers when wall time stops improving, memory peaks rise sharply, DB waits
202
+ increase, browser tests swap, or flaky rate rises.
203
+ 16. Make database and container setup worker-scoped.
204
+ - Prefer one DB server or service container per worker, with per-test schemas, databases,
205
+ transactions, savepoints, namespaces, ports, or fixture copies.
206
+ - Do not let all workers share the same mutable schema unless the suite is intentionally
207
+ serialized.
208
+ - Build a migrated and seeded template DB or snapshot once, then clone it per worker when
209
+ migrations or seed data dominate.
210
+ 17. Reset mutable state cheaply.
211
+ - Prefer transaction rollback or savepoints for tests whose side effects stay inside one DB
212
+ connection.
213
+ - Use DB clone, schema clone, or container snapshot for tests that cross processes, queues,
214
+ external workers, sequence assertions, or asynchronous effects.
215
+ - Use tmpfs only for non-persistent state with explicit size limits.
216
+ 18. Replace sleeps with readiness and fake time.
217
+ - Treat fixed sleeps as performance bugs.
218
+ - Wait for real readiness: health endpoints, logs, successful queries, event receipt, or port
219
+ plus protocol-level checks.
220
+ - For timers, retries, expiry, and scheduled work, inject clocks or use fake timers instead of
221
+ waiting in real time.
222
+ 19. Remove external internet from the default CI path.
223
+ - Use local stubs, fixtures, mocks, recorded responses, or contract-specific jobs.
224
+ - Block unrelated images, fonts, analytics, and third-party requests in UI tests unless the test
225
+ is specifically about that delivery behavior.
226
+ 20. Control hidden inputs.
227
+ - Inject time, random seeds, UUID generation, locale, timezone, environment, and current user.
228
+ - Report seed values on failure so a failed run can be reproduced.
229
+ 21. Move expensive evidence off the default path.
230
+ - Collect full coverage, videos, traces, screenshots, full logs, and HTML reports only where
231
+ they pay for themselves: failures, first retry, nightly, release, or dedicated coverage jobs.
232
+ - Merge shard reports after tests finish rather than blocking each shard on heavy reporting.
233
+ 22. Retry narrowly and honestly.
234
+ - Retry only the failed test or case, once, in a fresh worker when possible.
235
+ - Preserve first-failure logs and artifacts.
236
+ - Classify retry success as flaky success, not normal success.
237
+ 23. Use speculative tail duplication only for hermetic shards.
238
+ - If the last shard exceeds its historical p95 while workers are idle, a scheduler may duplicate
239
+ that shard and accept the first result.
240
+ - Do not duplicate non-hermetic tests, tests with side effects, or tests that consume scarce
241
+ external resources.
242
+ 24. Garbage-collect the test portfolio.
243
+ - Track duration, unique behavior coverage, defect-finding history, flaky rate, owner, and last
244
+ meaningful update.
245
+ - Move low-value duplicates from PR paths to nightly paths before deleting them.
246
+ - Quarantine flaky tests with owner and expiry; do not let quarantine become permanent silence.
247
+ 25. Verify the faster path and the fallback path.
248
+ - A selector, cache, shard rule, retry policy, or fixture reuse change is not complete until the
249
+ default fast path and the fallback or full path both have evidence or a reported missing
250
+ command intent.
251
+
252
+ <!-- mustflow-section: postconditions -->
253
+ ## Postconditions
254
+
255
+ - The test-suite bottleneck is classified by timing, selection, scheduling, cache, fixture, DB,
256
+ container, artifact, retry, or resource evidence.
257
+ - The optimization goal is explicit: faster first failure, shorter PR wall time, lower CI cost,
258
+ lower flaky rate, smaller artifact overhead, or faster local feedback.
259
+ - Selected-test execution has a full-suite fallback for unsafe or unknown changes.
260
+ - Caches declare inputs, omit irrelevant volatile values, and report hit rate or miss reasons when
261
+ evidence exists.
262
+ - Worker count, resource tokens, shard placement, fixture affinity, and retry behavior preserve
263
+ test isolation and failure evidence.
264
+ - Any speed claim is measured, complexity-only, or explicitly unverified.
265
+
266
+ <!-- mustflow-section: verification -->
267
+ ## Verification
268
+
269
+ Use configured oneshot command intents when available:
270
+
271
+ - `changes_status`
272
+ - `changes_diff_summary`
273
+ - `build`
274
+ - `test_related`
275
+ - `test`
276
+ - `test_audit`
277
+ - `docs_validate_fast`
278
+ - `test_release`
279
+ - `mustflow_check`
280
+
281
+ Use the narrowest configured test, build, docs, release, or mustflow intent that proves the changed
282
+ test-suite behavior. If the repository exposes a profiling or cached-test intent, use it only when
283
+ the command contract marks it configured, oneshot, and agent-allowed.
284
+
285
+ <!-- mustflow-section: failure-handling -->
286
+ ## Failure Handling
287
+
288
+ - If timing evidence is missing, add bounded measurement or report the missing evidence before
289
+ changing scheduling, caching, or selection policy.
290
+ - If selection cannot understand a change, fall back to the full suite.
291
+ - If a cache key is uncertain, prefer a cache miss over a false hit.
292
+ - If worker reuse causes order dependence, leaked state, or flaky failures, isolate the leaked
293
+ resource before widening reuse.
294
+ - If parallelism makes tests slower, check shared DB, browser, memory, filesystem, and container
295
+ contention before increasing workers again.
296
+ - If a retry hides a real failure, preserve first-failure evidence and classify the result as flaky.
297
+ - If configured verification is missing, report the missing intent instead of inventing raw runner
298
+ commands.
299
+
300
+ <!-- mustflow-section: output-format -->
301
+ ## Output Format
302
+
303
+ - Suite surface and feedback goal
304
+ - Timing breakdown and bottleneck class
305
+ - Selection policy and full-suite fallback
306
+ - Scheduling, sharding, worker, and resource-token decisions
307
+ - Fixture, DB, container, filesystem, time, randomness, network, and isolation notes
308
+ - Cache key, hit-rate, and miss-reason notes
309
+ - Retry and flaky-test policy
310
+ - Coverage, trace, log, screenshot, video, report, and artifact policy
311
+ - Speed evidence: measured, complexity-only, or unverified
312
+ - Command intents run
313
+ - Skipped command intents and reasons
314
+ - Remaining test-suite performance risk
@@ -2,7 +2,7 @@
2
2
  mustflow_doc: skill.typescript-code-change
3
3
  locale: en
4
4
  canonical: true
5
- revision: 4
5
+ revision: 5
6
6
  lifecycle: mustflow-owned
7
7
  authority: procedure
8
8
  name: typescript-code-change
@@ -91,19 +91,22 @@ Preserve TypeScript's type, runtime validation, module, build, and public API bo
91
91
  11. For type tests, prefer `@ts-expect-error` with a short reason. Do not use `@ts-ignore` in implementation code. Implementation `@ts-expect-error` needs an owner, removal condition, and risk report.
92
92
  12. If a public API changes, trace every consumer-visible import specifier, runtime export, type export, declaration output, docs example, type-only export, overload, generic default, interface field, enum or literal member, class member, and package entry condition.
93
93
  13. Treat `exports`, `types`, `typings`, `typesVersions`, package `type`, file extensions, path aliases, declaration import paths, and barrel exports as public API surfaces. Adding or tightening `exports` can break existing deep imports.
94
- 14. If ESM/CJS behavior changes, verify package `type`, `main`, `module`, `browser`, `exports`, condition order, extension rules, generated JS, and generated declaration files together.
95
- 15. Inspect generated declarations when package surfaces change. Declaration files must not leak source-only aliases, private paths, workspace-only package names, unpublished internal paths, or accidental public re-exports.
96
- 16. For TypeScript 6 migration work, treat deprecation warnings as future TypeScript 7 removal risk. `ignoreDeprecations` is a temporary compatibility valve, not proof that the project is ready for 7.0. Prefer removing deprecated options and updating resolver or module choices to match the project runtime.
97
- 17. Treat TypeScript 6 `--stableTypeOrdering` as a migration comparison tool for declaration and error-order differences, not as a permanent performance-neutral default. If it changes errors or declaration output, look for inference or declaration-stability issues instead of snapshotting noise.
98
- 18. For TypeScript 7 migration work, keep the tracks separate:
94
+ 14. For TypeScript that emits code for native Node ESM, prefer `.ts` source plus package `"type": "module"` and `module`/`moduleResolution` set to `NodeNext` or the repository's fixed Node mode. Do not rename every source file to `.mts` just to mean ESM; reserve `.mts` and `.cts` for explicit per-file module overrides or mixed-package boundaries.
95
+ 15. In TypeScript source that targets native Node ESM, write relative imports using the emitted runtime specifier, usually `.js`, such as `import { createApp } from "./app.js"` from `app.ts`. Do not write extensionless relative imports or `.ts` runtime specifiers unless a declared loader, bundler, or runtime explicitly owns that behavior.
96
+ 16. Use `moduleResolution: "Bundler"` only when a bundler such as Vite, esbuild, Rollup, or a framework build system owns final module resolution. Do not use bundler resolution to model code that Node will execute directly without that bundler.
97
+ 17. If ESM/CJS behavior changes, verify package `type`, `main`, `module`, `browser`, `exports`, condition order, extension rules, generated JS, and generated declaration files together.
98
+ 18. Inspect generated declarations when package surfaces change. Declaration files must not leak source-only aliases, private paths, workspace-only package names, unpublished internal paths, or accidental public re-exports.
99
+ 19. For TypeScript 6 migration work, treat deprecation warnings as future TypeScript 7 removal risk. `ignoreDeprecations` is a temporary compatibility valve, not proof that the project is ready for 7.0. Prefer removing deprecated options and updating resolver or module choices to match the project runtime.
100
+ 20. Treat TypeScript 6 `--stableTypeOrdering` as a migration comparison tool for declaration and error-order differences, not as a permanent performance-neutral default. If it changes errors or declaration output, look for inference or declaration-stability issues instead of snapshotting noise.
101
+ 21. For TypeScript 7 migration work, keep the tracks separate:
99
102
  - TS6 stable API track: `@typescript/typescript6` and `tsc6` for compiler API, transformer, ESLint, framework wrapper, and peer-dependency compatibility.
100
103
  - TS7 RC compiler track: `typescript@rc` and `tsc` for RC compiler verification.
101
104
  - TS7 nightly track: `@typescript/native-preview` and `tsgo` for nightly diagnostics only.
102
105
  - Future TS7 stable track: stable `typescript` once upstream publishes TypeScript 7 on the normal stable path.
103
- 19. Keep compiler API consumers, language-service plugins, custom transformers, and framework typecheck wrappers on the TS6 API track until their owners explicitly support the TS7 API surface. Treat TS7 RC `tsc` as compiler verification, not proof that JavaScript compiler API consumers can migrate.
104
- 20. When comparing TS6 `tsc6`, TS7 RC `tsc`, and optional TS7 nightly `tsgo`, classify differences before editing code: real type error, declaration emit order or printback noise, unsupported option, unsupported API, watch or incremental behavior gap, language-service gap, generated-output drift, or framework wrapper mismatch.
105
- 21. Do not treat faster TS7 RC or nightly results as sufficient verification. Keep the repository's existing `tsc`, `tsc6`, or framework typecheck as the compatibility baseline until repository policy explicitly adopts a different compiler track.
106
- 22. Choose the narrowest configured verification intents that cover typecheck, lint, tests, build output, declarations, package contract risk, and downstream-style consumer risk.
106
+ 22. Keep compiler API consumers, language-service plugins, custom transformers, and framework typecheck wrappers on the TS6 API track until their owners explicitly support the TS7 API surface. Treat TS7 RC `tsc` as compiler verification, not proof that JavaScript compiler API consumers can migrate.
107
+ 23. When comparing TS6 `tsc6`, TS7 RC `tsc`, and optional TS7 nightly `tsgo`, classify differences before editing code: real type error, declaration emit order or printback noise, unsupported option, unsupported API, watch or incremental behavior gap, language-service gap, generated-output drift, or framework wrapper mismatch.
108
+ 24. Do not treat faster TS7 RC or nightly results as sufficient verification. Keep the repository's existing `tsc`, `tsc6`, or framework typecheck as the compatibility baseline until repository policy explicitly adopts a different compiler track.
109
+ 25. Choose the narrowest configured verification intents that cover typecheck, lint, tests, build output, declarations, package contract risk, and downstream-style consumer risk.
107
110
 
108
111
  <!-- mustflow-section: assertion-policy -->
109
112
  ## Assertion Policy
@@ -1,6 +1,6 @@
1
1
  id = "default"
2
2
  name = "default"
3
- version = "2.75.2"
3
+ version = "2.85.4"
4
4
  description = "Minimal workflow for LLM agents to read, edit, and verify their work in a repository."
5
5
  common_root = "common"
6
6
  locales_root = "locales"
@@ -127,6 +127,7 @@ creates = [
127
127
  ".mustflow/skills/release-publish-change/SKILL.md",
128
128
  ".mustflow/skills/test-design-guard/SKILL.md",
129
129
  ".mustflow/skills/test-maintenance/SKILL.md",
130
+ ".mustflow/skills/test-suite-performance-review/SKILL.md",
130
131
  ".mustflow/skills/vertical-slice-tdd/SKILL.md",
131
132
  ".mustflow/skills/llm-service-ux-review/SKILL.md",
132
133
  ".mustflow/skills/prompt-contract-quality-review/SKILL.md",
@@ -157,6 +158,7 @@ creates = [
157
158
  ".mustflow/skills/proactive-risk-surfacing/SKILL.md",
158
159
  ".mustflow/skills/repo-improvement-loop/SKILL.md",
159
160
  ".mustflow/skills/restricted-handoff-resume/SKILL.md",
161
+ ".mustflow/skills/cross-agent-session-reference/SKILL.md",
160
162
  ".mustflow/skills/structure-discovery-gate/SKILL.md",
161
163
  ".mustflow/skills/readme-authoring/SKILL.md",
162
164
  ".mustflow/skills/requirement-regression-guard/SKILL.md",
@@ -324,6 +326,7 @@ minimal = [
324
326
  "proactive-risk-surfacing",
325
327
  "requirement-regression-guard",
326
328
  "restricted-handoff-resume",
329
+ "cross-agent-session-reference",
327
330
  "repro-first-debug",
328
331
  "security-privacy-review",
329
332
  "secret-exposure-response",
@@ -335,6 +338,7 @@ minimal = [
335
338
  "support-surface-advisor",
336
339
  "test-design-guard",
337
340
  "test-maintenance",
341
+ "test-suite-performance-review",
338
342
  "vertical-slice-tdd",
339
343
  ]
340
344
  patterns = [
@@ -462,6 +466,7 @@ patterns = [
462
466
  "proactive-risk-surfacing",
463
467
  "repo-improvement-loop",
464
468
  "restricted-handoff-resume",
469
+ "cross-agent-session-reference",
465
470
  "result-option",
466
471
  "requirement-regression-guard",
467
472
  "repro-first-debug",
@@ -477,6 +482,7 @@ patterns = [
477
482
  "support-surface-advisor",
478
483
  "test-design-guard",
479
484
  "test-maintenance",
485
+ "test-suite-performance-review",
480
486
  "vertical-slice-tdd",
481
487
  ]
482
488
  oss = [
@@ -620,6 +626,7 @@ oss = [
620
626
  "result-option",
621
627
  "requirement-regression-guard",
622
628
  "restricted-handoff-resume",
629
+ "cross-agent-session-reference",
623
630
  "repro-first-debug",
624
631
  "security-privacy-review",
625
632
  "security-regression-tests",
@@ -637,6 +644,7 @@ oss = [
637
644
  "support-surface-advisor",
638
645
  "test-design-guard",
639
646
  "test-maintenance",
647
+ "test-suite-performance-review",
640
648
  "vertical-slice-tdd",
641
649
  ]
642
650
  team = [
@@ -769,6 +777,7 @@ team = [
769
777
  "result-option",
770
778
  "requirement-regression-guard",
771
779
  "restricted-handoff-resume",
780
+ "cross-agent-session-reference",
772
781
  "repro-first-debug",
773
782
  "security-privacy-review",
774
783
  "secret-exposure-response",
@@ -782,6 +791,7 @@ team = [
782
791
  "support-surface-advisor",
783
792
  "test-design-guard",
784
793
  "test-maintenance",
794
+ "test-suite-performance-review",
785
795
  "vertical-slice-tdd",
786
796
  ]
787
797
  product = [
@@ -913,6 +923,7 @@ product = [
913
923
  "result-option",
914
924
  "requirement-regression-guard",
915
925
  "restricted-handoff-resume",
926
+ "cross-agent-session-reference",
916
927
  "repro-first-debug",
917
928
  "security-privacy-review",
918
929
  "secret-exposure-response",
@@ -927,6 +938,7 @@ product = [
927
938
  "support-surface-advisor",
928
939
  "test-design-guard",
929
940
  "test-maintenance",
941
+ "test-suite-performance-review",
930
942
  "vertical-slice-tdd",
931
943
  "ui-quality-gate",
932
944
  "visual-review-artifact",
@@ -1071,6 +1083,7 @@ library = [
1071
1083
  "result-option",
1072
1084
  "requirement-regression-guard",
1073
1085
  "restricted-handoff-resume",
1086
+ "cross-agent-session-reference",
1074
1087
  "repro-first-debug",
1075
1088
  "security-privacy-review",
1076
1089
  "security-regression-tests",
@@ -1087,6 +1100,7 @@ library = [
1087
1100
  "support-surface-advisor",
1088
1101
  "test-design-guard",
1089
1102
  "test-maintenance",
1103
+ "test-suite-performance-review",
1090
1104
  "vertical-slice-tdd",
1091
1105
  ]
1092
1106