mustflow 2.75.2 → 2.85.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -3
- package/dist/cli/commands/docs.js +86 -2
- package/dist/cli/commands/script-pack.js +9 -0
- package/dist/cli/i18n/en.js +180 -2
- package/dist/cli/i18n/es.js +180 -2
- package/dist/cli/i18n/fr.js +180 -2
- package/dist/cli/i18n/hi.js +180 -2
- package/dist/cli/i18n/ko.js +180 -2
- package/dist/cli/i18n/zh.js +180 -2
- package/dist/cli/lib/repo-map.js +27 -6
- package/dist/cli/lib/run-root-trust.js +15 -1
- package/dist/cli/lib/script-pack-registry.js +275 -6
- package/dist/cli/lib/validation/index.js +2 -2
- package/dist/cli/lib/validation/primitives.js +4 -1
- package/dist/cli/script-packs/code-change-impact.js +172 -0
- package/dist/cli/script-packs/code-dependency-graph.js +181 -0
- package/dist/cli/script-packs/code-export-diff.js +160 -0
- package/dist/cli/script-packs/code-outline.js +33 -5
- package/dist/cli/script-packs/code-route-outline.js +155 -0
- package/dist/cli/script-packs/docs-reference-drift.js +150 -0
- package/dist/cli/script-packs/repo-config-chain.js +163 -0
- package/dist/cli/script-packs/repo-env-contract.js +156 -0
- package/dist/cli/script-packs/repo-related-files.js +161 -0
- package/dist/cli/script-packs/repo-secret-risk-scan.js +147 -0
- package/dist/core/change-impact.js +383 -0
- package/dist/core/change-verification.js +32 -5
- package/dist/core/code-outline.js +460 -79
- package/dist/core/config-chain.js +595 -0
- package/dist/core/config-loading.js +121 -4
- package/dist/core/dependency-graph.js +490 -0
- package/dist/core/env-contract.js +450 -0
- package/dist/core/export-diff.js +359 -0
- package/dist/core/line-endings.js +26 -13
- package/dist/core/public-json-contracts.js +126 -0
- package/dist/core/reference-drift.js +388 -0
- package/dist/core/related-files.js +493 -0
- package/dist/core/route-outline.js +964 -0
- package/dist/core/script-pack-suggestions.js +131 -5
- package/dist/core/secret-risk-scan.js +440 -0
- package/dist/core/source-anchors.js +13 -1
- package/package.json +1 -1
- package/schemas/README.md +44 -6
- package/schemas/change-impact-report.schema.json +150 -0
- package/schemas/code-outline-report.schema.json +1 -1
- package/schemas/code-symbol-read-report.schema.json +64 -4
- package/schemas/commands.schema.json +12 -0
- package/schemas/config-chain-report.schema.json +187 -0
- package/schemas/dependency-graph-report.schema.json +149 -0
- package/schemas/env-contract-report.schema.json +203 -0
- package/schemas/export-diff-report.schema.json +220 -0
- package/schemas/reference-drift-report.schema.json +166 -0
- package/schemas/related-files-report.schema.json +145 -0
- package/schemas/route-outline-report.schema.json +200 -0
- package/schemas/secret-risk-scan-report.schema.json +152 -0
- package/templates/default/common/.mustflow/config/commands.toml +21 -0
- package/templates/default/i18n.toml +21 -9
- package/templates/default/locales/en/.mustflow/docs/agent-workflow.md +1 -1
- package/templates/default/locales/en/.mustflow/skills/INDEX.md +8 -2
- package/templates/default/locales/en/.mustflow/skills/architecture-deepening-review/SKILL.md +28 -11
- package/templates/default/locales/en/.mustflow/skills/astro-code-change/SKILL.md +71 -27
- package/templates/default/locales/en/.mustflow/skills/cross-agent-session-reference/SKILL.md +146 -0
- package/templates/default/locales/en/.mustflow/skills/dependency-upgrade-review/SKILL.md +3 -1
- package/templates/default/locales/en/.mustflow/skills/github-contribution-quality-gate/SKILL.md +48 -11
- package/templates/default/locales/en/.mustflow/skills/javascript-code-change/SKILL.md +15 -13
- package/templates/default/locales/en/.mustflow/skills/node-code-change/SKILL.md +16 -14
- package/templates/default/locales/en/.mustflow/skills/routes.toml +21 -9
- package/templates/default/locales/en/.mustflow/skills/security-privacy-review/SKILL.md +3 -1
- package/templates/default/locales/en/.mustflow/skills/test-suite-performance-review/SKILL.md +314 -0
- package/templates/default/locales/en/.mustflow/skills/typescript-code-change/SKILL.md +13 -10
- package/templates/default/manifest.toml +15 -1
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
---
|
|
2
|
+
mustflow_doc: skill.test-suite-performance-review
|
|
3
|
+
locale: en
|
|
4
|
+
canonical: true
|
|
5
|
+
revision: 1
|
|
6
|
+
lifecycle: mustflow-owned
|
|
7
|
+
authority: procedure
|
|
8
|
+
name: test-suite-performance-review
|
|
9
|
+
description: Apply this skill when test-suite runtime, CI feedback latency, test selection, shard balance, worker scheduling, retry policy, flaky-test handling, fixture setup, database or container test lifecycle, coverage or artifact overhead, test-result caching, test discovery, or test performance claims are planned, edited, reviewed, or reported.
|
|
10
|
+
metadata:
|
|
11
|
+
mustflow_schema: "1"
|
|
12
|
+
mustflow_kind: procedure
|
|
13
|
+
pack_id: mustflow.core
|
|
14
|
+
skill_id: mustflow.core.test-suite-performance-review
|
|
15
|
+
command_intents:
|
|
16
|
+
- changes_status
|
|
17
|
+
- changes_diff_summary
|
|
18
|
+
- build
|
|
19
|
+
- test_related
|
|
20
|
+
- test
|
|
21
|
+
- test_audit
|
|
22
|
+
- docs_validate_fast
|
|
23
|
+
- test_release
|
|
24
|
+
- mustflow_check
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
# Test Suite Performance Review
|
|
28
|
+
|
|
29
|
+
<!-- mustflow-section: purpose -->
|
|
30
|
+
## Purpose
|
|
31
|
+
|
|
32
|
+
Make test suites faster without turning the default verification path into wishful thinking.
|
|
33
|
+
|
|
34
|
+
The review question is not "can the suite run in parallel?" It is "where is the wall-clock time
|
|
35
|
+
actually spent, which tests are safe to skip or run later, which workers are waiting, which shared
|
|
36
|
+
resources are saturated, and what evidence keeps a faster path trustworthy?"
|
|
37
|
+
|
|
38
|
+
<!-- mustflow-section: use-when -->
|
|
39
|
+
## Use When
|
|
40
|
+
|
|
41
|
+
- Test or CI runtime, developer-loop feedback time, shard balance, worker count, test discovery,
|
|
42
|
+
coverage collection, trace or artifact generation, retry policy, flaky-test handling, or test
|
|
43
|
+
result caching is created, changed, reviewed, or reported.
|
|
44
|
+
- A task changes command contracts, CI workflows, test runner configuration, test grouping, test
|
|
45
|
+
selection, test scheduling, package scripts, fixture setup, database setup, container lifecycle,
|
|
46
|
+
browser test behavior, or coverage and report defaults for performance reasons.
|
|
47
|
+
- A report claims tests are faster, optimized, selected, cached, parallelized, sharded, hermetic,
|
|
48
|
+
stable, less flaky, or safer to run on every PR.
|
|
49
|
+
- A test suite is slow because of repeated process startup, full-directory discovery, repeated
|
|
50
|
+
migration or seed work, per-test containers, sleeps, external internet calls, huge artifacts,
|
|
51
|
+
tail shards, or over-broad full-suite execution.
|
|
52
|
+
|
|
53
|
+
<!-- mustflow-section: do-not-use-when -->
|
|
54
|
+
## Do Not Use When
|
|
55
|
+
|
|
56
|
+
- The task only adds or updates behavior tests; use `test-design-guard` or `test-maintenance`.
|
|
57
|
+
- The task only reviews whether production code is testable; use `testability-boundary-review`.
|
|
58
|
+
- The task is ordinary application hot-path performance with no test runner, fixture, CI, or
|
|
59
|
+
verification-loop behavior; use `performance-budget-check`.
|
|
60
|
+
- The user only wants a one-time local command result and no persistent test or CI behavior changes.
|
|
61
|
+
- The proposed change weakens verification by deleting tests, skipping failed tests, removing
|
|
62
|
+
assertions, disabling coverage gates, or hiding flaky tests without an evidence-based policy.
|
|
63
|
+
|
|
64
|
+
<!-- mustflow-section: required-inputs -->
|
|
65
|
+
## Required Inputs
|
|
66
|
+
|
|
67
|
+
- Suite surface: local command, CI job, package script, test runner, shard system, coverage job,
|
|
68
|
+
browser test job, database-backed test job, or release gate.
|
|
69
|
+
- Timing ledger: discovery, process startup, shared setup, fixture creation, test body, database or
|
|
70
|
+
container setup, cleanup, coverage, report generation, artifact upload, queue wait, shard tail,
|
|
71
|
+
and idle-worker time when available.
|
|
72
|
+
- Test timing evidence: p50 and p95 per test or file, longest tests, previous failures, flaky
|
|
73
|
+
history, shard start and finish times, worker utilization, retry count, and timeout outliers.
|
|
74
|
+
- Selection ledger: changed files, dependency graph, runtime coverage or touched-file evidence,
|
|
75
|
+
newly added tests, previous failed tests, unsafe-change fallback rules, and scheduled full-suite
|
|
76
|
+
safety net.
|
|
77
|
+
- Isolation ledger: time, randomness, UUIDs, locale, timezone, environment variables, home
|
|
78
|
+
directory, filesystem paths, network, database, containers, queues, browser profiles, caches,
|
|
79
|
+
module state, and runner process reuse.
|
|
80
|
+
- Resource ledger: CPU, memory, DB connections, browser processes, containers, network, filesystem,
|
|
81
|
+
GPU, SQLite or PostgreSQL locks, port allocation, and any scarce resource that needs a token or
|
|
82
|
+
affinity rule.
|
|
83
|
+
- Cache ledger: declared inputs, cache keys, volatile values, hit rate, miss reasons, runner-local
|
|
84
|
+
cache, remote cache, result-cache eligibility, and invalidation behavior.
|
|
85
|
+
- Relevant command-intent entries for tests, builds, docs, release checks, and mustflow validation.
|
|
86
|
+
|
|
87
|
+
<!-- mustflow-section: preconditions -->
|
|
88
|
+
## Preconditions
|
|
89
|
+
|
|
90
|
+
- The task matches the Use When conditions and does not match the Do Not Use When exclusions.
|
|
91
|
+
- Higher-priority instructions and `.mustflow/config/commands.toml` have been checked for the
|
|
92
|
+
current scope.
|
|
93
|
+
- Required inputs are available, or missing timing, selection, cache, isolation, or resource
|
|
94
|
+
evidence can be reported without guessing.
|
|
95
|
+
- If a change touches production behavior or test coverage meaning, use the matching behavior,
|
|
96
|
+
test-design, security, data, or release skill before accepting the performance change.
|
|
97
|
+
- If test result caching or selected-test execution is introduced, the full-suite fallback and
|
|
98
|
+
scheduled full-suite safety net are defined first.
|
|
99
|
+
|
|
100
|
+
<!-- mustflow-section: allowed-edits -->
|
|
101
|
+
## Allowed Edits
|
|
102
|
+
|
|
103
|
+
- Add or refine test timing collection, historical duration stores, selected-test manifests,
|
|
104
|
+
dependency-to-test maps, runner scheduling, shard assignment, worker limits, retry classification,
|
|
105
|
+
fixture lifecycle, local stubs, fake timers, cache keys, coverage defaults, and CI report policy.
|
|
106
|
+
- Add or adjust tests, fixtures, docs, command contracts, release checks, and template surfaces
|
|
107
|
+
directly tied to the test-suite performance behavior.
|
|
108
|
+
- Move expensive setup from per-test to per-worker, per-module, or per-session scope only when
|
|
109
|
+
mutable state still has a cheap isolation layer.
|
|
110
|
+
- Do not remove tests, weaken assertions, disable failure artifacts, or mark flaky tests as normal
|
|
111
|
+
success to make a suite look faster.
|
|
112
|
+
- Do not add unbounded parallelism, shared database schemas, cross-test mutable fixtures, or result
|
|
113
|
+
caches when hidden inputs are not declared.
|
|
114
|
+
|
|
115
|
+
<!-- mustflow-section: procedure -->
|
|
116
|
+
## Procedure
|
|
117
|
+
|
|
118
|
+
1. Measure before optimizing.
|
|
119
|
+
- Split elapsed time into discovery, process startup, shared setup, fixture creation, test body,
|
|
120
|
+
database or container setup, cleanup, coverage, report generation, artifact upload, queue wait,
|
|
121
|
+
shard tail, and idle-worker time.
|
|
122
|
+
- Report p50 and p95 per test or file when available. Average-only timing hides the last slow
|
|
123
|
+
shard, and the last slow shard decides CI wall time.
|
|
124
|
+
2. Classify the bottleneck.
|
|
125
|
+
- Discovery bottleneck: full tree scans, broad classpath scans, fixtures and generated files in
|
|
126
|
+
search paths, or missing precomputed test manifests.
|
|
127
|
+
- Startup bottleneck: one process, JVM, Python, Node, browser, container, or dependency-injection
|
|
128
|
+
graph per test instead of a safe persistent worker.
|
|
129
|
+
- Fixture bottleneck: repeated migrations, seed data, model loads, browser profile setup, or
|
|
130
|
+
container creation.
|
|
131
|
+
- Scheduling bottleneck: file-count sharding, no historical duration data, no work stealing,
|
|
132
|
+
huge test files, or worker count beyond the resource optimum.
|
|
133
|
+
- Artifact bottleneck: coverage, screenshots, videos, traces, full logs, HTML reports, or upload
|
|
134
|
+
work paid on every PR path instead of only failure, retry, nightly, or release paths.
|
|
135
|
+
3. Replace blanket full-suite execution with selected execution only when fallback is explicit.
|
|
136
|
+
- Include affected tests from changed files, new tests, and previous failed tests.
|
|
137
|
+
- Fall back to the full suite for changes to lockfiles, compiler or runtime settings, test runner
|
|
138
|
+
configuration, shared fixtures, migrations, database schema, generated contracts, package
|
|
139
|
+
metadata that affects runtime, or any change the selector cannot understand.
|
|
140
|
+
- Keep a scheduled full-suite path for nightly, release, or pre-merge confidence.
|
|
141
|
+
4. Do not build impact analysis from imports alone.
|
|
142
|
+
- Combine static dependency evidence with previous runtime evidence such as touched files,
|
|
143
|
+
classes, schemas, config keys, fixtures, resources, and test-to-source mappings.
|
|
144
|
+
- Prefer file or class granularity when method-level tracing costs more than it saves.
|
|
145
|
+
- Treat dynamic imports, reflection, generated code, config-driven branches, and framework magic
|
|
146
|
+
as reasons to widen selection or fall back.
|
|
147
|
+
5. Run likely failures early.
|
|
148
|
+
- Order previous failed tests, recently changed tests, tests near the changed code, historically
|
|
149
|
+
flaky tests, and long tests before low-risk tests.
|
|
150
|
+
- Use early-fail jobs for fast developer feedback, but keep a separate full-data job that
|
|
151
|
+
continues collecting remaining failures when the workflow needs all failures.
|
|
152
|
+
6. Cache only hermetic test results.
|
|
153
|
+
- A successful test result can be reused only when the test binary, test code, affected product
|
|
154
|
+
code, fixtures, environment, toolchain, locale, timezone, and declared resources match.
|
|
155
|
+
- Do not include volatile values such as commit SHA, build number, wall-clock time, random temp
|
|
156
|
+
roots, or runner-specific paths in cache keys unless they truly affect output.
|
|
157
|
+
- Do include language version, compiler flags, runtime flags, timezone, locale, dependency lock
|
|
158
|
+
content, fixture versions, and DB schema version when they affect behavior.
|
|
159
|
+
- Track cache hit rate and miss reasons after adding a cache; a cache with invisible misses is
|
|
160
|
+
performance theater.
|
|
161
|
+
7. Preserve warm local and remote caches.
|
|
162
|
+
- Avoid clearing dependency caches, transformed source caches, test discovery indexes, compiled
|
|
163
|
+
output, and package caches at the start of every CI job.
|
|
164
|
+
- Prefer runner-local SSD caches or content-hash restore over slow network-volume scans.
|
|
165
|
+
8. Avoid full discovery on every run.
|
|
166
|
+
- Generate or maintain package-level test manifests when the project can do so safely.
|
|
167
|
+
- Exclude fixtures, snapshots, generated output, archived tests, vendored code, and build output
|
|
168
|
+
from discovery paths.
|
|
169
|
+
- For JVM and similar ecosystems, prefer explicit include patterns over broad classpath scanning
|
|
170
|
+
when the runner supports it.
|
|
171
|
+
9. Reuse workers before increasing worker count.
|
|
172
|
+
- Keep interpreters, JIT, dependency injection, ASTs, browser engines, and database clients warm
|
|
173
|
+
across multiple tests when isolation allows it.
|
|
174
|
+
- Reset static state, module caches, timers, environment, and global fixtures between tests.
|
|
175
|
+
- Replace workers only after measured memory leaks, not by default after every test.
|
|
176
|
+
10. Size work units for the scheduler.
|
|
177
|
+
- Do not send thousands of tiny remote jobs where queue setup, sandbox setup, input transfer,
|
|
178
|
+
and result collection cost more than the test body.
|
|
179
|
+
- Bundle micro-tests by package, runtime, or fixture affinity while preserving per-test result
|
|
180
|
+
reporting.
|
|
181
|
+
11. Shard by historical duration, not file count.
|
|
182
|
+
- Use recent successful durations, trimmed means, or exponential moving averages.
|
|
183
|
+
- Assign longest tests first to the currently lightest shard.
|
|
184
|
+
- Give new tests a directory or type median, and cap timeout outliers so one historic failure
|
|
185
|
+
does not poison future placement.
|
|
186
|
+
12. Add work stealing when static shards still leave idle workers.
|
|
187
|
+
- Let early workers take not-yet-started work from overloaded shards.
|
|
188
|
+
- Keep tests that share expensive setup together unless the tail cost is worse than duplicated
|
|
189
|
+
setup.
|
|
190
|
+
13. Split huge files below the file level when supported.
|
|
191
|
+
- File-level sharding is fake parallelism when one file contains most of the runtime.
|
|
192
|
+
- Confirm that the runner actually honors case-level sharding; unsupported sharding can cause
|
|
193
|
+
every shard to run the whole suite.
|
|
194
|
+
14. Schedule by resource tokens and affinity.
|
|
195
|
+
- Tag tests by CPU, memory, DB connections, browser processes, containers, GPU, filesystem, and
|
|
196
|
+
network pressure.
|
|
197
|
+
- Use resource tokens or locks for scarce shared resources instead of disabling all parallelism.
|
|
198
|
+
- Keep expensive fixture groups together when setup reuse beats perfect load balance.
|
|
199
|
+
15. Tune worker count empirically.
|
|
200
|
+
- Compare worker counts such as 1, 2, 4, and 8 on the same commit.
|
|
201
|
+
- Stop increasing workers when wall time stops improving, memory peaks rise sharply, DB waits
|
|
202
|
+
increase, browser tests swap, or flaky rate rises.
|
|
203
|
+
16. Make database and container setup worker-scoped.
|
|
204
|
+
- Prefer one DB server or service container per worker, with per-test schemas, databases,
|
|
205
|
+
transactions, savepoints, namespaces, ports, or fixture copies.
|
|
206
|
+
- Do not let all workers share the same mutable schema unless the suite is intentionally
|
|
207
|
+
serialized.
|
|
208
|
+
- Build a migrated and seeded template DB or snapshot once, then clone it per worker when
|
|
209
|
+
migrations or seed data dominate.
|
|
210
|
+
17. Reset mutable state cheaply.
|
|
211
|
+
- Prefer transaction rollback or savepoints for tests whose side effects stay inside one DB
|
|
212
|
+
connection.
|
|
213
|
+
- Use DB clone, schema clone, or container snapshot for tests that cross processes, queues,
|
|
214
|
+
external workers, sequence assertions, or asynchronous effects.
|
|
215
|
+
- Use tmpfs only for non-persistent state with explicit size limits.
|
|
216
|
+
18. Replace sleeps with readiness and fake time.
|
|
217
|
+
- Treat fixed sleeps as performance bugs.
|
|
218
|
+
- Wait for real readiness: health endpoints, logs, successful queries, event receipt, or port
|
|
219
|
+
plus protocol-level checks.
|
|
220
|
+
- For timers, retries, expiry, and scheduled work, inject clocks or use fake timers instead of
|
|
221
|
+
waiting in real time.
|
|
222
|
+
19. Remove external internet from the default CI path.
|
|
223
|
+
- Use local stubs, fixtures, mocks, recorded responses, or contract-specific jobs.
|
|
224
|
+
- Block unrelated images, fonts, analytics, and third-party requests in UI tests unless the test
|
|
225
|
+
is specifically about that delivery behavior.
|
|
226
|
+
20. Control hidden inputs.
|
|
227
|
+
- Inject time, random seeds, UUID generation, locale, timezone, environment, and current user.
|
|
228
|
+
- Report seed values on failure so a failed run can be reproduced.
|
|
229
|
+
21. Move expensive evidence off the default path.
|
|
230
|
+
- Collect full coverage, videos, traces, screenshots, full logs, and HTML reports only where
|
|
231
|
+
they pay for themselves: failures, first retry, nightly, release, or dedicated coverage jobs.
|
|
232
|
+
- Merge shard reports after tests finish rather than blocking each shard on heavy reporting.
|
|
233
|
+
22. Retry narrowly and honestly.
|
|
234
|
+
- Retry only the failed test or case, once, in a fresh worker when possible.
|
|
235
|
+
- Preserve first-failure logs and artifacts.
|
|
236
|
+
- Classify retry success as flaky success, not normal success.
|
|
237
|
+
23. Use speculative tail duplication only for hermetic shards.
|
|
238
|
+
- If the last shard exceeds its historical p95 while workers are idle, a scheduler may duplicate
|
|
239
|
+
that shard and accept the first result.
|
|
240
|
+
- Do not duplicate non-hermetic tests, tests with side effects, or tests that consume scarce
|
|
241
|
+
external resources.
|
|
242
|
+
24. Garbage-collect the test portfolio.
|
|
243
|
+
- Track duration, unique behavior coverage, defect-finding history, flaky rate, owner, and last
|
|
244
|
+
meaningful update.
|
|
245
|
+
- Move low-value duplicates from PR paths to nightly paths before deleting them.
|
|
246
|
+
- Quarantine flaky tests with owner and expiry; do not let quarantine become permanent silence.
|
|
247
|
+
25. Verify the faster path and the fallback path.
|
|
248
|
+
- A selector, cache, shard rule, retry policy, or fixture reuse change is not complete until the
|
|
249
|
+
default fast path and the fallback or full path both have evidence or a reported missing
|
|
250
|
+
command intent.
|
|
251
|
+
|
|
252
|
+
<!-- mustflow-section: postconditions -->
|
|
253
|
+
## Postconditions
|
|
254
|
+
|
|
255
|
+
- The test-suite bottleneck is classified by timing, selection, scheduling, cache, fixture, DB,
|
|
256
|
+
container, artifact, retry, or resource evidence.
|
|
257
|
+
- The optimization goal is explicit: faster first failure, shorter PR wall time, lower CI cost,
|
|
258
|
+
lower flaky rate, smaller artifact overhead, or faster local feedback.
|
|
259
|
+
- Selected-test execution has a full-suite fallback for unsafe or unknown changes.
|
|
260
|
+
- Caches declare inputs, omit irrelevant volatile values, and report hit rate or miss reasons when
|
|
261
|
+
evidence exists.
|
|
262
|
+
- Worker count, resource tokens, shard placement, fixture affinity, and retry behavior preserve
|
|
263
|
+
test isolation and failure evidence.
|
|
264
|
+
- Any speed claim is measured, complexity-only, or explicitly unverified.
|
|
265
|
+
|
|
266
|
+
<!-- mustflow-section: verification -->
|
|
267
|
+
## Verification
|
|
268
|
+
|
|
269
|
+
Use configured oneshot command intents when available:
|
|
270
|
+
|
|
271
|
+
- `changes_status`
|
|
272
|
+
- `changes_diff_summary`
|
|
273
|
+
- `build`
|
|
274
|
+
- `test_related`
|
|
275
|
+
- `test`
|
|
276
|
+
- `test_audit`
|
|
277
|
+
- `docs_validate_fast`
|
|
278
|
+
- `test_release`
|
|
279
|
+
- `mustflow_check`
|
|
280
|
+
|
|
281
|
+
Use the narrowest configured test, build, docs, release, or mustflow intent that proves the changed
|
|
282
|
+
test-suite behavior. If the repository exposes a profiling or cached-test intent, use it only when
|
|
283
|
+
the command contract marks it configured, oneshot, and agent-allowed.
|
|
284
|
+
|
|
285
|
+
<!-- mustflow-section: failure-handling -->
|
|
286
|
+
## Failure Handling
|
|
287
|
+
|
|
288
|
+
- If timing evidence is missing, add bounded measurement or report the missing evidence before
|
|
289
|
+
changing scheduling, caching, or selection policy.
|
|
290
|
+
- If selection cannot understand a change, fall back to the full suite.
|
|
291
|
+
- If a cache key is uncertain, prefer a cache miss over a false hit.
|
|
292
|
+
- If worker reuse causes order dependence, leaked state, or flaky failures, isolate the leaked
|
|
293
|
+
resource before widening reuse.
|
|
294
|
+
- If parallelism makes tests slower, check shared DB, browser, memory, filesystem, and container
|
|
295
|
+
contention before increasing workers again.
|
|
296
|
+
- If a retry hides a real failure, preserve first-failure evidence and classify the result as flaky.
|
|
297
|
+
- If configured verification is missing, report the missing intent instead of inventing raw runner
|
|
298
|
+
commands.
|
|
299
|
+
|
|
300
|
+
<!-- mustflow-section: output-format -->
|
|
301
|
+
## Output Format
|
|
302
|
+
|
|
303
|
+
- Suite surface and feedback goal
|
|
304
|
+
- Timing breakdown and bottleneck class
|
|
305
|
+
- Selection policy and full-suite fallback
|
|
306
|
+
- Scheduling, sharding, worker, and resource-token decisions
|
|
307
|
+
- Fixture, DB, container, filesystem, time, randomness, network, and isolation notes
|
|
308
|
+
- Cache key, hit-rate, and miss-reason notes
|
|
309
|
+
- Retry and flaky-test policy
|
|
310
|
+
- Coverage, trace, log, screenshot, video, report, and artifact policy
|
|
311
|
+
- Speed evidence: measured, complexity-only, or unverified
|
|
312
|
+
- Command intents run
|
|
313
|
+
- Skipped command intents and reasons
|
|
314
|
+
- Remaining test-suite performance risk
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
mustflow_doc: skill.typescript-code-change
|
|
3
3
|
locale: en
|
|
4
4
|
canonical: true
|
|
5
|
-
revision:
|
|
5
|
+
revision: 5
|
|
6
6
|
lifecycle: mustflow-owned
|
|
7
7
|
authority: procedure
|
|
8
8
|
name: typescript-code-change
|
|
@@ -91,19 +91,22 @@ Preserve TypeScript's type, runtime validation, module, build, and public API bo
|
|
|
91
91
|
11. For type tests, prefer `@ts-expect-error` with a short reason. Do not use `@ts-ignore` in implementation code. Implementation `@ts-expect-error` needs an owner, removal condition, and risk report.
|
|
92
92
|
12. If a public API changes, trace every consumer-visible import specifier, runtime export, type export, declaration output, docs example, type-only export, overload, generic default, interface field, enum or literal member, class member, and package entry condition.
|
|
93
93
|
13. Treat `exports`, `types`, `typings`, `typesVersions`, package `type`, file extensions, path aliases, declaration import paths, and barrel exports as public API surfaces. Adding or tightening `exports` can break existing deep imports.
|
|
94
|
-
14.
|
|
95
|
-
15.
|
|
96
|
-
16.
|
|
97
|
-
17.
|
|
98
|
-
18.
|
|
94
|
+
14. For TypeScript that emits code for native Node ESM, prefer `.ts` source plus package `"type": "module"` and `module`/`moduleResolution` set to `NodeNext` or the repository's fixed Node mode. Do not rename every source file to `.mts` just to mean ESM; reserve `.mts` and `.cts` for explicit per-file module overrides or mixed-package boundaries.
|
|
95
|
+
15. In TypeScript source that targets native Node ESM, write relative imports using the emitted runtime specifier, usually `.js`, such as `import { createApp } from "./app.js"` from `app.ts`. Do not write extensionless relative imports or `.ts` runtime specifiers unless a declared loader, bundler, or runtime explicitly owns that behavior.
|
|
96
|
+
16. Use `moduleResolution: "Bundler"` only when a bundler such as Vite, esbuild, Rollup, or a framework build system owns final module resolution. Do not use bundler resolution to model code that Node will execute directly without that bundler.
|
|
97
|
+
17. If ESM/CJS behavior changes, verify package `type`, `main`, `module`, `browser`, `exports`, condition order, extension rules, generated JS, and generated declaration files together.
|
|
98
|
+
18. Inspect generated declarations when package surfaces change. Declaration files must not leak source-only aliases, private paths, workspace-only package names, unpublished internal paths, or accidental public re-exports.
|
|
99
|
+
19. For TypeScript 6 migration work, treat deprecation warnings as future TypeScript 7 removal risk. `ignoreDeprecations` is a temporary compatibility valve, not proof that the project is ready for 7.0. Prefer removing deprecated options and updating resolver or module choices to match the project runtime.
|
|
100
|
+
20. Treat TypeScript 6 `--stableTypeOrdering` as a migration comparison tool for declaration and error-order differences, not as a permanent performance-neutral default. If it changes errors or declaration output, look for inference or declaration-stability issues instead of snapshotting noise.
|
|
101
|
+
21. For TypeScript 7 migration work, keep the tracks separate:
|
|
99
102
|
- TS6 stable API track: `@typescript/typescript6` and `tsc6` for compiler API, transformer, ESLint, framework wrapper, and peer-dependency compatibility.
|
|
100
103
|
- TS7 RC compiler track: `typescript@rc` and `tsc` for RC compiler verification.
|
|
101
104
|
- TS7 nightly track: `@typescript/native-preview` and `tsgo` for nightly diagnostics only.
|
|
102
105
|
- Future TS7 stable track: stable `typescript` once upstream publishes TypeScript 7 on the normal stable path.
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
106
|
+
22. Keep compiler API consumers, language-service plugins, custom transformers, and framework typecheck wrappers on the TS6 API track until their owners explicitly support the TS7 API surface. Treat TS7 RC `tsc` as compiler verification, not proof that JavaScript compiler API consumers can migrate.
|
|
107
|
+
23. When comparing TS6 `tsc6`, TS7 RC `tsc`, and optional TS7 nightly `tsgo`, classify differences before editing code: real type error, declaration emit order or printback noise, unsupported option, unsupported API, watch or incremental behavior gap, language-service gap, generated-output drift, or framework wrapper mismatch.
|
|
108
|
+
24. Do not treat faster TS7 RC or nightly results as sufficient verification. Keep the repository's existing `tsc`, `tsc6`, or framework typecheck as the compatibility baseline until repository policy explicitly adopts a different compiler track.
|
|
109
|
+
25. Choose the narrowest configured verification intents that cover typecheck, lint, tests, build output, declarations, package contract risk, and downstream-style consumer risk.
|
|
107
110
|
|
|
108
111
|
<!-- mustflow-section: assertion-policy -->
|
|
109
112
|
## Assertion Policy
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
id = "default"
|
|
2
2
|
name = "default"
|
|
3
|
-
version = "2.
|
|
3
|
+
version = "2.85.4"
|
|
4
4
|
description = "Minimal workflow for LLM agents to read, edit, and verify their work in a repository."
|
|
5
5
|
common_root = "common"
|
|
6
6
|
locales_root = "locales"
|
|
@@ -127,6 +127,7 @@ creates = [
|
|
|
127
127
|
".mustflow/skills/release-publish-change/SKILL.md",
|
|
128
128
|
".mustflow/skills/test-design-guard/SKILL.md",
|
|
129
129
|
".mustflow/skills/test-maintenance/SKILL.md",
|
|
130
|
+
".mustflow/skills/test-suite-performance-review/SKILL.md",
|
|
130
131
|
".mustflow/skills/vertical-slice-tdd/SKILL.md",
|
|
131
132
|
".mustflow/skills/llm-service-ux-review/SKILL.md",
|
|
132
133
|
".mustflow/skills/prompt-contract-quality-review/SKILL.md",
|
|
@@ -157,6 +158,7 @@ creates = [
|
|
|
157
158
|
".mustflow/skills/proactive-risk-surfacing/SKILL.md",
|
|
158
159
|
".mustflow/skills/repo-improvement-loop/SKILL.md",
|
|
159
160
|
".mustflow/skills/restricted-handoff-resume/SKILL.md",
|
|
161
|
+
".mustflow/skills/cross-agent-session-reference/SKILL.md",
|
|
160
162
|
".mustflow/skills/structure-discovery-gate/SKILL.md",
|
|
161
163
|
".mustflow/skills/readme-authoring/SKILL.md",
|
|
162
164
|
".mustflow/skills/requirement-regression-guard/SKILL.md",
|
|
@@ -324,6 +326,7 @@ minimal = [
|
|
|
324
326
|
"proactive-risk-surfacing",
|
|
325
327
|
"requirement-regression-guard",
|
|
326
328
|
"restricted-handoff-resume",
|
|
329
|
+
"cross-agent-session-reference",
|
|
327
330
|
"repro-first-debug",
|
|
328
331
|
"security-privacy-review",
|
|
329
332
|
"secret-exposure-response",
|
|
@@ -335,6 +338,7 @@ minimal = [
|
|
|
335
338
|
"support-surface-advisor",
|
|
336
339
|
"test-design-guard",
|
|
337
340
|
"test-maintenance",
|
|
341
|
+
"test-suite-performance-review",
|
|
338
342
|
"vertical-slice-tdd",
|
|
339
343
|
]
|
|
340
344
|
patterns = [
|
|
@@ -462,6 +466,7 @@ patterns = [
|
|
|
462
466
|
"proactive-risk-surfacing",
|
|
463
467
|
"repo-improvement-loop",
|
|
464
468
|
"restricted-handoff-resume",
|
|
469
|
+
"cross-agent-session-reference",
|
|
465
470
|
"result-option",
|
|
466
471
|
"requirement-regression-guard",
|
|
467
472
|
"repro-first-debug",
|
|
@@ -477,6 +482,7 @@ patterns = [
|
|
|
477
482
|
"support-surface-advisor",
|
|
478
483
|
"test-design-guard",
|
|
479
484
|
"test-maintenance",
|
|
485
|
+
"test-suite-performance-review",
|
|
480
486
|
"vertical-slice-tdd",
|
|
481
487
|
]
|
|
482
488
|
oss = [
|
|
@@ -620,6 +626,7 @@ oss = [
|
|
|
620
626
|
"result-option",
|
|
621
627
|
"requirement-regression-guard",
|
|
622
628
|
"restricted-handoff-resume",
|
|
629
|
+
"cross-agent-session-reference",
|
|
623
630
|
"repro-first-debug",
|
|
624
631
|
"security-privacy-review",
|
|
625
632
|
"security-regression-tests",
|
|
@@ -637,6 +644,7 @@ oss = [
|
|
|
637
644
|
"support-surface-advisor",
|
|
638
645
|
"test-design-guard",
|
|
639
646
|
"test-maintenance",
|
|
647
|
+
"test-suite-performance-review",
|
|
640
648
|
"vertical-slice-tdd",
|
|
641
649
|
]
|
|
642
650
|
team = [
|
|
@@ -769,6 +777,7 @@ team = [
|
|
|
769
777
|
"result-option",
|
|
770
778
|
"requirement-regression-guard",
|
|
771
779
|
"restricted-handoff-resume",
|
|
780
|
+
"cross-agent-session-reference",
|
|
772
781
|
"repro-first-debug",
|
|
773
782
|
"security-privacy-review",
|
|
774
783
|
"secret-exposure-response",
|
|
@@ -782,6 +791,7 @@ team = [
|
|
|
782
791
|
"support-surface-advisor",
|
|
783
792
|
"test-design-guard",
|
|
784
793
|
"test-maintenance",
|
|
794
|
+
"test-suite-performance-review",
|
|
785
795
|
"vertical-slice-tdd",
|
|
786
796
|
]
|
|
787
797
|
product = [
|
|
@@ -913,6 +923,7 @@ product = [
|
|
|
913
923
|
"result-option",
|
|
914
924
|
"requirement-regression-guard",
|
|
915
925
|
"restricted-handoff-resume",
|
|
926
|
+
"cross-agent-session-reference",
|
|
916
927
|
"repro-first-debug",
|
|
917
928
|
"security-privacy-review",
|
|
918
929
|
"secret-exposure-response",
|
|
@@ -927,6 +938,7 @@ product = [
|
|
|
927
938
|
"support-surface-advisor",
|
|
928
939
|
"test-design-guard",
|
|
929
940
|
"test-maintenance",
|
|
941
|
+
"test-suite-performance-review",
|
|
930
942
|
"vertical-slice-tdd",
|
|
931
943
|
"ui-quality-gate",
|
|
932
944
|
"visual-review-artifact",
|
|
@@ -1071,6 +1083,7 @@ library = [
|
|
|
1071
1083
|
"result-option",
|
|
1072
1084
|
"requirement-regression-guard",
|
|
1073
1085
|
"restricted-handoff-resume",
|
|
1086
|
+
"cross-agent-session-reference",
|
|
1074
1087
|
"repro-first-debug",
|
|
1075
1088
|
"security-privacy-review",
|
|
1076
1089
|
"security-regression-tests",
|
|
@@ -1087,6 +1100,7 @@ library = [
|
|
|
1087
1100
|
"support-surface-advisor",
|
|
1088
1101
|
"test-design-guard",
|
|
1089
1102
|
"test-maintenance",
|
|
1103
|
+
"test-suite-performance-review",
|
|
1090
1104
|
"vertical-slice-tdd",
|
|
1091
1105
|
]
|
|
1092
1106
|
|