@glubean/cli 0.2.6 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +2 -2
  2. package/dist/commands/init.d.ts +2 -0
  3. package/dist/commands/init.d.ts.map +1 -1
  4. package/dist/commands/init.js +267 -60
  5. package/dist/commands/init.js.map +1 -1
  6. package/dist/commands/redact.d.ts.map +1 -1
  7. package/dist/commands/redact.js +32 -8
  8. package/dist/commands/redact.js.map +1 -1
  9. package/dist/commands/run.d.ts +110 -2
  10. package/dist/commands/run.d.ts.map +1 -1
  11. package/dist/commands/run.js +483 -40
  12. package/dist/commands/run.js.map +1 -1
  13. package/dist/lib/config.d.ts +267 -43
  14. package/dist/lib/config.d.ts.map +1 -1
  15. package/dist/lib/config.js +744 -149
  16. package/dist/lib/config.js.map +1 -1
  17. package/dist/lib/env.d.ts +29 -0
  18. package/dist/lib/env.d.ts.map +1 -0
  19. package/dist/lib/env.js +59 -0
  20. package/dist/lib/env.js.map +1 -0
  21. package/dist/lib/print-plan.d.ts +21 -0
  22. package/dist/lib/print-plan.d.ts.map +1 -0
  23. package/dist/lib/print-plan.js +108 -0
  24. package/dist/lib/print-plan.js.map +1 -0
  25. package/dist/lib/upload.d.ts +36 -1
  26. package/dist/lib/upload.d.ts.map +1 -1
  27. package/dist/lib/upload.js +142 -20
  28. package/dist/lib/upload.js.map +1 -1
  29. package/dist/main.d.ts.map +1 -1
  30. package/dist/main.js +405 -27
  31. package/dist/main.js.map +1 -1
  32. package/package.json +5 -5
  33. package/templates/README.md +7 -13
  34. package/templates/demo/.env.example +7 -0
  35. package/templates/demo/.env.secrets.example +11 -0
  36. package/templates/demo/README.md +60 -0
  37. package/templates/demo/config/api.ts +24 -0
  38. package/templates/demo/gitignore.tpl +13 -0
  39. package/templates/demo/glubean.yaml +48 -0
  40. package/templates/demo/tests/api-flaky/search-flaky.test.ts +28 -0
  41. package/templates/demo/tests/api-stable/get-users.test.ts +30 -0
  42. package/templates/demo/tests/canary/synthetic-50pct-flaky.test.ts +23 -0
  43. package/templates/demo/tests/contracts/stable/users-contract.contract.ts +70 -0
  44. package/templates/demo/tsconfig.json +15 -0
  45. package/templates/AI-INSTRUCTIONS.md +0 -160
  46. package/templates/ci-config/ci.yaml +0 -13
  47. package/templates/ci-config/default.yaml +0 -9
  48. package/templates/ci-config/explore.yaml +0 -5
  49. package/templates/ci-config/staging.yaml +0 -9
@@ -2,7 +2,7 @@ import { bootstrap, evaluateThresholds, MetricCollector, ProjectRunner, buildRun
2
2
  import { basename, dirname, isAbsolute, relative, resolve } from "node:path";
3
3
  import { stat, readdir, readFile, writeFile, mkdir, rm } from "node:fs/promises";
4
4
  import { glob } from "node:fs/promises";
5
- import { loadConfig, mergeRunOptions, toSharedRunConfig } from "../lib/config.js";
5
+ import { CONFIG_DEFAULTS, mergeRunOptions, toSharedRunConfig } from "../lib/config.js";
6
6
  import { loadProjectEnv } from "@glubean/runner";
7
7
  import { resolveEnvFileName } from "../lib/active_env.js";
8
8
  import { shouldSkipTest } from "../lib/skip.js";
@@ -49,6 +49,22 @@ async function findProjectConfig(startDir) {
49
49
  // No glubean project found — use the starting directory (scratch mode)
50
50
  return { rootDir: startDir };
51
51
  }
52
+ // Config consolidation (docs/06): the package.json `glubean` field is no
53
+ // longer a config source. Warn (don't error) when one lingers so users
54
+ // migrate it into glubean.yaml instead of wondering why it stopped working.
55
+ async function warnIfLegacyPackageJsonConfig(rootDir) {
56
+ try {
57
+ const pkg = JSON.parse(await readFile(resolve(rootDir, "package.json"), "utf-8"));
58
+ if (pkg.glubean && typeof pkg.glubean === "object") {
59
+ console.warn(`\x1b[33mWarning: the package.json \`glubean\` field is no longer read ` +
60
+ `(config consolidation — see docs/06). Move run/redaction/thresholds ` +
61
+ `settings into glubean.yaml; the field is currently inert.\x1b[0m`);
62
+ }
63
+ }
64
+ catch {
65
+ // No package.json or parse error — nothing to warn about.
66
+ }
67
+ }
52
68
  const DEFAULT_SKIP_DIRS = ["node_modules", ".git", "dist", "build"];
53
69
  const DEFAULT_EXTENSIONS = ["ts"];
54
70
  function isGlob(target) {
@@ -86,7 +102,18 @@ async function walkTestFiles(dir, result) {
86
102
  }
87
103
  }
88
104
  }
89
- async function resolveTestFiles(target) {
105
+ export function classifyGlubeanFile(filePath) {
106
+ if (filePath.endsWith(".test.ts"))
107
+ return "test";
108
+ if (filePath.endsWith(".contract.ts"))
109
+ return "contract";
110
+ if (filePath.endsWith(".flow.ts"))
111
+ return "flow";
112
+ if (filePath.endsWith(".bootstrap.ts"))
113
+ return "bootstrap";
114
+ return undefined;
115
+ }
116
+ async function resolveSingleTarget(target) {
90
117
  const abs = resolve(target);
91
118
  try {
92
119
  const s = await stat(abs);
@@ -117,6 +144,80 @@ async function resolveTestFiles(target) {
117
144
  }
118
145
  return [abs];
119
146
  }
147
+ /**
148
+ * Resolve one or more targets (file / dir / glob) to a deduped list of
149
+ * test file paths. Phase 4 multi-suite execution passes a per-suite
150
+ * array here so the runner can sweep all suites in a single pass with
151
+ * unified discovery, filtering, and reporter output.
152
+ */
153
+ async function resolveTestFiles(target) {
154
+ const targets = Array.isArray(target) ? target : [target];
155
+ const all = [];
156
+ for (const t of targets) {
157
+ const files = await resolveSingleTarget(t);
158
+ all.push(...files);
159
+ }
160
+ // Dedupe (suites may share a directory) while preserving the caller-
161
+ // supplied order. Multi-suite main.ts depends on this — sorting here
162
+ // would mix files across suites and break failFast/failAfter
163
+ // short-circuit ordering. resolveSingleTarget still sorts within a
164
+ // single directory walk for determinism inside one suite.
165
+ const seen = new Set();
166
+ const ordered = [];
167
+ for (const f of all) {
168
+ if (seen.has(f))
169
+ continue;
170
+ seen.add(f);
171
+ ordered.push(f);
172
+ }
173
+ return ordered;
174
+ }
175
+ /**
176
+ * Per-suite resolution helper exposed for main.ts. Resolves a suite's
177
+ * `target` (file / dir / glob), then keeps only files whose
178
+ * `classifyGlubeanFile` result is in `kinds` (.bootstrap.ts files are
179
+ * always kept regardless of kinds so overlay registration still fires
180
+ * across the project — they emit no runnable tests on their own).
181
+ *
182
+ * `kinds.length === 0` means "no kind filter" (all Glubean files).
183
+ *
184
+ * KNOWN LIMITATION (file-level only): the filter operates on the file
185
+ * EXTENSION, not on individual exports. A `.contract.ts` file CAN
186
+ * legitimately export a flow inline (and vice versa). For canonical
187
+ * `tests/` + `contracts/` directory layouts this doesn't matter — each
188
+ * file kind matches its declared suite kind. For mixed exports inside
189
+ * a single file (`kinds: [contract]` running a flow exported from the
190
+ * same .contract.ts), authors should split flows into `.flow.ts`. A
191
+ * proper export-level kind filter would require threading suite kinds
192
+ * through discoverTests and is left as a follow-up.
193
+ */
194
+ export async function resolveTestFilesForSuite(target, kinds) {
195
+ const files = await resolveSingleTarget(target);
196
+ if (kinds.length === 0)
197
+ return files;
198
+ const kindSet = new Set(kinds);
199
+ // Strict per-kind file filter: `.test.ts` ↔ "test", `.contract.ts` ↔
200
+ // "contract", `.flow.ts` ↔ "flow". This keeps the "zero files for
201
+ // declared suite" error a reliable signal of misconfiguration.
202
+ //
203
+ // KNOWN LIMITATION: a `.contract.ts` file that exports ONLY a flow
204
+ // (uncommon — flows usually live in `.flow.ts`) won't match a
205
+ // `kinds: [flow]` suite at the file-level filter. To run such a flow
206
+ // from a strict flow-only suite, either move the export into a
207
+ // `.flow.ts` file (recommended canonical layout) or declare the
208
+ // suite as `kinds: [contract, flow]` so both candidate file types
209
+ // are scanned and the runnable-level filter sorts them out.
210
+ return files.filter((f) => {
211
+ const k = classifyGlubeanFile(f);
212
+ if (k === undefined)
213
+ return false;
214
+ // Bootstrap files: always retain so contract.bootstrap() side-effects
215
+ // fire on import (per attachment-model §7.4 eager loading).
216
+ if (k === "bootstrap")
217
+ return true;
218
+ return kindSet.has(k);
219
+ });
220
+ }
120
221
  export async function discoverTests(filePath) {
121
222
  // `.bootstrap.ts` files register overlays as a side-effect of import; they
122
223
  // produce no runnable tests of their own. We don't even need to import here
@@ -135,16 +236,35 @@ export async function discoverTests(filePath) {
135
236
  const result = await extractContractFromFile(filePath);
136
237
  const results = [];
137
238
  for (const ec of result.contracts) {
239
+ const contractTags = ec.tags ?? [];
138
240
  for (const c of ec.cases) {
241
+ // Mirror SDK dispatchContract: finalTags = contract + case + runtime
242
+ // synthetic. Without this, pre-spawn excludeTags / --tag filtering
243
+ // skips contract cases entirely (Phase 1 filter reads meta.tags).
244
+ const caseTags = c.tags ?? [];
245
+ const requires = c.requires ?? "headless";
246
+ const defaultRun = c.defaultRun ?? (requires !== "headless" ? "opt-in" : "always");
247
+ const runtimeTags = [];
248
+ if (requires !== "headless")
249
+ runtimeTags.push(`requires:${requires}`);
250
+ if (defaultRun === "opt-in")
251
+ runtimeTags.push("default-run:opt-in");
252
+ const finalTags = [...contractTags, ...caseTags, ...runtimeTags];
139
253
  results.push({
140
254
  exportName: ec.exportName,
141
255
  meta: {
142
256
  id: `${ec.id}.${c.key}`,
257
+ // Mirror SDK dispatchContract testName: `${contractId} — ${caseKey}`.
258
+ // Phase 1 matchesFilter checks meta.name; without this, --filter
259
+ // matches against testId only for contract cases (uneven with test()).
260
+ name: `${ec.id} — ${c.key}`,
143
261
  description: c.description,
262
+ tags: finalTags.length > 0 ? finalTags : undefined,
144
263
  requires: c.requires,
145
264
  defaultRun: c.defaultRun,
146
265
  deferred: c.deferredReason,
147
266
  deprecated: c.deprecatedReason,
267
+ kind: "contract",
148
268
  },
149
269
  });
150
270
  }
@@ -152,6 +272,8 @@ export async function discoverTests(filePath) {
152
272
  // Each flow has a single orchestrator Test (setup → steps → teardown).
153
273
  // Discover it as one runnable entry with the flow id. Post-Phase 2f
154
274
  // flows live as `kind: "flow"` entries inside `result.attachments`.
275
+ // SDK maps FlowMeta.skip → TestMeta.deferred (string reason); mirror
276
+ // that here so the runner's deferred-skip path applies uniformly.
155
277
  for (const att of result.attachments) {
156
278
  if (att.kind !== "flow")
157
279
  continue;
@@ -160,32 +282,58 @@ export async function discoverTests(filePath) {
160
282
  meta: {
161
283
  id: att.flow.id,
162
284
  description: att.flow.description,
285
+ tags: att.flow.tags,
286
+ only: att.flow.only,
287
+ deferred: att.flow.skip,
288
+ kind: "flow",
163
289
  },
164
290
  });
165
291
  }
166
292
  if (results.length > 0)
167
293
  return results;
168
- // Runtime failed — fall back to static regex (old syntax, contracts only)
294
+ // Runtime failed — fall back to static regex ONLY for files that
295
+ // contain ONLY contract.http(...). Stricter than MCP's gate: CLI
296
+ // emits flows as runnable tests via discoverTests, so silently
297
+ // dropping `contract.flow(...)` would hide an actual test. Any
298
+ // non-HTTP usage (including flow) → fail closed and surface the
299
+ // import error so the user knows discovery is degraded.
169
300
  if (result.errors.length > 0) {
170
- const contracts = extractContractCases(content);
301
+ // Allow whitespace/newlines between `contract` and `.method` so the
302
+ // common fluent style `contract\n .flow(...)` still trips the gate.
303
+ const hasHttp = /contract\s*\.\s*http\b/i.test(content);
304
+ const hasNonHttp = /contract\s*\.\s*(?!http\b)\w+\s*[.(]/i.test(content);
305
+ const contracts = hasHttp && !hasNonHttp ? extractContractCases(content) : [];
171
306
  if (contracts.length > 0) {
172
307
  for (const c of contracts) {
173
308
  for (const caseItem of c.cases) {
309
+ const requires = caseItem.requires ?? "headless";
310
+ const defaultRun = caseItem.defaultRun ??
311
+ (requires !== "headless" ? "opt-in" : "always");
312
+ const runtimeTags = [];
313
+ if (requires !== "headless") {
314
+ runtimeTags.push(`requires:${requires}`);
315
+ }
316
+ if (defaultRun === "opt-in")
317
+ runtimeTags.push("default-run:opt-in");
174
318
  results.push({
175
319
  exportName: c.exportName,
176
320
  meta: {
177
321
  id: `${c.contractId}.${caseItem.key}`,
322
+ name: `${c.contractId} — ${caseItem.key}`,
178
323
  description: caseItem.description,
324
+ tags: runtimeTags.length > 0 ? runtimeTags : undefined,
179
325
  requires: caseItem.requires,
180
326
  defaultRun: caseItem.defaultRun,
181
327
  deferred: caseItem.deferred,
328
+ kind: "contract",
182
329
  },
183
330
  });
184
331
  }
185
332
  }
186
333
  return results;
187
334
  }
188
- // Both runtime and static failed — surface the import error
335
+ // Both runtime and static failed (or non-HTTP detected) — surface the
336
+ // import error so the user knows discovery is degraded.
189
337
  for (const err of result.errors) {
190
338
  console.error(`\x1b[31m✗ Contract import failed: ${err.file}\x1b[0m`);
191
339
  console.error(`\x1b[2m ${err.error}\x1b[0m`);
@@ -194,19 +342,41 @@ export async function discoverTests(filePath) {
194
342
  return [];
195
343
  }
196
344
  const metas = extractFromSource(content);
197
- return metas.map((m) => ({
198
- exportName: m.exportName,
199
- meta: {
200
- id: m.id,
201
- name: m.name,
202
- tags: m.tags,
203
- timeout: m.timeout,
204
- skip: m.skip,
205
- only: m.only,
206
- groupId: m.groupId ?? (m.variant === "pick" || m.parallel ? m.id : undefined),
207
- parallel: m.parallel,
208
- },
209
- }));
345
+ return metas.map((m) => {
346
+ // Mirror the contract-case path so .test.ts authors who declare
347
+ // `requires: "browser"` / `defaultRun: "opt-in"` see the same
348
+ // selection behavior (excludeTags via synthetic tag-names AND
349
+ // shouldSkipTest via meta.requires/defaultRun).
350
+ const userTags = m.tags ?? [];
351
+ const requires = m.requires ?? "headless";
352
+ // Mirror SDK dispatchContract: non-headless implicitly opt-in unless
353
+ // the author overrode defaultRun. Same default applied to test() so
354
+ // tag-based selection (e.g. `--exclude-tag default-run:opt-in`)
355
+ // treats equivalent test() and contract cases identically.
356
+ const defaultRun = m.defaultRun ?? (requires !== "headless" ? "opt-in" : "always");
357
+ const runtimeTags = [];
358
+ if (requires !== "headless")
359
+ runtimeTags.push(`requires:${requires}`);
360
+ if (defaultRun === "opt-in")
361
+ runtimeTags.push("default-run:opt-in");
362
+ const finalTags = [...userTags, ...runtimeTags];
363
+ return {
364
+ exportName: m.exportName,
365
+ meta: {
366
+ id: m.id,
367
+ name: m.name,
368
+ tags: finalTags.length > 0 ? finalTags : undefined,
369
+ timeout: m.timeout,
370
+ skip: m.skip,
371
+ only: m.only,
372
+ groupId: m.groupId ?? (m.variant === "pick" || m.parallel ? m.id : undefined),
373
+ parallel: m.parallel,
374
+ requires: m.requires,
375
+ defaultRun: m.defaultRun,
376
+ kind: "test",
377
+ },
378
+ };
379
+ });
210
380
  }
211
381
  function matchesFilter(testItem, filter) {
212
382
  const lowerFilter = filter.toLowerCase();
@@ -216,6 +386,11 @@ function matchesFilter(testItem, filter) {
216
386
  return true;
217
387
  return false;
218
388
  }
389
+ // Exported for testing only. Internal helpers otherwise.
390
+ export const __testing = {
391
+ matchesTags: (...args) => matchesTags(...args),
392
+ matchesExcludeTags: (...args) => matchesExcludeTags(...args),
393
+ };
219
394
  function matchesTags(testItem, tags, mode = "or") {
220
395
  if (!testItem.meta.tags?.length)
221
396
  return false;
@@ -223,6 +398,19 @@ function matchesTags(testItem, tags, mode = "or") {
223
398
  const match = (t) => lowerTestTags.includes(t.toLowerCase());
224
399
  return mode === "and" ? tags.every(match) : tags.some(match);
225
400
  }
401
+ /**
402
+ * Returns true if the test carries ANY tag in excludeTags (case-insensitive).
403
+ * Always OR-mode — independent of positive-side tagMode. A test with no
404
+ * tags is never excluded by this filter.
405
+ */
406
+ function matchesExcludeTags(testItem, excludeTags) {
407
+ if (!excludeTags.length)
408
+ return false;
409
+ if (!testItem.meta.tags?.length)
410
+ return false;
411
+ const lowerTestTags = testItem.meta.tags.map((t) => t.toLowerCase());
412
+ return excludeTags.some((t) => lowerTestTags.includes(t.toLowerCase()));
413
+ }
226
414
  function getLogFilePath(testFilePath) {
227
415
  const lastDot = testFilePath.lastIndexOf(".");
228
416
  if (lastDot === -1)
@@ -247,7 +435,7 @@ function resolveOutputPath(userPath, cwd) {
247
435
  }
248
436
  async function writeEmptyResult(target, runAt) {
249
437
  const payload = {
250
- target,
438
+ target: Array.isArray(target) ? target.join(", ") : target,
251
439
  files: [],
252
440
  runAt,
253
441
  summary: { total: 0, passed: 0, failed: 0, skipped: 0, durationMs: 0, stats: {} },
@@ -267,6 +455,10 @@ export async function runCommand(target, options = {}) {
267
455
  const runStartDate = new Date();
268
456
  const runStartTime = runStartDate.toISOString();
269
457
  const runStartLocal = localTimeString(runStartDate);
458
+ if (options.uploadReceiptJson && !options.upload) {
459
+ console.error(`${colors.red}Error: --upload-receipt-json requires --upload or an upload-enabled profile.${colors.reset}`);
460
+ process.exit(1);
461
+ }
270
462
  // ── Capability profile ──────────────────────────────────────────────────
271
463
  const isCiEnv = process.env.CI === "true" || process.env.GLUBEAN_CI === "true";
272
464
  // Hard fail: --include-browser/--include-out-of-band in CI
@@ -285,23 +477,36 @@ export async function runCommand(target, options = {}) {
285
477
  console.log(`\n${colors.bold}${colors.blue}🧪 Glubean Test Runner${colors.reset}\n`);
286
478
  const testFiles = await resolveTestFiles(target);
287
479
  const isMultiFile = testFiles.length > 1;
480
+ // Single string view of target for serialization / display paths
481
+ // (result.json, junit, traces). Multi-suite passes an array; join with
482
+ // ", " so downstream consumers still see a printable target field.
483
+ const targetDisplay = Array.isArray(target) ? target.join(", ") : target;
288
484
  if (testFiles.length === 0) {
289
- console.error(`\n${colors.red}❌ No test files found for target: ${target}${colors.reset}`);
485
+ console.error(`\n${colors.red}❌ No test files found for target: ${Array.isArray(target) ? target.join(", ") : target}${colors.reset}`);
290
486
  console.error(`${colors.dim}Glubean looks for files matching *.test.ts, *.contract.ts, or *.flow.ts in the target directory.${colors.reset}`);
291
487
  console.error(`${colors.dim}Run "glubean run tests/" or "glubean run path/to/file.test.ts".${colors.reset}\n`);
292
488
  await writeEmptyResult(target, runStartLocal);
293
489
  process.exit(1);
294
490
  }
295
491
  if (isMultiFile) {
296
- console.log(`${colors.dim}Target: ${resolve(target)}${colors.reset}`);
492
+ const targetDisplay = Array.isArray(target)
493
+ ? target.map((t) => resolve(t)).join(", ")
494
+ : resolve(target);
495
+ console.log(`${colors.dim}Target: ${targetDisplay}${colors.reset}`);
297
496
  console.log(`${colors.dim}Files: ${testFiles.length} test file(s)${colors.reset}\n`);
298
497
  }
299
498
  else {
300
499
  console.log(`${colors.dim}File: ${testFiles[0]}${colors.reset}\n`);
301
500
  }
302
501
  const startDir = testFiles[0].substring(0, testFiles[0].lastIndexOf("/"));
303
- const { rootDir, configPath } = await findProjectConfig(startDir);
304
- const glubeanConfig = await loadConfig(rootDir, options.configFiles);
502
+ const { rootDir } = await findProjectConfig(startDir);
503
+ // Config consolidation (docs/06 P2): the legacy package.json `glubean`
504
+ // flat-shape is no longer read. Profile runs get run/redaction/thresholds
505
+ // from the resolved plan (threaded via `options`); non-profile target runs
506
+ // fall back to built-in defaults + CLI flags + env. Warn once if a stale
507
+ // `glubean` field lingers in package.json so users know it's inert now.
508
+ await warnIfLegacyPackageJsonConfig(rootDir);
509
+ const glubeanConfig = structuredClone(CONFIG_DEFAULTS);
305
510
  const effectiveRun = mergeRunOptions(glubeanConfig.run, {
306
511
  verbose: options.verbose,
307
512
  pretty: options.pretty,
@@ -312,6 +517,12 @@ export async function runCommand(target, options = {}) {
312
517
  envFile: options.envFile,
313
518
  failFast: options.failFast,
314
519
  failAfter: options.failAfter,
520
+ // Phase 1 sub-task E1: forward profile-driven execution settings.
521
+ // mergeRunOptions handles undefined as "no override" — so non-profile
522
+ // runs (where options.timeoutMs/concurrency are undefined) keep
523
+ // legacy GlubeanRunConfig defaults; profile runs get the resolved values.
524
+ timeoutMs: options.timeoutMs,
525
+ concurrency: options.concurrency,
315
526
  });
316
527
  if (effectiveRun.logFile && !isMultiFile) {
317
528
  const logPath = getLogFilePath(testFiles[0]);
@@ -428,10 +639,24 @@ export async function runCommand(target, options = {}) {
428
639
  for (const filePath of testFiles) {
429
640
  try {
430
641
  const tests = await discoverTests(filePath);
431
- for (const test of tests) {
642
+ // Phase 4 multi-suite: enforce suite.kinds at the runnable level
643
+ // (not just file-level). A `.contract.ts` exporting an inline
644
+ // `contract.flow(...)` produces a flow runnable; if the contributing
645
+ // suite declared `kinds: [contract]`, drop the flow here.
646
+ const allowedKinds = options.allowedKindsPerFile?.get(filePath);
647
+ const filteredTests = allowedKinds
648
+ ? tests.filter((t) => {
649
+ const k = t.meta.kind;
650
+ // Treat missing kind as "always allowed" — legacy / static-
651
+ // fallback paths populate kind, but the safety net keeps
652
+ // unknown shapes runnable rather than silently dropped.
653
+ return k === undefined || allowedKinds.has(k);
654
+ })
655
+ : tests;
656
+ for (const test of filteredTests) {
432
657
  allFileTests.push({ filePath, exportName: test.exportName, test });
433
658
  }
434
- totalDiscovered += tests.length;
659
+ totalDiscovered += filteredTests.length;
435
660
  }
436
661
  catch (error) {
437
662
  if (isMultiFile) {
@@ -465,6 +690,7 @@ export async function runCommand(target, options = {}) {
465
690
  console.log(`${colors.yellow}ℹ️ Running only tests marked with .only${colors.reset}`);
466
691
  }
467
692
  const hasTags = options.tags && options.tags.length > 0;
693
+ const hasExcludeTags = options.excludeTags && options.excludeTags.length > 0;
468
694
  const testsToRun = allFileTests.filter((ft) => {
469
695
  const tc = ft.test;
470
696
  if (tc.meta.skip)
@@ -475,6 +701,8 @@ export async function runCommand(target, options = {}) {
475
701
  return false;
476
702
  if (hasTags && !matchesTags(tc, options.tags, options.tagMode))
477
703
  return false;
704
+ if (hasExcludeTags && matchesExcludeTags(tc, options.excludeTags))
705
+ return false;
478
706
  return true;
479
707
  });
480
708
  if (testsToRun.length === 0) {
@@ -689,10 +917,17 @@ export async function runCommand(target, options = {}) {
689
917
  let assertions = [];
690
918
  let success = false;
691
919
  let errorMsg;
920
+ let errorStack;
921
+ let errorReason;
922
+ let errorMissingPath;
923
+ let errorSuggestions;
692
924
  let peakMemoryMB;
693
925
  let stepAssertionCount = 0;
694
926
  let stepTraceLines = [];
695
927
  let testStarted = false;
928
+ // Plan 1 AC5: dedupe warning messages per session so the same warning
929
+ // doesn't repeat across session setup + each file's run() call.
930
+ const emittedWarnings = new Set();
696
931
  const addLogEntry = (type, message, data) => {
697
932
  if (effectiveRun.logFile) {
698
933
  logEntries.push({
@@ -771,8 +1006,42 @@ export async function runCommand(target, options = {}) {
771
1006
  }
772
1007
  }
773
1008
  if (errorMsg) {
774
- console.log(` ${colors.red}Error: ${errorMsg}${colors.reset}`);
1009
+ if (errorReason === "test_file_missing" && errorMissingPath) {
1010
+ console.log(` ${colors.red}✗ Test file not found: ${errorMissingPath}${colors.reset}`);
1011
+ if (errorSuggestions && errorSuggestions.length > 0) {
1012
+ console.log(` ${colors.dim}Did you mean:${colors.reset}`);
1013
+ for (const s of errorSuggestions) {
1014
+ console.log(` ${s}`);
1015
+ }
1016
+ }
1017
+ }
1018
+ else {
1019
+ console.log(` ${colors.red}Error: ${errorMsg}${colors.reset}`);
1020
+ if (errorStack) {
1021
+ const lines = errorStack.split("\n").slice(1);
1022
+ for (const line of lines) {
1023
+ const trimmed = line.trim();
1024
+ if (!trimmed)
1025
+ continue;
1026
+ const isFramework = trimmed.includes("/node_modules/") ||
1027
+ trimmed.includes("/@glubean/runner/") ||
1028
+ trimmed.includes("internal/modules/");
1029
+ console.log(` ${isFramework ? colors.dim : colors.reset}${trimmed}${colors.reset}`);
1030
+ }
1031
+ }
1032
+ }
775
1033
  }
1034
+ // Clear error fields after rendering so file:complete's orphan branch
1035
+ // (`!testStarted && errorMsg`) doesn't render this same failure again
1036
+ // and double-count it. The orphan branch is only meant for failures
1037
+ // that happened BEFORE any test started (e.g. harness died during
1038
+ // userModule load) — once we've finalized a started test, the error
1039
+ // belongs to that test alone.
1040
+ errorMsg = undefined;
1041
+ errorStack = undefined;
1042
+ errorReason = undefined;
1043
+ errorMissingPath = undefined;
1044
+ errorSuggestions = undefined;
776
1045
  };
777
1046
  // Pre-filter tests by capability profile so file:start can emit the
778
1047
  // ⊘ lines inline (preserves the pre-migration output layout where these
@@ -808,6 +1077,60 @@ export async function runCommand(target, options = {}) {
808
1077
  // Files ProjectRunner actually started. Any fileGroups entry that never
809
1078
  // gets file:start is a fail-fast skip — handled post run:complete.
810
1079
  const startedFiles = new Set();
1080
+ // Files that are 100% capability-skipped need ⊘ rows emitted manually
1081
+ // because ProjectRunner never starts a file with zero runnable tests
1082
+ // (file:start, which normally renders inline skip rows, won't fire).
1083
+ // We do NOT emit them up-front because that would re-order them ahead of
1084
+ // any earlier runnable files. Instead, we render them lazily — right
1085
+ // before the next runnable file's `file:start` fires (and one final pass
1086
+ // after run:complete for any trailing all-skipped files). This keeps the
1087
+ // visible file order matching `fileGroups` insertion order even in
1088
+ // multi-file fail-fast runs.
1089
+ const fileOrder = Array.from(fileGroups.keys());
1090
+ let nextFileIdx = 0;
1091
+ const emitAllSkippedFilesUpTo = (stopFilePath) => {
1092
+ while (nextFileIdx < fileOrder.length) {
1093
+ const filePath = fileOrder[nextFileIdx];
1094
+ if (filePath === stopFilePath)
1095
+ return;
1096
+ nextFileIdx++;
1097
+ if (runnableByFile.has(filePath))
1098
+ continue;
1099
+ const skips = fileCapabilitySkips.get(filePath);
1100
+ if (!skips || skips.length === 0)
1101
+ continue;
1102
+ if (isMultiFile) {
1103
+ const relPath = relative(process.cwd(), filePath);
1104
+ console.log(`${colors.bold}📁 ${relPath}${colors.reset}`);
1105
+ }
1106
+ for (const { ft, reason } of skips) {
1107
+ skipped++;
1108
+ const name = ft.test.meta.name || ft.test.meta.id;
1109
+ console.log(` ${colors.yellow}⊘${colors.reset} ${name} ${colors.dim}— skipped (${reason})${colors.reset}`);
1110
+ collectedRuns.push({
1111
+ testId: ft.test.meta.id,
1112
+ testName: name,
1113
+ tags: ft.test.meta.tags,
1114
+ filePath,
1115
+ events: [{ type: "status", status: "skipped", reason }],
1116
+ success: true,
1117
+ durationMs: 0,
1118
+ groupId: ft.test.meta.groupId,
1119
+ });
1120
+ }
1121
+ fileCapabilitySkips.delete(filePath);
1122
+ startedFiles.add(filePath);
1123
+ }
1124
+ };
1125
+ // If every selected test was capability-skipped, ProjectRunner has
1126
+ // nothing to do. Running it anyway would still perform session setup,
1127
+ // which on a broken session.ts would mask the skip output behind a
1128
+ // session:setup:failed exit. Drain the skip rows now and short-circuit
1129
+ // to the summary block.
1130
+ const hasRunnable = runnableTests.length > 0;
1131
+ if (!hasRunnable) {
1132
+ emitAllSkippedFilesUpTo(null);
1133
+ }
811
1134
  const runner = new ProjectRunner({
812
1135
  rootDir,
813
1136
  sharedConfig: shared,
@@ -827,7 +1150,10 @@ export async function runCommand(target, options = {}) {
827
1150
  ...(options.inspectBrk !== undefined && { inspectBrk: options.inspectBrk }),
828
1151
  metricCollector,
829
1152
  });
830
- for await (const ev of runner.run()) {
1153
+ // Only walk the runner stream when there are runnable tests. The empty
1154
+ // case has already emitted all capability skips above and falls
1155
+ // straight through to the summary.
1156
+ for await (const ev of hasRunnable ? runner.run() : []) {
831
1157
  switch (ev.type) {
832
1158
  case "bootstrap:start":
833
1159
  case "bootstrap:done":
@@ -859,6 +1185,18 @@ export async function runCommand(target, options = {}) {
859
1185
  else if (se.type === "log") {
860
1186
  console.log(` ${colors.dim}[session] ${se.message}${colors.reset}`);
861
1187
  }
1188
+ else if (se.type === "warning") {
1189
+ // Plan 1 AC5: render runner-fallback warnings emitted during
1190
+ // session setup. Only dedupe runner diagnostics (those carry a
1191
+ // `code` field — see ExecutionEvent.warning schema); user-emitted
1192
+ // ctx.warn(false, ...) warnings have no code and pass through.
1193
+ const isRunnerDiag = !!se.code;
1194
+ if (!isRunnerDiag || !emittedWarnings.has(se.message)) {
1195
+ if (isRunnerDiag)
1196
+ emittedWarnings.add(se.message);
1197
+ console.log(` ${colors.yellow}⚠ ${se.message}${colors.reset}`);
1198
+ }
1199
+ }
862
1200
  break;
863
1201
  }
864
1202
  case "session:setup:done": {
@@ -883,6 +1221,13 @@ export async function runCommand(target, options = {}) {
883
1221
  break;
884
1222
  }
885
1223
  case "file:start": {
1224
+ // Flush any 100%-skipped files that come before this one in
1225
+ // fileGroups order, so the user sees them in their natural place.
1226
+ emitAllSkippedFilesUpTo(ev.filePath);
1227
+ if (nextFileIdx < fileOrder.length &&
1228
+ fileOrder[nextFileIdx] === ev.filePath) {
1229
+ nextFileIdx++;
1230
+ }
886
1231
  currentGroupFilePath = ev.filePath;
887
1232
  startedFiles.add(ev.filePath);
888
1233
  const runnable = runnableByFile.get(ev.filePath) ?? [];
@@ -929,6 +1274,10 @@ export async function runCommand(target, options = {}) {
929
1274
  assertions = [];
930
1275
  success = false;
931
1276
  errorMsg = undefined;
1277
+ errorStack = undefined;
1278
+ errorReason = undefined;
1279
+ errorMissingPath = undefined;
1280
+ errorSuggestions = undefined;
932
1281
  peakMemoryMB = undefined;
933
1282
  stepAssertionCount = 0;
934
1283
  stepTraceLines = [];
@@ -946,6 +1295,10 @@ export async function runCommand(target, options = {}) {
946
1295
  success = event.status === "completed";
947
1296
  if (event.error) {
948
1297
  errorMsg = event.error;
1298
+ errorStack = event.stack;
1299
+ errorReason = event.reason;
1300
+ errorMissingPath = event.missingPath;
1301
+ errorSuggestions = event.suggestions;
949
1302
  addLogEntry("error", event.error);
950
1303
  }
951
1304
  if (event.peakMemoryMB)
@@ -954,8 +1307,13 @@ export async function runCommand(target, options = {}) {
954
1307
  break;
955
1308
  case "error":
956
1309
  success = false;
957
- if (!errorMsg)
1310
+ if (!errorMsg) {
958
1311
  errorMsg = event.message;
1312
+ errorStack = event.stack;
1313
+ errorReason = event.reason;
1314
+ errorMissingPath = event.missingPath;
1315
+ errorSuggestions = event.suggestions;
1316
+ }
959
1317
  addLogEntry("error", event.message);
960
1318
  break;
961
1319
  case "log":
@@ -1089,7 +1447,16 @@ export async function runCommand(target, options = {}) {
1089
1447
  break;
1090
1448
  case "warning": {
1091
1449
  const warnIcon = event.condition ? `${colors.green}✓${colors.reset}` : `${colors.yellow}⚠${colors.reset}`;
1092
- console.log(` ${warnIcon} ${colors.yellow}${event.message}${colors.reset}`);
1450
+ // Plan 1 AC5: dedupe runner-fallback / protocol-min warnings
1451
+ // (carry a `code` field — see ExecutionEvent.warning schema).
1452
+ // User-emitted ctx.warn(false, ...) warnings have no code and
1453
+ // pass through every time so test authors can see them repeat.
1454
+ const isRunnerDiag = !!event.code;
1455
+ if (!isRunnerDiag || !emittedWarnings.has(event.message)) {
1456
+ if (isRunnerDiag)
1457
+ emittedWarnings.add(event.message);
1458
+ console.log(` ${warnIcon} ${colors.yellow}${event.message}${colors.reset}`);
1459
+ }
1093
1460
  break;
1094
1461
  }
1095
1462
  case "schema_validation":
@@ -1114,7 +1481,32 @@ export async function runCommand(target, options = {}) {
1114
1481
  // mid-test or emitted no start event, promote the leftover state
1115
1482
  // to a visible failure row.
1116
1483
  if (!testStarted && errorMsg) {
1117
- console.log(` ${colors.red}✗ ${errorMsg}${colors.reset}`);
1484
+ // Plan 4: rich render for orphan-error case (no leading start event,
1485
+ // e.g. harness died during userModule import).
1486
+ if (errorReason === "test_file_missing" && errorMissingPath) {
1487
+ console.log(` ${colors.red}✗ Test file not found: ${errorMissingPath}${colors.reset}`);
1488
+ if (errorSuggestions && errorSuggestions.length > 0) {
1489
+ console.log(` ${colors.dim}Did you mean:${colors.reset}`);
1490
+ for (const s of errorSuggestions) {
1491
+ console.log(` ${s}`);
1492
+ }
1493
+ }
1494
+ }
1495
+ else {
1496
+ console.log(` ${colors.red}✗ ${errorMsg}${colors.reset}`);
1497
+ if (errorStack) {
1498
+ const lines = errorStack.split("\n").slice(1);
1499
+ for (const line of lines) {
1500
+ const trimmed = line.trim();
1501
+ if (!trimmed)
1502
+ continue;
1503
+ const isFramework = trimmed.includes("/node_modules/") ||
1504
+ trimmed.includes("/@glubean/runner/") ||
1505
+ trimmed.includes("internal/modules/");
1506
+ console.log(` ${isFramework ? colors.dim : colors.reset}${trimmed}${colors.reset}`);
1507
+ }
1508
+ }
1509
+ }
1118
1510
  failed++;
1119
1511
  }
1120
1512
  if (testStarted) {
@@ -1124,6 +1516,13 @@ export async function runCommand(target, options = {}) {
1124
1516
  }
1125
1517
  break;
1126
1518
  case "run:complete":
1519
+ // Flush any trailing 100%-skipped files (after the last runnable
1520
+ // file). Under fail-fast, also flush only up to the file that
1521
+ // actually started — files beyond the fail point still belong to
1522
+ // the fail-fast pass below, not to the capability-skip pass.
1523
+ if (failureLimit === undefined || ev.failedCount < failureLimit) {
1524
+ emitAllSkippedFilesUpTo(null);
1525
+ }
1127
1526
  // Fail-fast skip display: any file ProjectRunner never started
1128
1527
  // (because the failure limit kicked in between file groups) gets
1129
1528
  // the old "○ (skipped — fail-fast)" lines here, preserving the
@@ -1186,9 +1585,15 @@ export async function runCommand(target, options = {}) {
1186
1585
  console.log(`${colors.bold}Stats:${colors.reset} ${colors.dim}${parts.join(" · ")}${colors.reset}`);
1187
1586
  }
1188
1587
  // ── Threshold evaluation ──────────────────────────────────────────────────
1588
+ // Prefer the v1 plan's resolved thresholds when present (profile mode);
1589
+ // fall back to the legacy package.json `thresholds` otherwise. (P2 removes
1590
+ // the legacy source — see docs/06 config consolidation.)
1591
+ const effectiveThresholds = options.thresholds && Object.keys(options.thresholds).length > 0
1592
+ ? options.thresholds
1593
+ : glubeanConfig.thresholds;
1189
1594
  let thresholdSummary;
1190
- if (glubeanConfig.thresholds && Object.keys(glubeanConfig.thresholds).length > 0) {
1191
- thresholdSummary = evaluateThresholds(glubeanConfig.thresholds, metricCollector);
1595
+ if (effectiveThresholds && Object.keys(effectiveThresholds).length > 0) {
1596
+ thresholdSummary = evaluateThresholds(effectiveThresholds, metricCollector);
1192
1597
  const { results: thresholdResults, pass: allPass } = thresholdSummary;
1193
1598
  if (thresholdResults.length > 0) {
1194
1599
  console.log(`${colors.bold}Thresholds:${colors.reset}`);
@@ -1215,7 +1620,11 @@ export async function runCommand(target, options = {}) {
1215
1620
  };
1216
1621
  const logContent = [
1217
1622
  `# Glubean Test Log`,
1218
- `# Target: ${isMultiFile ? resolve(target) : testFiles[0]}`,
1623
+ `# Target: ${isMultiFile
1624
+ ? Array.isArray(target)
1625
+ ? target.map((t) => resolve(t)).join(", ")
1626
+ : resolve(target)
1627
+ : testFiles[0]}`,
1219
1628
  `# Run at: ${runStartTime}`,
1220
1629
  `# Tests: ${passed} passed, ${failed} failed`,
1221
1630
  ``,
@@ -1269,7 +1678,7 @@ export async function runCommand(target, options = {}) {
1269
1678
  const tracesPath = resolve(glubeanDir, "traces.json");
1270
1679
  const traceSummary = {
1271
1680
  runAt: runStartTime,
1272
- target,
1681
+ target: targetDisplay,
1273
1682
  files: testFiles.map((f) => relative(process.cwd(), f)),
1274
1683
  traces: traceCollector,
1275
1684
  };
@@ -1288,7 +1697,7 @@ export async function runCommand(target, options = {}) {
1288
1697
  };
1289
1698
  const resultPayload = {
1290
1699
  context: runContext,
1291
- target,
1700
+ target: targetDisplay,
1292
1701
  files: testFiles.map((f) => relative(process.cwd(), f)),
1293
1702
  runAt: runStartLocal,
1294
1703
  summary: {
@@ -1344,7 +1753,7 @@ export async function runCommand(target, options = {}) {
1344
1753
  skipped,
1345
1754
  durationMs: totalDurationMs,
1346
1755
  };
1347
- const xml = toJunitXml(collectedRuns, target, summaryData);
1756
+ const xml = toJunitXml(collectedRuns, targetDisplay, summaryData);
1348
1757
  await mkdir(dirname(junitPath), { recursive: true });
1349
1758
  await writeFile(junitPath, xml, "utf-8");
1350
1759
  console.log(`${colors.dim}JUnit XML written to: ${junitPath}${colors.reset}\n`);
@@ -1404,10 +1813,17 @@ export async function runCommand(target, options = {}) {
1404
1813
  }
1405
1814
  else {
1406
1815
  const { compileScopes, redactEvent, BUILTIN_SCOPES } = await import("@glubean/redaction");
1816
+ // Prefer the v1 plan's full redaction config when supplied
1817
+ // (Phase 4 init scaffolds `defaults.redaction` in glubean.yaml,
1818
+ // including any custom globalRules / sensitiveKeys / customPatterns).
1819
+ // The legacy loadConfig path doesn't read glubean.yaml — without
1820
+ // this, custom rules would be silently ignored and matching
1821
+ // secrets could be sent to Cloud.
1822
+ const effectiveRedaction = options.redactionConfig ?? glubeanConfig.redaction;
1407
1823
  const compiledScopes = compileScopes({
1408
1824
  builtinScopes: BUILTIN_SCOPES,
1409
- globalRules: glubeanConfig.redaction.globalRules,
1410
- replacementFormat: glubeanConfig.redaction.replacementFormat,
1825
+ globalRules: effectiveRedaction.globalRules,
1826
+ replacementFormat: effectiveRedaction.replacementFormat,
1411
1827
  });
1412
1828
  // Generate metadata for test registry
1413
1829
  let metadata;
@@ -1424,21 +1840,48 @@ export async function runCommand(target, options = {}) {
1424
1840
  catch {
1425
1841
  // Non-critical: upload results without metadata
1426
1842
  }
1843
+ // Phase 5 5a — attach run-plan provenance to the upload metadata
1844
+ // bucket. Cloud server projects this to top-level RunEntity fields
1845
+ // (see apps/server/src/tasks/helpers/extract-run-plan.ts). Nested
1846
+ // under `metadata` to clear the server DTO's `forbidNonWhitelisted`
1847
+ // top-level gate. Only emitted when:
1848
+ // 1. The run used a profile (no profile → nothing to record).
1849
+ // 2. The scan path produced metadata.
1850
+ // Skipping runPlan in the degraded-scan path is intentional —
1851
+ // synthesizing a runPlan-only shell with `files: {}` would make
1852
+ // the server's upsertTests treat all active tests as "removed"
1853
+ // (authoritative file map = empty). Better to lose runPlan
1854
+ // provenance on degraded scans than to corrupt the test registry.
1855
+ if (metadata && options.profile) {
1856
+ const runPlan = {
1857
+ profile: options.profile,
1858
+ };
1859
+ if (options.suites && options.suites.length > 0) {
1860
+ runPlan.suites = options.suites;
1861
+ }
1862
+ metadata = { ...metadata, runPlan };
1863
+ }
1427
1864
  const redactedPayload = {
1428
1865
  ...resultPayload,
1429
1866
  metadata,
1430
1867
  tests: resultPayload.tests.map((t) => ({
1431
1868
  ...t,
1432
- events: t.events.map((e) => redactEvent(e, compiledScopes, glubeanConfig.redaction.replacementFormat)),
1869
+ events: t.events.map((e) => redactEvent(e, compiledScopes, effectiveRedaction.replacementFormat)),
1433
1870
  })),
1434
1871
  };
1435
- await uploadToCloud(redactedPayload, {
1872
+ const uploadReceipt = await uploadToCloud(redactedPayload, {
1436
1873
  apiUrl,
1437
1874
  token,
1438
1875
  projectId,
1439
1876
  envFile: effectiveRun.envFile,
1440
1877
  rootDir,
1441
1878
  });
1879
+ if (options.uploadReceiptJson) {
1880
+ const receiptPath = resolveOutputPath(options.uploadReceiptJson, process.cwd());
1881
+ await mkdir(dirname(receiptPath), { recursive: true });
1882
+ await writeFile(receiptPath, JSON.stringify(uploadReceipt, null, 2) + "\n", "utf-8");
1883
+ console.log(`${colors.dim}Upload receipt written to: ${receiptPath}${colors.reset}`);
1884
+ }
1442
1885
  }
1443
1886
  }
1444
1887
  if (failed > 0 || (thresholdSummary && !thresholdSummary.pass)) {