@glubean/cli 0.9.3 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -374,6 +374,19 @@ export async function discoverTests(filePath) {
374
374
  console.error(`\x1b[31m✗ Contract import failed: ${err.file}\x1b[0m`);
375
375
  console.error(`\x1b[2m ${err.error}\x1b[0m`);
376
376
  }
377
+ // GLU-155: a contract file that throws during import is a discovery
378
+ // FAILURE, not an empty file — previously this returned `[]` here,
379
+ // which is indistinguishable from "this contract file legitimately
380
+ // exports zero cases." The caller's loop would then silently drop the
381
+ // whole file: other files still ran, the process still exited 0, and
382
+ // the final summary never mentioned the import error printed above.
383
+ // Throwing routes this through the existing per-file catch block below,
384
+ // which records it as a discovery failure — the run still executes
385
+ // every OTHER file (no fail-fast), but the summary now reports the
386
+ // failed file and the process exits non-zero. Message omits the file
387
+ // path (the caller already labels the error with the relative path) —
388
+ // just the underlying reason(s), already detailed above.
389
+ throw new Error(result.errors.map((err) => err.error).join("\n"));
377
390
  }
378
391
  return [];
379
392
  }
@@ -574,8 +587,14 @@ export async function runCommand(target, options = {}) {
574
587
  `${colors.dim}--rerun-failed reads ${lastRunPath}.${colors.reset}\n`);
575
588
  process.exit(1);
576
589
  }
577
- const { selectors, files } = deriveRerunSelectors({ tests: lastRun.tests ?? [] });
578
- if (selectors.length === 0) {
590
+ const { selectors, files, discoveryFailureFiles } = deriveRerunSelectors({
591
+ tests: lastRun.tests ?? [],
592
+ discoveryFailures: lastRun.discoveryFailures ?? [],
593
+ });
594
+ // GLU-155: a run isn't "nothing to rerun" just because no DISCOVERED test
595
+ // failed — a file whose import threw last time carries zero test ids and
596
+ // must still be retried.
597
+ if (selectors.length === 0 && discoveryFailureFiles.length === 0) {
579
598
  console.log(`\n${colors.green}✓ Last run had no failures — nothing to rerun.${colors.reset}\n`);
580
599
  process.exit(0);
581
600
  }
@@ -592,9 +611,45 @@ export async function runCommand(target, options = {}) {
592
611
  `from the last run are in the current target.${colors.reset}\n`);
593
612
  process.exit(1);
594
613
  }
595
- onlySelectors = selectors;
596
- console.log(`${colors.dim}--rerun-failed: ${selectors.length} failed test(s) across ` +
597
- `${testFiles.length} file(s)${colors.reset}\n`);
614
+ // GLU-155 codex R3 P2: a discovery-failure file's test ids were NEVER
615
+ // recorded (its import threw before any test ran), so `selectors` has no
616
+ // entry for it. Pushing its freshly-discovered ids into `onlySelectors`
617
+ // (an earlier version of this fix did that) is unsound for data-driven
618
+ // exports: `test.each`/`test.pick` discovery yields a TEMPLATE sentinel
619
+ // id (e.g. "user-$index"), but the harness's `matchOnly` (driven by the
620
+ // SAME global `GLUBEAN_RUNNER_ONLY_SELECTORS` selector set — one flat
621
+ // list, not scoped per file) matches CONCRETE expanded row ids by exact
622
+ // equality. The template id would never match a single row, so the
623
+ // rerun would exit green having executed ZERO rows from the very file
624
+ // it was supposed to retry — the same false-green failure mode GLU-155
625
+ // exists to close, just relocated to `--rerun-failed`.
626
+ //
627
+ // There is no way to scope selectors per-file in the current protocol
628
+ // (one flat list feeds both the CLI's own narrowing below and the
629
+ // harness env channel), so when a carry-over import-failed file is
630
+ // ACTUALLY IN THIS RUN'S TARGET, the only sound choice is to drop
631
+ // id-based narrowing entirely and run every test in every file
632
+ // `testFiles` was already narrowed to above. This only widens what a
633
+ // MIXED rerun (real failed tests in one file + an import failure in
634
+ // another) re-executes inside the failed-test file's OTHER,
635
+ // previously-passing cases — safe over-inclusion, not a correctness risk.
636
+ //
637
+ // GLU-155 codex R4 P2: gate that widening on discovery-failure files
638
+ // that SURVIVED the `testFiles` filter above — NOT on
639
+ // `discoveryFailureFiles.length` from the last run. A partial target
640
+ // (e.g. `glubean run one-failed-test-file.ts --rerun-failed`) that
641
+ // excludes the import-failed file must keep its precise id narrowing;
642
+ // otherwise the old run's unrelated import failure would silently
643
+ // re-run every previously-passing test in the one file being retried.
644
+ const discoveryFailureFilesAbs = new Set(discoveryFailureFiles.map((f) => resolve(rootDir, f)));
645
+ const targetedDiscoveryFailures = testFiles.filter((f) => discoveryFailureFilesAbs.has(resolve(f)));
646
+ onlySelectors = targetedDiscoveryFailures.length > 0 ? [] : selectors;
647
+ console.log(`${colors.dim}--rerun-failed: ${selectors.length} failed test(s)` +
648
+ (targetedDiscoveryFailures.length > 0
649
+ ? ` + ${targetedDiscoveryFailures.length} file(s) that failed to import ` +
650
+ `(running those file(s) in full, import-time test ids are unknown)`
651
+ : "") +
652
+ ` across ${testFiles.length} file(s)${colors.reset}\n`);
598
653
  }
599
654
  else {
600
655
  onlySelectors = selectorFlags.selectors;
@@ -606,6 +661,23 @@ export async function runCommand(target, options = {}) {
606
661
  // `glubean` field lingers in package.json so users know it's inert now.
607
662
  await warnIfLegacyPackageJsonConfig(rootDir);
608
663
  const glubeanConfig = structuredClone(CONFIG_DEFAULTS);
664
+ // GLU-155 codex R2 P1: hoisted from its original spot (just before the
665
+ // runner-stream loop) so discovery-failure error messages can be redacted
666
+ // BEFORE they're persisted — a contract file can throw with a secret in
667
+ // its message (e.g. a leaked token interpolated into an error string), and
668
+ // that string now lands in `.glubean/last-run.result.json` / the Cloud
669
+ // upload result (see discoveryFailedFiles below). Value is unchanged by
670
+ // moving it — it only reads `options`/`glubeanConfig`, both already
671
+ // available here.
672
+ const effectiveRedaction = options.redactionConfig ?? glubeanConfig.redaction;
673
+ // Redact a raw error/exception message the same way `redactNonEvent` (further
674
+ // below) redacts `context`/`customMetadata` — same rules, same replacement
675
+ // format, just scoped to a single string instead of an arbitrary value tree.
676
+ const redactDiscoveryError = (message) => redactValue(message, {
677
+ globalRules: effectiveRedaction.globalRules,
678
+ replacementFormat: effectiveRedaction.replacementFormat,
679
+ maxDepth: 64,
680
+ });
609
681
  const effectiveRun = mergeRunOptions(glubeanConfig.run, {
610
682
  verbose: options.verbose,
611
683
  pretty: options.pretty,
@@ -685,7 +757,7 @@ export async function runCommand(target, options = {}) {
685
757
  // resolution happens here (pre-run) so a misconfigured destination fails fast.
686
758
  let resolvedUploadTargetId;
687
759
  if (options.upload) {
688
- const { resolveToken, resolveProjectId, resolveApiUrl, resolveTargetId, resolveDefaultTargetId, checkUploadAuth, checkTargetInProject, } = await import("../lib/auth.js");
760
+ const { resolveToken, resolveProjectId, resolveApiUrl, resolveTargetId, resolveDefaultTargetId, checkUploadAuth, checkTargetInProject, PLATFORM_API_URL_UNRESOLVED_HINT, } = await import("../lib/auth.js");
689
761
  const authOpts = {
690
762
  token: options.token,
691
763
  project: options.project,
@@ -714,6 +786,11 @@ export async function runCommand(target, options = {}) {
714
786
  console.error(`${colors.dim}Use --project or set GLUBEAN_PROJECT_ID.${colors.reset}`);
715
787
  process.exit(1);
716
788
  }
789
+ if (!preApiUrl) {
790
+ console.error(`${colors.red}Error: could not determine the Platform API URL.${colors.reset}`);
791
+ console.error(`${colors.dim}${PLATFORM_API_URL_UNRESOLVED_HINT}${colors.reset}`);
792
+ process.exit(1);
793
+ }
717
794
  // Validate against the SAME server runs upload to. Don't pre-judge token
718
795
  // format locally — let the server decide. A least-privilege ingest token
719
796
  // (runs:write, no projects:read) gets 403 yet can still POST runs, so that
@@ -728,7 +805,7 @@ export async function runCommand(target, options = {}) {
728
805
  else if (check.status === 404) {
729
806
  console.error(`${colors.red}Error: project ${preProject} not found (404).${colors.reset}`);
730
807
  console.error(`${colors.dim}Preflight GET: ${preApiUrl}/v1/projects/${preProject}${colors.reset}`);
731
- console.error(`${colors.dim}Check that --project / GLUBEAN_PROJECT_ID is a real project id, that --api-url / GLUBEAN_API_URL has no stray trailing slash, and that it points at the platform ingest API (the token-only \`/v1/*\` service) — not a dashboard/session-auth host, which has no \`/v1\` routes and 404s here too.${colors.reset}`);
808
+ console.error(`${colors.dim}Check that --project / GLUBEAN_PROJECT_ID is a real project id, that --api-url / GLUBEAN_PLATFORM_API_URL / GLUBEAN_API_URL has no stray trailing slash, and that it points at the platform ingest API (the token-only \`/v1/*\` service) — not a dashboard/session-auth host, which has no \`/v1\` routes and 404s here too.${colors.reset}`);
732
809
  }
733
810
  else if (check.status === 403) {
734
811
  console.error(`${colors.red}Error: access to project ${preProject} is forbidden (403).${colors.reset}`);
@@ -739,7 +816,7 @@ export async function runCommand(target, options = {}) {
739
816
  }
740
817
  else {
741
818
  console.error(`${colors.red}Error: upload preflight got an unexpected response (${check.status}).${colors.reset}`);
742
- console.error(`${colors.dim}Check that --api-url / GLUBEAN_API_URL points at the Glubean platform API.${colors.reset}`);
819
+ console.error(`${colors.dim}Check that --api-url / GLUBEAN_PLATFORM_API_URL / GLUBEAN_API_URL points at the Glubean platform API.${colors.reset}`);
743
820
  }
744
821
  process.exit(1);
745
822
  }
@@ -762,7 +839,7 @@ export async function runCommand(target, options = {}) {
762
839
  if (tcheck.status === 404) {
763
840
  console.error(`${colors.red}Error: target ${preTarget} not found in project ${preProject} (404).${colors.reset}`);
764
841
  console.error(`${colors.dim}Preflight GET: ${preApiUrl}/v1/projects/${preProject}/targets/${preTarget}${colors.reset}`);
765
- console.error(`${colors.dim}Check upload.targetId / GLUBEAN_TARGET_ID / --upload-target, and that --api-url / GLUBEAN_API_URL points at the platform ingest API.${colors.reset}`);
842
+ console.error(`${colors.dim}Check upload.targetId / GLUBEAN_TARGET_ID / --upload-target, and that --api-url / GLUBEAN_PLATFORM_API_URL / GLUBEAN_API_URL points at the platform ingest API.${colors.reset}`);
766
843
  }
767
844
  else if (tcheck.status === 401) {
768
845
  console.error(`${colors.red}Error: authentication failed validating the target (401).${colors.reset}`);
@@ -816,6 +893,15 @@ export async function runCommand(target, options = {}) {
816
893
  console.log(`${colors.dim}Discovering tests...${colors.reset}`);
817
894
  const allFileTests = [];
818
895
  let totalDiscovered = 0;
896
+ // GLU-155: files whose discovery threw (e.g. a contract that fails on
897
+ // import) — aggregated so the run keeps executing every OTHER file, but
898
+ // the final summary + exit code still reflect the failure. Distinct from
899
+ // a genuinely test-less file (which discoverTests returns `[]` for
900
+ // without throwing) — this array is ONLY populated on a thrown error.
901
+ // Persisted into resultPayload.discoveryFailures below so
902
+ // `.glubean/last-run.result.json` / `--rerun-failed` stay consistent with
903
+ // the non-zero exit code (codex GLU-155 R1 P2).
904
+ const discoveryFailedFiles = [];
819
905
  for (const filePath of testFiles) {
820
906
  try {
821
907
  const tests = await discoverTests(filePath);
@@ -839,19 +925,59 @@ export async function runCommand(target, options = {}) {
839
925
  totalDiscovered += filteredTests.length;
840
926
  }
841
927
  catch (error) {
928
+ const message = error instanceof Error ? error.message : String(error);
842
929
  if (isMultiFile) {
843
930
  const relPath = relative(process.cwd(), filePath);
844
- console.error(` ${colors.red}✗${colors.reset} ${relPath}: ${error instanceof Error ? error.message : String(error)}`);
931
+ console.error(` ${colors.red}✗${colors.reset} ${relPath}: ${message}`);
932
+ // GLU-155: record the failure instead of silently moving on — the
933
+ // rest of the loop still runs every other file (aggregate, not
934
+ // fail-fast), but this file's absence must be visible in the
935
+ // summary and must flip the exit code non-zero below. The console
936
+ // line above stays UNREDACTED (local terminal, not persisted) —
937
+ // `error` here is redacted because it lands in
938
+ // `.glubean/last-run.result.json` / the Cloud upload result, which
939
+ // `context`/`customMetadata` already redact (codex R2 P1).
940
+ discoveryFailedFiles.push({
941
+ filePath: relative(rootDir, filePath),
942
+ error: redactDiscoveryError(message),
943
+ });
845
944
  }
846
945
  else {
847
946
  console.error(`\n${colors.red}❌ Failed to load test file${colors.reset}`);
848
- console.error(`${colors.dim}${error instanceof Error ? error.message : String(error)}${colors.reset}`);
947
+ console.error(`${colors.dim}${message}${colors.reset}`);
849
948
  process.exit(1);
850
949
  }
851
950
  }
852
951
  }
853
952
  if (allFileTests.length === 0) {
854
953
  console.error(`\n${colors.red}❌ No test cases found${isMultiFile ? ` in ${testFiles.length} file(s)` : " in file"}${colors.reset}`);
954
+ if (discoveryFailedFiles.length > 0) {
955
+ console.error(`${colors.dim}${discoveryFailedFiles.length} file(s) failed to import (see errors above) — ` +
956
+ `that may be the entire cause.${colors.reset}`);
957
+ // GLU-155 codex R2 P2: this branch used to exit WITHOUT writing
958
+ // `.glubean/last-run.result.json` — a run where EVERY targeted file
959
+ // failed to import (or the ones that did import export zero tests)
960
+ // left no trace on disk, so a mixed run's discovery failure got
961
+ // persisted (see resultPayload further below) but this all-or-mostly-
962
+ // broken one didn't, and `--rerun-failed` had nothing to retry from.
963
+ // Persist the same minimal shape `writeEmptyResult` uses elsewhere,
964
+ // plus `discoveryFailures`, before exiting.
965
+ try {
966
+ const glubeanDir = resolve(rootDir, ".glubean");
967
+ await mkdir(glubeanDir, { recursive: true });
968
+ await writeFile(resolve(glubeanDir, "last-run.result.json"), JSON.stringify({
969
+ target: targetDisplay,
970
+ files: testFiles.map((f) => relative(rootDir, f)),
971
+ runAt: runStartLocal,
972
+ summary: { total: 0, passed: 0, failed: 0, skipped: 0, durationMs: 0, stats: {} },
973
+ tests: [],
974
+ discoveryFailures: discoveryFailedFiles,
975
+ }, null, 2) + "\n", "utf-8");
976
+ }
977
+ catch {
978
+ // Non-critical — best-effort persistence, matches writeEmptyResult.
979
+ }
980
+ }
855
981
  console.error(`${colors.dim}Each test file must export tests: export const myTest = test("id")...${colors.reset}\n`);
856
982
  process.exit(1);
857
983
  }
@@ -1200,7 +1326,10 @@ export async function runCommand(target, options = {}) {
1200
1326
  testEvents.push({
1201
1327
  type: "status",
1202
1328
  status: "skipped",
1203
- ...(skipReason && { reason: skipReason }),
1329
+ // GLU-142 — `!== undefined` (not truthy) so an explicit `ctx.skip("")`
1330
+ // is preserved rather than silently coerced to "no reason", matching
1331
+ // the top-level `reason` field on the pushed CollectedTestRun below.
1332
+ ...(skipReason !== undefined && { reason: skipReason }),
1204
1333
  });
1205
1334
  }
1206
1335
  // GLU-128: `runStats` (→ result JSON `summary.stats`) used to wait for a
@@ -1231,6 +1360,9 @@ export async function runCommand(target, options = {}) {
1231
1360
  groupId: testItem?.meta.groupId,
1232
1361
  rowIndex: testRowIndex,
1233
1362
  each: testEach,
1363
+ // GLU-142 — `!== undefined` (not truthy) preserves an explicit
1364
+ // `ctx.skip("")` instead of silently coercing it to "no reason".
1365
+ ...(skippedClean && skipReason !== undefined && { reason: skipReason }),
1234
1366
  });
1235
1367
  addLogEntry("result", skippedClean ? "SKIPPED" : finalSuccess ? "PASSED" : "FAILED", {
1236
1368
  duration,
@@ -1357,6 +1489,31 @@ export async function runCommand(target, options = {}) {
1357
1489
  // Files ProjectRunner actually started. Any fileGroups entry that never
1358
1490
  // gets file:start is a fail-fast skip — handled post run:complete.
1359
1491
  const startedFiles = new Set();
1492
+ // Redaction (GLU-105): compile scopes ONCE, unconditionally, and redact
1493
+ // every harness event as it arrives — BEFORE it reaches any sink. Every
1494
+ // sink that touches this run's data (`.glubean/last-run.result.json`,
1495
+ // `--result-json`, `.glubean/traces.json`, `--log-file`, `--verbose`
1496
+ // console output, `--emit-full-trace` trace files) must consume the same
1497
+ // redacted stream. Previously redaction only ran on a clone built for
1498
+ // `--upload`, so every local-disk sink got the raw, secret-bearing event
1499
+ // regardless of whether `--upload` was even passed.
1500
+ //
1501
+ // Hoisted above `emitAllSkippedFilesUpTo` below (GLU-142 codex R2 P0): a
1502
+ // capability-skip's synthesized reason (`meta.deferred`/`meta.deprecated`
1503
+ // free text, or the requires-capability strings) is itself redaction-
1504
+ // relevant now that it's promoted into a top-level `reason` field
1505
+ // (`CollectedTestRun.reason` → result JSON / upload `test_result` row).
1506
+ // Compiling scopes here — instead of after this const's original position
1507
+ // just before the runner-stream loop — ensures they exist even for the
1508
+ // "every selected test was capability-skipped" short-circuit, which calls
1509
+ // `emitAllSkippedFilesUpTo` before the runner stream ever starts.
1510
+ // (`effectiveRedaction` itself now lives further up — GLU-155 codex R2 P1 —
1511
+ // so discovery-failure messages can be redacted before this point too.)
1512
+ const compiledScopes = compileScopes({
1513
+ builtinScopes: BUILTIN_SCOPES,
1514
+ globalRules: effectiveRedaction.globalRules,
1515
+ replacementFormat: effectiveRedaction.replacementFormat,
1516
+ });
1360
1517
  // Files that are 100% capability-skipped need ⊘ rows emitted manually
1361
1518
  // because ProjectRunner never starts a file with zero runnable tests
1362
1519
  // (file:start, which normally renders inline skip rows, won't fire).
@@ -1386,16 +1543,23 @@ export async function runCommand(target, options = {}) {
1386
1543
  for (const { ft, reason } of skips) {
1387
1544
  skipped++;
1388
1545
  const name = ft.test.meta.name || ft.test.meta.id;
1389
- console.log(` ${colors.yellow}⊘${colors.reset} ${name} ${colors.dim}skipped (${reason})${colors.reset}`);
1546
+ // GLU-142 (codex R2 P0) a capability-skip's synthesized reason
1547
+ // (meta.deferred/meta.deprecated free text) is redaction-relevant
1548
+ // too now that it's promoted into a persisted/uploaded top-level
1549
+ // field; scrub it through the status.reason scope the same way a
1550
+ // runtime ctx.skip(reason) is, before it reaches console/disk/upload.
1551
+ const redactedReason = redactEvent({ type: "status", status: "skipped", reason }, compiledScopes, effectiveRedaction.replacementFormat).reason;
1552
+ console.log(` ${colors.yellow}⊘${colors.reset} ${name} ${colors.dim}— skipped (${redactedReason})${colors.reset}`);
1390
1553
  collectedRuns.push({
1391
1554
  testId: ft.test.meta.id,
1392
1555
  testName: name,
1393
1556
  tags: ft.test.meta.tags,
1394
1557
  filePath,
1395
- events: [{ type: "status", status: "skipped", reason }],
1558
+ events: [{ type: "status", status: "skipped", reason: redactedReason }],
1396
1559
  success: true,
1397
1560
  durationMs: 0,
1398
1561
  groupId: ft.test.meta.groupId,
1562
+ reason: redactedReason,
1399
1563
  });
1400
1564
  }
1401
1565
  fileCapabilitySkips.delete(filePath);
@@ -1430,20 +1594,6 @@ export async function runCommand(target, options = {}) {
1430
1594
  ...(options.inspectBrk !== undefined && { inspectBrk: options.inspectBrk }),
1431
1595
  metricCollector,
1432
1596
  });
1433
- // Redaction (GLU-105): compile scopes ONCE, unconditionally, and redact
1434
- // every harness event as it arrives — BEFORE it reaches any sink. Every
1435
- // sink that touches this run's data (`.glubean/last-run.result.json`,
1436
- // `--result-json`, `.glubean/traces.json`, `--log-file`, `--verbose`
1437
- // console output, `--emit-full-trace` trace files) must consume the same
1438
- // redacted stream. Previously redaction only ran on a clone built for
1439
- // `--upload`, so every local-disk sink got the raw, secret-bearing event
1440
- // regardless of whether `--upload` was even passed.
1441
- const effectiveRedaction = options.redactionConfig ?? glubeanConfig.redaction;
1442
- const compiledScopes = compileScopes({
1443
- builtinScopes: BUILTIN_SCOPES,
1444
- globalRules: effectiveRedaction.globalRules,
1445
- replacementFormat: effectiveRedaction.replacementFormat,
1446
- });
1447
1597
  // Only walk the runner stream when there are runnable tests. The empty
1448
1598
  // case has already emitted all capability skips above and falls
1449
1599
  // straight through to the summary.
@@ -1539,16 +1689,20 @@ export async function runCommand(target, options = {}) {
1539
1689
  for (const { ft, reason } of skips) {
1540
1690
  skipped++;
1541
1691
  const name = ft.test.meta.name || ft.test.meta.id;
1542
- console.log(` ${colors.yellow}⊘${colors.reset} ${name} ${colors.dim}skipped (${reason})${colors.reset}`);
1692
+ // GLU-142 (codex R2 P0) same scrub as the other capability-skip
1693
+ // emission site above (emitAllSkippedFilesUpTo).
1694
+ const redactedReason = redactEvent({ type: "status", status: "skipped", reason }, compiledScopes, effectiveRedaction.replacementFormat).reason;
1695
+ console.log(` ${colors.yellow}⊘${colors.reset} ${name} ${colors.dim}— skipped (${redactedReason})${colors.reset}`);
1543
1696
  collectedRuns.push({
1544
1697
  testId: ft.test.meta.id,
1545
1698
  testName: name,
1546
1699
  tags: ft.test.meta.tags,
1547
1700
  filePath: ev.filePath,
1548
- events: [{ type: "status", status: "skipped", reason }],
1701
+ events: [{ type: "status", status: "skipped", reason: redactedReason }],
1549
1702
  success: true,
1550
1703
  durationMs: 0,
1551
1704
  groupId: ft.test.meta.groupId,
1705
+ reason: redactedReason,
1552
1706
  });
1553
1707
  }
1554
1708
  }
@@ -1881,6 +2035,13 @@ export async function runCommand(target, options = {}) {
1881
2035
  summaryParts.push(`${colors.yellow}${skipped} skipped${colors.reset}`);
1882
2036
  console.log(`${colors.bold}Tests:${colors.reset} ${summaryParts.join(", ")}`);
1883
2037
  console.log(`${colors.bold}Total:${colors.reset} ${passed + failed + skipped}`);
2038
+ // GLU-155: a contract/test file that failed to import is NOT reflected in
2039
+ // the pass/fail/skip counts above (its tests never got discovered), so it
2040
+ // gets its own summary line — otherwise a run with import failures reads
2041
+ // as a clean green summary even though whole files silently never ran.
2042
+ if (discoveryFailedFiles.length > 0) {
2043
+ console.log(`${colors.bold}Discovery:${colors.reset} ${colors.red}${discoveryFailedFiles.length} file(s) failed to import${colors.reset} ${colors.dim}(${discoveryFailedFiles.map((d) => d.filePath).join(", ")})${colors.reset}`);
2044
+ }
1884
2045
  if (overallPeakMemoryMB > 0) {
1885
2046
  const memColor = overallPeakMemoryMB > MEMORY_WARNING_THRESHOLD_MB ? colors.yellow : colors.dim;
1886
2047
  console.log(`${colors.bold}Memory:${colors.reset} ${memColor}${overallPeakMemoryMB.toFixed(2)} MB peak${colors.reset}`);
@@ -2063,8 +2224,20 @@ export async function runCommand(target, options = {}) {
2063
2224
  // without depending on a projection join. Undefined for non-each tests
2064
2225
  // (backward compatible: old runs / old CLI builds simply omit the field).
2065
2226
  ...(r.each !== undefined && { each: r.each }),
2227
+ // GLU-142 — surface the runtime skip reason at the top level (it was
2228
+ // already reachable by scanning `events` for a status:"skipped" entry,
2229
+ // but a top-level field lets consumers skip that reconstruction).
2230
+ ...(r.reason !== undefined && { reason: r.reason }),
2066
2231
  filePath: relative(rootDir, r.filePath),
2067
2232
  })),
2233
+ // GLU-155: files that failed to IMPORT (zero test ids ever discovered),
2234
+ // kept alongside `tests` so this run isn't internally inconsistent — the
2235
+ // process already exits non-zero for these, but without this the saved
2236
+ // `summary`/`tests` could read as a clean "N passed" even though a whole
2237
+ // file silently never ran. `filePath` is rootDir-relative, matching
2238
+ // `tests[].filePath`, so `--rerun-failed` (deriveRerunSelectors) can fold
2239
+ // these files back into its target set on the next run.
2240
+ ...(discoveryFailedFiles.length > 0 && { discoveryFailures: discoveryFailedFiles }),
2068
2241
  ...(thresholdSummary && { thresholds: thresholdSummary }),
2069
2242
  ...(options.meta && Object.keys(options.meta).length > 0
2070
2243
  ? { customMetadata: redactNonEvent(options.meta) }
@@ -2141,7 +2314,7 @@ export async function runCommand(target, options = {}) {
2141
2314
  }
2142
2315
  // ── Cloud upload ────────────────────────────────────────────────────────
2143
2316
  if (options.upload) {
2144
- const { resolveToken, resolveProjectId, resolveApiUrl } = await import("../lib/auth.js");
2317
+ const { resolveToken, resolveProjectId, resolveApiUrl, PLATFORM_API_URL_UNRESOLVED_HINT } = await import("../lib/auth.js");
2145
2318
  const { uploadToCloud, removeUploadedScreenshots } = await import("../lib/upload.js");
2146
2319
  const authOpts = {
2147
2320
  token: options.token,
@@ -2164,6 +2337,11 @@ export async function runCommand(target, options = {}) {
2164
2337
  console.error(`${colors.red}Upload failed: no project ID.${colors.reset}`);
2165
2338
  process.exit(1);
2166
2339
  }
2340
+ else if (!apiUrl) {
2341
+ console.error(`${colors.red}Upload failed: could not determine the Platform API URL.${colors.reset}`);
2342
+ console.error(`${colors.dim}${PLATFORM_API_URL_UNRESOLVED_HINT}${colors.reset}`);
2343
+ process.exit(1);
2344
+ }
2167
2345
  else {
2168
2346
  // `compiledScopes`/`effectiveRedaction` are the SAME instances used
2169
2347
  // above to redact every event as it was collected (GLU-105) — reused
@@ -2204,6 +2382,10 @@ export async function runCommand(target, options = {}) {
2204
2382
  durationMs: r.durationMs,
2205
2383
  ...(r.tags && r.tags.length ? { tags: r.tags } : {}),
2206
2384
  eventCount: r.events.length,
2385
+ // GLU-142 — run-time `ctx.skip(reason)` text, so `test_result` rows
2386
+ // carry the actual reason from THIS run instead of only the spec's
2387
+ // declared reason (which the dashboard falls back to).
2388
+ ...(r.reason !== undefined && { reason: r.reason }),
2207
2389
  }));
2208
2390
  // Metric tags (method/path) can in rare cases embed a secret in a path
2209
2391
  // segment — redact them with the same engine the projection line uses.
@@ -2237,9 +2419,14 @@ export async function runCommand(target, options = {}) {
2237
2419
  // Stable idempotency id for this run — reused across the upload retry so
2238
2420
  // a lost-response retry replaces this run instead of duplicating it (P1).
2239
2421
  clientRunId: randomUUID(),
2240
- // A breached metric threshold fails the run (mirrors the process exit
2241
- // below) even when every test passed don't record it as "passed".
2242
- status: failed > 0 || (thresholdSummary && !thresholdSummary.pass) ? "failed" : "passed",
2422
+ // A breached metric threshold, or a file that failed to import
2423
+ // (GLU-155 mirrors the process exit below), fails the run even
2424
+ // when every DISCOVERED test passed don't record it as "passed".
2425
+ status: failed > 0 ||
2426
+ discoveryFailedFiles.length > 0 ||
2427
+ (thresholdSummary && !thresholdSummary.pass)
2428
+ ? "failed"
2429
+ : "passed",
2243
2430
  startedAt: runStartTime,
2244
2431
  completedAt: new Date(Date.parse(runStartTime) + totalDurationMs).toISOString(),
2245
2432
  durationMs: totalDurationMs,
@@ -2303,7 +2490,12 @@ export async function runCommand(target, options = {}) {
2303
2490
  }
2304
2491
  }
2305
2492
  }
2306
- if (failed > 0 || (thresholdSummary && !thresholdSummary.pass)) {
2493
+ // GLU-155: a file that failed to import must fail the run even if every
2494
+ // test that WAS discovered passed — otherwise CI reads green while a whole
2495
+ // contract file silently never ran.
2496
+ if (failed > 0 ||
2497
+ discoveryFailedFiles.length > 0 ||
2498
+ (thresholdSummary && !thresholdSummary.pass)) {
2307
2499
  process.exit(1);
2308
2500
  }
2309
2501
  }