qualty 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/bin/qualty.js +277 -38
  2. package/package.json +1 -1
package/bin/qualty.js CHANGED
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
 
3
+ import { appendFileSync } from "node:fs";
3
4
  import { spawn } from "node:child_process";
4
5
  import process from "node:process";
5
6
 
@@ -174,18 +175,232 @@ function sleep(ms) {
174
175
  return new Promise((resolve) => setTimeout(resolve, ms));
175
176
  }
176
177
 
178
+ function isGithubActions() {
179
+ return process.env.GITHUB_ACTIONS === "true";
180
+ }
181
+
182
+ /** Safe one-line text for a GitHub-flavored Markdown table cell. */
183
+ function mdTableCell(text, maxLen = 200) {
184
+ return String(text ?? "")
185
+ .replace(/\r\n/g, "\n")
186
+ .replace(/\n/g, " ")
187
+ .replace(/\\/g, "\\\\")
188
+ .replace(/\|/g, "\\|")
189
+ .trim()
190
+ .slice(0, maxLen) || "—";
191
+ }
192
+
193
+ function githubWorkflowRunUrl() {
194
+ const server = process.env.GITHUB_SERVER_URL || "https://github.com";
195
+ const repo = process.env.GITHUB_REPOSITORY;
196
+ const runId = process.env.GITHUB_RUN_ID;
197
+ if (!repo || !runId) return "";
198
+ return `${server.replace(/\/$/, "")}/${repo}/actions/runs/${runId}`;
199
+ }
200
+
201
+ function executionOutcome(status) {
202
+ const st = status?.status;
203
+ const failed = Number(status?.failed_tests ?? 0);
204
+ if (st === "completed" && failed === 0) return { ok: true, label: "Pass" };
205
+ if (st === "completed" && failed > 0) return { ok: false, label: "Fail" };
206
+ if (st === "cancelled") return { ok: false, label: "Cancelled" };
207
+ if (st === "failed") return { ok: false, label: "Failed" };
208
+ return { ok: false, label: mdTableCell(st || "?", 24) };
209
+ }
210
+
211
+ /**
212
+ * Rich Markdown for the job "Summary" tab (tables, links). Step logs stay plain text.
213
+ * https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary
214
+ */
215
+ function appendGithubJobSummaryMarkdown(markdown) {
216
+ const path = process.env.GITHUB_STEP_SUMMARY;
217
+ if (!path || !markdown) return;
218
+ try {
219
+ appendFileSync(path, `${markdown}\n`, "utf8");
220
+ } catch {
221
+ // best effort — never fail the job for summary I/O
222
+ }
223
+ }
224
+
225
+ /**
226
+ * Markdown for per-step labels, thoughts (description), actions, explanation, evaluator.
227
+ * Kept short so the job Summary stays under GitHub size limits; full text stays in step logs.
228
+ */
229
+ function buildStepDetailsSummaryMarkdown(finalStatuses, executionJobIds) {
230
+ const MAX_STEPS = 50;
231
+ const parts = [];
232
+ let any = false;
233
+
234
+ for (const executionId of executionJobIds) {
235
+ const status = finalStatuses[executionId] || {};
236
+ const combos = Array.isArray(status.combinations) ? status.combinations : [];
237
+ const episode = status.episode_name || "Run";
238
+
239
+ for (const c of combos) {
240
+ const steps = Array.isArray(c.steps_json) ? c.steps_json : [];
241
+ const device = String(c.device ?? "?");
242
+ const evaluator = c.agent_output ?? c.gpt_output;
243
+ if (
244
+ steps.length === 0 &&
245
+ !String(c.explanation || "").trim() &&
246
+ !String(evaluator || "").trim()
247
+ ) {
248
+ continue;
249
+ }
250
+ any = true;
251
+ parts.push(
252
+ `#### ${mdTableCell(episode, 72)} · ${mdTableCell(device, 36)} · \`${mdTableCell(executionId, 36)}\``
253
+ );
254
+ parts.push("");
255
+
256
+ const shown = steps.slice(0, MAX_STEPS);
257
+ for (let j = 0; j < shown.length; j += 1) {
258
+ const s = shown[j];
259
+ const label =
260
+ (s.name && String(s.name).trim()) ||
261
+ (s.description && String(s.description).trim().slice(0, 100)) ||
262
+ `Step ${j + 1}`;
263
+ const st = s.status != null ? s.status : "?";
264
+ parts.push(`${j + 1}. **${mdTableCell(st, 16)}** ${mdTableCell(label, 140)}`);
265
+ const desc = s.description && String(s.description).trim();
266
+ if (desc && desc !== String(s.name)) {
267
+ parts.push(` - *Thoughts:* ${mdTableCell(desc, 900)}`);
268
+ }
269
+ if (s.action) {
270
+ parts.push(` - *Action:* \`${mdTableCell(String(s.action), 500)}\``);
271
+ }
272
+ parts.push("");
273
+ }
274
+ if (steps.length > MAX_STEPS) {
275
+ parts.push(
276
+ `*…and ${steps.length - MAX_STEPS} more steps — expand **Qualty** groups in the job log or open the run in Qualty for the full list.*`
277
+ );
278
+ parts.push("");
279
+ }
280
+
281
+ if (c.explanation && String(c.explanation).trim()) {
282
+ parts.push("**Explanation**");
283
+ parts.push("");
284
+ parts.push(`> ${mdTableCell(c.explanation, 4500)}`);
285
+ parts.push("");
286
+ }
287
+ if (evaluator && String(evaluator).trim()) {
288
+ parts.push("**Final evaluator**");
289
+ parts.push("");
290
+ parts.push(`> ${mdTableCell(evaluator, 8000)}`);
291
+ parts.push("");
292
+ }
293
+ }
294
+ }
295
+
296
+ if (!any) return "";
297
+ parts.push(
298
+ "*Truncated for the Summary tab. Uncapped step lines and evaluator text are in the job log (expand the Qualty groups) or in the Qualty dashboard.*"
299
+ );
300
+ return parts.join("\n");
301
+ }
302
+
303
+ function writeQualtyGithubJobSummary({ executionJobIds, finalStatuses, passed, failed }) {
304
+ if (!isGithubActions()) return;
305
+
306
+ const total = executionJobIds.length;
307
+ const lines = [];
308
+ lines.push("## Qualty");
309
+ lines.push("");
310
+ lines.push(
311
+ `**${passed} passed**, **${failed} failed** · ${total} run${total === 1 ? "" : "s"}.`
312
+ );
313
+ lines.push("");
314
+ lines.push("| Test | Execution | Status | Failed | Result | Qualty |");
315
+ lines.push("| --- | --- | --- | ---: | --- | --- |");
316
+
317
+ for (const executionId of executionJobIds) {
318
+ const status = finalStatuses[executionId] || {};
319
+ const title = mdTableCell(status.episode_name || "—", 72);
320
+ const idCell = `\`${mdTableCell(executionId, 80)}\``;
321
+ const apiStatus = mdTableCell(status.status ?? "—", 20);
322
+ const failedN = status.failed_tests != null ? String(status.failed_tests) : "—";
323
+ const outcome = executionOutcome(status);
324
+ const resultCell = outcome.ok ? `✅ **${outcome.label}**` : `❌ **${outcome.label}**`;
325
+ const url = String(status.url || "").trim();
326
+ const linkCell = url ? `[Open run](${url})` : "—";
327
+ lines.push(
328
+ `| ${title} | ${idCell} | ${apiStatus} | ${failedN} | ${resultCell} | ${linkCell} |`
329
+ );
330
+ }
331
+
332
+ const comboRows = [];
333
+ for (const executionId of executionJobIds) {
334
+ const status = finalStatuses[executionId] || {};
335
+ const combos = Array.isArray(status.combinations) ? status.combinations : [];
336
+ const shortTitle = mdTableCell(
337
+ status.episode_name || (executionId ? String(executionId).slice(0, 8) : "—"),
338
+ 40
339
+ );
340
+ for (const c of combos) {
341
+ const device = mdTableCell(c.device ?? "?", 32);
342
+ const comboSt = mdTableCell(
343
+ c.status ?? (c.success === true ? "passed" : c.success === false ? "failed" : "?"),
344
+ 16
345
+ );
346
+ comboRows.push(`| \`${mdTableCell(executionId, 36)}\` | ${shortTitle} | ${device} | ${comboSt} |`);
347
+ }
348
+ }
349
+
350
+ if (comboRows.length > 0) {
351
+ lines.push("");
352
+ lines.push("<details>");
353
+ lines.push("<summary><strong>Per device / combination</strong></summary>");
354
+ lines.push("");
355
+ lines.push("| Execution | Test | Device | Result |");
356
+ lines.push("| --- | --- | --- | --- |");
357
+ lines.push(...comboRows);
358
+ lines.push("");
359
+ lines.push("</details>");
360
+ }
361
+
362
+ const stepDetailsMd = buildStepDetailsSummaryMarkdown(finalStatuses, executionJobIds);
363
+ if (stepDetailsMd) {
364
+ lines.push("");
365
+ lines.push("<details>");
366
+ lines.push("<summary><strong>Steps &amp; agent notes (truncated)</strong></summary>");
367
+ lines.push("");
368
+ lines.push(stepDetailsMd);
369
+ lines.push("");
370
+ lines.push("</details>");
371
+ }
372
+
373
+ const runUrl = githubWorkflowRunUrl();
374
+ if (runUrl) {
375
+ lines.push("");
376
+ lines.push(`[This workflow run on GitHub](${runUrl})`);
377
+ }
378
+
379
+ appendGithubJobSummaryMarkdown(lines.join("\n"));
380
+ }
381
+
177
382
  function truncateForCiLog(text, maxLen) {
178
383
  const s = String(text ?? "");
179
384
  if (s.length <= maxLen) return s;
180
- return `${s.slice(0, maxLen)}\n[qualty] (truncated, ${s.length - maxLen} more chars)`;
385
+ const tail = isGithubActions()
386
+ ? `… (truncated, ${s.length - maxLen} more chars)`
387
+ : `[qualty] … (truncated, ${s.length - maxLen} more chars)`;
388
+ return `${s.slice(0, maxLen)}\n${tail}`;
181
389
  }
182
390
 
183
- function logPrefixedLines(prefix, text) {
391
+ /** One log line: `[qualty]` prefix only outside GitHub Actions (inside ::group::, plain text reads better). */
392
+ function viewOut(line) {
393
+ // eslint-disable-next-line no-console
394
+ console.log(isGithubActions() ? line : `[qualty] ${line}`);
395
+ }
396
+
397
+ function logPrefixedLines(ghaIndent, nonGhaPrefix, text) {
184
398
  const body = String(text ?? "").trimEnd();
185
399
  if (!body) return;
186
- for (const line of body.split("\n")) {
400
+ const prefix = isGithubActions() ? ghaIndent : nonGhaPrefix;
401
+ for (const ln of body.split("\n")) {
187
402
  // eslint-disable-next-line no-console
188
- console.log(`${prefix}${line}`);
403
+ console.log(`${prefix}${ln}`);
189
404
  }
190
405
  }
191
406
 
@@ -194,40 +409,51 @@ function logPrefixedLines(prefix, text) {
194
409
  * Data comes from GET status payload fields on each combination (steps_json, explanation, agent_output).
195
410
  */
196
411
  function printQualtyViewLogsReport(executionId, status) {
197
- // eslint-disable-next-line no-console
198
- console.log(`\n[qualty] ━━━ View logs: ${executionId} (${status.episode_name || "run"}) ━━━`);
199
- if (status.url) {
412
+ const gha = isGithubActions();
413
+ const title = `${status.episode_name || "run"} (${executionId})`;
414
+ if (gha) {
415
+ // Collapsible section in GitHub Actions (no rich tables — stdout is plain text).
416
+ // eslint-disable-next-line no-console
417
+ console.log(`::group::Qualty · ${title}`);
418
+ } else {
200
419
  // eslint-disable-next-line no-console
201
- console.log(`[qualty] URL: ${status.url}`);
420
+ console.log(`\n[qualty] ━━━ View logs: ${executionId} (${status.episode_name || "run"}) ━━━`);
202
421
  }
422
+ if (status.url) viewOut(`URL: ${status.url}`);
203
423
  if (status.error) {
204
- // eslint-disable-next-line no-console
205
- console.log(`[qualty] Run error: ${truncateForCiLog(status.error, 4000)}`);
424
+ viewOut("Run error:");
425
+ logPrefixedLines(" ", "[qualty] ", truncateForCiLog(status.error, 4000));
206
426
  }
207
427
  if (status.expected_behavior) {
208
- // eslint-disable-next-line no-console
209
- console.log(`[qualty] Expected behavior:\n[qualty] ${truncateForCiLog(status.expected_behavior, 6000).split("\n").join("\n[qualty] ")}`);
428
+ viewOut("Expected behavior:");
429
+ logPrefixedLines(" ", "[qualty] ", truncateForCiLog(status.expected_behavior, 6000));
210
430
  }
211
431
  const combos = Array.isArray(status.combinations) ? status.combinations : [];
212
432
  if (combos.length === 0) {
213
- // eslint-disable-next-line no-console
214
- console.log(
215
- "[qualty] (No per-device breakdown in API response yet — open this run in the Qualty dashboard for full logs.)"
216
- );
217
- // eslint-disable-next-line no-console
218
- console.log(`[qualty] ━━━ End view logs: ${executionId} ━━━\n`);
433
+ viewOut("(No per-device breakdown in API response yet — open this run in the Qualty dashboard for full logs.)");
434
+ if (gha) {
435
+ // eslint-disable-next-line no-console
436
+ console.log("::endgroup::");
437
+ } else {
438
+ // eslint-disable-next-line no-console
439
+ console.log(`[qualty] ━━━ End view logs: ${executionId} ━━━\n`);
440
+ }
219
441
  return;
220
442
  }
221
443
  for (let i = 0; i < combos.length; i += 1) {
222
444
  const c = combos[i];
223
445
  const device = c.device ?? "?";
224
446
  const comboStatus = c.status ?? (c.success === true ? "passed" : c.success === false ? "failed" : "?");
225
- // eslint-disable-next-line no-console
226
- console.log(`\n[qualty] --- Combination ${i + 1}/${combos.length} (${device} · ${comboStatus}) ---`);
447
+ if (gha) {
448
+ // eslint-disable-next-line no-console
449
+ console.log(`::group::${device} · ${comboStatus} (${i + 1}/${combos.length})`);
450
+ } else {
451
+ // eslint-disable-next-line no-console
452
+ console.log(`\n[qualty] --- Combination ${i + 1}/${combos.length} (${device} · ${comboStatus}) ---`);
453
+ }
227
454
  const steps = Array.isArray(c.steps_json) ? c.steps_json : [];
228
455
  if (steps.length > 0) {
229
- // eslint-disable-next-line no-console
230
- console.log(`[qualty] Steps (${steps.length}):`);
456
+ viewOut(`Steps (${steps.length}):`);
231
457
  for (let j = 0; j < steps.length; j += 1) {
232
458
  const s = steps[j];
233
459
  const label =
@@ -235,34 +461,38 @@ function printQualtyViewLogsReport(executionId, status) {
235
461
  (s.description && String(s.description).trim().slice(0, 100)) ||
236
462
  `Step ${j + 1}`;
237
463
  const st = s.status != null ? s.status : "?";
238
- // eslint-disable-next-line no-console
239
- console.log(`[qualty] ${j + 1}. [${st}] ${label}`);
464
+ viewOut(` ${j + 1}. [${st}] ${label}`);
240
465
  if (s.description && String(s.description).trim() && String(s.description) !== String(s.name)) {
241
- logPrefixedLines("[qualty] ", truncateForCiLog(s.description, 4000));
466
+ logPrefixedLines(" ", "[qualty] ", truncateForCiLog(s.description, 4000));
242
467
  }
243
468
  if (s.action) {
244
- // eslint-disable-next-line no-console
245
- console.log(`[qualty] action: ${truncateForCiLog(s.action, 2000)}`);
469
+ logPrefixedLines(" ", "[qualty] ", `action: ${truncateForCiLog(s.action, 2000)}`);
246
470
  }
247
471
  }
248
472
  } else if (c.total_steps) {
249
- // eslint-disable-next-line no-console
250
- console.log(`[qualty] (Step list not available yet; ${c.total_steps} step(s) reported.)`);
473
+ viewOut(`(Step list not available yet; ${c.total_steps} step(s) reported.)`);
251
474
  }
252
475
  if (c.explanation) {
253
- // eslint-disable-next-line no-console
254
- console.log(`[qualty] Explanation:\n[qualty] ${truncateForCiLog(c.explanation, 12000).split("\n").join("\n[qualty] ")}`);
476
+ viewOut("Explanation:");
477
+ logPrefixedLines(" ", "[qualty] ", truncateForCiLog(c.explanation, 12000));
255
478
  }
256
479
  const evaluator = c.agent_output ?? c.gpt_output;
257
480
  if (evaluator) {
481
+ viewOut("Final evaluator output:");
482
+ logPrefixedLines(" ", "[qualty] ", truncateForCiLog(evaluator, 16000));
483
+ }
484
+ if (gha) {
258
485
  // eslint-disable-next-line no-console
259
- console.log(
260
- `[qualty] Final evaluator output:\n[qualty] ${truncateForCiLog(evaluator, 16000).split("\n").join("\n[qualty] ")}`
261
- );
486
+ console.log("::endgroup::");
262
487
  }
263
488
  }
264
- // eslint-disable-next-line no-console
265
- console.log(`\n[qualty] ━━━ End view logs: ${executionId} ━━━\n`);
489
+ if (gha) {
490
+ // eslint-disable-next-line no-console
491
+ console.log("::endgroup::");
492
+ } else {
493
+ // eslint-disable-next-line no-console
494
+ console.log(`\n[qualty] ━━━ End view logs: ${executionId} ━━━\n`);
495
+ }
266
496
  }
267
497
 
268
498
  async function runCi(args) {
@@ -377,9 +607,18 @@ async function runCi(args) {
377
607
  );
378
608
  }
379
609
 
610
+ writeQualtyGithubJobSummary({ executionJobIds, finalStatuses, passed, failed });
611
+
380
612
  if (!noViewLogs) {
381
- // eslint-disable-next-line no-console
382
- console.log(`[qualty] Detailed run output (same fields as dashboard "View logs"):`);
613
+ if (isGithubActions()) {
614
+ // eslint-disable-next-line no-console
615
+ console.log(
616
+ "[qualty] Open the job Summary tab for a results table; expand the Qualty groups below for full step logs."
617
+ );
618
+ } else {
619
+ // eslint-disable-next-line no-console
620
+ console.log(`[qualty] Detailed run output (same fields as dashboard "View logs"):`);
621
+ }
383
622
  for (const executionId of executionJobIds) {
384
623
  printQualtyViewLogsReport(executionId, finalStatuses[executionId] || {});
385
624
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "qualty",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "description": "Qualty CLI for localhost and CI test runs",
5
5
  "bin": {
6
6
  "qualty": "bin/qualty.js"