qualty 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/qualty.js +277 -38
- package/package.json +1 -1
package/bin/qualty.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
+
import { appendFileSync } from "node:fs";
|
|
3
4
|
import { spawn } from "node:child_process";
|
|
4
5
|
import process from "node:process";
|
|
5
6
|
|
|
@@ -174,18 +175,232 @@ function sleep(ms) {
|
|
|
174
175
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
175
176
|
}
|
|
176
177
|
|
|
178
|
+
function isGithubActions() {
|
|
179
|
+
return process.env.GITHUB_ACTIONS === "true";
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/** Safe one-line text for a GitHub-flavored Markdown table cell. */
|
|
183
|
+
function mdTableCell(text, maxLen = 200) {
|
|
184
|
+
return String(text ?? "")
|
|
185
|
+
.replace(/\r\n/g, "\n")
|
|
186
|
+
.replace(/\n/g, " ")
|
|
187
|
+
.replace(/\\/g, "\\\\")
|
|
188
|
+
.replace(/\|/g, "\\|")
|
|
189
|
+
.trim()
|
|
190
|
+
.slice(0, maxLen) || "—";
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function githubWorkflowRunUrl() {
|
|
194
|
+
const server = process.env.GITHUB_SERVER_URL || "https://github.com";
|
|
195
|
+
const repo = process.env.GITHUB_REPOSITORY;
|
|
196
|
+
const runId = process.env.GITHUB_RUN_ID;
|
|
197
|
+
if (!repo || !runId) return "";
|
|
198
|
+
return `${server.replace(/\/$/, "")}/${repo}/actions/runs/${runId}`;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function executionOutcome(status) {
|
|
202
|
+
const st = status?.status;
|
|
203
|
+
const failed = Number(status?.failed_tests ?? 0);
|
|
204
|
+
if (st === "completed" && failed === 0) return { ok: true, label: "Pass" };
|
|
205
|
+
if (st === "completed" && failed > 0) return { ok: false, label: "Fail" };
|
|
206
|
+
if (st === "cancelled") return { ok: false, label: "Cancelled" };
|
|
207
|
+
if (st === "failed") return { ok: false, label: "Failed" };
|
|
208
|
+
return { ok: false, label: mdTableCell(st || "?", 24) };
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Rich Markdown for the job "Summary" tab (tables, links). Step logs stay plain text.
|
|
213
|
+
* https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary
|
|
214
|
+
*/
|
|
215
|
+
function appendGithubJobSummaryMarkdown(markdown) {
|
|
216
|
+
const path = process.env.GITHUB_STEP_SUMMARY;
|
|
217
|
+
if (!path || !markdown) return;
|
|
218
|
+
try {
|
|
219
|
+
appendFileSync(path, `${markdown}\n`, "utf8");
|
|
220
|
+
} catch {
|
|
221
|
+
// best effort — never fail the job for summary I/O
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Markdown for per-step labels, thoughts (description), actions, explanation, evaluator.
|
|
227
|
+
* Kept short so the job Summary stays under GitHub size limits; full text stays in step logs.
|
|
228
|
+
*/
|
|
229
|
+
function buildStepDetailsSummaryMarkdown(finalStatuses, executionJobIds) {
|
|
230
|
+
const MAX_STEPS = 50;
|
|
231
|
+
const parts = [];
|
|
232
|
+
let any = false;
|
|
233
|
+
|
|
234
|
+
for (const executionId of executionJobIds) {
|
|
235
|
+
const status = finalStatuses[executionId] || {};
|
|
236
|
+
const combos = Array.isArray(status.combinations) ? status.combinations : [];
|
|
237
|
+
const episode = status.episode_name || "Run";
|
|
238
|
+
|
|
239
|
+
for (const c of combos) {
|
|
240
|
+
const steps = Array.isArray(c.steps_json) ? c.steps_json : [];
|
|
241
|
+
const device = String(c.device ?? "?");
|
|
242
|
+
const evaluator = c.agent_output ?? c.gpt_output;
|
|
243
|
+
if (
|
|
244
|
+
steps.length === 0 &&
|
|
245
|
+
!String(c.explanation || "").trim() &&
|
|
246
|
+
!String(evaluator || "").trim()
|
|
247
|
+
) {
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
any = true;
|
|
251
|
+
parts.push(
|
|
252
|
+
`#### ${mdTableCell(episode, 72)} · ${mdTableCell(device, 36)} · \`${mdTableCell(executionId, 36)}\``
|
|
253
|
+
);
|
|
254
|
+
parts.push("");
|
|
255
|
+
|
|
256
|
+
const shown = steps.slice(0, MAX_STEPS);
|
|
257
|
+
for (let j = 0; j < shown.length; j += 1) {
|
|
258
|
+
const s = shown[j];
|
|
259
|
+
const label =
|
|
260
|
+
(s.name && String(s.name).trim()) ||
|
|
261
|
+
(s.description && String(s.description).trim().slice(0, 100)) ||
|
|
262
|
+
`Step ${j + 1}`;
|
|
263
|
+
const st = s.status != null ? s.status : "?";
|
|
264
|
+
parts.push(`${j + 1}. **${mdTableCell(st, 16)}** ${mdTableCell(label, 140)}`);
|
|
265
|
+
const desc = s.description && String(s.description).trim();
|
|
266
|
+
if (desc && desc !== String(s.name)) {
|
|
267
|
+
parts.push(` - *Thoughts:* ${mdTableCell(desc, 900)}`);
|
|
268
|
+
}
|
|
269
|
+
if (s.action) {
|
|
270
|
+
parts.push(` - *Action:* \`${mdTableCell(String(s.action), 500)}\``);
|
|
271
|
+
}
|
|
272
|
+
parts.push("");
|
|
273
|
+
}
|
|
274
|
+
if (steps.length > MAX_STEPS) {
|
|
275
|
+
parts.push(
|
|
276
|
+
`*…and ${steps.length - MAX_STEPS} more steps — expand **Qualty** groups in the job log or open the run in Qualty for the full list.*`
|
|
277
|
+
);
|
|
278
|
+
parts.push("");
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
if (c.explanation && String(c.explanation).trim()) {
|
|
282
|
+
parts.push("**Explanation**");
|
|
283
|
+
parts.push("");
|
|
284
|
+
parts.push(`> ${mdTableCell(c.explanation, 4500)}`);
|
|
285
|
+
parts.push("");
|
|
286
|
+
}
|
|
287
|
+
if (evaluator && String(evaluator).trim()) {
|
|
288
|
+
parts.push("**Final evaluator**");
|
|
289
|
+
parts.push("");
|
|
290
|
+
parts.push(`> ${mdTableCell(evaluator, 8000)}`);
|
|
291
|
+
parts.push("");
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (!any) return "";
|
|
297
|
+
parts.push(
|
|
298
|
+
"*Truncated for the Summary tab. Uncapped step lines and evaluator text are in the job log (expand the Qualty groups) or in the Qualty dashboard.*"
|
|
299
|
+
);
|
|
300
|
+
return parts.join("\n");
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
function writeQualtyGithubJobSummary({ executionJobIds, finalStatuses, passed, failed }) {
|
|
304
|
+
if (!isGithubActions()) return;
|
|
305
|
+
|
|
306
|
+
const total = executionJobIds.length;
|
|
307
|
+
const lines = [];
|
|
308
|
+
lines.push("## Qualty");
|
|
309
|
+
lines.push("");
|
|
310
|
+
lines.push(
|
|
311
|
+
`**${passed} passed**, **${failed} failed** · ${total} run${total === 1 ? "" : "s"}.`
|
|
312
|
+
);
|
|
313
|
+
lines.push("");
|
|
314
|
+
lines.push("| Test | Execution | Status | Failed | Result | Qualty |");
|
|
315
|
+
lines.push("| --- | --- | --- | ---: | --- | --- |");
|
|
316
|
+
|
|
317
|
+
for (const executionId of executionJobIds) {
|
|
318
|
+
const status = finalStatuses[executionId] || {};
|
|
319
|
+
const title = mdTableCell(status.episode_name || "—", 72);
|
|
320
|
+
const idCell = `\`${mdTableCell(executionId, 80)}\``;
|
|
321
|
+
const apiStatus = mdTableCell(status.status ?? "—", 20);
|
|
322
|
+
const failedN = status.failed_tests != null ? String(status.failed_tests) : "—";
|
|
323
|
+
const outcome = executionOutcome(status);
|
|
324
|
+
const resultCell = outcome.ok ? `✅ **${outcome.label}**` : `❌ **${outcome.label}**`;
|
|
325
|
+
const url = String(status.url || "").trim();
|
|
326
|
+
const linkCell = url ? `[Open run](${url})` : "—";
|
|
327
|
+
lines.push(
|
|
328
|
+
`| ${title} | ${idCell} | ${apiStatus} | ${failedN} | ${resultCell} | ${linkCell} |`
|
|
329
|
+
);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
const comboRows = [];
|
|
333
|
+
for (const executionId of executionJobIds) {
|
|
334
|
+
const status = finalStatuses[executionId] || {};
|
|
335
|
+
const combos = Array.isArray(status.combinations) ? status.combinations : [];
|
|
336
|
+
const shortTitle = mdTableCell(
|
|
337
|
+
status.episode_name || (executionId ? String(executionId).slice(0, 8) : "—"),
|
|
338
|
+
40
|
|
339
|
+
);
|
|
340
|
+
for (const c of combos) {
|
|
341
|
+
const device = mdTableCell(c.device ?? "?", 32);
|
|
342
|
+
const comboSt = mdTableCell(
|
|
343
|
+
c.status ?? (c.success === true ? "passed" : c.success === false ? "failed" : "?"),
|
|
344
|
+
16
|
|
345
|
+
);
|
|
346
|
+
comboRows.push(`| \`${mdTableCell(executionId, 36)}\` | ${shortTitle} | ${device} | ${comboSt} |`);
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
if (comboRows.length > 0) {
|
|
351
|
+
lines.push("");
|
|
352
|
+
lines.push("<details>");
|
|
353
|
+
lines.push("<summary><strong>Per device / combination</strong></summary>");
|
|
354
|
+
lines.push("");
|
|
355
|
+
lines.push("| Execution | Test | Device | Result |");
|
|
356
|
+
lines.push("| --- | --- | --- | --- |");
|
|
357
|
+
lines.push(...comboRows);
|
|
358
|
+
lines.push("");
|
|
359
|
+
lines.push("</details>");
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
const stepDetailsMd = buildStepDetailsSummaryMarkdown(finalStatuses, executionJobIds);
|
|
363
|
+
if (stepDetailsMd) {
|
|
364
|
+
lines.push("");
|
|
365
|
+
lines.push("<details>");
|
|
366
|
+
lines.push("<summary><strong>Steps & agent notes (truncated)</strong></summary>");
|
|
367
|
+
lines.push("");
|
|
368
|
+
lines.push(stepDetailsMd);
|
|
369
|
+
lines.push("");
|
|
370
|
+
lines.push("</details>");
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
const runUrl = githubWorkflowRunUrl();
|
|
374
|
+
if (runUrl) {
|
|
375
|
+
lines.push("");
|
|
376
|
+
lines.push(`[This workflow run on GitHub](${runUrl})`);
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
appendGithubJobSummaryMarkdown(lines.join("\n"));
|
|
380
|
+
}
|
|
381
|
+
|
|
177
382
|
function truncateForCiLog(text, maxLen) {
|
|
178
383
|
const s = String(text ?? "");
|
|
179
384
|
if (s.length <= maxLen) return s;
|
|
180
|
-
|
|
385
|
+
const tail = isGithubActions()
|
|
386
|
+
? `… (truncated, ${s.length - maxLen} more chars)`
|
|
387
|
+
: `[qualty] … (truncated, ${s.length - maxLen} more chars)`;
|
|
388
|
+
return `${s.slice(0, maxLen)}\n${tail}`;
|
|
181
389
|
}
|
|
182
390
|
|
|
183
|
-
|
|
391
|
+
/** One log line: `[qualty]` prefix only outside GitHub Actions (inside ::group::, plain text reads better). */
|
|
392
|
+
function viewOut(line) {
|
|
393
|
+
// eslint-disable-next-line no-console
|
|
394
|
+
console.log(isGithubActions() ? line : `[qualty] ${line}`);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
function logPrefixedLines(ghaIndent, nonGhaPrefix, text) {
|
|
184
398
|
const body = String(text ?? "").trimEnd();
|
|
185
399
|
if (!body) return;
|
|
186
|
-
|
|
400
|
+
const prefix = isGithubActions() ? ghaIndent : nonGhaPrefix;
|
|
401
|
+
for (const ln of body.split("\n")) {
|
|
187
402
|
// eslint-disable-next-line no-console
|
|
188
|
-
console.log(`${prefix}${
|
|
403
|
+
console.log(`${prefix}${ln}`);
|
|
189
404
|
}
|
|
190
405
|
}
|
|
191
406
|
|
|
@@ -194,40 +409,51 @@ function logPrefixedLines(prefix, text) {
|
|
|
194
409
|
* Data comes from GET status payload fields on each combination (steps_json, explanation, agent_output).
|
|
195
410
|
*/
|
|
196
411
|
function printQualtyViewLogsReport(executionId, status) {
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
if (
|
|
412
|
+
const gha = isGithubActions();
|
|
413
|
+
const title = `${status.episode_name || "run"} (${executionId})`;
|
|
414
|
+
if (gha) {
|
|
415
|
+
// Collapsible section in GitHub Actions (no rich tables — stdout is plain text).
|
|
416
|
+
// eslint-disable-next-line no-console
|
|
417
|
+
console.log(`::group::Qualty · ${title}`);
|
|
418
|
+
} else {
|
|
200
419
|
// eslint-disable-next-line no-console
|
|
201
|
-
console.log(
|
|
420
|
+
console.log(`\n[qualty] ━━━ View logs: ${executionId} (${status.episode_name || "run"}) ━━━`);
|
|
202
421
|
}
|
|
422
|
+
if (status.url) viewOut(`URL: ${status.url}`);
|
|
203
423
|
if (status.error) {
|
|
204
|
-
|
|
205
|
-
|
|
424
|
+
viewOut("Run error:");
|
|
425
|
+
logPrefixedLines(" ", "[qualty] ", truncateForCiLog(status.error, 4000));
|
|
206
426
|
}
|
|
207
427
|
if (status.expected_behavior) {
|
|
208
|
-
|
|
209
|
-
|
|
428
|
+
viewOut("Expected behavior:");
|
|
429
|
+
logPrefixedLines(" ", "[qualty] ", truncateForCiLog(status.expected_behavior, 6000));
|
|
210
430
|
}
|
|
211
431
|
const combos = Array.isArray(status.combinations) ? status.combinations : [];
|
|
212
432
|
if (combos.length === 0) {
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
433
|
+
viewOut("(No per-device breakdown in API response yet — open this run in the Qualty dashboard for full logs.)");
|
|
434
|
+
if (gha) {
|
|
435
|
+
// eslint-disable-next-line no-console
|
|
436
|
+
console.log("::endgroup::");
|
|
437
|
+
} else {
|
|
438
|
+
// eslint-disable-next-line no-console
|
|
439
|
+
console.log(`[qualty] ━━━ End view logs: ${executionId} ━━━\n`);
|
|
440
|
+
}
|
|
219
441
|
return;
|
|
220
442
|
}
|
|
221
443
|
for (let i = 0; i < combos.length; i += 1) {
|
|
222
444
|
const c = combos[i];
|
|
223
445
|
const device = c.device ?? "?";
|
|
224
446
|
const comboStatus = c.status ?? (c.success === true ? "passed" : c.success === false ? "failed" : "?");
|
|
225
|
-
|
|
226
|
-
|
|
447
|
+
if (gha) {
|
|
448
|
+
// eslint-disable-next-line no-console
|
|
449
|
+
console.log(`::group::${device} · ${comboStatus} (${i + 1}/${combos.length})`);
|
|
450
|
+
} else {
|
|
451
|
+
// eslint-disable-next-line no-console
|
|
452
|
+
console.log(`\n[qualty] --- Combination ${i + 1}/${combos.length} (${device} · ${comboStatus}) ---`);
|
|
453
|
+
}
|
|
227
454
|
const steps = Array.isArray(c.steps_json) ? c.steps_json : [];
|
|
228
455
|
if (steps.length > 0) {
|
|
229
|
-
|
|
230
|
-
console.log(`[qualty] Steps (${steps.length}):`);
|
|
456
|
+
viewOut(`Steps (${steps.length}):`);
|
|
231
457
|
for (let j = 0; j < steps.length; j += 1) {
|
|
232
458
|
const s = steps[j];
|
|
233
459
|
const label =
|
|
@@ -235,34 +461,38 @@ function printQualtyViewLogsReport(executionId, status) {
|
|
|
235
461
|
(s.description && String(s.description).trim().slice(0, 100)) ||
|
|
236
462
|
`Step ${j + 1}`;
|
|
237
463
|
const st = s.status != null ? s.status : "?";
|
|
238
|
-
|
|
239
|
-
console.log(`[qualty] ${j + 1}. [${st}] ${label}`);
|
|
464
|
+
viewOut(` ${j + 1}. [${st}] ${label}`);
|
|
240
465
|
if (s.description && String(s.description).trim() && String(s.description) !== String(s.name)) {
|
|
241
|
-
logPrefixedLines("[qualty] ", truncateForCiLog(s.description, 4000));
|
|
466
|
+
logPrefixedLines(" ", "[qualty] ", truncateForCiLog(s.description, 4000));
|
|
242
467
|
}
|
|
243
468
|
if (s.action) {
|
|
244
|
-
|
|
245
|
-
console.log(`[qualty] action: ${truncateForCiLog(s.action, 2000)}`);
|
|
469
|
+
logPrefixedLines(" ", "[qualty] ", `action: ${truncateForCiLog(s.action, 2000)}`);
|
|
246
470
|
}
|
|
247
471
|
}
|
|
248
472
|
} else if (c.total_steps) {
|
|
249
|
-
|
|
250
|
-
console.log(`[qualty] (Step list not available yet; ${c.total_steps} step(s) reported.)`);
|
|
473
|
+
viewOut(`(Step list not available yet; ${c.total_steps} step(s) reported.)`);
|
|
251
474
|
}
|
|
252
475
|
if (c.explanation) {
|
|
253
|
-
|
|
254
|
-
|
|
476
|
+
viewOut("Explanation:");
|
|
477
|
+
logPrefixedLines(" ", "[qualty] ", truncateForCiLog(c.explanation, 12000));
|
|
255
478
|
}
|
|
256
479
|
const evaluator = c.agent_output ?? c.gpt_output;
|
|
257
480
|
if (evaluator) {
|
|
481
|
+
viewOut("Final evaluator output:");
|
|
482
|
+
logPrefixedLines(" ", "[qualty] ", truncateForCiLog(evaluator, 16000));
|
|
483
|
+
}
|
|
484
|
+
if (gha) {
|
|
258
485
|
// eslint-disable-next-line no-console
|
|
259
|
-
console.log(
|
|
260
|
-
`[qualty] Final evaluator output:\n[qualty] ${truncateForCiLog(evaluator, 16000).split("\n").join("\n[qualty] ")}`
|
|
261
|
-
);
|
|
486
|
+
console.log("::endgroup::");
|
|
262
487
|
}
|
|
263
488
|
}
|
|
264
|
-
|
|
265
|
-
|
|
489
|
+
if (gha) {
|
|
490
|
+
// eslint-disable-next-line no-console
|
|
491
|
+
console.log("::endgroup::");
|
|
492
|
+
} else {
|
|
493
|
+
// eslint-disable-next-line no-console
|
|
494
|
+
console.log(`\n[qualty] ━━━ End view logs: ${executionId} ━━━\n`);
|
|
495
|
+
}
|
|
266
496
|
}
|
|
267
497
|
|
|
268
498
|
async function runCi(args) {
|
|
@@ -377,9 +607,18 @@ async function runCi(args) {
|
|
|
377
607
|
);
|
|
378
608
|
}
|
|
379
609
|
|
|
610
|
+
writeQualtyGithubJobSummary({ executionJobIds, finalStatuses, passed, failed });
|
|
611
|
+
|
|
380
612
|
if (!noViewLogs) {
|
|
381
|
-
|
|
382
|
-
|
|
613
|
+
if (isGithubActions()) {
|
|
614
|
+
// eslint-disable-next-line no-console
|
|
615
|
+
console.log(
|
|
616
|
+
"[qualty] Open the job Summary tab for a results table; expand the Qualty groups below for full step logs."
|
|
617
|
+
);
|
|
618
|
+
} else {
|
|
619
|
+
// eslint-disable-next-line no-console
|
|
620
|
+
console.log(`[qualty] Detailed run output (same fields as dashboard "View logs"):`);
|
|
621
|
+
}
|
|
383
622
|
for (const executionId of executionJobIds) {
|
|
384
623
|
printQualtyViewLogsReport(executionId, finalStatuses[executionId] || {});
|
|
385
624
|
}
|