qualty 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/qualty.js +93 -2
- package/package.json +1 -1
package/bin/qualty.js
CHANGED
|
@@ -77,7 +77,9 @@ function startCloudflared(token, port) {
|
|
|
77
77
|
"--protocol",
|
|
78
78
|
"http2",
|
|
79
79
|
"--url",
|
|
80
|
-
|
|
80
|
+
// Use 127.0.0.1 so cloudflared does not prefer [::1] (common on Linux/GitHub Actions) while
|
|
81
|
+
// Next/webpack dev servers often bind --hostname 127.0.0.1 only.
|
|
82
|
+
`http://127.0.0.1:${port}`,
|
|
81
83
|
"run",
|
|
82
84
|
"--token",
|
|
83
85
|
token,
|
|
@@ -114,7 +116,7 @@ async function runConnect(args) {
|
|
|
114
116
|
|
|
115
117
|
const hostUrl = connect.localhost?.url;
|
|
116
118
|
// eslint-disable-next-line no-console
|
|
117
|
-
console.log(`Connected ✅ ${hostUrl} -> http://
|
|
119
|
+
console.log(`Connected ✅ ${hostUrl} -> http://127.0.0.1:${port}`);
|
|
118
120
|
|
|
119
121
|
const cloudflaredToken = connect.tunnel?.token;
|
|
120
122
|
if (!cloudflaredToken) {
|
|
@@ -222,6 +224,84 @@ function appendGithubJobSummaryMarkdown(markdown) {
|
|
|
222
224
|
}
|
|
223
225
|
}
|
|
224
226
|
|
|
227
|
+
/**
|
|
228
|
+
* Markdown for per-step labels, thoughts (description), actions, explanation, evaluator.
|
|
229
|
+
* Kept short so the job Summary stays under GitHub size limits; full text stays in step logs.
|
|
230
|
+
*/
|
|
231
|
+
function buildStepDetailsSummaryMarkdown(finalStatuses, executionJobIds) {
|
|
232
|
+
const MAX_STEPS = 50;
|
|
233
|
+
const parts = [];
|
|
234
|
+
let any = false;
|
|
235
|
+
|
|
236
|
+
for (const executionId of executionJobIds) {
|
|
237
|
+
const status = finalStatuses[executionId] || {};
|
|
238
|
+
const combos = Array.isArray(status.combinations) ? status.combinations : [];
|
|
239
|
+
const episode = status.episode_name || "Run";
|
|
240
|
+
|
|
241
|
+
for (const c of combos) {
|
|
242
|
+
const steps = Array.isArray(c.steps_json) ? c.steps_json : [];
|
|
243
|
+
const device = String(c.device ?? "?");
|
|
244
|
+
const evaluator = c.agent_output ?? c.gpt_output;
|
|
245
|
+
if (
|
|
246
|
+
steps.length === 0 &&
|
|
247
|
+
!String(c.explanation || "").trim() &&
|
|
248
|
+
!String(evaluator || "").trim()
|
|
249
|
+
) {
|
|
250
|
+
continue;
|
|
251
|
+
}
|
|
252
|
+
any = true;
|
|
253
|
+
parts.push(
|
|
254
|
+
`#### ${mdTableCell(episode, 72)} · ${mdTableCell(device, 36)} · \`${mdTableCell(executionId, 36)}\``
|
|
255
|
+
);
|
|
256
|
+
parts.push("");
|
|
257
|
+
|
|
258
|
+
const shown = steps.slice(0, MAX_STEPS);
|
|
259
|
+
for (let j = 0; j < shown.length; j += 1) {
|
|
260
|
+
const s = shown[j];
|
|
261
|
+
const label =
|
|
262
|
+
(s.name && String(s.name).trim()) ||
|
|
263
|
+
(s.description && String(s.description).trim().slice(0, 100)) ||
|
|
264
|
+
`Step ${j + 1}`;
|
|
265
|
+
const st = s.status != null ? s.status : "?";
|
|
266
|
+
parts.push(`${j + 1}. **${mdTableCell(st, 16)}** ${mdTableCell(label, 140)}`);
|
|
267
|
+
const desc = s.description && String(s.description).trim();
|
|
268
|
+
if (desc && desc !== String(s.name)) {
|
|
269
|
+
parts.push(` - *Thoughts:* ${mdTableCell(desc, 900)}`);
|
|
270
|
+
}
|
|
271
|
+
if (s.action) {
|
|
272
|
+
parts.push(` - *Action:* \`${mdTableCell(String(s.action), 500)}\``);
|
|
273
|
+
}
|
|
274
|
+
parts.push("");
|
|
275
|
+
}
|
|
276
|
+
if (steps.length > MAX_STEPS) {
|
|
277
|
+
parts.push(
|
|
278
|
+
`*…and ${steps.length - MAX_STEPS} more steps — expand **Qualty** groups in the job log or open the run in Qualty for the full list.*`
|
|
279
|
+
);
|
|
280
|
+
parts.push("");
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
if (c.explanation && String(c.explanation).trim()) {
|
|
284
|
+
parts.push("**Explanation**");
|
|
285
|
+
parts.push("");
|
|
286
|
+
parts.push(`> ${mdTableCell(c.explanation, 4500)}`);
|
|
287
|
+
parts.push("");
|
|
288
|
+
}
|
|
289
|
+
if (evaluator && String(evaluator).trim()) {
|
|
290
|
+
parts.push("**Final evaluator**");
|
|
291
|
+
parts.push("");
|
|
292
|
+
parts.push(`> ${mdTableCell(evaluator, 8000)}`);
|
|
293
|
+
parts.push("");
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
if (!any) return "";
|
|
299
|
+
parts.push(
|
|
300
|
+
"*Truncated for the Summary tab. Uncapped step lines and evaluator text are in the job log (expand the Qualty groups) or in the Qualty dashboard.*"
|
|
301
|
+
);
|
|
302
|
+
return parts.join("\n");
|
|
303
|
+
}
|
|
304
|
+
|
|
225
305
|
function writeQualtyGithubJobSummary({ executionJobIds, finalStatuses, passed, failed }) {
|
|
226
306
|
if (!isGithubActions()) return;
|
|
227
307
|
|
|
@@ -281,6 +361,17 @@ function writeQualtyGithubJobSummary({ executionJobIds, finalStatuses, passed, f
|
|
|
281
361
|
lines.push("</details>");
|
|
282
362
|
}
|
|
283
363
|
|
|
364
|
+
const stepDetailsMd = buildStepDetailsSummaryMarkdown(finalStatuses, executionJobIds);
|
|
365
|
+
if (stepDetailsMd) {
|
|
366
|
+
lines.push("");
|
|
367
|
+
lines.push("<details>");
|
|
368
|
+
lines.push("<summary><strong>Steps & agent notes (truncated)</strong></summary>");
|
|
369
|
+
lines.push("");
|
|
370
|
+
lines.push(stepDetailsMd);
|
|
371
|
+
lines.push("");
|
|
372
|
+
lines.push("</details>");
|
|
373
|
+
}
|
|
374
|
+
|
|
284
375
|
const runUrl = githubWorkflowRunUrl();
|
|
285
376
|
if (runUrl) {
|
|
286
377
|
lines.push("");
|