clementine-agent 1.18.77 → 1.18.79
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/goal-evaluator.d.ts +54 -0
- package/dist/agent/goal-evaluator.js +235 -0
- package/dist/cli/cron.js +16 -2
- package/dist/cli/dashboard.js +248 -4
- package/dist/gateway/cron-scheduler.js +17 -0
- package/dist/types.d.ts +18 -0
- package/package.json +1 -1
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Goal evaluation — PRD Phase 1.
|
|
3
|
+
*
|
|
4
|
+
* Two evaluators run at the END of a successful cron run, when the Task
|
|
5
|
+
* defines `successSchema` (JSON Schema validated against the agent's output)
|
|
6
|
+
* and/or `successCriteriaText` (free-text criterion graded by an evaluator
|
|
7
|
+
* sub-agent). The verdicts merge into a single `goalCheck` object that
|
|
8
|
+
* gets stamped on the run's CronRunEntry.
|
|
9
|
+
*
|
|
10
|
+
* Design constraints:
|
|
11
|
+
* - Never block run completion. Any thrown error becomes status='error' on
|
|
12
|
+
* goalCheck and the rest of the run logs unchanged.
|
|
13
|
+
* - Bounded budgets — schema validation is sub-millisecond; evaluator agent
|
|
14
|
+
* gets max_turns=1, ~30s wall clock, Haiku-class model.
|
|
15
|
+
* - No new top-level deps — ajv is a transitive install; we import it lazily
|
|
16
|
+
* inside the function so test fixtures that don't need it never load it.
|
|
17
|
+
*/
|
|
18
|
+
import type { CronJobDefinition, CronRunEntry } from '../types.js';
|
|
19
|
+
type SchemaResult = {
|
|
20
|
+
pass: boolean;
|
|
21
|
+
errors: string[];
|
|
22
|
+
tried: boolean;
|
|
23
|
+
};
|
|
24
|
+
type EvaluatorResult = {
|
|
25
|
+
pass: boolean;
|
|
26
|
+
reason: string;
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Validate the agent's response against a JSON Schema. Returns:
|
|
30
|
+
* - tried=false if no JSON could be extracted from the response
|
|
31
|
+
* - tried=true with pass + errors otherwise
|
|
32
|
+
* Schema-compile errors throw — caller catches.
|
|
33
|
+
*/
|
|
34
|
+
export declare function validateAgainstSchema(responseText: string, schema: Record<string, unknown>): Promise<SchemaResult>;
|
|
35
|
+
/**
|
|
36
|
+
* Ask a small evaluator sub-agent whether the run accomplished the
|
|
37
|
+
* `successCriteriaText` criterion. Returns null if the evaluator failed
|
|
38
|
+
* to produce a parseable verdict (caller treats null as goalCheck.status='error').
|
|
39
|
+
*
|
|
40
|
+
* The evaluator is intentionally minimal — Haiku, max_turns=1, focused
|
|
41
|
+
* system prompt, ~30s budget. We're grading text, not running tools.
|
|
42
|
+
*/
|
|
43
|
+
export declare function evaluateAgainstCriterion(responseText: string, criterion: string, opts?: {
|
|
44
|
+
model?: string;
|
|
45
|
+
timeoutMs?: number;
|
|
46
|
+
}): Promise<EvaluatorResult | null>;
|
|
47
|
+
/**
|
|
48
|
+
* Orchestrator: runs whichever evaluators are configured on the Task and
|
|
49
|
+
* merges their verdicts into a single goalCheck record. Returns undefined
|
|
50
|
+
* when no goal is configured — the field then stays absent on the run entry.
|
|
51
|
+
*/
|
|
52
|
+
export declare function runGoalCheck(responseText: string, job: CronJobDefinition): Promise<CronRunEntry['goalCheck']>;
|
|
53
|
+
export {};
|
|
54
|
+
//# sourceMappingURL=goal-evaluator.d.ts.map
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Goal evaluation — PRD Phase 1.
|
|
3
|
+
*
|
|
4
|
+
* Two evaluators run at the END of a successful cron run, when the Task
|
|
5
|
+
* defines `successSchema` (JSON Schema validated against the agent's output)
|
|
6
|
+
* and/or `successCriteriaText` (free-text criterion graded by an evaluator
|
|
7
|
+
* sub-agent). The verdicts merge into a single `goalCheck` object that
|
|
8
|
+
* gets stamped on the run's CronRunEntry.
|
|
9
|
+
*
|
|
10
|
+
* Design constraints:
|
|
11
|
+
* - Never block run completion. Any thrown error becomes status='error' on
|
|
12
|
+
* goalCheck and the rest of the run logs unchanged.
|
|
13
|
+
* - Bounded budgets — schema validation is sub-millisecond; evaluator agent
|
|
14
|
+
* gets max_turns=1, ~30s wall clock, Haiku-class model.
|
|
15
|
+
* - No new top-level deps — ajv is a transitive install; we import it lazily
|
|
16
|
+
* inside the function so test fixtures that don't need it never load it.
|
|
17
|
+
*/
|
|
18
|
+
/**
|
|
19
|
+
* Try to extract a JSON object from the agent's response. Looks first at the
|
|
20
|
+
* whole text, then at fenced ```json blocks (the common Claude output shape),
|
|
21
|
+
* then at any {...} substring as a last resort.
|
|
22
|
+
*/
|
|
23
|
+
function extractJson(responseText) {
|
|
24
|
+
if (!responseText || typeof responseText !== 'string')
|
|
25
|
+
return null;
|
|
26
|
+
// Whole-text parse first.
|
|
27
|
+
try {
|
|
28
|
+
return JSON.parse(responseText);
|
|
29
|
+
}
|
|
30
|
+
catch { /* fall through */ }
|
|
31
|
+
// Fenced ```json ... ``` block.
|
|
32
|
+
const fenced = responseText.match(/```(?:json|JSON)?\s*([\s\S]*?)```/);
|
|
33
|
+
if (fenced && fenced[1]) {
|
|
34
|
+
try {
|
|
35
|
+
return JSON.parse(fenced[1].trim());
|
|
36
|
+
}
|
|
37
|
+
catch { /* fall through */ }
|
|
38
|
+
}
|
|
39
|
+
// First {...} substring (greedy through last brace).
|
|
40
|
+
const first = responseText.indexOf('{');
|
|
41
|
+
const last = responseText.lastIndexOf('}');
|
|
42
|
+
if (first >= 0 && last > first) {
|
|
43
|
+
try {
|
|
44
|
+
return JSON.parse(responseText.slice(first, last + 1));
|
|
45
|
+
}
|
|
46
|
+
catch { /* fall through */ }
|
|
47
|
+
}
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Validate the agent's response against a JSON Schema. Returns:
|
|
52
|
+
* - tried=false if no JSON could be extracted from the response
|
|
53
|
+
* - tried=true with pass + errors otherwise
|
|
54
|
+
* Schema-compile errors throw — caller catches.
|
|
55
|
+
*/
|
|
56
|
+
export async function validateAgainstSchema(responseText, schema) {
|
|
57
|
+
const candidate = extractJson(responseText);
|
|
58
|
+
if (candidate === null) {
|
|
59
|
+
return { tried: false, pass: false, errors: ['No JSON object found in agent response'] };
|
|
60
|
+
}
|
|
61
|
+
// Lazy import so this module costs nothing when no Task has a schema.
|
|
62
|
+
const ajvMod = await import('ajv').catch(() => null);
|
|
63
|
+
if (!ajvMod) {
|
|
64
|
+
throw new Error('ajv not available — cannot validate success_schema');
|
|
65
|
+
}
|
|
66
|
+
// Handle CJS default-export interop (ajv@8 ships as CJS; the ESM bridge
|
|
67
|
+
// sometimes lands the constructor on .default and sometimes at the top
|
|
68
|
+
// level).
|
|
69
|
+
const AjvCtor = ajvMod.default ?? ajvMod;
|
|
70
|
+
const ajv = new AjvCtor({ allErrors: true, strict: false });
|
|
71
|
+
const validator = ajv.compile(schema);
|
|
72
|
+
const ok = validator(candidate);
|
|
73
|
+
if (ok)
|
|
74
|
+
return { tried: true, pass: true, errors: [] };
|
|
75
|
+
// ajv stamps errors on the compiled validator; the instance fallback covers
|
|
76
|
+
// older versions that put them on the ajv instance instead.
|
|
77
|
+
const rawErrors = validator.errors ?? ajv.errors ?? [];
|
|
78
|
+
const errs = rawErrors.slice(0, 5).map((e) => {
|
|
79
|
+
const path = e.instancePath || '';
|
|
80
|
+
const msg = e.message || 'invalid';
|
|
81
|
+
return path ? `${path} ${msg}` : msg;
|
|
82
|
+
});
|
|
83
|
+
return { tried: true, pass: false, errors: errs.length ? errs : ['validation failed'] };
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Ask a small evaluator sub-agent whether the run accomplished the
|
|
87
|
+
* `successCriteriaText` criterion. Returns null if the evaluator failed
|
|
88
|
+
* to produce a parseable verdict (caller treats null as goalCheck.status='error').
|
|
89
|
+
*
|
|
90
|
+
* The evaluator is intentionally minimal — Haiku, max_turns=1, focused
|
|
91
|
+
* system prompt, ~30s budget. We're grading text, not running tools.
|
|
92
|
+
*/
|
|
93
|
+
export async function evaluateAgainstCriterion(responseText, criterion, opts = {}) {
|
|
94
|
+
const trimmedResponse = (responseText || '').slice(0, 8000);
|
|
95
|
+
const trimmedCriterion = (criterion || '').slice(0, 2000);
|
|
96
|
+
if (!trimmedCriterion)
|
|
97
|
+
return null;
|
|
98
|
+
const sdk = await import('@anthropic-ai/claude-agent-sdk').catch(() => null);
|
|
99
|
+
if (!sdk || typeof sdk.query !== 'function') {
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
const systemPrompt = 'You are a strict evaluator. Grade whether a scheduled task accomplished its stated goal.\n' +
|
|
103
|
+
'Reply with EXACTLY one line in this format:\n' +
|
|
104
|
+
'PASS — <one-sentence reason> | FAIL — <one-sentence reason>\n' +
|
|
105
|
+
'Be honest. If the run did not achieve the goal, say FAIL even if the agent claimed success.';
|
|
106
|
+
const userPrompt = `GOAL:\n${trimmedCriterion}\n\nRUN OUTPUT:\n${trimmedResponse}\n\nVerdict:`;
|
|
107
|
+
const timeoutMs = opts.timeoutMs ?? 30_000;
|
|
108
|
+
const model = opts.model ?? 'claude-haiku-4-5-20251001';
|
|
109
|
+
// Race the SDK query against a hard timeout so a hung evaluator never
|
|
110
|
+
// blocks run logging.
|
|
111
|
+
const queryPromise = (async () => {
|
|
112
|
+
let collected = '';
|
|
113
|
+
try {
|
|
114
|
+
const queryFn = sdk.query;
|
|
115
|
+
const iter = queryFn({
|
|
116
|
+
prompt: userPrompt,
|
|
117
|
+
options: {
|
|
118
|
+
systemPrompt,
|
|
119
|
+
model,
|
|
120
|
+
maxTurns: 1,
|
|
121
|
+
permissionMode: 'default',
|
|
122
|
+
allowedTools: [],
|
|
123
|
+
settingSources: [],
|
|
124
|
+
// No tools, no network beyond model — purely text-in / text-out.
|
|
125
|
+
},
|
|
126
|
+
});
|
|
127
|
+
for await (const message of iter) {
|
|
128
|
+
const m = message;
|
|
129
|
+
if (m.type === 'assistant' && Array.isArray(m.content)) {
|
|
130
|
+
for (const block of m.content) {
|
|
131
|
+
const b = block;
|
|
132
|
+
if (b.type === 'text' && typeof b.text === 'string')
|
|
133
|
+
collected += b.text;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
else if (m.type === 'result' && typeof m.result === 'string') {
|
|
137
|
+
collected += m.result;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
catch {
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
return collected;
|
|
145
|
+
})();
|
|
146
|
+
const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), timeoutMs));
|
|
147
|
+
const collected = await Promise.race([queryPromise, timeoutPromise]);
|
|
148
|
+
if (!collected || typeof collected !== 'string')
|
|
149
|
+
return null;
|
|
150
|
+
// Parse the strict verdict line. Accept variants: "PASS — reason", "FAIL: reason",
|
|
151
|
+
// "Verdict: PASS — reason", etc.
|
|
152
|
+
const match = collected.match(/\b(PASS|FAIL)\b\s*[—\-:]?\s*(.+)/i);
|
|
153
|
+
if (!match)
|
|
154
|
+
return null;
|
|
155
|
+
const verdict = match[1].toUpperCase() === 'PASS';
|
|
156
|
+
const reason = (match[2] || '').replace(/[\r\n].*$/s, '').trim().slice(0, 280);
|
|
157
|
+
return { pass: verdict, reason: reason || (verdict ? 'Pass' : 'Fail') };
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Orchestrator: runs whichever evaluators are configured on the Task and
|
|
161
|
+
* merges their verdicts into a single goalCheck record. Returns undefined
|
|
162
|
+
* when no goal is configured — the field then stays absent on the run entry.
|
|
163
|
+
*/
|
|
164
|
+
export async function runGoalCheck(responseText, job) {
|
|
165
|
+
const hasSchema = !!(job.successSchema && Object.keys(job.successSchema).length > 0);
|
|
166
|
+
const hasCriterion = !!(job.successCriteriaText && job.successCriteriaText.trim());
|
|
167
|
+
if (!hasSchema && !hasCriterion)
|
|
168
|
+
return undefined;
|
|
169
|
+
let schemaResult = null;
|
|
170
|
+
let evaluatorResult = null;
|
|
171
|
+
let errored = false;
|
|
172
|
+
let errorMessage = '';
|
|
173
|
+
if (hasSchema) {
|
|
174
|
+
try {
|
|
175
|
+
schemaResult = await validateAgainstSchema(responseText, job.successSchema);
|
|
176
|
+
}
|
|
177
|
+
catch (err) {
|
|
178
|
+
errored = true;
|
|
179
|
+
errorMessage = `schema validator threw: ${String(err).slice(0, 200)}`;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
if (hasCriterion) {
|
|
183
|
+
try {
|
|
184
|
+
evaluatorResult = await evaluateAgainstCriterion(responseText, job.successCriteriaText);
|
|
185
|
+
if (evaluatorResult === null && !errored) {
|
|
186
|
+
// Treat unparseable evaluator output as 'error' rather than 'fail' — we
|
|
187
|
+
// don't want a flaky evaluator to mark a healthy run as failed.
|
|
188
|
+
errored = true;
|
|
189
|
+
errorMessage = 'evaluator did not return a parseable PASS/FAIL verdict';
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
catch (err) {
|
|
193
|
+
errored = true;
|
|
194
|
+
errorMessage = `evaluator threw: ${String(err).slice(0, 200)}`;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
// Decide overall status. Both passed = pass. Either failed = fail. Neither
|
|
198
|
+
// ran cleanly but both were configured = error.
|
|
199
|
+
const mode = hasSchema && hasCriterion ? 'both' : hasSchema ? 'schema' : 'evaluator';
|
|
200
|
+
let status;
|
|
201
|
+
if (errored && (!schemaResult || !evaluatorResult)) {
|
|
202
|
+
status = 'error';
|
|
203
|
+
}
|
|
204
|
+
else {
|
|
205
|
+
const schemaPassed = schemaResult?.pass !== false; // true if not run, or true if run + passed
|
|
206
|
+
const evaluatorPassed = evaluatorResult?.pass !== false; // same
|
|
207
|
+
const schemaFailed = schemaResult ? !schemaResult.pass || !schemaResult.tried : false;
|
|
208
|
+
const evaluatorFailed = evaluatorResult ? !evaluatorResult.pass : false;
|
|
209
|
+
if (schemaFailed || evaluatorFailed)
|
|
210
|
+
status = 'fail';
|
|
211
|
+
else if (schemaPassed && evaluatorPassed)
|
|
212
|
+
status = 'pass';
|
|
213
|
+
else
|
|
214
|
+
status = 'error';
|
|
215
|
+
}
|
|
216
|
+
const out = { status, mode };
|
|
217
|
+
if (schemaResult) {
|
|
218
|
+
out.schemaPass = schemaResult.pass && schemaResult.tried;
|
|
219
|
+
if (!schemaResult.pass || !schemaResult.tried) {
|
|
220
|
+
out.schemaErrors = schemaResult.errors.slice(0, 5);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (evaluatorResult) {
|
|
224
|
+
out.evaluatorPass = evaluatorResult.pass;
|
|
225
|
+
out.evaluatorReason = evaluatorResult.reason;
|
|
226
|
+
}
|
|
227
|
+
if (errored && errorMessage) {
|
|
228
|
+
// Stash the error in evaluatorReason if we don't already have one — the
|
|
229
|
+
// dashboard surfaces this string in the tooltip.
|
|
230
|
+
if (!out.evaluatorReason)
|
|
231
|
+
out.evaluatorReason = errorMessage;
|
|
232
|
+
}
|
|
233
|
+
return out;
|
|
234
|
+
}
|
|
235
|
+
//# sourceMappingURL=goal-evaluator.js.map
|
package/dist/cli/cron.js
CHANGED
|
@@ -140,7 +140,7 @@ export async function cmdCronRun(jobName) {
|
|
|
140
140
|
try {
|
|
141
141
|
const response = await gateway.handleCronJob(job.name, job.prompt, job.tier, job.maxTurns, job.model, job.workDir, job.mode, job.maxHours);
|
|
142
142
|
const finishedAt = new Date();
|
|
143
|
-
|
|
143
|
+
const entry = {
|
|
144
144
|
jobName: job.name,
|
|
145
145
|
startedAt: startedAt.toISOString(),
|
|
146
146
|
finishedAt: finishedAt.toISOString(),
|
|
@@ -148,7 +148,21 @@ export async function cmdCronRun(jobName) {
|
|
|
148
148
|
durationMs: finishedAt.getTime() - startedAt.getTime(),
|
|
149
149
|
attempt: 1,
|
|
150
150
|
outputPreview: response ? response.slice(0, 200) : undefined,
|
|
151
|
-
}
|
|
151
|
+
};
|
|
152
|
+
// PRD Phase 1.1: goal-orientation evaluator (mirrors the daemon path).
|
|
153
|
+
if (job.successSchema || (job.successCriteriaText && job.successCriteriaText.trim())) {
|
|
154
|
+
try {
|
|
155
|
+
const { runGoalCheck } = await import('../agent/goal-evaluator.js');
|
|
156
|
+
const goalCheck = await runGoalCheck(response ?? '', job);
|
|
157
|
+
if (goalCheck)
|
|
158
|
+
entry.goalCheck = goalCheck;
|
|
159
|
+
}
|
|
160
|
+
catch (err) {
|
|
161
|
+
// Never block logging on evaluator failure.
|
|
162
|
+
entry.goalCheck = { status: 'error', mode: 'evaluator', evaluatorReason: `evaluator orchestrator threw: ${String(err).slice(0, 200)}` };
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
runLog.append(entry);
|
|
152
166
|
console.log(response || '(no output)');
|
|
153
167
|
if (response && response !== '__NOTHING__') {
|
|
154
168
|
console.log('\n(Note: Standalone runner — output not delivered to channels. Use the daemon for channel delivery.)');
|
package/dist/cli/dashboard.js
CHANGED
|
@@ -15119,6 +15119,11 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
|
|
|
15119
15119
|
/* ── Recent history row hover (Tasks page bottom zone) ── */
|
|
15120
15120
|
.history-row { transition: background 0.12s ease; }
|
|
15121
15121
|
.history-row:hover { background: var(--bg-hover); }
|
|
15122
|
+
/* PRD Phase 1.2: "Run task once" running-state pulse on the Last run tab. */
|
|
15123
|
+
@keyframes pulse {
|
|
15124
|
+
0%, 100% { opacity: 0.4; transform: scale(0.85); }
|
|
15125
|
+
50% { opacity: 1; transform: scale(1); }
|
|
15126
|
+
}
|
|
15122
15127
|
/* ── Trick capability strip (skills + MCP + tools at a glance) ─── */
|
|
15123
15128
|
.task-cap-strip {
|
|
15124
15129
|
border-top: 1px solid var(--border-light);
|
|
@@ -19903,6 +19908,7 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
|
|
|
19903
19908
|
<div class="cron-tabs" role="tablist">
|
|
19904
19909
|
<button type="button" class="cron-tab-btn active" data-cron-tab="configure" onclick="switchCronTab('configure')">Configure</button>
|
|
19905
19910
|
<button type="button" class="cron-tab-btn" id="cron-tab-btn-preview" data-cron-tab="preview" onclick="switchCronTab('preview')" title="See exactly what the agent will receive at fire-time">What will run</button>
|
|
19911
|
+
<button type="button" class="cron-tab-btn" id="cron-tab-btn-lastrun" data-cron-tab="lastrun" onclick="switchCronTab('lastrun')" title="Watch the most recent run — click Run task once to fire it now">Last run</button>
|
|
19906
19912
|
</div>
|
|
19907
19913
|
<div class="modal-body">
|
|
19908
19914
|
<!-- ── Tab: Configure ─────────────────────────────────────────── -->
|
|
@@ -20222,10 +20228,23 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
|
|
|
20222
20228
|
</div>
|
|
20223
20229
|
</div>
|
|
20224
20230
|
</div><!-- /cron-tab-preview -->
|
|
20231
|
+
|
|
20232
|
+
<!-- ── Tab: Last run ── PRD Phase 1.2: "Run task once" inline output. -->
|
|
20233
|
+
<div class="cron-tab-pane" id="cron-tab-lastrun">
|
|
20234
|
+
<div id="cron-lastrun-body" style="padding:0">
|
|
20235
|
+
<div style="padding:36px 24px;color:var(--text-muted);text-align:center;font-size:13px">
|
|
20236
|
+
Save the task first, then click <strong>Run task once</strong> to fire it now and watch the result here.
|
|
20237
|
+
</div>
|
|
20238
|
+
</div>
|
|
20239
|
+
</div><!-- /cron-tab-lastrun -->
|
|
20225
20240
|
</div>
|
|
20226
20241
|
<div class="modal-footer">
|
|
20227
20242
|
<div style="display:flex;align-items:center;gap:8px;flex:1">
|
|
20228
20243
|
<button class="btn btn-sm" id="cron-train-btn" onclick="showCronTraining()" style="font-size:11px;display:none">Train with Agent</button>
|
|
20244
|
+
<!-- PRD §5.1 header bullet: "Run task once" green button. Visible only
|
|
20245
|
+
when editing a saved task (set by openEditCronModal). Disabled
|
|
20246
|
+
during an in-flight run. -->
|
|
20247
|
+
<button class="btn btn-sm btn-success" id="cron-run-once-btn" onclick="runCronOnceFromModal()" style="display:none;font-size:12px;padding:6px 14px">▶ Run task once</button>
|
|
20229
20248
|
</div>
|
|
20230
20249
|
<button onclick="closeCronModal()">Cancel</button>
|
|
20231
20250
|
<button class="btn-primary" id="cron-modal-save" onclick="saveCronJob()">Create Task</button>
|
|
@@ -23274,6 +23293,22 @@ function renderScheduledTaskCard(task) {
|
|
|
23274
23293
|
var ok = lr.status === 'ok';
|
|
23275
23294
|
var statusIcon = ok ? '<span style="color:var(--green)">✓</span>' : '<span style="color:var(--red)">✗</span>';
|
|
23276
23295
|
lastRunHtml = statusIcon + ' ' + esc(lr.status || 'unknown') + ' · ' + esc(timeAgo(lr.finishedAt || lr.startedAt || ''));
|
|
23296
|
+
// PRD Phase 1.1: goal pill. Orthogonal to status — a run can be status='ok'
|
|
23297
|
+
// but goalCheck.status='fail' (the agent finished cleanly without
|
|
23298
|
+
// accomplishing the stated goal). That's exactly the failure mode the
|
|
23299
|
+
// PRD's goal-orientation feature is designed to surface.
|
|
23300
|
+
if (lr.goalCheck) {
|
|
23301
|
+
var gc = lr.goalCheck;
|
|
23302
|
+
var gIcon = gc.status === 'pass' ? '🎯' : gc.status === 'fail' ? '✗' : gc.status === 'error' ? '⚠' : '';
|
|
23303
|
+
var gColor = gc.status === 'pass' ? 'var(--green)' : gc.status === 'fail' ? 'var(--red)' : 'var(--yellow)';
|
|
23304
|
+
var gLabel = gc.status === 'pass' ? 'goal met' : gc.status === 'fail' ? 'goal not met' : gc.status === 'error' ? 'goal eval failed' : '';
|
|
23305
|
+
var gTip = '';
|
|
23306
|
+
if (gc.evaluatorReason) gTip = gc.evaluatorReason;
|
|
23307
|
+
else if (Array.isArray(gc.schemaErrors) && gc.schemaErrors.length) gTip = 'Schema errors: ' + gc.schemaErrors.join('; ');
|
|
23308
|
+
if (gIcon && gLabel) {
|
|
23309
|
+
lastRunHtml += ' <span style="color:' + gColor + ';font-size:11px;font-weight:500" title="' + esc(gTip || gLabel) + '">· ' + gIcon + ' ' + esc(gLabel) + '</span>';
|
|
23310
|
+
}
|
|
23311
|
+
}
|
|
23277
23312
|
// "ran with: …" — surface the skills + MCP that were live for this run.
|
|
23278
23313
|
var ranWith = [];
|
|
23279
23314
|
if (Array.isArray(lr.skillsApplied) && lr.skillsApplied.length > 0) {
|
|
@@ -23380,8 +23415,23 @@ function renderRecentHistoryList(runs) {
|
|
|
23380
23415
|
var preview = String(entry.outputPreview).slice(0, 140);
|
|
23381
23416
|
errorPreview = '<div style="font-size:11px;color:var(--text-muted);margin-top:2px;word-break:break-word">' + esc(preview) + '</div>';
|
|
23382
23417
|
}
|
|
23383
|
-
|
|
23418
|
+
// PRD Phase 1.1: goal cell. Empty cell when no goal configured (status='skipped'
|
|
23419
|
+
// returned by runGoalCheck means "no goal" — but we omit goalCheck entirely
|
|
23420
|
+
// in that case, so missing field == no goal). The cell stays present in the
|
|
23421
|
+
// grid for column alignment.
|
|
23422
|
+
var goalCellHtml = '<div></div>';
|
|
23423
|
+
if (entry.goalCheck) {
|
|
23424
|
+
var gc2 = entry.goalCheck;
|
|
23425
|
+
var gIcon2 = gc2.status === 'pass' ? '🎯' : gc2.status === 'fail' ? '✗' : gc2.status === 'error' ? '⚠' : '';
|
|
23426
|
+
var gColor2 = gc2.status === 'pass' ? 'var(--green)' : gc2.status === 'fail' ? 'var(--red)' : 'var(--yellow)';
|
|
23427
|
+
var gTip2 = gc2.evaluatorReason
|
|
23428
|
+
? gc2.evaluatorReason
|
|
23429
|
+
: (Array.isArray(gc2.schemaErrors) && gc2.schemaErrors.length ? 'Schema errors: ' + gc2.schemaErrors.join('; ') : gc2.status);
|
|
23430
|
+
goalCellHtml = '<div style="color:' + gColor2 + ';font-size:13px;line-height:18px;text-align:center" title="' + esc(gTip2) + '">' + gIcon2 + '</div>';
|
|
23431
|
+
}
|
|
23432
|
+
rowsHtml += '<div class="history-row" data-trace-job="' + esc(jobName) + '" style="display:grid;grid-template-columns:24px 24px minmax(180px,1.2fr) minmax(180px,1fr) 90px auto;gap:10px;align-items:start;padding:8px 14px;border-bottom:1px solid var(--border);cursor:pointer">'
|
|
23384
23433
|
+ '<div style="color:' + statusColor + ';font-size:14px;line-height:18px;text-align:center" title="' + esc(status) + '">' + statusIcon + '</div>'
|
|
23434
|
+
+ goalCellHtml
|
|
23385
23435
|
+ '<div style="min-width:0">'
|
|
23386
23436
|
+ '<div style="font-weight:500;color:var(--text-primary);font-size:13px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="' + esc(jobName) + '">' + esc(jobName) + attemptLabel + '</div>'
|
|
23387
23437
|
+ errorPreview
|
|
@@ -23392,8 +23442,10 @@ function renderRecentHistoryList(runs) {
|
|
|
23392
23442
|
+ '</div>';
|
|
23393
23443
|
}
|
|
23394
23444
|
return '<div class="history-list" style="background:var(--bg-secondary);border:1px solid var(--border);border-radius:var(--radius)">'
|
|
23395
|
-
+ '<div style="display:grid;grid-template-columns:24px minmax(180px,1.2fr) minmax(180px,1fr) 90px auto;gap:10px;padding:8px 14px;border-bottom:1px solid var(--border);font-size:11px;color:var(--text-muted);text-transform:uppercase;letter-spacing:0.04em;font-weight:500">'
|
|
23396
|
-
+ '<div></div
|
|
23445
|
+
+ '<div style="display:grid;grid-template-columns:24px 24px minmax(180px,1.2fr) minmax(180px,1fr) 90px auto;gap:10px;padding:8px 14px;border-bottom:1px solid var(--border);font-size:11px;color:var(--text-muted);text-transform:uppercase;letter-spacing:0.04em;font-weight:500">'
|
|
23446
|
+
+ '<div title="Run status (ok / error / etc.)"></div>'
|
|
23447
|
+
+ '<div title="Goal check result — orthogonal to run status">Goal</div>'
|
|
23448
|
+
+ '<div>Task</div><div>Started</div><div>Duration</div><div></div>'
|
|
23397
23449
|
+ '</div>'
|
|
23398
23450
|
+ rowsHtml
|
|
23399
23451
|
+ '</div>';
|
|
@@ -24717,8 +24769,10 @@ function switchCronTab(tab) {
|
|
|
24717
24769
|
});
|
|
24718
24770
|
var configurePane = document.getElementById('cron-tab-configure');
|
|
24719
24771
|
var previewPane = document.getElementById('cron-tab-preview');
|
|
24772
|
+
var lastRunPane = document.getElementById('cron-tab-lastrun');
|
|
24720
24773
|
if (configurePane) configurePane.classList.toggle('active', tab === 'configure');
|
|
24721
24774
|
if (previewPane) previewPane.classList.toggle('active', tab === 'preview');
|
|
24775
|
+
if (lastRunPane) lastRunPane.classList.toggle('active', tab === 'lastrun');
|
|
24722
24776
|
if (tab === 'preview') {
|
|
24723
24777
|
var name = editingCronJob;
|
|
24724
24778
|
if (!name) {
|
|
@@ -24727,6 +24781,10 @@ function switchCronTab(tab) {
|
|
|
24727
24781
|
return;
|
|
24728
24782
|
}
|
|
24729
24783
|
if (_cronPreviewLoadedFor !== name) loadCronPreviewIntoTab(name);
|
|
24784
|
+
} else if (tab === 'lastrun') {
|
|
24785
|
+
// Re-render in case run-state changed since the modal opened.
|
|
24786
|
+
var jobLR = (typeof cronJobsData !== 'undefined' ? cronJobsData : []).find(function(j) { return j.name === editingCronJob; });
|
|
24787
|
+
if (jobLR) renderCronLastRunPane(jobLR);
|
|
24730
24788
|
}
|
|
24731
24789
|
}
|
|
24732
24790
|
|
|
@@ -24751,6 +24809,169 @@ async function loadCronPreviewIntoTab(jobName) {
|
|
|
24751
24809
|
// Mark the preview as stale (call after save so next tab visit refetches).
|
|
24752
24810
|
function markCronPreviewDirty() { _cronPreviewLoadedFor = null; }
|
|
24753
24811
|
|
|
24812
|
+
// ── PRD Phase 1.2: "Run task once" — inline run + Last run tab ───────────
|
|
24813
|
+
// Tracks an in-flight run triggered FROM the modal so the SSE listeners
|
|
24814
|
+
// know when a cron_complete event belongs to "the run I just kicked off"
|
|
24815
|
+
// vs a scheduled tick that fired in the background. Cleared when the run
|
|
24816
|
+
// completes or the modal closes.
|
|
24817
|
+
var _cronRunOnceInFlight = null; // { jobName: string, startedAt: number }
|
|
24818
|
+
var _cronRunOnceTickerId = null; // setInterval id for the elapsed counter
|
|
24819
|
+
|
|
24820
|
+
// Render the Last run pane from the job's most-recent JSONL entry. Called
|
|
24821
|
+
// when the modal opens for a saved task and when switchCronTab('lastrun')
|
|
24822
|
+
// reactivates the pane.
|
|
24823
|
+
function renderCronLastRunPane(job) {
|
|
24824
|
+
var pane = document.getElementById('cron-lastrun-body');
|
|
24825
|
+
if (!pane) return;
|
|
24826
|
+
// If we have an in-flight run, render the running state regardless of
|
|
24827
|
+
// what's on disk — the on-disk lastRun is from BEFORE this fire.
|
|
24828
|
+
if (_cronRunOnceInFlight && _cronRunOnceInFlight.jobName === (job && job.name)) {
|
|
24829
|
+
pane.innerHTML = renderCronRunningState(_cronRunOnceInFlight.startedAt);
|
|
24830
|
+
return;
|
|
24831
|
+
}
|
|
24832
|
+
var lr = job && job.lastRun;
|
|
24833
|
+
if (!lr) {
|
|
24834
|
+
pane.innerHTML = '<div style="padding:36px 24px;color:var(--text-muted);text-align:center;font-size:13px">No runs yet. Click <strong>Run task once</strong> below to fire it now and watch the result here.</div>';
|
|
24835
|
+
return;
|
|
24836
|
+
}
|
|
24837
|
+
pane.innerHTML = renderCronRunDetails(lr);
|
|
24838
|
+
}
|
|
24839
|
+
|
|
24840
|
+
function renderCronRunningState(startedAtMs) {
|
|
24841
|
+
var elapsed = Math.max(0, Math.round((Date.now() - startedAtMs) / 1000));
|
|
24842
|
+
return ''
|
|
24843
|
+
+ '<div style="padding:36px 24px;text-align:center">'
|
|
24844
|
+
+ '<div class="run-once-pulse" style="font-size:14px;color:var(--accent);font-weight:500;margin-bottom:8px">'
|
|
24845
|
+
+ '<span class="pulse-dot" style="display:inline-block;width:8px;height:8px;border-radius:50%;background:var(--accent);margin-right:6px;animation:pulse 1.4s ease-in-out infinite"></span>'
|
|
24846
|
+
+ 'Running…'
|
|
24847
|
+
+ '</div>'
|
|
24848
|
+
+ '<div style="font-size:12px;color:var(--text-muted)">Elapsed: <span id="cron-run-once-elapsed">' + elapsed + 's</span></div>'
|
|
24849
|
+
+ '<div style="font-size:11px;color:var(--text-muted);margin-top:14px">Live output streaming will land here when the run completes.</div>'
|
|
24850
|
+
+ '</div>';
|
|
24851
|
+
}
|
|
24852
|
+
|
|
24853
|
+
function renderCronRunDetails(lr) {
|
|
24854
|
+
var ok = lr.status === 'ok';
|
|
24855
|
+
var statusColor = ok ? 'var(--green)' : (lr.status === 'error' ? 'var(--red)' : 'var(--yellow)');
|
|
24856
|
+
var statusIcon = ok ? '✓' : (lr.status === 'error' ? '✗' : '⏱');
|
|
24857
|
+
var dur = lr.durationMs != null ? formatDurationMs(lr.durationMs) : '—';
|
|
24858
|
+
var when = lr.finishedAt || lr.startedAt;
|
|
24859
|
+
var whenLabel = when ? new Date(when).toLocaleString() : '—';
|
|
24860
|
+
var html = ''
|
|
24861
|
+
+ '<div style="padding:24px">'
|
|
24862
|
+
+ '<div style="display:flex;align-items:baseline;gap:10px;margin-bottom:14px">'
|
|
24863
|
+
+ '<span style="color:' + statusColor + ';font-size:18px">' + statusIcon + '</span>'
|
|
24864
|
+
+ '<span style="font-size:14px;font-weight:600;color:var(--text-primary);text-transform:capitalize">' + esc(lr.status || 'unknown') + '</span>'
|
|
24865
|
+
+ '<span style="flex:1"></span>'
|
|
24866
|
+
+ '<span style="font-size:12px;color:var(--text-muted)">' + esc(whenLabel) + ' · ' + esc(dur) + (lr.attempt && lr.attempt > 1 ? ' · attempt ' + esc(lr.attempt) : '') + '</span>'
|
|
24867
|
+
+ '</div>';
|
|
24868
|
+
if (lr.goalCheck) {
|
|
24869
|
+
var gc = lr.goalCheck;
|
|
24870
|
+
var gIcon = gc.status === 'pass' ? '🎯' : gc.status === 'fail' ? '✗' : '⚠';
|
|
24871
|
+
var gColor = gc.status === 'pass' ? 'var(--green)' : gc.status === 'fail' ? 'var(--red)' : 'var(--yellow)';
|
|
24872
|
+
var gLabel = gc.status === 'pass' ? 'Goal met' : gc.status === 'fail' ? 'Goal NOT met' : 'Goal evaluation failed';
|
|
24873
|
+
var gReason = gc.evaluatorReason || (Array.isArray(gc.schemaErrors) ? gc.schemaErrors.join('; ') : '');
|
|
24874
|
+
html += '<div style="padding:10px 14px;border-radius:6px;background:rgba(255,255,255,0.04);border-left:3px solid ' + gColor + ';margin-bottom:14px">'
|
|
24875
|
+
+ '<div style="font-size:13px;font-weight:500;color:' + gColor + '">' + gIcon + ' ' + gLabel + '</div>'
|
|
24876
|
+
+ (gReason ? '<div style="font-size:12px;color:var(--text-secondary);margin-top:4px">' + esc(gReason) + '</div>' : '')
|
|
24877
|
+
+ '</div>';
|
|
24878
|
+
}
|
|
24879
|
+
if (lr.error) {
|
|
24880
|
+
html += '<div style="margin-bottom:14px"><div style="font-size:11px;color:var(--text-muted);text-transform:uppercase;letter-spacing:0.04em;margin-bottom:6px">Error</div>'
|
|
24881
|
+
+ '<div style="font-family:\\x27JetBrains Mono\\x27,monospace;font-size:11px;color:var(--red);background:rgba(239,68,68,0.06);border:1px solid rgba(239,68,68,0.2);padding:10px;border-radius:6px;white-space:pre-wrap;word-break:break-word">'
|
|
24882
|
+
+ esc(String(lr.error).slice(0, 2000)) + '</div></div>';
|
|
24883
|
+
}
|
|
24884
|
+
if (lr.outputPreview) {
|
|
24885
|
+
html += '<div style="margin-bottom:14px"><div style="font-size:11px;color:var(--text-muted);text-transform:uppercase;letter-spacing:0.04em;margin-bottom:6px">Output preview</div>'
|
|
24886
|
+
+ '<div style="font-size:12px;color:var(--text-primary);background:var(--bg-secondary);border:1px solid var(--border);padding:10px;border-radius:6px;white-space:pre-wrap;word-break:break-word;max-height:300px;overflow-y:auto">'
|
|
24887
|
+
+ esc(String(lr.outputPreview).slice(0, 4000)) + '</div></div>';
|
|
24888
|
+
}
|
|
24889
|
+
if (Array.isArray(lr.skillsApplied) && lr.skillsApplied.length) {
|
|
24890
|
+
html += '<div style="font-size:11px;color:var(--text-muted);margin-bottom:6px">Skills active: ' + esc(lr.skillsApplied.map(function(s){ return s.name; }).join(', ')) + '</div>';
|
|
24891
|
+
}
|
|
24892
|
+
if (Array.isArray(lr.mcpServersApplied) && lr.mcpServersApplied.length) {
|
|
24893
|
+
html += '<div style="font-size:11px;color:var(--text-muted);margin-bottom:6px">MCP servers: ' + esc(lr.mcpServersApplied.join(', ')) + '</div>';
|
|
24894
|
+
}
|
|
24895
|
+
html += '<div style="margin-top:14px;display:flex;gap:8px"><button class="btn-sm" onclick="openTraceViewer(\\x27' + jsStr(lr.jobName || editingCronJob || '') + '\\x27)" style="font-size:11px">Open trace</button></div>';
|
|
24896
|
+
html += '</div>';
|
|
24897
|
+
return html;
|
|
24898
|
+
}
|
|
24899
|
+
|
|
24900
|
+
// Click handler for the green "Run task once" button. Triggers the existing
|
|
24901
|
+
// /api/cron/run/:job endpoint and switches to the Last run tab so the user
|
|
24902
|
+
// sees the running state. The SSE handler at the bottom of this file picks
|
|
24903
|
+
// up cron_complete and re-renders the pane with the result.
|
|
24904
|
+
async function runCronOnceFromModal() {
|
|
24905
|
+
if (!editingCronJob) {
|
|
24906
|
+
toast('Save the task first, then Run task once.', 'error');
|
|
24907
|
+
return;
|
|
24908
|
+
}
|
|
24909
|
+
if (_cronRunOnceInFlight) {
|
|
24910
|
+
toast('Already running — wait for the current run to finish.', 'info');
|
|
24911
|
+
return;
|
|
24912
|
+
}
|
|
24913
|
+
if (isCronModalDirty()) {
|
|
24914
|
+
if (!confirm('You have unsaved changes. Run the SAVED version (your edits stay in the form)?')) return;
|
|
24915
|
+
}
|
|
24916
|
+
var btn = document.getElementById('cron-run-once-btn');
|
|
24917
|
+
if (btn) { btn.disabled = true; btn.textContent = 'Triggering…'; }
|
|
24918
|
+
_cronRunOnceInFlight = { jobName: editingCronJob, startedAt: Date.now() };
|
|
24919
|
+
// Show the running state and switch the pane immediately.
|
|
24920
|
+
switchCronTab('lastrun');
|
|
24921
|
+
var pane = document.getElementById('cron-lastrun-body');
|
|
24922
|
+
if (pane) pane.innerHTML = renderCronRunningState(_cronRunOnceInFlight.startedAt);
|
|
24923
|
+
// Tick the elapsed counter once a second.
|
|
24924
|
+
if (_cronRunOnceTickerId) clearInterval(_cronRunOnceTickerId);
|
|
24925
|
+
_cronRunOnceTickerId = setInterval(function() {
|
|
24926
|
+
if (!_cronRunOnceInFlight) { clearInterval(_cronRunOnceTickerId); _cronRunOnceTickerId = null; return; }
|
|
24927
|
+
var elapsedEl = document.getElementById('cron-run-once-elapsed');
|
|
24928
|
+
if (elapsedEl) {
|
|
24929
|
+
var s = Math.max(0, Math.round((Date.now() - _cronRunOnceInFlight.startedAt) / 1000));
|
|
24930
|
+
elapsedEl.textContent = s + 's';
|
|
24931
|
+
}
|
|
24932
|
+
}, 1000);
|
|
24933
|
+
try {
|
|
24934
|
+
var r = await apiFetch('/api/cron/run/' + encodeURIComponent(editingCronJob), { method: 'POST' });
|
|
24935
|
+
var d = await r.json();
|
|
24936
|
+
if (!r.ok || d.ok === false) {
|
|
24937
|
+
toast(d.error || 'Run failed to start', 'error');
|
|
24938
|
+
_cronRunOnceInFlight = null;
|
|
24939
|
+
if (_cronRunOnceTickerId) { clearInterval(_cronRunOnceTickerId); _cronRunOnceTickerId = null; }
|
|
24940
|
+
if (pane) pane.innerHTML = '<div style="padding:36px 24px;color:var(--red);text-align:center;font-size:13px">' + esc(d.error || 'Run failed to start') + '</div>';
|
|
24941
|
+
}
|
|
24942
|
+
} catch (e) {
|
|
24943
|
+
toast('Run failed to start: ' + String(e), 'error');
|
|
24944
|
+
_cronRunOnceInFlight = null;
|
|
24945
|
+
if (_cronRunOnceTickerId) { clearInterval(_cronRunOnceTickerId); _cronRunOnceTickerId = null; }
|
|
24946
|
+
} finally {
|
|
24947
|
+
if (btn) { btn.disabled = false; btn.textContent = '▶ Run task once'; }
|
|
24948
|
+
}
|
|
24949
|
+
}
|
|
24950
|
+
|
|
24951
|
+
// Called from the SSE handler when cron_complete fires. The same SSE handler
|
|
24952
|
+
// also schedules refreshCron() which updates cronJobsData with the fresh
|
|
24953
|
+
// lastRun. We just wait a beat for that, then re-render the pane.
|
|
24954
|
+
function handleCronRunOnceComplete(jobName) {
|
|
24955
|
+
if (!_cronRunOnceInFlight || _cronRunOnceInFlight.jobName !== jobName) return;
|
|
24956
|
+
if (_cronRunOnceTickerId) { clearInterval(_cronRunOnceTickerId); _cronRunOnceTickerId = null; }
|
|
24957
|
+
_cronRunOnceInFlight = null;
|
|
24958
|
+
// refreshCron is racing with us; give it ~600ms to land the new entry
|
|
24959
|
+
// into cronJobsData before we read. The SSE handler at line 34927 already
|
|
24960
|
+
// kicks it off when this event arrives.
|
|
24961
|
+
setTimeout(function() {
|
|
24962
|
+
var pane = document.getElementById('cron-lastrun-body');
|
|
24963
|
+
if (!pane) return;
|
|
24964
|
+
var fresh = (Array.isArray(cronJobsData) ? cronJobsData : []).find(function(j) { return j.name === jobName; });
|
|
24965
|
+
var lr = fresh && fresh.lastRun;
|
|
24966
|
+
if (lr) {
|
|
24967
|
+
pane.innerHTML = renderCronRunDetails(lr);
|
|
24968
|
+
toast('Run finished — ' + (lr.status === 'ok' ? 'success' : lr.status), lr.status === 'ok' ? 'success' : 'error');
|
|
24969
|
+
} else {
|
|
24970
|
+
pane.innerHTML = '<div style="padding:36px 24px;color:var(--text-muted);text-align:center;font-size:13px">Run finished but the result is still propagating. Refresh the dashboard to see it.</div>';
|
|
24971
|
+
}
|
|
24972
|
+
}, 600);
|
|
24973
|
+
}
|
|
24974
|
+
|
|
24754
24975
|
// ── Predictable mode: visual card sync + legacy banner ───────────
|
|
24755
24976
|
function onPredictableChange() {
|
|
24756
24977
|
var predEl = document.getElementById('cron-predictable');
|
|
@@ -24866,6 +25087,11 @@ function openCreateCronModal(agentSlug) {
|
|
|
24866
25087
|
// No saved state to preview when creating — disable the Preview tab.
|
|
24867
25088
|
var previewBtn = document.getElementById('cron-tab-btn-preview');
|
|
24868
25089
|
if (previewBtn) previewBtn.setAttribute('disabled', 'disabled');
|
|
25090
|
+
// Last run + Run-task-once button only make sense for saved tasks.
|
|
25091
|
+
var lastRunBtn = document.getElementById('cron-tab-btn-lastrun');
|
|
25092
|
+
if (lastRunBtn) lastRunBtn.setAttribute('disabled', 'disabled');
|
|
25093
|
+
var runOnceBtn = document.getElementById('cron-run-once-btn');
|
|
25094
|
+
if (runOnceBtn) runOnceBtn.style.display = 'none';
|
|
24869
25095
|
var host = document.getElementById('cron-legacy-banner-host');
|
|
24870
25096
|
if (host) host.innerHTML = '';
|
|
24871
25097
|
// Reset the "Use a cron expression" link in case it was hidden last time.
|
|
@@ -24957,9 +25183,18 @@ function openEditCronModal(jobName) {
|
|
|
24957
25183
|
renderTagsPickerChips();
|
|
24958
25184
|
_pendingAttachments = [];
|
|
24959
25185
|
loadCronAttachments(jobName);
|
|
24960
|
-
// Existing job has saved state, enable Preview
|
|
25186
|
+
// Existing job has saved state, enable Preview + Last run tabs.
|
|
24961
25187
|
var previewBtn = document.getElementById('cron-tab-btn-preview');
|
|
24962
25188
|
if (previewBtn) previewBtn.removeAttribute('disabled');
|
|
25189
|
+
var lastRunBtnEdit = document.getElementById('cron-tab-btn-lastrun');
|
|
25190
|
+
if (lastRunBtnEdit) lastRunBtnEdit.removeAttribute('disabled');
|
|
25191
|
+
// Show "Run task once" only for saved tasks.
|
|
25192
|
+
var runOnceBtnEdit = document.getElementById('cron-run-once-btn');
|
|
25193
|
+
if (runOnceBtnEdit) runOnceBtnEdit.style.display = '';
|
|
25194
|
+
// Render the most recent run from the loaded job into the Last run tab so
|
|
25195
|
+
// the user sees something the moment they switch to it (rather than a
|
|
25196
|
+
// dead empty pane). The pane updates live when Run task once fires.
|
|
25197
|
+
renderCronLastRunPane(job);
|
|
24963
25198
|
switchCronTab('configure');
|
|
24964
25199
|
document.getElementById('cron-modal').classList.add('show');
|
|
24965
25200
|
setTimeout(captureCronModalSnapshot, 0);
|
|
@@ -25167,6 +25402,10 @@ function closeCronModal(force) {
|
|
|
25167
25402
|
editingCronJob = null;
|
|
25168
25403
|
_cronPreviewLoadedFor = null;
|
|
25169
25404
|
_cronModalSnapshot = null;
|
|
25405
|
+
// Clear any pending Run-task-once watch so SSE events for a different job
|
|
25406
|
+
// don't accidentally re-render the (now closed) Last run pane.
|
|
25407
|
+
if (_cronRunOnceTickerId) { clearInterval(_cronRunOnceTickerId); _cronRunOnceTickerId = null; }
|
|
25408
|
+
_cronRunOnceInFlight = null;
|
|
25170
25409
|
var attachList = document.getElementById('cron-attachments-list');
|
|
25171
25410
|
if (attachList) attachList.innerHTML = '';
|
|
25172
25411
|
var bannerHost = document.getElementById('cron-legacy-banner-host');
|
|
@@ -34858,6 +35097,11 @@ try {
|
|
|
34858
35097
|
refreshActivity();
|
|
34859
35098
|
if (currentPage === 'build') refreshCron();
|
|
34860
35099
|
refreshTeamNav();
|
|
35100
|
+
// PRD Phase 1.2: if the user just clicked "Run task once" in the
|
|
35101
|
+
// modal, re-render the Last run pane with the fresh result.
|
|
35102
|
+
if (evt.type === 'cron_complete' && evt.data && evt.data.job && typeof handleCronRunOnceComplete === 'function') {
|
|
35103
|
+
try { handleCronRunOnceComplete(evt.data.job); } catch (err) { /* non-fatal */ }
|
|
35104
|
+
}
|
|
34861
35105
|
}
|
|
34862
35106
|
// A delete on one tab should drop the card from every open dashboard
|
|
34863
35107
|
// without waiting for the next poll. cron_toggled is similar but lighter.
|
|
@@ -1236,6 +1236,23 @@ export class CronScheduler {
|
|
|
1236
1236
|
this.gateway.injectContext(`discord:user:${DISCORD_OWNER_ID}`, `[Scheduled cron: ${job.name}]`, response);
|
|
1237
1237
|
}
|
|
1238
1238
|
}
|
|
1239
|
+
// PRD Phase 1.1: goal-orientation. If the Task has successSchema or
|
|
1240
|
+
// successCriteriaText, run the evaluator now (before logging) so the
|
|
1241
|
+
// entry carries the goalCheck verdict. Errors here NEVER block
|
|
1242
|
+
// logging — runGoalCheck catches its own throws and emits
|
|
1243
|
+
// status='error' on the goalCheck instead.
|
|
1244
|
+
if (job.successSchema || (job.successCriteriaText && job.successCriteriaText.trim())) {
|
|
1245
|
+
try {
|
|
1246
|
+
const { runGoalCheck } = await import('../agent/goal-evaluator.js');
|
|
1247
|
+
const goalCheck = await runGoalCheck(response ?? '', job);
|
|
1248
|
+
if (goalCheck)
|
|
1249
|
+
entry.goalCheck = goalCheck;
|
|
1250
|
+
}
|
|
1251
|
+
catch (err) {
|
|
1252
|
+
logger.warn({ err, job: job.name }, 'Goal evaluator failed — proceeding without goalCheck');
|
|
1253
|
+
entry.goalCheck = { status: 'error', mode: 'evaluator', evaluatorReason: `evaluator orchestrator threw: ${String(err).slice(0, 200)}` };
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1239
1256
|
this._logRun(entry);
|
|
1240
1257
|
this.logAutonomy('completed', job, { durationMs: entry.durationMs, deliveryFailed: entry.deliveryFailed, advisorApplied: !!advisorApplied });
|
|
1241
1258
|
// Fire-and-forget: extract procedural skill from successful long-running cron jobs
|
package/dist/types.d.ts
CHANGED
|
@@ -447,6 +447,24 @@ export interface CronRunEntry {
|
|
|
447
447
|
allowedToolsApplied?: string[];
|
|
448
448
|
/** MCP servers live for this run (post profile + trick allowlist intersection). */
|
|
449
449
|
mcpServersApplied?: string[];
|
|
450
|
+
/** PRD Phase 1: did the run accomplish what it was supposed to?
|
|
451
|
+
* Computed at run-end when the Task has successSchema or successCriteriaText.
|
|
452
|
+
* - status='pass' both configured checks passed (or the only one configured did)
|
|
453
|
+
* - status='fail' a configured check failed
|
|
454
|
+
* - status='skipped' no goal configured on the Task (don't show the pill)
|
|
455
|
+
* - status='error' evaluator/validator threw; does NOT mark the run failed
|
|
456
|
+
* This is orthogonal to CronRunEntry.status — a run can be status='ok' with
|
|
457
|
+
* goalCheck.status='fail' (the agent finished cleanly but didn't accomplish
|
|
458
|
+
* the stated goal), and that's the failure mode the PRD is designed to surface. */
|
|
459
|
+
goalCheck?: {
|
|
460
|
+
status: 'pass' | 'fail' | 'skipped' | 'error';
|
|
461
|
+
/** Which evaluators ran. 'both' means schema + evaluator agreed. */
|
|
462
|
+
mode: 'schema' | 'evaluator' | 'both';
|
|
463
|
+
schemaPass?: boolean;
|
|
464
|
+
schemaErrors?: string[];
|
|
465
|
+
evaluatorPass?: boolean;
|
|
466
|
+
evaluatorReason?: string;
|
|
467
|
+
};
|
|
450
468
|
}
|
|
451
469
|
export interface Models {
|
|
452
470
|
haiku: string;
|