@principles/core 1.86.0 → 1.87.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/runtime-v2/__tests__/evaluator-runner-vslice.test.js +245 -1
- package/dist/runtime-v2/__tests__/evaluator-runner-vslice.test.js.map +1 -1
- package/dist/runtime-v2/__tests__/telemetry-event.test.js +13 -0
- package/dist/runtime-v2/__tests__/telemetry-event.test.js.map +1 -1
- package/dist/runtime-v2/internalization/evaluator-output.d.ts +2 -2
- package/dist/runtime-v2/internalization/evaluator-output.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/evaluator-output.js +29 -23
- package/dist/runtime-v2/internalization/evaluator-output.js.map +1 -1
- package/dist/runtime-v2/internalization/evaluator-runner.d.ts +74 -48
- package/dist/runtime-v2/internalization/evaluator-runner.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/evaluator-runner.js +164 -444
- package/dist/runtime-v2/internalization/evaluator-runner.js.map +1 -1
- package/dist/telemetry-event.d.ts +2 -2
- package/dist/telemetry-event.d.ts.map +1 -1
- package/dist/telemetry-event.js +1 -0
- package/dist/telemetry-event.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { PDRuntimeError } from '../error-categories.js';
|
|
1
|
+
import { PDRuntimeError, isPDErrorCategory } from '../error-categories.js';
|
|
2
2
|
import { hydratePITaskRecord } from './pitask-metadata.js';
|
|
3
|
-
import { RunnerPhase } from '../runner/runner-phase.js';
|
|
4
3
|
import { EvaluatorPromptBuilder } from './evaluator-prompt-builder.js';
|
|
5
4
|
import { injectRunnerLineageIfAbsent } from './peer-runner-contracts.js';
|
|
6
|
-
|
|
5
|
+
import { BasePeerRunner } from '../runner/base-peer-runner.js';
|
|
6
|
+
export const DEFAULT_EVALUATOR_RUNNER_OPTIONS = {
|
|
7
7
|
pollIntervalMs: 5_000,
|
|
8
8
|
timeoutMs: 300_000,
|
|
9
9
|
defaultMaxAttempts: 3,
|
|
@@ -19,119 +19,22 @@ export function resolveEvaluatorRunnerOptions(options) {
|
|
|
19
19
|
agentId: options.agentId ?? DEFAULT_EVALUATOR_RUNNER_OPTIONS.agentId,
|
|
20
20
|
};
|
|
21
21
|
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
]);
|
|
25
|
-
export class EvaluatorRunner {
|
|
26
|
-
phase = RunnerPhase.Idle;
|
|
27
|
-
resolvedOptions;
|
|
28
|
-
stateManager;
|
|
29
|
-
runtimeAdapter;
|
|
30
|
-
eventEmitter;
|
|
22
|
+
// ── EvaluatorRunner ───────────────────────────────────────────────────────────
|
|
23
|
+
export class EvaluatorRunner extends BasePeerRunner {
|
|
31
24
|
validator;
|
|
32
|
-
artifactStore;
|
|
33
25
|
constructor(deps, options) {
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
this.resolvedOptions = resolveEvaluatorRunnerOptions(options);
|
|
40
|
-
}
|
|
41
|
-
get currentPhase() {
|
|
42
|
-
return this.phase;
|
|
43
|
-
}
|
|
44
|
-
emitEvaluatorEvent(eventType, taskId, payload) {
|
|
45
|
-
this.eventEmitter.emitTelemetry({
|
|
46
|
-
eventType: eventType,
|
|
47
|
-
traceId: taskId,
|
|
48
|
-
timestamp: new Date().toISOString(),
|
|
49
|
-
sessionId: this.resolvedOptions.owner,
|
|
50
|
-
agentId: this.resolvedOptions.agentId,
|
|
51
|
-
payload,
|
|
26
|
+
super(deps, options, {
|
|
27
|
+
runnerName: 'evaluator',
|
|
28
|
+
expectedTaskKind: 'evaluator',
|
|
29
|
+
defaultAgentId: 'evaluator',
|
|
30
|
+
resultRefPrefix: 'evaluator',
|
|
52
31
|
});
|
|
32
|
+
this.validator = deps.validator;
|
|
53
33
|
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
leasedTask = await this.stateManager.acquireLease({
|
|
59
|
-
taskId,
|
|
60
|
-
owner: this.resolvedOptions.owner,
|
|
61
|
-
runtimeKind: this.resolvedOptions.runtimeKind,
|
|
62
|
-
});
|
|
63
|
-
}
|
|
64
|
-
catch (error) {
|
|
65
|
-
return await this.handleLeaseOrPhaseError(taskId, error);
|
|
66
|
-
}
|
|
67
|
-
if (leasedTask.taskKind !== 'evaluator') {
|
|
68
|
-
this.emitEvaluatorEvent('evaluator_wrong_task_kind', taskId, {
|
|
69
|
-
expectedKind: 'evaluator',
|
|
70
|
-
actualKind: leasedTask.taskKind,
|
|
71
|
-
});
|
|
72
|
-
return this.retryOrFail({
|
|
73
|
-
taskId,
|
|
74
|
-
task: leasedTask,
|
|
75
|
-
errorCategory: 'input_invalid',
|
|
76
|
-
failureReason: `Task kind must be 'evaluator', got '${leasedTask.taskKind}'`,
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
this.emitEvaluatorEvent('evaluator_task_leased', taskId, {
|
|
80
|
-
taskKind: 'evaluator',
|
|
81
|
-
attemptCount: leasedTask.attemptCount,
|
|
82
|
-
});
|
|
83
|
-
try {
|
|
84
|
-
const storeRunId = await this.resolveStoreRunId(taskId);
|
|
85
|
-
this.phase = RunnerPhase.BuildingContext;
|
|
86
|
-
const { contextHash, artificerArtifact, sourceArtificerArtifactId } = await this.buildContext(taskId);
|
|
87
|
-
if (!artificerArtifact || !sourceArtificerArtifactId) {
|
|
88
|
-
return this.retryOrFail({
|
|
89
|
-
taskId,
|
|
90
|
-
task: leasedTask,
|
|
91
|
-
errorCategory: 'input_invalid',
|
|
92
|
-
failureReason: sourceArtificerArtifactId ? 'Artificer dependency artifact not found' : 'Artificer dependency artifact ID not resolved',
|
|
93
|
-
});
|
|
94
|
-
}
|
|
95
|
-
this.emitEvaluatorEvent('evaluator_context_built', taskId, { contextHash });
|
|
96
|
-
this.phase = RunnerPhase.Invoking;
|
|
97
|
-
const runHandle = await this.invokeRuntime({ taskId, contextHash, artificerArtifact, sourceArtificerArtifactId });
|
|
98
|
-
this.emitEvaluatorEvent('evaluator_run_started', taskId, {
|
|
99
|
-
runtimeKind: this.resolvedOptions.runtimeKind,
|
|
100
|
-
});
|
|
101
|
-
this.phase = RunnerPhase.Polling;
|
|
102
|
-
const finalStatus = await this.pollUntilTerminal(runHandle);
|
|
103
|
-
if (finalStatus.status !== 'succeeded') {
|
|
104
|
-
return await this.handleRuntimeFailure(taskId, leasedTask, finalStatus);
|
|
105
|
-
}
|
|
106
|
-
this.phase = RunnerPhase.FetchingOutput;
|
|
107
|
-
const output = await this.fetchAndParseOutput(runHandle.runId);
|
|
108
|
-
// Re-inject taskId if stripped by stripLineageFields (PRI-272 / ERR-008).
|
|
109
|
-
injectRunnerLineageIfAbsent(output, 'taskId', taskId);
|
|
110
|
-
this.phase = RunnerPhase.Validating;
|
|
111
|
-
const validationResult = await this.validator.validate(output, taskId, sourceArtificerArtifactId ?? undefined);
|
|
112
|
-
if (!validationResult.valid) {
|
|
113
|
-
return await this.handleValidationError({
|
|
114
|
-
taskId,
|
|
115
|
-
task: leasedTask,
|
|
116
|
-
errors: validationResult.errors,
|
|
117
|
-
errorCategory: validationResult.errorCategory,
|
|
118
|
-
});
|
|
119
|
-
}
|
|
120
|
-
this.emitEvaluatorEvent('evaluator_output_validated', taskId, {
|
|
121
|
-
evaluationDecision: output.evaluation.decision,
|
|
122
|
-
evaluationScore: output.evaluation.score,
|
|
123
|
-
});
|
|
124
|
-
return await this.succeedTask({
|
|
125
|
-
taskId,
|
|
126
|
-
runId: storeRunId,
|
|
127
|
-
output,
|
|
128
|
-
task: leasedTask,
|
|
129
|
-
contextHash,
|
|
130
|
-
});
|
|
131
|
-
}
|
|
132
|
-
catch (error) {
|
|
133
|
-
return await this.handlePostLeaseError(taskId, leasedTask, error);
|
|
134
|
-
}
|
|
34
|
+
// ── Abstract implementations ────────────────────────────────────────────────
|
|
35
|
+
// eslint-disable-next-line @typescript-eslint/class-methods-use-this
|
|
36
|
+
get permanentErrorCategories() {
|
|
37
|
+
return new Set(['storage_unavailable', 'workspace_invalid', 'capability_missing', 'cancelled', 'input_invalid', 'output_invalid']);
|
|
135
38
|
}
|
|
136
39
|
async buildContext(taskId) {
|
|
137
40
|
const task = await this.stateManager.getTask(taskId);
|
|
@@ -141,8 +44,8 @@ export class EvaluatorRunner {
|
|
|
141
44
|
const piTask = hydratePITaskRecord(task);
|
|
142
45
|
const deps = piTask?.dependencyTaskIds ?? [];
|
|
143
46
|
if (deps.length === 0) {
|
|
144
|
-
this.
|
|
145
|
-
|
|
47
|
+
this.emitEvent('no_dependencies', taskId, {});
|
|
48
|
+
throw new PDRuntimeError('input_invalid', 'Artificer dependency artifact ID not resolved');
|
|
146
49
|
}
|
|
147
50
|
for (const depId of deps) {
|
|
148
51
|
const depTask = await this.stateManager.getTask(depId);
|
|
@@ -151,7 +54,7 @@ export class EvaluatorRunner {
|
|
|
151
54
|
if (depTask.taskKind !== 'artificer')
|
|
152
55
|
continue;
|
|
153
56
|
if (depTask.status !== 'succeeded') {
|
|
154
|
-
this.
|
|
57
|
+
this.emitEvent('dependency_not_succeeded', taskId, {
|
|
155
58
|
depTaskId: depId,
|
|
156
59
|
depStatus: depTask.status,
|
|
157
60
|
});
|
|
@@ -163,170 +66,149 @@ export class EvaluatorRunner {
|
|
|
163
66
|
if (!firstArtifact)
|
|
164
67
|
continue;
|
|
165
68
|
const artifactRef = firstArtifact.artifactId;
|
|
166
|
-
this.
|
|
69
|
+
this.emitEvent('artificer_dep_selected', taskId, {
|
|
167
70
|
depTaskId: depId,
|
|
168
71
|
artifactId: firstArtifact.artifactId,
|
|
169
72
|
});
|
|
170
73
|
return {
|
|
171
|
-
contextHash:
|
|
74
|
+
contextHash: BasePeerRunner.hashContextRefs([artifactRef]),
|
|
172
75
|
artificerArtifact: firstArtifact.contentJson,
|
|
173
76
|
sourceArtificerArtifactId: firstArtifact.artifactId,
|
|
174
77
|
};
|
|
175
78
|
}
|
|
176
79
|
}
|
|
177
|
-
this.
|
|
178
|
-
|
|
179
|
-
}
|
|
180
|
-
static hashContextRefs(refs) {
|
|
181
|
-
if (refs.length === 0)
|
|
182
|
-
return 'empty';
|
|
183
|
-
const str = refs.join('|');
|
|
184
|
-
let hash = 0;
|
|
185
|
-
for (let i = 0; i < str.length; i++) {
|
|
186
|
-
hash = (Math.imul(31, hash) + str.charCodeAt(i)) | 0;
|
|
187
|
-
}
|
|
188
|
-
return `ctx-${Math.abs(hash).toString(16)}`;
|
|
189
|
-
}
|
|
190
|
-
async resolveStoreRunId(taskId) {
|
|
191
|
-
const runs = await this.stateManager.getRunsByTask(taskId);
|
|
192
|
-
const latestRun = runs[runs.length - 1];
|
|
193
|
-
if (!latestRun) {
|
|
194
|
-
throw new PDRuntimeError('execution_failed', `No run records found for task ${taskId} after lease acquisition`);
|
|
195
|
-
}
|
|
196
|
-
return latestRun.runId;
|
|
80
|
+
this.emitEvent('no_artificer_artifact', taskId, {});
|
|
81
|
+
throw new PDRuntimeError('input_invalid', 'Artificer dependency artifact not found');
|
|
197
82
|
}
|
|
198
|
-
async invokeRuntime(
|
|
83
|
+
async invokeRuntime(taskId, context) {
|
|
199
84
|
let parsedArtificerArtifact = null;
|
|
200
|
-
if (
|
|
85
|
+
if (context.artificerArtifact) {
|
|
201
86
|
try {
|
|
202
|
-
parsedArtificerArtifact = JSON.parse(
|
|
87
|
+
parsedArtificerArtifact = JSON.parse(context.artificerArtifact);
|
|
203
88
|
}
|
|
204
89
|
catch {
|
|
205
|
-
parsedArtificerArtifact =
|
|
90
|
+
parsedArtificerArtifact = context.artificerArtifact;
|
|
206
91
|
}
|
|
207
92
|
}
|
|
208
93
|
const builder = new EvaluatorPromptBuilder();
|
|
209
94
|
const { message } = builder.buildPrompt({
|
|
210
|
-
taskId
|
|
211
|
-
contextHash:
|
|
95
|
+
taskId,
|
|
96
|
+
contextHash: context.contextHash,
|
|
212
97
|
artificerArtifact: parsedArtificerArtifact,
|
|
213
|
-
sourceArtificerArtifactId:
|
|
98
|
+
sourceArtificerArtifactId: context.sourceArtificerArtifactId ?? '',
|
|
214
99
|
});
|
|
215
|
-
|
|
100
|
+
return this.runtimeAdapter.startRun({
|
|
216
101
|
agentSpec: { agentId: this.resolvedOptions.agentId, schemaVersion: 'v1' },
|
|
217
|
-
taskRef: { taskId
|
|
102
|
+
taskRef: { taskId },
|
|
218
103
|
inputPayload: message,
|
|
219
104
|
contextItems: [],
|
|
220
105
|
outputSchemaRef: 'evaluator-output-v1',
|
|
221
106
|
timeoutMs: this.resolvedOptions.timeoutMs,
|
|
222
|
-
};
|
|
223
|
-
return this.runtimeAdapter.startRun(startInput);
|
|
107
|
+
});
|
|
224
108
|
}
|
|
225
|
-
async
|
|
226
|
-
const
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
await this.runtimeAdapter.cancelRun(runHandle.runId);
|
|
245
|
-
}
|
|
246
|
-
catch (cancelErr) {
|
|
247
|
-
cancelFailed = true;
|
|
248
|
-
this.emitEvaluatorEvent('evaluator_cancel_run_failed', runHandle.runId, {
|
|
249
|
-
runId: runHandle.runId,
|
|
250
|
-
errorMessage: cancelErr instanceof Error ? cancelErr.message : String(cancelErr),
|
|
251
|
-
});
|
|
109
|
+
async validateOutput(output, taskId, context) {
|
|
110
|
+
const result = await this.validator.validate(output, taskId, context.sourceArtificerArtifactId ?? undefined);
|
|
111
|
+
// Trust-boundary: validator is an injected dependency returning `string | undefined`
|
|
112
|
+
// for errorCategory. We must not `as`-cast; validate at runtime (ERR-001, ERR-005).
|
|
113
|
+
const rawCategory = result.errorCategory;
|
|
114
|
+
let errorCategory;
|
|
115
|
+
if (rawCategory == null) {
|
|
116
|
+
errorCategory = undefined;
|
|
117
|
+
}
|
|
118
|
+
else if (isPDErrorCategory(rawCategory)) {
|
|
119
|
+
errorCategory = rawCategory;
|
|
120
|
+
}
|
|
121
|
+
else {
|
|
122
|
+
// Invalid errorCategory from validator — fail loud, do not pass through
|
|
123
|
+
return {
|
|
124
|
+
valid: false,
|
|
125
|
+
errors: [...result.errors, `invalid errorCategory: ${rawCategory}`],
|
|
126
|
+
errorCategory: 'output_invalid',
|
|
127
|
+
};
|
|
252
128
|
}
|
|
253
|
-
|
|
254
|
-
|
|
129
|
+
return {
|
|
130
|
+
valid: result.valid,
|
|
131
|
+
errors: result.errors,
|
|
132
|
+
errorCategory,
|
|
133
|
+
};
|
|
255
134
|
}
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
135
|
+
// eslint-disable-next-line @typescript-eslint/max-params
|
|
136
|
+
async succeedTask(taskId, runId, output, task, contextHash, context) {
|
|
137
|
+
// Lineage consistency: sourceArtificerArtifactId must match buildContext result (ERR-004).
|
|
138
|
+
if (context.sourceArtificerArtifactId && output.sourceArtificerArtifactId !== context.sourceArtificerArtifactId) {
|
|
139
|
+
throw new PDRuntimeError('output_invalid', `sourceArtificerArtifactId mismatch: expected ${context.sourceArtificerArtifactId}, got ${output.sourceArtificerArtifactId}`);
|
|
260
140
|
}
|
|
261
|
-
|
|
262
|
-
if (typeof payload !== 'object' || payload === null) {
|
|
263
|
-
throw new PDRuntimeError('output_invalid', `Output payload is not an object for run ${runId}`);
|
|
264
|
-
}
|
|
265
|
-
if (typeof payload.evaluation !== 'object' || payload.evaluation === null) {
|
|
266
|
-
throw new PDRuntimeError('output_invalid', `Output payload missing evaluation object for run ${runId}`);
|
|
267
|
-
}
|
|
268
|
-
return result.payload;
|
|
269
|
-
}
|
|
270
|
-
async succeedTask(ctx) {
|
|
141
|
+
// Store output before marking succeeded
|
|
271
142
|
try {
|
|
272
|
-
await this.stateManager.updateRunOutput(
|
|
143
|
+
await this.stateManager.updateRunOutput(runId, JSON.stringify(output));
|
|
273
144
|
}
|
|
274
145
|
catch (updateErr) {
|
|
275
|
-
this.
|
|
276
|
-
runId
|
|
146
|
+
this.emitEvent('update_output_failed', taskId, {
|
|
147
|
+
runId,
|
|
277
148
|
errorMessage: updateErr instanceof Error ? updateErr.message : String(updateErr),
|
|
278
149
|
});
|
|
279
150
|
throw updateErr;
|
|
280
151
|
}
|
|
281
|
-
|
|
282
|
-
const now = new Date().toISOString();
|
|
152
|
+
// Resolve lineage artifact IDs
|
|
283
153
|
let lineageArtifactIds = [];
|
|
154
|
+
let lineageHasRejected = false;
|
|
284
155
|
try {
|
|
285
|
-
const
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
}
|
|
156
|
+
const lineageResult = await this.resolveLineageArtifactIds(taskId);
|
|
157
|
+
lineageArtifactIds = lineageResult.ids;
|
|
158
|
+
lineageHasRejected = lineageResult.hasRejected;
|
|
159
|
+
}
|
|
160
|
+
catch (lineageErr) {
|
|
161
|
+
this.emitEvent('lineage_resolve_failed', taskId, {
|
|
162
|
+
runId,
|
|
163
|
+
errorMessage: lineageErr instanceof Error ? lineageErr.message : String(lineageErr),
|
|
164
|
+
});
|
|
295
165
|
}
|
|
296
|
-
|
|
166
|
+
if (lineageHasRejected) {
|
|
167
|
+
this.emitEvent('lineage_partial', taskId, {
|
|
168
|
+
runId,
|
|
169
|
+
resolvedCount: lineageArtifactIds.length,
|
|
170
|
+
warning: 'Some dependency artifact queries were rejected; lineage may be incomplete',
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
// Write PIArtifact via artifactStore (idempotent upsert)
|
|
174
|
+
const artifactId = `pi-art-${taskId}-${runId}`;
|
|
175
|
+
const now = new Date().toISOString();
|
|
297
176
|
try {
|
|
298
177
|
await this.artifactStore.upsertArtifact({
|
|
299
178
|
artifactId,
|
|
300
179
|
artifactKind: 'principle',
|
|
301
|
-
sourceTaskId:
|
|
180
|
+
sourceTaskId: taskId,
|
|
302
181
|
lineageArtifactIds,
|
|
303
182
|
validationStatus: 'pending',
|
|
304
|
-
contentJson: JSON.stringify(
|
|
183
|
+
contentJson: JSON.stringify(output),
|
|
305
184
|
createdAt: now,
|
|
306
185
|
updatedAt: now,
|
|
307
186
|
});
|
|
308
187
|
}
|
|
309
188
|
catch (artifactErr) {
|
|
310
|
-
this.
|
|
311
|
-
runId
|
|
189
|
+
this.emitEvent('artifact_write_failed', taskId, {
|
|
190
|
+
runId,
|
|
312
191
|
errorMessage: artifactErr instanceof Error ? artifactErr.message : String(artifactErr),
|
|
313
192
|
});
|
|
314
193
|
return this.retryOrFail({
|
|
315
|
-
taskId
|
|
316
|
-
task
|
|
194
|
+
taskId,
|
|
195
|
+
task,
|
|
317
196
|
errorCategory: 'artifact_commit_failed',
|
|
318
197
|
failureReason: `PIArtifact write failed: ${artifactErr instanceof Error ? artifactErr.message : String(artifactErr)}`,
|
|
319
198
|
});
|
|
320
199
|
}
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
200
|
+
// ── Evaluator-specific: validate principle-bearing Scribe artifact ──
|
|
201
|
+
// This is the critical business logic: approved evaluator must validate
|
|
202
|
+
// the Scribe principle artifact, NOT the Artificer plan artifact.
|
|
203
|
+
if (output.evaluation.decision === 'approved') {
|
|
204
|
+
const principleArtifactId = await this.resolvePrincipleBearerArtifact(output, taskId);
|
|
324
205
|
if (principleArtifactId) {
|
|
325
206
|
try {
|
|
326
207
|
const updated = await this.artifactStore.updateValidationStatus(principleArtifactId, 'validated');
|
|
327
208
|
if (!updated) {
|
|
328
|
-
|
|
329
|
-
|
|
209
|
+
// updateValidationStatus returned false — fail loud with structured telemetry (ERR-018)
|
|
210
|
+
this.emitEvent('source_validation_update_not_found', taskId, {
|
|
211
|
+
runId,
|
|
330
212
|
sourceArtifactId: principleArtifactId,
|
|
331
213
|
reason: 'principle_artifact_not_found_in_store',
|
|
332
214
|
nextAction: 'verify_artifact_lineage_and_store_consistency',
|
|
@@ -334,228 +216,88 @@ export class EvaluatorRunner {
|
|
|
334
216
|
}
|
|
335
217
|
}
|
|
336
218
|
catch (updateErr) {
|
|
337
|
-
this.
|
|
338
|
-
runId
|
|
219
|
+
this.emitEvent('source_validation_update_failed', taskId, {
|
|
220
|
+
runId,
|
|
339
221
|
sourceArtifactId: principleArtifactId,
|
|
340
222
|
errorMessage: updateErr instanceof Error ? updateErr.message : String(updateErr),
|
|
341
223
|
});
|
|
342
224
|
}
|
|
343
225
|
}
|
|
344
226
|
}
|
|
227
|
+
// Mark task succeeded
|
|
228
|
+
const resultRef = `${this.config.resultRefPrefix}://${runId}`;
|
|
345
229
|
try {
|
|
346
|
-
await this.stateManager.markTaskSucceeded(
|
|
230
|
+
await this.stateManager.markTaskSucceeded(taskId, resultRef);
|
|
347
231
|
}
|
|
348
232
|
catch (stateErr) {
|
|
349
|
-
this.
|
|
350
|
-
taskId
|
|
351
|
-
runId
|
|
233
|
+
this.emitEvent('mark_succeeded_failed', taskId, {
|
|
234
|
+
taskId,
|
|
235
|
+
runId,
|
|
352
236
|
errorMessage: stateErr instanceof Error ? stateErr.message : String(stateErr),
|
|
353
237
|
});
|
|
354
238
|
throw stateErr;
|
|
355
239
|
}
|
|
356
|
-
this.
|
|
357
|
-
attemptCount:
|
|
240
|
+
this.emitEvent('task_succeeded', taskId, {
|
|
241
|
+
attemptCount: task.attemptCount,
|
|
358
242
|
resultRef,
|
|
359
|
-
evaluationDecision:
|
|
360
|
-
evaluationScore:
|
|
243
|
+
evaluationDecision: output.evaluation.decision,
|
|
244
|
+
evaluationScore: output.evaluation.score,
|
|
361
245
|
});
|
|
362
|
-
this.phase = RunnerPhase.Completed;
|
|
363
246
|
return {
|
|
364
247
|
status: 'succeeded',
|
|
365
|
-
taskId
|
|
366
|
-
runId
|
|
248
|
+
taskId,
|
|
249
|
+
runId,
|
|
367
250
|
artifactId,
|
|
368
251
|
resultRef,
|
|
369
|
-
contextHash
|
|
370
|
-
output
|
|
371
|
-
attemptCount:
|
|
252
|
+
contextHash,
|
|
253
|
+
output,
|
|
254
|
+
attemptCount: task.attemptCount,
|
|
372
255
|
};
|
|
373
256
|
}
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
errorCategory,
|
|
384
|
-
failureReason: `Runtime execution ended with status: ${runStatus.status}`,
|
|
385
|
-
});
|
|
386
|
-
}
|
|
387
|
-
async handleValidationError(ctx) {
|
|
388
|
-
const category = (ctx.errorCategory ?? 'output_invalid');
|
|
389
|
-
this.emitEvaluatorEvent('evaluator_output_invalid', ctx.taskId, {
|
|
390
|
-
errorCount: ctx.errors.length,
|
|
391
|
-
errorCategory: category,
|
|
392
|
-
});
|
|
393
|
-
return this.retryOrFail({
|
|
394
|
-
taskId: ctx.taskId,
|
|
395
|
-
task: ctx.task,
|
|
396
|
-
errorCategory: category,
|
|
397
|
-
failureReason: `Validation failed: ${ctx.errors.join('; ')}`,
|
|
398
|
-
});
|
|
399
|
-
}
|
|
400
|
-
async handleLeaseOrPhaseError(taskId, error) {
|
|
401
|
-
const classified = this.classifyError(error);
|
|
402
|
-
if (classified.category === 'lease_conflict') {
|
|
403
|
-
this.emitEvaluatorEvent('evaluator_run_failed', taskId, {
|
|
404
|
-
errorCategory: 'lease_conflict',
|
|
405
|
-
errorMessage: classified.message,
|
|
406
|
-
});
|
|
407
|
-
return {
|
|
408
|
-
status: 'failed',
|
|
409
|
-
taskId,
|
|
410
|
-
errorCategory: 'lease_conflict',
|
|
411
|
-
failureReason: classified.message,
|
|
412
|
-
attemptCount: 1,
|
|
413
|
-
};
|
|
414
|
-
}
|
|
415
|
-
this.emitEvaluatorEvent('evaluator_run_failed', taskId, {
|
|
416
|
-
errorCategory: classified.category,
|
|
417
|
-
errorMessage: classified.message,
|
|
418
|
-
});
|
|
419
|
-
const task = {
|
|
420
|
-
taskId,
|
|
421
|
-
taskKind: 'evaluator',
|
|
422
|
-
status: 'leased',
|
|
423
|
-
createdAt: new Date().toISOString(),
|
|
424
|
-
updatedAt: new Date().toISOString(),
|
|
425
|
-
attemptCount: 1,
|
|
426
|
-
maxAttempts: this.resolvedOptions.defaultMaxAttempts,
|
|
427
|
-
};
|
|
428
|
-
return this.retryOrFail({ taskId, task, errorCategory: classified.category, failureReason: classified.message });
|
|
257
|
+
// ── Optional hooks ──────────────────────────────────────────────────────────
|
|
258
|
+
/**
|
|
259
|
+
* Re-inject taskId if stripped by stripLineageFields (PRI-272 / ERR-008).
|
|
260
|
+
* Only fill when absent via Object.hasOwn — present-but-falsy values
|
|
261
|
+
* must reach validation and fail loud (Runtime Contract Rule 3).
|
|
262
|
+
*/
|
|
263
|
+
// eslint-disable-next-line @typescript-eslint/class-methods-use-this
|
|
264
|
+
postFetchTransform(taskId, untrustedOutput) {
|
|
265
|
+
injectRunnerLineageIfAbsent(untrustedOutput, 'taskId', taskId);
|
|
429
266
|
}
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
errorMessage: classified.message,
|
|
267
|
+
emitSuccessTelemetry(taskId, output) {
|
|
268
|
+
this.emitEvent('decision_recorded', taskId, {
|
|
269
|
+
evaluationDecision: output.evaluation.decision,
|
|
270
|
+
evaluationScore: output.evaluation.score,
|
|
435
271
|
});
|
|
436
|
-
return this.retryOrFail({ taskId, task, errorCategory: classified.category, failureReason: classified.message });
|
|
437
272
|
}
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
return {
|
|
450
|
-
status: 'failed',
|
|
451
|
-
taskId: ctx.taskId,
|
|
452
|
-
errorCategory: 'storage_unavailable',
|
|
453
|
-
failureReason: `State manager error: ${ctx.failureReason}`,
|
|
454
|
-
attemptCount: ctx.task.attemptCount,
|
|
455
|
-
};
|
|
456
|
-
}
|
|
457
|
-
this.emitEvaluatorEvent('evaluator_task_failed', ctx.taskId, {
|
|
458
|
-
errorCategory: ctx.errorCategory,
|
|
459
|
-
attemptCount: ctx.task.attemptCount,
|
|
460
|
-
failureReason: ctx.failureReason,
|
|
461
|
-
});
|
|
462
|
-
this.phase = RunnerPhase.Failed;
|
|
463
|
-
return {
|
|
464
|
-
status: 'failed',
|
|
465
|
-
taskId: ctx.taskId,
|
|
466
|
-
errorCategory: ctx.errorCategory,
|
|
467
|
-
failureReason: ctx.failureReason,
|
|
468
|
-
attemptCount: ctx.task.attemptCount,
|
|
469
|
-
};
|
|
470
|
-
}
|
|
471
|
-
const shouldRetry = this.stateManager.getRetryPolicy().shouldRetry(ctx.task);
|
|
472
|
-
if (shouldRetry) {
|
|
473
|
-
try {
|
|
474
|
-
await this.stateManager.markTaskRetryWait(ctx.taskId, ctx.errorCategory);
|
|
475
|
-
}
|
|
476
|
-
catch (stateErr) {
|
|
477
|
-
this.emitEvaluatorEvent('evaluator_mark_retry_error', ctx.taskId, {
|
|
478
|
-
errorCategory: 'storage_unavailable',
|
|
479
|
-
attemptCount: ctx.task.attemptCount,
|
|
480
|
-
errorMessage: stateErr instanceof Error ? stateErr.message : String(stateErr),
|
|
481
|
-
});
|
|
482
|
-
return {
|
|
483
|
-
status: 'failed',
|
|
484
|
-
taskId: ctx.taskId,
|
|
485
|
-
errorCategory: 'storage_unavailable',
|
|
486
|
-
failureReason: `State manager error: ${ctx.failureReason}`,
|
|
487
|
-
attemptCount: ctx.task.attemptCount,
|
|
488
|
-
};
|
|
489
|
-
}
|
|
490
|
-
this.emitEvaluatorEvent('evaluator_task_retried', ctx.taskId, {
|
|
491
|
-
errorCategory: ctx.errorCategory,
|
|
492
|
-
attemptCount: ctx.task.attemptCount,
|
|
273
|
+
/**
|
|
274
|
+
* Check lineage strip contract after validation passes.
|
|
275
|
+
* Validates sourceTrace.scribeArtifactId consistency (ERR-004, ERR-008).
|
|
276
|
+
*/
|
|
277
|
+
checkLineageIntegrity(taskId, output) {
|
|
278
|
+
// sourceTrace.artificerArtifactId must match sourceArtificerArtifactId
|
|
279
|
+
if (output.sourceTrace.artificerArtifactId !== output.sourceArtificerArtifactId) {
|
|
280
|
+
this.emitEvent('lineage_integrity_violation', taskId, {
|
|
281
|
+
sourceArtificerArtifactId: output.sourceArtificerArtifactId,
|
|
282
|
+
traceArtificerArtifactId: output.sourceTrace.artificerArtifactId,
|
|
283
|
+
reason: 'sourceArtificerArtifactId_and_sourceTrace_artificerArtifactId_mismatch',
|
|
493
284
|
});
|
|
494
|
-
this.phase = RunnerPhase.RetryWaiting;
|
|
495
|
-
return {
|
|
496
|
-
status: 'retried',
|
|
497
|
-
taskId: ctx.taskId,
|
|
498
|
-
errorCategory: ctx.errorCategory,
|
|
499
|
-
failureReason: ctx.failureReason,
|
|
500
|
-
attemptCount: ctx.task.attemptCount,
|
|
501
|
-
};
|
|
502
|
-
}
|
|
503
|
-
try {
|
|
504
|
-
await this.stateManager.markTaskFailed(ctx.taskId, 'max_attempts_exceeded');
|
|
505
|
-
}
|
|
506
|
-
catch (stateErr) {
|
|
507
|
-
this.emitEvaluatorEvent('evaluator_mark_failed_error', ctx.taskId, {
|
|
508
|
-
errorCategory: 'storage_unavailable',
|
|
509
|
-
attemptCount: ctx.task.attemptCount,
|
|
510
|
-
errorMessage: stateErr instanceof Error ? stateErr.message : String(stateErr),
|
|
511
|
-
});
|
|
512
|
-
return {
|
|
513
|
-
status: 'failed',
|
|
514
|
-
taskId: ctx.taskId,
|
|
515
|
-
errorCategory: 'storage_unavailable',
|
|
516
|
-
failureReason: `State manager error: ${ctx.failureReason}`,
|
|
517
|
-
attemptCount: ctx.task.attemptCount,
|
|
518
|
-
};
|
|
519
|
-
}
|
|
520
|
-
this.emitEvaluatorEvent('evaluator_task_failed', ctx.taskId, {
|
|
521
|
-
errorCategory: 'max_attempts_exceeded',
|
|
522
|
-
attemptCount: ctx.task.attemptCount,
|
|
523
|
-
failureReason: `Max attempts exceeded: ${ctx.failureReason}`,
|
|
524
|
-
});
|
|
525
|
-
this.phase = RunnerPhase.Failed;
|
|
526
|
-
return {
|
|
527
|
-
status: 'failed',
|
|
528
|
-
taskId: ctx.taskId,
|
|
529
|
-
errorCategory: 'max_attempts_exceeded',
|
|
530
|
-
failureReason: `Max attempts exceeded: ${ctx.failureReason}`,
|
|
531
|
-
attemptCount: ctx.task.attemptCount,
|
|
532
|
-
};
|
|
533
|
-
}
|
|
534
|
-
// eslint-disable-next-line @typescript-eslint/class-methods-use-this
|
|
535
|
-
isPermanentError(category) {
|
|
536
|
-
return EVALUATOR_PERMANENT_ERROR_CATEGORIES.has(category);
|
|
537
|
-
}
|
|
538
|
-
// eslint-disable-next-line @typescript-eslint/class-methods-use-this
|
|
539
|
-
classifyError(error) {
|
|
540
|
-
if (error instanceof PDRuntimeError) {
|
|
541
|
-
return { category: error.category, message: error.message };
|
|
542
|
-
}
|
|
543
|
-
if (error instanceof Error) {
|
|
544
|
-
return { category: 'execution_failed', message: error.message };
|
|
545
|
-
}
|
|
546
|
-
return { category: 'execution_failed', message: String(error) };
|
|
547
|
-
}
|
|
548
|
-
// eslint-disable-next-line @typescript-eslint/class-methods-use-this
|
|
549
|
-
mapRunStatusToErrorCategory(status) {
|
|
550
|
-
switch (status) {
|
|
551
|
-
case 'failed': return 'execution_failed';
|
|
552
|
-
case 'timed_out': return 'timeout';
|
|
553
|
-
case 'cancelled': return 'cancelled';
|
|
554
|
-
default: return 'execution_failed';
|
|
555
285
|
}
|
|
556
286
|
}
|
|
287
|
+
// ── Evaluator-specific: principle bearer resolution ─────────────────────────
|
|
288
|
+
/**
|
|
289
|
+
* Resolve the principle-bearing artifact that the evaluator should validate.
|
|
290
|
+
*
|
|
291
|
+
* Strategy 1: Use scribeArtifactId from sourceTrace (the Scribe artifact
|
|
292
|
+
* carries principleDraft).
|
|
293
|
+
* Strategy 2: Search lineage for principle-kind artifacts with principleDraft.
|
|
294
|
+
* Strategy 3: No principle-bearing artifact found → telemetry, return null.
|
|
295
|
+
*
|
|
296
|
+
* Ambiguous candidates (more than 1) → fail loud with telemetry, return null.
|
|
297
|
+
* Never silently pick the first candidate (ERR-018, ERR-019).
|
|
298
|
+
*/
|
|
557
299
|
async resolvePrincipleBearerArtifact(output, taskId) {
|
|
558
|
-
// Strategy 1: Use scribeArtifactId from sourceTrace
|
|
300
|
+
// Strategy 1: Use scribeArtifactId from sourceTrace
|
|
559
301
|
const scribeArtifactId = output.sourceTrace?.scribeArtifactId;
|
|
560
302
|
if (typeof scribeArtifactId === 'string' && scribeArtifactId.trim() !== '') {
|
|
561
303
|
const scribeArtifact = await this.artifactStore.getArtifactById(scribeArtifactId);
|
|
@@ -563,13 +305,13 @@ export class EvaluatorRunner {
|
|
|
563
305
|
return scribeArtifactId;
|
|
564
306
|
}
|
|
565
307
|
// Scribe artifact not found or wrong kind — fall through to lineage search
|
|
566
|
-
this.
|
|
308
|
+
this.emitEvent('scribe_artifact_not_principle', taskId, {
|
|
567
309
|
scribeArtifactId,
|
|
568
310
|
actualKind: scribeArtifact?.artifactKind ?? 'not_found',
|
|
569
311
|
});
|
|
570
312
|
}
|
|
571
313
|
// Strategy 2: Search lineage for all principle-kind artifacts with principleDraft content
|
|
572
|
-
const lineageArtifactIds = await this.resolveLineageArtifactIds(taskId);
|
|
314
|
+
const { ids: lineageArtifactIds } = await this.resolveLineageArtifactIds(taskId);
|
|
573
315
|
const candidates = [];
|
|
574
316
|
for (const lineageId of lineageArtifactIds) {
|
|
575
317
|
const artifact = await this.artifactStore.getArtifactById(lineageId);
|
|
@@ -586,15 +328,16 @@ export class EvaluatorRunner {
|
|
|
586
328
|
return only ?? null;
|
|
587
329
|
}
|
|
588
330
|
if (candidates.length > 1) {
|
|
589
|
-
|
|
331
|
+
// Ambiguous — fail loud, do NOT silently pick first (ERR-018)
|
|
332
|
+
this.emitEvent('principle_bearer_ambiguous', taskId, {
|
|
590
333
|
candidateArtifactIds: candidates,
|
|
591
334
|
reason: 'multiple_principle_bearing_artifacts_in_lineage',
|
|
592
335
|
nextAction: 'disambiguate_principle_source_or_fix_lineage',
|
|
593
336
|
});
|
|
594
337
|
return null;
|
|
595
338
|
}
|
|
596
|
-
// Strategy 3:
|
|
597
|
-
this.
|
|
339
|
+
// Strategy 3: No principle-bearing artifact found
|
|
340
|
+
this.emitEvent('no_principle_bearer_found', taskId, {
|
|
598
341
|
runId: output.taskId,
|
|
599
342
|
scribeArtifactId: scribeArtifactId ?? 'not_provided',
|
|
600
343
|
lineageCount: lineageArtifactIds.length,
|
|
@@ -603,28 +346,10 @@ export class EvaluatorRunner {
|
|
|
603
346
|
});
|
|
604
347
|
return null;
|
|
605
348
|
}
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
return [];
|
|
611
|
-
const piTask = hydratePITaskRecord(task);
|
|
612
|
-
const deps = piTask?.dependencyTaskIds ?? [];
|
|
613
|
-
const results = await Promise.allSettled(deps.map((depId) => this.artifactStore.listBySourceTaskId(depId)));
|
|
614
|
-
const ids = [];
|
|
615
|
-
for (const result of results) {
|
|
616
|
-
if (result.status === 'fulfilled') {
|
|
617
|
-
for (const artifact of result.value) {
|
|
618
|
-
ids.push(artifact.artifactId);
|
|
619
|
-
}
|
|
620
|
-
}
|
|
621
|
-
}
|
|
622
|
-
return ids;
|
|
623
|
-
}
|
|
624
|
-
catch {
|
|
625
|
-
return [];
|
|
626
|
-
}
|
|
627
|
-
}
|
|
349
|
+
/**
|
|
350
|
+
* Check if an artifact's contentJson contains principle-bearing content.
|
|
351
|
+
* Uses Object.hasOwn (ERR-013) and runtime type checks (ERR-001, ERR-005).
|
|
352
|
+
*/
|
|
628
353
|
// eslint-disable-next-line @typescript-eslint/class-methods-use-this
|
|
629
354
|
hasPrincipleDraftContent(contentJson) {
|
|
630
355
|
try {
|
|
@@ -654,10 +379,5 @@ export class EvaluatorRunner {
|
|
|
654
379
|
static isRecord(value) {
|
|
655
380
|
return value !== null && typeof value === 'object' && !Array.isArray(value);
|
|
656
381
|
}
|
|
657
|
-
// eslint-disable-next-line @typescript-eslint/class-methods-use-this
|
|
658
|
-
sleep(ms) {
|
|
659
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
660
|
-
}
|
|
661
382
|
}
|
|
662
|
-
export { DEFAULT_EVALUATOR_RUNNER_OPTIONS };
|
|
663
383
|
//# sourceMappingURL=evaluator-runner.js.map
|