@purista/harness 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/dist/agents/index.d.ts +5 -3
- package/dist/agents/index.js +58 -6
- package/dist/errors/catalog.d.ts +11 -4
- package/dist/eval/index.d.ts +57 -0
- package/dist/eval/index.js +181 -0
- package/dist/harness/defineHarness.d.ts +38 -18
- package/dist/harness/defineHarness.js +23 -2
- package/dist/index.d.ts +2 -0
- package/dist/index.js +2 -0
- package/dist/memory/sandbox/index.d.ts +17 -0
- package/dist/memory/sandbox/index.js +122 -0
- package/dist/models/registry.js +32 -7
- package/dist/ports/capabilities.d.ts +24 -2
- package/dist/ports/harness-context.d.ts +4 -1
- package/dist/ports/index.d.ts +1 -0
- package/dist/ports/index.js +1 -0
- package/dist/ports/memory/facade.d.ts +5 -0
- package/dist/ports/memory/facade.js +123 -0
- package/dist/ports/memory/telemetry.d.ts +16 -0
- package/dist/ports/memory/telemetry.js +77 -0
- package/dist/ports/memory/types.d.ts +204 -0
- package/dist/ports/memory/types.js +1 -0
- package/dist/ports/memory/validation.d.ts +19 -0
- package/dist/ports/memory/validation.js +160 -0
- package/dist/ports/memory.d.ts +3 -0
- package/dist/ports/memory.js +3 -0
- package/dist/sessions/index.d.ts +2 -0
- package/dist/sessions/index.js +275 -68
- package/dist/telemetry/shim.d.ts +20 -0
- package/dist/telemetry/shim.js +28 -0
- package/dist/testing/fakeMemoryAdapter.d.ts +16 -0
- package/dist/testing/fakeMemoryAdapter.js +110 -0
- package/dist/testing/index.d.ts +3 -0
- package/dist/testing/index.js +2 -0
- package/package.json +8 -3
package/dist/sessions/index.js
CHANGED
|
@@ -1,55 +1,16 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { InternalError, OperationCancelledError, OperationTimeoutError, HarnessError, SessionBusyError, StateError, ValidationError, WorkflowNotFoundError, serializeError } from '../errors/index.js';
|
|
1
|
+
import { InternalError, OperationCancelledError, OperationTimeoutError, HarnessError, SessionBusyError, ValidationError, serializeError } from '../errors/index.js';
|
|
3
2
|
import { ulid } from '../ulid/index.js';
|
|
4
3
|
import { runDefaultAgent } from '../agents/index.js';
|
|
5
4
|
import { runWorkflow } from '../workflows/index.js';
|
|
5
|
+
import { createMemoryFacade, createSessionMemory } from '../ports/memory.js';
|
|
6
6
|
import { loadSkillsSync } from '../skills/index.js';
|
|
7
7
|
import { createModelRegistry } from '../models/registry.js';
|
|
8
|
-
import { createTelemetryShim } from '../telemetry/index.js';
|
|
8
|
+
import { createMetrics, createTelemetryShim } from '../telemetry/index.js';
|
|
9
9
|
import { createMcpRunnerRegistry } from '../tools/mcp/runner.js';
|
|
10
|
-
const
|
|
10
|
+
const NEVER_ABORT_SIGNAL = new AbortController().signal;
|
|
11
11
|
function now() {
|
|
12
12
|
return new Date().toISOString();
|
|
13
13
|
}
|
|
14
|
-
function makeMemory(sessionId, sandboxSession) {
|
|
15
|
-
return {
|
|
16
|
-
async read(key) {
|
|
17
|
-
validateMemoryKey(key);
|
|
18
|
-
const path = `/memory/${key}.json`;
|
|
19
|
-
if (!(await sandboxSession.exists(path))) {
|
|
20
|
-
return undefined;
|
|
21
|
-
}
|
|
22
|
-
return JSON.parse(await sandboxSession.readText(path));
|
|
23
|
-
},
|
|
24
|
-
async write(key, value) {
|
|
25
|
-
validateMemoryKey(key);
|
|
26
|
-
let encoded;
|
|
27
|
-
try {
|
|
28
|
-
encoded = JSON.stringify(value);
|
|
29
|
-
}
|
|
30
|
-
catch (error) {
|
|
31
|
-
throw new ValidationError('Memory value must be JSON-serializable.', { where: 'memory_value', issues: { key } }, error);
|
|
32
|
-
}
|
|
33
|
-
await sandboxSession.write(`/memory/${key}.json`, encoded);
|
|
34
|
-
},
|
|
35
|
-
async delete(key) {
|
|
36
|
-
validateMemoryKey(key);
|
|
37
|
-
await sandboxSession.remove(`/memory/${key}.json`).catch(() => undefined);
|
|
38
|
-
},
|
|
39
|
-
async list() {
|
|
40
|
-
const entries = await sandboxSession.list('/memory').catch(() => []);
|
|
41
|
-
return entries
|
|
42
|
-
.filter((entry) => entry.kind === 'file' && entry.name.endsWith('.json'))
|
|
43
|
-
.map((entry) => entry.name.slice(0, -5))
|
|
44
|
-
.sort();
|
|
45
|
-
}
|
|
46
|
-
};
|
|
47
|
-
}
|
|
48
|
-
function validateMemoryKey(key) {
|
|
49
|
-
if (!MEMORY_KEY_PATTERN.test(key)) {
|
|
50
|
-
throw new ValidationError('Invalid session memory key.', { where: 'memory_key', issues: { key } });
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
14
|
function validateInvokeOptions(opts) {
|
|
54
15
|
if (opts?.historyWindow !== undefined && opts.historyWindow < 0) {
|
|
55
16
|
throw new ValidationError('Invoke options are invalid.', { where: 'invoke_options', issues: { historyWindow: opts.historyWindow } });
|
|
@@ -69,11 +30,15 @@ function normalizeMessage(message, sessionId) {
|
|
|
69
30
|
export function createSessionHarness(definition) {
|
|
70
31
|
const resolvedSkills = loadSkillsSync(definition.skills);
|
|
71
32
|
const sessionStates = new Map();
|
|
72
|
-
const
|
|
33
|
+
const contentCaptureMode = resolveContentCaptureMode(definition.telemetry);
|
|
34
|
+
const telemetry = withTelemetryFlavor(definition.telemetryShim ?? createTelemetryShim(), definition.telemetry);
|
|
35
|
+
const adapterMetrics = createMetrics(telemetry, { 'harness.name': definition.name });
|
|
73
36
|
const adapterContext = {
|
|
74
37
|
harnessName: definition.name,
|
|
75
38
|
logger: definition.logger,
|
|
76
39
|
telemetry,
|
|
40
|
+
metrics: adapterMetrics,
|
|
41
|
+
contentCaptureMode,
|
|
77
42
|
defaults: {
|
|
78
43
|
agentMaxIterations: definition.defaults.agentMaxIterations ?? 16,
|
|
79
44
|
runTimeoutMs: definition.defaults.runTimeoutMs ?? 600_000,
|
|
@@ -83,10 +48,9 @@ export function createSessionHarness(definition) {
|
|
|
83
48
|
...(definition.defaults.historyWindow !== undefined ? { historyWindow: definition.defaults.historyWindow } : {})
|
|
84
49
|
}
|
|
85
50
|
};
|
|
86
|
-
configureHarnessAdapters(adapterContext, definition.models, definition.state, definition.sandbox, definition.tools);
|
|
51
|
+
configureHarnessAdapters(adapterContext, definition.models, definition.state, definition.sandbox, definition.memory, definition.tools);
|
|
87
52
|
const modelRegistry = createModelRegistry(definition.models, { telemetry, harnessName: definition.name });
|
|
88
53
|
const mcpRegistry = createMcpRunnerRegistry();
|
|
89
|
-
const captureContent = definition.telemetry?.captureContent === true;
|
|
90
54
|
async function ensureSessionRecord(sessionId) {
|
|
91
55
|
const existing = await definition.state.getSession(sessionId);
|
|
92
56
|
if (existing) {
|
|
@@ -120,6 +84,63 @@ export function createSessionHarness(definition) {
|
|
|
120
84
|
definition.logger.error('Failed to persist run events.', { harness: definition.name, run_id: runId, error: serializeError(error) });
|
|
121
85
|
}
|
|
122
86
|
}
|
|
87
|
+
async function getRunSummary(runId) {
|
|
88
|
+
const run = await definition.state.getRun(runId);
|
|
89
|
+
if (!run)
|
|
90
|
+
return undefined;
|
|
91
|
+
const events = await definition.state.listEvents(runId);
|
|
92
|
+
const tokenTotals = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
93
|
+
let modelCalls = 0;
|
|
94
|
+
let toolCalls = 0;
|
|
95
|
+
let agentCalls = 0;
|
|
96
|
+
for (const event of events) {
|
|
97
|
+
if (event.type === 'agent.started')
|
|
98
|
+
agentCalls += 1;
|
|
99
|
+
if (event.type === 'tool.started')
|
|
100
|
+
toolCalls += 1;
|
|
101
|
+
if (event.type.startsWith('model.') && event.type.endsWith('.completed'))
|
|
102
|
+
modelCalls += 1;
|
|
103
|
+
if (event.type === 'model.object')
|
|
104
|
+
modelCalls += 1;
|
|
105
|
+
const payload = event.payload;
|
|
106
|
+
if (isJsonRecord(payload) && isTokenUsage(payload['usage'])) {
|
|
107
|
+
tokenTotals.inputTokens += payload['usage'].inputTokens;
|
|
108
|
+
tokenTotals.outputTokens += payload['usage'].outputTokens;
|
|
109
|
+
tokenTotals.totalTokens += payload['usage'].totalTokens;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return {
|
|
113
|
+
runId: run.id,
|
|
114
|
+
sessionId: run.sessionId,
|
|
115
|
+
status: run.status,
|
|
116
|
+
startedAt: run.startedAt,
|
|
117
|
+
...(run.finishedAt ? { finishedAt: run.finishedAt } : {}),
|
|
118
|
+
tokenTotals,
|
|
119
|
+
modelCalls,
|
|
120
|
+
toolCalls,
|
|
121
|
+
agentCalls,
|
|
122
|
+
...(run.error ? { error: normalizeSerializedRunError(run.error) } : {})
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
function memoryOptions(sessionId, sandboxSession, signal, opts = {}) {
|
|
126
|
+
return {
|
|
127
|
+
adapter: definition.memory,
|
|
128
|
+
logger: definition.logger,
|
|
129
|
+
telemetry,
|
|
130
|
+
contentCaptureMode,
|
|
131
|
+
signal,
|
|
132
|
+
sandbox: sandboxSession,
|
|
133
|
+
harnessName: definition.name,
|
|
134
|
+
sessionId,
|
|
135
|
+
...(opts.runId ? { runId: opts.runId } : {}),
|
|
136
|
+
...(opts.agentId ? { agentId: opts.agentId } : {}),
|
|
137
|
+
...(opts.workflowId ? { workflowId: opts.workflowId } : {}),
|
|
138
|
+
metadata: opts.metadata ?? {}
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
function memoryFacade(opts) {
|
|
142
|
+
return createMemoryFacade(memoryOptions(opts.sessionId, opts.sandboxSession, opts.signal, opts));
|
|
143
|
+
}
|
|
123
144
|
return {
|
|
124
145
|
inspect() {
|
|
125
146
|
return definition.inspection;
|
|
@@ -127,7 +148,7 @@ export function createSessionHarness(definition) {
|
|
|
127
148
|
async getSession(sessionId) {
|
|
128
149
|
await ensureSessionRecord(sessionId);
|
|
129
150
|
const state = await getSessionState(sessionId);
|
|
130
|
-
const memory =
|
|
151
|
+
const memory = createSessionMemory(memoryOptions(sessionId, state.sandboxSession, NEVER_ABORT_SIGNAL), { kind: 'session', sessionId });
|
|
131
152
|
const workflowEntries = Object.entries(definition.workflows).map(([workflowId, workflow]) => {
|
|
132
153
|
const invoker = {
|
|
133
154
|
prompt: (input, opts) => runWorkflowCall(sessionId, workflowId, workflow, input, opts),
|
|
@@ -160,6 +181,9 @@ export function createSessionHarness(definition) {
|
|
|
160
181
|
history: {
|
|
161
182
|
list: (opts) => definition.state.listMessages(sessionId, opts)
|
|
162
183
|
},
|
|
184
|
+
async getRunSummary(runId) {
|
|
185
|
+
return getRunSummary(runId);
|
|
186
|
+
},
|
|
163
187
|
async clearHistory() {
|
|
164
188
|
if (state.busy) {
|
|
165
189
|
throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'history_clear_during_run' });
|
|
@@ -213,6 +237,12 @@ export function createSessionHarness(definition) {
|
|
|
213
237
|
catch (error) {
|
|
214
238
|
errors.push(error instanceof HarnessError ? error : new InternalError('Failed to close state store.', undefined, error));
|
|
215
239
|
}
|
|
240
|
+
try {
|
|
241
|
+
await definition.memory.close?.();
|
|
242
|
+
}
|
|
243
|
+
catch (error) {
|
|
244
|
+
errors.push(error instanceof HarnessError ? error : new InternalError('Failed to close memory adapter.', undefined, error));
|
|
245
|
+
}
|
|
216
246
|
return { errors };
|
|
217
247
|
},
|
|
218
248
|
$infer: {}
|
|
@@ -268,13 +298,20 @@ export function createSessionHarness(definition) {
|
|
|
268
298
|
}
|
|
269
299
|
const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
|
|
270
300
|
const state = await getSessionState(sessionId);
|
|
271
|
-
const memory = makeMemory(sessionId, state.sandboxSession);
|
|
272
301
|
if (state.busy) {
|
|
273
302
|
throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
|
|
274
303
|
}
|
|
275
304
|
state.busy = true;
|
|
276
305
|
const startedAt = now();
|
|
277
306
|
const runId = ulid();
|
|
307
|
+
const memory = memoryFacade({
|
|
308
|
+
sessionId,
|
|
309
|
+
runId,
|
|
310
|
+
agentId,
|
|
311
|
+
signal: runSignal.signal,
|
|
312
|
+
sandboxSession: state.sandboxSession,
|
|
313
|
+
metadata: opts?.metadata ?? {}
|
|
314
|
+
});
|
|
278
315
|
const runRecord = {
|
|
279
316
|
id: runId,
|
|
280
317
|
sessionId,
|
|
@@ -287,7 +324,7 @@ export function createSessionHarness(definition) {
|
|
|
287
324
|
const emit = async (event) => {
|
|
288
325
|
const eventAt = 'at' in event ? event.at : now();
|
|
289
326
|
await onEvent?.(event);
|
|
290
|
-
await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event
|
|
327
|
+
await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event) }]);
|
|
291
328
|
};
|
|
292
329
|
try {
|
|
293
330
|
await definition.state.createRun(runRecord);
|
|
@@ -297,11 +334,13 @@ export function createSessionHarness(definition) {
|
|
|
297
334
|
throw error;
|
|
298
335
|
}
|
|
299
336
|
try {
|
|
300
|
-
const result = await telemetry.span('harness.session.agent_prompt', {
|
|
337
|
+
const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.agent_prompt', {
|
|
301
338
|
'harness.name': definition.name,
|
|
302
339
|
'harness.session.id': sessionId,
|
|
303
340
|
'harness.run.id': runId,
|
|
304
|
-
'harness.agent.id': agentId
|
|
341
|
+
'harness.agent.id': agentId,
|
|
342
|
+
'harness.telemetry.content_capture_mode': contentCaptureMode,
|
|
343
|
+
...metadataSpanAttrs(opts?.metadata)
|
|
305
344
|
}, async () => {
|
|
306
345
|
await emit({ type: 'run.started', runId, at: startedAt });
|
|
307
346
|
const resolvedHistoryWindow = opts?.historyWindow ?? definition.defaults.historyWindow;
|
|
@@ -326,13 +365,14 @@ export function createSessionHarness(definition) {
|
|
|
326
365
|
toolTimeoutMs: definition.defaults.toolTimeoutMs ?? 120_000,
|
|
327
366
|
logger: definition.logger,
|
|
328
367
|
telemetry,
|
|
329
|
-
emitEvent: emit
|
|
368
|
+
emitEvent: emit,
|
|
369
|
+
metadata: opts?.metadata ?? {}
|
|
330
370
|
});
|
|
331
371
|
if (run.emitted.length > 0) {
|
|
332
372
|
await definition.state.appendMessages(sessionId, run.emitted);
|
|
333
373
|
}
|
|
334
374
|
return run.output;
|
|
335
|
-
});
|
|
375
|
+
}));
|
|
336
376
|
const finishedAt = now();
|
|
337
377
|
await emit({ type: 'run.finished', runId, at: finishedAt, output: result });
|
|
338
378
|
await definition.state.finishRun(runId, { status: 'succeeded', finishedAt, output: result });
|
|
@@ -428,13 +468,20 @@ export function createSessionHarness(definition) {
|
|
|
428
468
|
}
|
|
429
469
|
const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
|
|
430
470
|
const state = await getSessionState(sessionId);
|
|
431
|
-
const memory = makeMemory(sessionId, state.sandboxSession);
|
|
432
471
|
if (state.busy) {
|
|
433
472
|
throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
|
|
434
473
|
}
|
|
435
474
|
state.busy = true;
|
|
436
475
|
const startedAt = now();
|
|
437
476
|
const runId = ulid();
|
|
477
|
+
const memory = memoryFacade({
|
|
478
|
+
sessionId,
|
|
479
|
+
runId,
|
|
480
|
+
workflowId,
|
|
481
|
+
signal: runSignal.signal,
|
|
482
|
+
sandboxSession: state.sandboxSession,
|
|
483
|
+
metadata: opts?.metadata ?? {}
|
|
484
|
+
});
|
|
438
485
|
const runRecord = {
|
|
439
486
|
id: runId,
|
|
440
487
|
sessionId,
|
|
@@ -447,7 +494,7 @@ export function createSessionHarness(definition) {
|
|
|
447
494
|
const emit = async (event) => {
|
|
448
495
|
const eventAt = 'at' in event ? event.at : now();
|
|
449
496
|
await onEvent?.(event);
|
|
450
|
-
await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event
|
|
497
|
+
await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event) }]);
|
|
451
498
|
};
|
|
452
499
|
try {
|
|
453
500
|
await definition.state.createRun(runRecord);
|
|
@@ -457,14 +504,22 @@ export function createSessionHarness(definition) {
|
|
|
457
504
|
throw error;
|
|
458
505
|
}
|
|
459
506
|
try {
|
|
460
|
-
const result = await telemetry.span('harness.session.prompt', {
|
|
507
|
+
const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.prompt', {
|
|
461
508
|
'harness.name': definition.name,
|
|
462
509
|
'harness.session.id': sessionId,
|
|
463
510
|
'harness.run.id': runId,
|
|
464
|
-
'harness.workflow.id': workflowId
|
|
511
|
+
'harness.workflow.id': workflowId,
|
|
512
|
+
'harness.telemetry.content_capture_mode': contentCaptureMode,
|
|
513
|
+
...metadataSpanAttrs(opts?.metadata)
|
|
465
514
|
}, async () => {
|
|
466
515
|
const runStarted = { type: 'run.started', runId, at: startedAt };
|
|
467
516
|
await emit(runStarted);
|
|
517
|
+
const workflowMetrics = createMetrics(telemetry, {
|
|
518
|
+
'harness.name': definition.name,
|
|
519
|
+
'harness.session.id': sessionId,
|
|
520
|
+
'harness.run.id': runId,
|
|
521
|
+
'harness.workflow.id': workflowId
|
|
522
|
+
});
|
|
468
523
|
const workflowArgs = {
|
|
469
524
|
workflowId,
|
|
470
525
|
workflow,
|
|
@@ -474,12 +529,25 @@ export function createSessionHarness(definition) {
|
|
|
474
529
|
runId,
|
|
475
530
|
sessionId,
|
|
476
531
|
models: modelRegistry,
|
|
532
|
+
metadata: opts?.metadata ?? {},
|
|
533
|
+
metrics: workflowMetrics,
|
|
534
|
+
memory,
|
|
477
535
|
agents: Object.fromEntries(Object.entries(definition.agents).map(([agentId, agent]) => [
|
|
478
536
|
agentId,
|
|
479
537
|
async (agentInput, agentOpts) => {
|
|
480
538
|
const agentSignal = combineSignals(runSignal.signal, agentOpts?.signal);
|
|
481
539
|
try {
|
|
482
540
|
const resolvedHistoryWindow = agentOpts?.historyWindow ?? opts?.historyWindow ?? definition.defaults.historyWindow;
|
|
541
|
+
const agentMetadata = { ...(opts?.metadata ?? {}), ...(agentOpts?.metadata ?? {}) };
|
|
542
|
+
const agentMemory = memoryFacade({
|
|
543
|
+
sessionId,
|
|
544
|
+
runId,
|
|
545
|
+
workflowId,
|
|
546
|
+
agentId,
|
|
547
|
+
signal: agentSignal.signal,
|
|
548
|
+
sandboxSession: state.sandboxSession,
|
|
549
|
+
metadata: agentMetadata
|
|
550
|
+
});
|
|
483
551
|
const run = await runDefaultAgent({
|
|
484
552
|
harnessName: definition.name,
|
|
485
553
|
agentId,
|
|
@@ -494,7 +562,7 @@ export function createSessionHarness(definition) {
|
|
|
494
562
|
customTools: definition.tools,
|
|
495
563
|
mcpRegistry,
|
|
496
564
|
session: state.sandboxSession,
|
|
497
|
-
memory,
|
|
565
|
+
memory: agentMemory,
|
|
498
566
|
mountedSkills: state.mountedSkills,
|
|
499
567
|
...(resolvedHistoryWindow !== undefined ? { historyWindow: resolvedHistoryWindow } : {}),
|
|
500
568
|
maxSteps: definition.defaults.agentMaxIterations ?? 16,
|
|
@@ -502,7 +570,8 @@ export function createSessionHarness(definition) {
|
|
|
502
570
|
toolTimeoutMs: definition.defaults.toolTimeoutMs ?? 120_000,
|
|
503
571
|
logger: definition.logger,
|
|
504
572
|
telemetry,
|
|
505
|
-
emitEvent: emit
|
|
573
|
+
emitEvent: emit,
|
|
574
|
+
metadata: agentMetadata
|
|
506
575
|
});
|
|
507
576
|
if (run.emitted.length > 0) {
|
|
508
577
|
await definition.state.appendMessages(sessionId, run.emitted);
|
|
@@ -520,12 +589,13 @@ export function createSessionHarness(definition) {
|
|
|
520
589
|
'harness.name': definition.name,
|
|
521
590
|
'harness.session.id': sessionId,
|
|
522
591
|
'harness.run.id': runId,
|
|
523
|
-
'harness.workflow.id': workflowId
|
|
592
|
+
'harness.workflow.id': workflowId,
|
|
593
|
+
...metadataSpanAttrs(opts?.metadata)
|
|
524
594
|
}, async () => runWorkflow({
|
|
525
595
|
...workflowArgs,
|
|
526
596
|
...(opts ? { opts: { ...opts, signal: runSignal.signal } } : { opts: { signal: runSignal.signal } })
|
|
527
597
|
}));
|
|
528
|
-
});
|
|
598
|
+
}));
|
|
529
599
|
const finishedAt = now();
|
|
530
600
|
const runFinished = { type: 'run.finished', runId, at: finishedAt, output: result };
|
|
531
601
|
await emit(runFinished);
|
|
@@ -598,13 +668,14 @@ export function createSessionHarness(definition) {
|
|
|
598
668
|
}
|
|
599
669
|
}
|
|
600
670
|
}
|
|
601
|
-
function configureHarnessAdapters(context, models, state, sandbox, tools) {
|
|
671
|
+
function configureHarnessAdapters(context, models, state, sandbox, memory, tools) {
|
|
602
672
|
const seen = new Set();
|
|
603
673
|
for (const alias of Object.values(models)) {
|
|
604
674
|
configureOne(alias.provider, context, seen);
|
|
605
675
|
}
|
|
606
676
|
configureOne(state, context, seen);
|
|
607
677
|
configureOne(sandbox, context, seen);
|
|
678
|
+
configureOne(memory, context, seen);
|
|
608
679
|
for (const tool of Object.values(tools)) {
|
|
609
680
|
configureOne(tool, context, seen);
|
|
610
681
|
}
|
|
@@ -616,6 +687,124 @@ function configureOne(adapter, context, seen) {
|
|
|
616
687
|
configurable.configureHarnessContext(context);
|
|
617
688
|
seen.add(adapter);
|
|
618
689
|
}
|
|
690
|
+
function withTelemetryFlavor(telemetry, options) {
|
|
691
|
+
const flavor = options?.flavor ?? process.env['PURISTA_TELEMETRY_FLAVOR'] ?? 'dual';
|
|
692
|
+
if (flavor === 'dual')
|
|
693
|
+
return telemetry;
|
|
694
|
+
const filtered = {
|
|
695
|
+
span: (name, attrs, fn) => telemetry.span(name, filterTelemetryAttrs(attrs, flavor), (span) => fn(filterSpanAttrs(span, flavor))),
|
|
696
|
+
recordHistogram: (name, value, attrs) => telemetry.recordHistogram(name, value, filterTelemetryAttrs(attrs, flavor)),
|
|
697
|
+
recordCounter: (name, value, attrs) => telemetry.recordCounter(name, value, filterTelemetryAttrs(attrs, flavor)),
|
|
698
|
+
currentTraceparent: () => telemetry.currentTraceparent()
|
|
699
|
+
};
|
|
700
|
+
if (telemetry.withTraceContext) {
|
|
701
|
+
filtered.withTraceContext = (carrier, fn) => telemetry.withTraceContext?.(carrier, fn) ?? fn();
|
|
702
|
+
}
|
|
703
|
+
return filtered;
|
|
704
|
+
}
|
|
705
|
+
async function withIncomingTraceContext(telemetry, opts, logger, fn) {
|
|
706
|
+
if (!opts?.traceparent)
|
|
707
|
+
return fn();
|
|
708
|
+
if (!isValidTraceparent(opts.traceparent) || (opts.tracestate !== undefined && !isValidTracestate(opts.tracestate))) {
|
|
709
|
+
logger.warn('Invalid Trace Context ignored.', {
|
|
710
|
+
'harness.warning.code': 'INVALID_TRACE_CONTEXT',
|
|
711
|
+
traceparent: opts.traceparent,
|
|
712
|
+
tracestate: opts.tracestate
|
|
713
|
+
});
|
|
714
|
+
return fn();
|
|
715
|
+
}
|
|
716
|
+
return telemetry.withTraceContext?.({ traceparent: opts.traceparent, ...(opts.tracestate ? { tracestate: opts.tracestate } : {}) }, fn) ?? fn();
|
|
717
|
+
}
|
|
718
|
+
function resolveContentCaptureMode(options) {
|
|
719
|
+
if (options?.contentCaptureMode !== undefined)
|
|
720
|
+
return options.contentCaptureMode;
|
|
721
|
+
const envValue = process.env['OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT'];
|
|
722
|
+
if (envValue === 'true')
|
|
723
|
+
return 'SPAN_AND_EVENT';
|
|
724
|
+
if (envValue === 'false')
|
|
725
|
+
return 'NO_CONTENT';
|
|
726
|
+
if (envValue === 'NO_CONTENT' || envValue === 'SPAN_ONLY' || envValue === 'EVENT_ONLY' || envValue === 'SPAN_AND_EVENT')
|
|
727
|
+
return envValue;
|
|
728
|
+
return 'NO_CONTENT';
|
|
729
|
+
}
|
|
730
|
+
function metadataSpanAttrs(metadata) {
|
|
731
|
+
const attrs = {};
|
|
732
|
+
for (const [key, value] of Object.entries(metadata ?? {})) {
|
|
733
|
+
if (!/^[a-zA-Z][a-zA-Z0-9_.-]{0,63}$/.test(key))
|
|
734
|
+
continue;
|
|
735
|
+
if (typeof value === 'string') {
|
|
736
|
+
if (value.length <= 256)
|
|
737
|
+
attrs[`harness.metadata.${key}`] = value;
|
|
738
|
+
continue;
|
|
739
|
+
}
|
|
740
|
+
if (typeof value === 'number' && Number.isFinite(value)) {
|
|
741
|
+
attrs[`harness.metadata.${key}`] = value;
|
|
742
|
+
continue;
|
|
743
|
+
}
|
|
744
|
+
if (typeof value === 'boolean') {
|
|
745
|
+
attrs[`harness.metadata.${key}`] = value;
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
return attrs;
|
|
749
|
+
}
|
|
750
|
+
function isValidTraceparent(traceparent) {
|
|
751
|
+
const match = /^([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$/.exec(traceparent);
|
|
752
|
+
if (!match)
|
|
753
|
+
return false;
|
|
754
|
+
const [, version, traceId, parentId] = match;
|
|
755
|
+
return version !== 'ff' && traceId !== '00000000000000000000000000000000' && parentId !== '0000000000000000';
|
|
756
|
+
}
|
|
757
|
+
function isValidTracestate(tracestate) {
|
|
758
|
+
return tracestate.length <= 512 && !/[\r\n]/.test(tracestate);
|
|
759
|
+
}
|
|
760
|
+
function filterSpanAttrs(span, flavor) {
|
|
761
|
+
const target = span;
|
|
762
|
+
return new Proxy(span, {
|
|
763
|
+
get(value, property, receiver) {
|
|
764
|
+
if (property === 'setAttribute' && target.setAttribute) {
|
|
765
|
+
return (key, attrValue) => {
|
|
766
|
+
const filtered = filterTelemetryAttrs({ [key]: attrValue }, flavor);
|
|
767
|
+
if (Object.keys(filtered).length === 0)
|
|
768
|
+
return span;
|
|
769
|
+
target.setAttribute?.(key, attrValue);
|
|
770
|
+
return span;
|
|
771
|
+
};
|
|
772
|
+
}
|
|
773
|
+
if (property === 'setAttributes' && target.setAttributes) {
|
|
774
|
+
return (attrs) => {
|
|
775
|
+
target.setAttributes?.(filterTelemetryAttrs(attrs, flavor));
|
|
776
|
+
return span;
|
|
777
|
+
};
|
|
778
|
+
}
|
|
779
|
+
return Reflect.get(value, property, receiver);
|
|
780
|
+
}
|
|
781
|
+
});
|
|
782
|
+
}
|
|
783
|
+
function filterTelemetryAttrs(attrs, flavor) {
|
|
784
|
+
const out = {};
|
|
785
|
+
for (const [key, value] of Object.entries(attrs)) {
|
|
786
|
+
if (value === undefined)
|
|
787
|
+
continue;
|
|
788
|
+
if (flavor === 'gen_ai_only' && isOpenInferenceAttr(key))
|
|
789
|
+
continue;
|
|
790
|
+
if (flavor === 'openinference_only' && key.startsWith('gen_ai.'))
|
|
791
|
+
continue;
|
|
792
|
+
out[key] = value;
|
|
793
|
+
}
|
|
794
|
+
return out;
|
|
795
|
+
}
|
|
796
|
+
function isOpenInferenceAttr(key) {
|
|
797
|
+
return key === 'openinference.span.kind'
|
|
798
|
+
|| key.startsWith('llm.')
|
|
799
|
+
|| key.startsWith('tool.')
|
|
800
|
+
|| key.startsWith('retrieval.')
|
|
801
|
+
|| key.startsWith('embedding.')
|
|
802
|
+
|| key.startsWith('reranker.')
|
|
803
|
+
|| key.startsWith('guardrail.')
|
|
804
|
+
|| key.startsWith('evaluator.')
|
|
805
|
+
|| key === 'input.value'
|
|
806
|
+
|| key === 'output.value';
|
|
807
|
+
}
|
|
619
808
|
function normalizeRunError(error, signal) {
|
|
620
809
|
if (!signal.aborted)
|
|
621
810
|
return error;
|
|
@@ -625,11 +814,7 @@ function normalizeRunError(error, signal) {
|
|
|
625
814
|
return error;
|
|
626
815
|
return new OperationCancelledError('Run was cancelled.', { scope: 'run' }, signal.reason ?? error);
|
|
627
816
|
}
|
|
628
|
-
function sanitizeEventForPersistence(event
|
|
629
|
-
if (captureContent) {
|
|
630
|
-
const { runId: _runId, at: _at, type: _type, ...payload } = event;
|
|
631
|
-
return payload;
|
|
632
|
-
}
|
|
817
|
+
function sanitizeEventForPersistence(event) {
|
|
633
818
|
switch (event.type) {
|
|
634
819
|
case 'run.started':
|
|
635
820
|
return {};
|
|
@@ -663,7 +848,11 @@ function sanitizeEventForPersistence(event, captureContent) {
|
|
|
663
848
|
case 'model.object.partial':
|
|
664
849
|
return { ...(event.agentId ? { agentId: event.agentId } : {}), partial: '[redacted]' };
|
|
665
850
|
case 'model.object':
|
|
666
|
-
return {
|
|
851
|
+
return {
|
|
852
|
+
...(event.agentId ? { agentId: event.agentId } : {}),
|
|
853
|
+
object: '[redacted]',
|
|
854
|
+
...(event.usage ? { usage: event.usage } : {})
|
|
855
|
+
};
|
|
667
856
|
case 'model.embedding.completed':
|
|
668
857
|
return {
|
|
669
858
|
...(event.agentId ? { agentId: event.agentId } : {}),
|
|
@@ -682,6 +871,24 @@ function sanitizeEventForPersistence(event, captureContent) {
|
|
|
682
871
|
return { dropped: event.dropped };
|
|
683
872
|
}
|
|
684
873
|
}
|
|
874
|
+
function isJsonRecord(value) {
|
|
875
|
+
return value !== null && typeof value === 'object' && !Array.isArray(value);
|
|
876
|
+
}
|
|
877
|
+
function isTokenUsage(value) {
|
|
878
|
+
return isJsonRecord(value)
|
|
879
|
+
&& typeof value['inputTokens'] === 'number'
|
|
880
|
+
&& typeof value['outputTokens'] === 'number'
|
|
881
|
+
&& typeof value['totalTokens'] === 'number';
|
|
882
|
+
}
|
|
883
|
+
function normalizeSerializedRunError(error) {
|
|
884
|
+
return {
|
|
885
|
+
code: error?.code ?? 'UNKNOWN',
|
|
886
|
+
category: error?.category ?? 'internal',
|
|
887
|
+
retriable: error?.retriable ?? false,
|
|
888
|
+
message: error?.message ?? 'Unknown error',
|
|
889
|
+
...(error?.meta ? { meta: error.meta } : {})
|
|
890
|
+
};
|
|
891
|
+
}
|
|
685
892
|
function createRunSignal(parent, timeoutMs) {
|
|
686
893
|
const controller = new AbortController();
|
|
687
894
|
const relay = () => controller.abort(parent?.reason);
|
package/dist/telemetry/shim.d.ts
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
/** Attributes accepted by telemetry span/metric helpers. */
|
|
2
2
|
export type SpanAttrs = Record<string, string | number | boolean | string[] | undefined>;
|
|
3
|
+
/** Developer-facing metric helper exposed in handler contexts. */
|
|
4
|
+
export interface Metrics {
|
|
5
|
+
/** Adds to a counter instrument. */
|
|
6
|
+
counter(name: string, value?: number, attrs?: SpanAttrs): void;
|
|
7
|
+
/** Records a histogram sample. */
|
|
8
|
+
histogram(name: string, value: number, attrs?: SpanAttrs): void;
|
|
9
|
+
/** Records the duration of an async operation in seconds. */
|
|
10
|
+
duration<T>(name: string, attrs: SpanAttrs | undefined, fn: () => Promise<T>): Promise<T>;
|
|
11
|
+
}
|
|
3
12
|
/** Minimal telemetry abstraction used by harness internals and integrations. */
|
|
4
13
|
export interface TelemetryShim {
|
|
5
14
|
/** Creates a span, executes `fn`, and closes the span with success/error status. */
|
|
@@ -10,6 +19,11 @@ export interface TelemetryShim {
|
|
|
10
19
|
recordCounter(name: string, value: number, attrs: SpanAttrs): void;
|
|
11
20
|
/** Injects the current active trace context into a W3C traceparent carrier. */
|
|
12
21
|
currentTraceparent(): string | undefined;
|
|
22
|
+
/** Runs `fn` with the supplied W3C Trace Context as the active parent context. */
|
|
23
|
+
withTraceContext?<T>(carrier: {
|
|
24
|
+
traceparent: string;
|
|
25
|
+
tracestate?: string;
|
|
26
|
+
}, fn: () => Promise<T>): Promise<T>;
|
|
13
27
|
}
|
|
14
28
|
/** OpenTelemetry-backed implementation of {@link TelemetryShim}. */
|
|
15
29
|
export declare class OtelTelemetryShim implements TelemetryShim {
|
|
@@ -21,6 +35,12 @@ export declare class OtelTelemetryShim implements TelemetryShim {
|
|
|
21
35
|
recordHistogram(name: string, value: number, attrs: SpanAttrs): void;
|
|
22
36
|
recordCounter(name: string, value: number, attrs: SpanAttrs): void;
|
|
23
37
|
currentTraceparent(): string | undefined;
|
|
38
|
+
withTraceContext<T>(carrier: {
|
|
39
|
+
traceparent: string;
|
|
40
|
+
tracestate?: string;
|
|
41
|
+
}, fn: () => Promise<T>): Promise<T>;
|
|
24
42
|
}
|
|
25
43
|
/** Creates the default telemetry shim instance. */
|
|
26
44
|
export declare function createTelemetryShim(): TelemetryShim;
|
|
45
|
+
/** Creates a scoped metrics helper with default attributes merged into every metric. */
|
|
46
|
+
export declare function createMetrics(telemetry: TelemetryShim, defaultAttrs?: SpanAttrs): Metrics;
|
package/dist/telemetry/shim.js
CHANGED
|
@@ -113,8 +113,36 @@ export class OtelTelemetryShim {
|
|
|
113
113
|
propagation.inject(context.active(), carrier);
|
|
114
114
|
return carrier['traceparent'];
|
|
115
115
|
}
|
|
116
|
+
async withTraceContext(carrier, fn) {
|
|
117
|
+
const extracted = propagation.extract(context.active(), {
|
|
118
|
+
traceparent: carrier.traceparent,
|
|
119
|
+
...(carrier.tracestate ? { tracestate: carrier.tracestate } : {})
|
|
120
|
+
});
|
|
121
|
+
return context.with(extracted, fn);
|
|
122
|
+
}
|
|
116
123
|
}
|
|
117
124
|
/** Creates the default telemetry shim instance. */
|
|
118
125
|
export function createTelemetryShim() {
|
|
119
126
|
return new OtelTelemetryShim();
|
|
120
127
|
}
|
|
128
|
+
/** Creates a scoped metrics helper with default attributes merged into every metric. */
|
|
129
|
+
export function createMetrics(telemetry, defaultAttrs = {}) {
|
|
130
|
+
const merge = (attrs) => ({ ...defaultAttrs, ...(attrs ?? {}) });
|
|
131
|
+
return {
|
|
132
|
+
counter(name, value = 1, attrs) {
|
|
133
|
+
telemetry.recordCounter(name, value, merge(attrs));
|
|
134
|
+
},
|
|
135
|
+
histogram(name, value, attrs) {
|
|
136
|
+
telemetry.recordHistogram(name, value, merge(attrs));
|
|
137
|
+
},
|
|
138
|
+
async duration(name, attrs, fn) {
|
|
139
|
+
const started = Date.now();
|
|
140
|
+
try {
|
|
141
|
+
return await fn();
|
|
142
|
+
}
|
|
143
|
+
finally {
|
|
144
|
+
telemetry.recordHistogram(name, (Date.now() - started) / 1000, merge(attrs));
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
};
|
|
148
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { MemoryAdapter, MemoryOpenContext, MemoryScope, MemoryStore } from '../ports/memory.js';
|
|
2
|
+
/** Deterministic in-memory adapter for unit tests and adapter contract examples. */
|
|
3
|
+
export declare class FakeMemoryAdapter implements MemoryAdapter {
|
|
4
|
+
readonly info: {
|
|
5
|
+
id: string;
|
|
6
|
+
packageName: string;
|
|
7
|
+
capabilities: readonly ["memory.kv", "memory.list", "memory.delete", "memory.search", "memory.run", "memory.session", "memory.agent", "memory.user", "memory.tenant", "memory.persistent"];
|
|
8
|
+
};
|
|
9
|
+
readonly capabilities: readonly ["memory.kv", "memory.list", "memory.delete", "memory.search", "memory.run", "memory.session", "memory.agent", "memory.user", "memory.tenant", "memory.persistent"];
|
|
10
|
+
readonly openedScopes: MemoryScope[];
|
|
11
|
+
private readonly values;
|
|
12
|
+
configureHarnessContext(): void;
|
|
13
|
+
open(scope: MemoryScope, _ctx: MemoryOpenContext): Promise<MemoryStore>;
|
|
14
|
+
}
|
|
15
|
+
/** Shared contract for memory adapters. */
|
|
16
|
+
export declare function memoryAdapterContract(make: () => MemoryAdapter | Promise<MemoryAdapter>): void;
|