@purista/harness 1.2.6 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +6 -0
  2. package/dist/agents/index.d.ts +7 -1
  3. package/dist/agents/index.js +126 -44
  4. package/dist/errors/catalog.d.ts +18 -2
  5. package/dist/errors/catalog.js +10 -0
  6. package/dist/eval/index.d.ts +3 -3
  7. package/dist/eval/index.js +15 -1
  8. package/dist/harness/defineHarness.d.ts +149 -3
  9. package/dist/harness/defineHarness.js +110 -1
  10. package/dist/index.d.ts +38 -18
  11. package/dist/index.js +30 -16
  12. package/dist/local/index.d.ts +36 -0
  13. package/dist/local/index.js +24 -0
  14. package/dist/local/local-sandbox.d.ts +25 -0
  15. package/dist/local/local-sandbox.js +368 -0
  16. package/dist/local/local-workspace.d.ts +56 -0
  17. package/dist/local/local-workspace.js +496 -0
  18. package/dist/local/ref-hash.d.ts +6 -0
  19. package/dist/local/ref-hash.js +9 -0
  20. package/dist/local/sqlite-storage.d.ts +106 -0
  21. package/dist/local/sqlite-storage.js +680 -0
  22. package/dist/models/adapter-utils.d.ts +52 -0
  23. package/dist/models/adapter-utils.js +81 -0
  24. package/dist/models/registry.js +28 -37
  25. package/dist/models/stream-pump.d.ts +16 -0
  26. package/dist/models/stream-pump.js +77 -0
  27. package/dist/ports/base-model-provider.d.ts +7 -1
  28. package/dist/ports/base-model-provider.js +384 -87
  29. package/dist/ports/capabilities.d.ts +16 -2
  30. package/dist/ports/context-checkpoints.d.ts +63 -0
  31. package/dist/ports/context-checkpoints.js +33 -0
  32. package/dist/ports/index.d.ts +1 -0
  33. package/dist/ports/index.js +1 -0
  34. package/dist/ports/model-provider.d.ts +94 -0
  35. package/dist/runtime/durable.d.ts +11 -0
  36. package/dist/runtime/durable.js +15 -2
  37. package/dist/runtime/sessionDurable.js +47 -21
  38. package/dist/runtime/steps.d.ts +22 -1
  39. package/dist/runtime/steps.js +53 -2
  40. package/dist/sessions/index.d.ts +17 -6
  41. package/dist/sessions/index.js +345 -84
  42. package/dist/skills/index.d.ts +0 -2
  43. package/dist/skills/index.js +0 -8
  44. package/dist/state/in-memory.js +6 -6
  45. package/dist/telemetry/shim.js +2 -6
  46. package/dist/telemetry/span-attrs.d.ts +9 -0
  47. package/dist/telemetry/span-attrs.js +27 -0
  48. package/dist/testing/durableWorkspaceStoreContract.js +69 -0
  49. package/dist/testing/fakeLogger.d.ts +29 -0
  50. package/dist/testing/fakeLogger.js +47 -0
  51. package/dist/testing/fakeSandbox.d.ts +27 -0
  52. package/dist/testing/fakeSandbox.js +153 -0
  53. package/dist/testing/fakeStateStore.d.ts +36 -0
  54. package/dist/testing/fakeStateStore.js +66 -0
  55. package/dist/testing/index.d.ts +10 -4
  56. package/dist/testing/index.js +14 -4
  57. package/dist/testing/loggerContract.d.ts +9 -0
  58. package/dist/testing/loggerContract.js +62 -0
  59. package/dist/testing/modelProviderContract.d.ts +12 -0
  60. package/dist/testing/modelProviderContract.js +222 -0
  61. package/dist/testing/recordEvents.d.ts +3 -0
  62. package/dist/testing/recordEvents.js +8 -0
  63. package/dist/testing/stateStoreContract.js +27 -0
  64. package/dist/tools/index.js +26 -1
  65. package/dist/tools/mcp/http.d.ts +2 -0
  66. package/dist/tools/mcp/http.js +34 -21
  67. package/dist/tools/mcp/runner.d.ts +4 -0
  68. package/dist/tools/mcp/runner.js +75 -21
  69. package/dist/tools/mcp/stdio.d.ts +7 -1
  70. package/dist/tools/mcp/stdio.js +102 -23
  71. package/dist/version.d.ts +1 -1
  72. package/dist/version.js +1 -1
  73. package/dist/workspace/in-memory.d.ts +1 -0
  74. package/dist/workspace/in-memory.js +47 -12
  75. package/package.json +5 -4
@@ -1,11 +1,13 @@
1
1
  import { z } from 'zod';
2
2
  import { type Logger } from '../logger/index.js';
3
- import type { ModelAlias, ModelCapability, TokenUsage } from '../ports/model-provider.js';
3
+ import type { ModelAlias, ModelCapability, ObjectResponse, ToolCallSpec, ModelMessage, ModelToolSpec, TokenUsage, ModelCallOptions } from '../ports/model-provider.js';
4
4
  import type { StateStore } from '../ports/state.js';
5
5
  import type { Metrics, TelemetryShim } from '../telemetry/index.js';
6
6
  import type { HarnessAdapterContext } from '../ports/harness-context.js';
7
7
  import type { MemoryAdapter, MemoryFacade, SessionMemory } from '../ports/memory.js';
8
8
  import type { DurableWorkspaceStore } from '../ports/workspace.js';
9
+ import type { ContextCheckpointStore } from '../ports/context-checkpoints.js';
10
+ import type { ContextCheckpoint, ContextCheckpointQuery } from '../ports/context-checkpoints.js';
9
11
  import type { JsonValue } from '../models/json.js';
10
12
  import type { Message } from '../models/state.js';
11
13
  import type { RunStatus } from '../models/state.js';
@@ -13,6 +15,7 @@ import type { HarnessError } from '../errors/harness-error.js';
13
15
  import { type Sandbox } from '../sandbox/index.js';
14
16
  import type { ModelHandle } from '../models/registry.js';
15
17
  import { type AdapterCapability, type DurableRuntimeAdapter, type HarnessInspection } from '../ports/capabilities.js';
18
+ import type { DurableStepOptions } from '../runtime/steps.js';
16
19
  /** Stable harness version string for diagnostics and generated documentation. */
17
20
  export { HARNESS_VERSION } from '../version.js';
18
21
  /** OpenTelemetry capture controls used by the harness. */
@@ -43,6 +46,32 @@ export interface HarnessDefaults {
43
46
  * `undefined` keeps all history, `0` keeps only system messages.
44
47
  */
45
48
  historyWindow?: number;
49
+ /** Default workflow child-agent delegation budgets. */
50
+ delegation?: DelegationDefaults;
51
+ }
52
+ /** Workflow child-agent delegation defaults. Delegation is disabled unless explicitly enabled. */
53
+ export interface DelegationDefaults {
54
+ /**
55
+ * Enable workflow child-agent calls for workflows that do not declare their
56
+ * own `delegation` policy. Default: `false`.
57
+ */
58
+ enabled?: boolean;
59
+ /**
60
+ * Maximum child-agent calls one workflow run may start. Default: `32`.
61
+ * Set per workflow with `workflow.delegation.maxChildAgentCalls`.
62
+ */
63
+ maxChildAgentCalls?: number;
64
+ /**
65
+ * Maximum child-agent calls active at the same time inside one workflow run.
66
+ * Default: `8`.
67
+ */
68
+ maxParallelChildAgentCalls?: number;
69
+ /**
70
+ * Maximum local delegation depth. Default: `1`.
71
+ * Current harness workflows invoke leaf agents, so `1` allows normal
72
+ * workflow-to-agent calls and `0` disables child-agent delegation.
73
+ */
74
+ maxDepth?: number;
46
75
  }
47
76
  /** Top-level harness options passed to {@link defineHarness}. */
48
77
  export interface HarnessOptions {
@@ -333,30 +362,96 @@ export interface AgentContextMinimal<S extends BuilderState, I> {
333
362
  runId: string;
334
363
  history: ConversationHistory;
335
364
  memory: MemoryFacade;
365
+ checkpoints: ContextCheckpoints;
336
366
  metadata: Readonly<Record<string, JsonValue>>;
337
367
  metrics: Metrics;
338
368
  }
369
+ /** Context passed before each default agent loop model call. */
370
+ export interface AgentPrepareStepContext<S extends BuilderState, I> extends AgentContextMinimal<S, I> {
371
+ /** Zero-based model-call step in the default loop. */
372
+ step: number;
373
+ /** Model alias selected for this step before overrides are applied. */
374
+ model: keyof NonNullable<S['models']> & string;
375
+ /** Messages that would be sent to the model for this step. */
376
+ messages: readonly ModelMessage[];
377
+ /** Model-facing tools that would be available for this step. */
378
+ tools: readonly ModelToolSpec[];
379
+ }
380
+ /** Per-step overrides returned from `AgentDefinition.prepareStep`. */
381
+ export interface AgentPrepareStepResult<S extends BuilderState> {
382
+ /** Optional model alias override for this model call. */
383
+ model?: keyof NonNullable<S['models']> & string;
384
+ /** Optional instruction override for this model call only. */
385
+ instructions?: string;
386
+ /** Optional model-facing tool names to keep active for this model call. */
387
+ activeTools?: readonly string[];
388
+ /** Optional message override for this model call only. */
389
+ messages?: readonly ModelMessage[];
390
+ /** Optional generation settings for this model call only. */
391
+ call?: ModelCallOptions;
392
+ }
393
+ /** Context passed after a default agent loop model call to decide whether to stop. */
394
+ export interface AgentStopWhenContext<S extends BuilderState, I> extends AgentPrepareStepContext<S, I> {
395
+ /** Raw provider-normalized object response from the current model call. */
396
+ response: ObjectResponse<JsonValue>;
397
+ /** Tool calls requested by the current model response. */
398
+ toolCalls: readonly ToolCallSpec[];
399
+ }
400
+ /** Hook used to prepare each model call in the default agent loop. */
401
+ export type AgentPrepareStep<S extends BuilderState, I> = (ctx: AgentPrepareStepContext<S, I>) => AgentPrepareStepResult<S> | Promise<AgentPrepareStepResult<S> | void> | void;
402
+ /** Hook used to stop the default loop after a model call. */
403
+ export type AgentStopWhen<S extends BuilderState, I> = (ctx: AgentStopWhenContext<S, I>) => boolean | Promise<boolean>;
404
+ /** Run-bound facade for explicit long-horizon context checkpoints. */
405
+ export interface ContextCheckpoints {
406
+ write(input: {
407
+ sequence: number;
408
+ kind: ContextCheckpoint['kind'];
409
+ payload: JsonValue;
410
+ metadata?: Record<string, JsonValue>;
411
+ }): Promise<void>;
412
+ list(query?: Omit<ContextCheckpointQuery, 'runId' | 'sessionId' | 'workflowId' | 'agentId' | 'signal'>): Promise<readonly ContextCheckpoint[]>;
413
+ read(ref: {
414
+ sequence: number;
415
+ kind: ContextCheckpoint['kind'];
416
+ }): Promise<ContextCheckpoint | undefined>;
417
+ delete(ref: {
418
+ sequence: number;
419
+ kind: ContextCheckpoint['kind'];
420
+ }): Promise<void>;
421
+ }
339
422
  /** Full context passed to workflow handlers. */
340
423
  export interface WorkflowContext<S extends BuilderState, I, O> {
341
424
  input: I;
342
425
  agents: {
343
- [K in keyof NonNullable<S['agents']>]: (input: AgentInput<S, K>, opts?: InvokeOptions) => Promise<AgentOutput<S, K>>;
426
+ [K in keyof NonNullable<S['agents']>]: (input: AgentInput<S, K>, opts?: WorkflowAgentInvokeOptions<S, K>) => Promise<AgentOutput<S, K>>;
344
427
  };
345
428
  models: ModelHandles<S>;
429
+ /** Harness logger scoped for workflow handler code (spec 10 `WorkflowContext`). */
430
+ log: Logger;
346
431
  signal: AbortSignal;
347
432
  runId: string;
348
433
  sessionId: string;
349
434
  metadata: Readonly<Record<string, JsonValue>>;
350
435
  memory: MemoryFacade;
436
+ checkpoints: ContextCheckpoints;
351
437
  metrics: Metrics;
352
438
  /**
353
439
  * Runs `fn` as a durable step. Under a durable invocation the output is
354
440
  * checkpointed and replayed on resume without re-running `fn`; otherwise it is
355
441
  * a transparent pass-through. See spec 10 "Durable steps".
356
442
  */
357
- step<T extends JsonValue>(stepId: string, fn: () => Promise<T>): Promise<T>;
443
+ step<T extends JsonValue>(stepId: string, fn: () => Promise<T>, options?: DurableStepOptions): Promise<T>;
358
444
  output?: O;
359
445
  }
446
+ /** Invoke options accepted by workflow-local child-agent calls. */
447
+ export type WorkflowAgentInvokeOptions<S extends BuilderState, K extends keyof NonNullable<S['agents']>> = InvokeOptions & {
448
+ /**
449
+ * Optional model alias override for this child-agent call.
450
+ * The alias must exist on the harness model registry and be allowed by the
451
+ * workflow delegation policy.
452
+ */
453
+ model?: keyof NonNullable<S['models']> & string;
454
+ };
360
455
  /** Full context passed to custom agent handlers. */
361
456
  export interface AgentContext<S extends BuilderState, I, O> extends AgentContextMinimal<S, I> {
362
457
  models: ModelHandles<S>;
@@ -375,12 +470,31 @@ export interface AgentDefinition<S extends BuilderState, I extends z.ZodTypeAny
375
470
  permissions?: AgentPermissions;
376
471
  onPermission?: OnPermission;
377
472
  maxSteps?: number;
473
+ /**
474
+ * Optional hook for per-round loop control in the default agent loop.
475
+ *
476
+ * @example
477
+ * ```ts
478
+ * prepareStep: ({ step }) => step === 0 ? { activeTools: ['lookup'] } : {}
479
+ * ```
480
+ */
481
+ prepareStep?: AgentPrepareStep<S, z.infer<I>>;
482
+ /**
483
+ * Optional hook that can stop the default loop after a model call.
484
+ *
485
+ * @example
486
+ * ```ts
487
+ * stopWhen: ({ step }) => step >= 2
488
+ * ```
489
+ */
490
+ stopWhen?: AgentStopWhen<S, z.infer<I>>;
378
491
  handler?: (ctx: AgentContext<S, z.infer<I>, z.infer<O>>) => Promise<z.infer<O>>;
379
492
  }
380
493
  /** Workflow definition registered inline within `.workflows(...)`. */
381
494
  export interface WorkflowDefinition<S extends BuilderState, I extends z.ZodTypeAny = z.ZodTypeAny, O extends z.ZodTypeAny = z.ZodTypeAny> {
382
495
  input?: I;
383
496
  output?: O;
497
+ delegation?: WorkflowDelegationPolicy<S>;
384
498
  handler: (ctx: WorkflowContext<S, z.infer<I>, z.infer<O>>) => Promise<z.infer<O>>;
385
499
  }
386
500
  type AgentSchemaFields = {
@@ -398,6 +512,8 @@ type AgentDefinitionResolved<S extends BuilderState, I extends z.ZodTypeAny, O e
398
512
  permissions?: AgentPermissions;
399
513
  onPermission?: OnPermission;
400
514
  maxSteps?: number;
515
+ prepareStep?: AgentPrepareStep<S, z.infer<I>>;
516
+ stopWhen?: AgentStopWhen<S, z.infer<I>>;
401
517
  handler?: (ctx: AgentContext<S, z.infer<I>, z.infer<O>>) => Promise<z.infer<O>>;
402
518
  };
403
519
  type AgentDefinitionFor<S extends BuilderState, D> = D extends {
@@ -423,8 +539,26 @@ type WorkflowSchemaFields = {
423
539
  type WorkflowDefinitionResolved<S extends BuilderState, I extends z.ZodTypeAny, O extends z.ZodTypeAny> = {
424
540
  input?: I;
425
541
  output?: O;
542
+ delegation?: WorkflowDelegationPolicy<S>;
426
543
  handler: (ctx: WorkflowContext<S, z.infer<I>, z.infer<O>>) => Promise<z.infer<O>>;
427
544
  };
545
+ /** Policy for workflow-local child-agent delegation through `ctx.agents`. */
546
+ export interface WorkflowDelegationPolicy<S extends BuilderState = BuilderState> {
547
+ /** Enable or disable child-agent calls for this workflow. A policy object without this field enables delegation. */
548
+ enabled?: boolean;
549
+ /** Child agent ids this workflow may call. Omit to allow all registered agents. */
550
+ agents?: readonly (keyof NonNullable<S['agents']> & string)[];
551
+ /** Per-run child-agent call limit. Overrides `defaults.delegation.maxChildAgentCalls`. */
552
+ maxChildAgentCalls?: number;
553
+ /** Per-run active child-agent call limit. Overrides `defaults.delegation.maxParallelChildAgentCalls`. */
554
+ maxParallelChildAgentCalls?: number;
555
+ /** Maximum local delegation depth. Overrides `defaults.delegation.maxDepth`. */
556
+ maxDepth?: number;
557
+ /** Model aliases allowed for every child-agent call in this workflow, including calls running on the agent's default `model`. */
558
+ modelAliases?: readonly (keyof NonNullable<S['models']> & string)[];
559
+ /** Per-child-agent model alias allowlists. These replace `modelAliases` for the named agent. */
560
+ agentModelAliases?: Partial<Record<keyof NonNullable<S['agents']> & string, readonly (keyof NonNullable<S['models']> & string)[]>>;
561
+ }
428
562
  type WorkflowDefinitionFor<S extends BuilderState, D> = D extends {
429
563
  input: infer I extends z.ZodTypeAny;
430
564
  output: infer O extends z.ZodTypeAny;
@@ -576,11 +710,21 @@ export type RunEvent = {
576
710
  runId: string;
577
711
  agentId: string;
578
712
  at: string;
713
+ workflowId?: string;
714
+ parentAgentId?: string;
715
+ delegationCallId?: string;
716
+ delegationDepth?: number;
717
+ modelAlias?: string;
579
718
  } | {
580
719
  type: 'agent.finished';
581
720
  runId: string;
582
721
  agentId: string;
583
722
  at: string;
723
+ workflowId?: string;
724
+ parentAgentId?: string;
725
+ delegationCallId?: string;
726
+ delegationDepth?: number;
727
+ modelAlias?: string;
584
728
  output?: JsonValue;
585
729
  error?: SerializedError;
586
730
  } | {
@@ -657,6 +801,7 @@ export interface HarnessBuilder<S extends BuilderState = {}> {
657
801
  memory(adapter: MemoryAdapter): HarnessBuilder<S>;
658
802
  runtime(runtime: DurableRuntimeAdapter): HarnessBuilder<S>;
659
803
  workspaceStore(store: DurableWorkspaceStore): HarnessBuilder<S>;
804
+ checkpoints(store: ContextCheckpointStore): HarnessBuilder<S>;
660
805
  requires(capabilities: readonly AdapterCapability[]): HarnessBuilder<S>;
661
806
  defaults(defaults: HarnessDefaults): HarnessBuilder<S>;
662
807
  models<const M extends ModelsConfig>(models: M): HarnessBuilder<S & {
@@ -745,6 +890,7 @@ export interface HarnessBuilder<S extends BuilderState = {}> {
745
890
  * summarize_ticket: {
746
891
  * input: z.object({ ticket: z.string() }),
747
892
  * output: z.string(),
893
+ * delegation: { agents: ['summarize'] },
748
894
  * handler: (ctx) => ctx.agents.summarize(ctx.input.ticket)
749
895
  * }
750
896
  * })
@@ -3,6 +3,7 @@ import { JsonLogger } from '../logger/index.js';
3
3
  import { sandboxMemory } from '../memory/sandbox/index.js';
4
4
  import { validateMemoryAdapter } from '../ports/memory.js';
5
5
  import { validateDurableWorkspaceStore } from '../ports/workspace.js';
6
+ import { validateContextCheckpointStore } from '../ports/context-checkpoints.js';
6
7
  import { InMemoryStateStore } from '../state/in-memory.js';
7
8
  import { HarnessConfigError, SkillManifestError } from '../errors/catalog.js';
8
9
  import { BUILTIN_TOOL_NAMES } from '../tools/index.js';
@@ -47,6 +48,13 @@ class Builder {
47
48
  validateDurableWorkspaceStore(workspaceStore);
48
49
  return this.clone({ workspaceStore });
49
50
  }
51
+ checkpoints(checkpoints) {
52
+ if (this.configured.checkpoints) {
53
+ throw new HarnessConfigError('Context checkpoint store is already configured.', { reason: 'duplicate_adapter', path: 'checkpoints' });
54
+ }
55
+ validateContextCheckpointStore(checkpoints);
56
+ return this.clone({ checkpoints });
57
+ }
50
58
  requires(capabilities) {
51
59
  return this.clone({ requiredCapabilities: uniqueCapabilities(capabilities) });
52
60
  }
@@ -57,6 +65,9 @@ class Builder {
57
65
  if (defaults.maxParallelToolCalls !== undefined && (!Number.isInteger(defaults.maxParallelToolCalls) || defaults.maxParallelToolCalls < 1)) {
58
66
  throw new HarnessConfigError('maxParallelToolCalls must be a positive integer', { reason: 'invalid_defaults', path: 'defaults.maxParallelToolCalls' });
59
67
  }
68
+ validateDelegationBudget(defaults.delegation?.maxChildAgentCalls, 'defaults.delegation.maxChildAgentCalls', { min: 0 });
69
+ validateDelegationBudget(defaults.delegation?.maxParallelChildAgentCalls, 'defaults.delegation.maxParallelChildAgentCalls', { min: 1 });
70
+ validateDelegationBudget(defaults.delegation?.maxDepth, 'defaults.delegation.maxDepth', { min: 0 });
60
71
  return this.clone({ defaults });
61
72
  }
62
73
  models(models) {
@@ -87,6 +98,7 @@ class Builder {
87
98
  const resolved = typeof workflows === 'function'
88
99
  ? workflows({ workflow: (definition) => definition })
89
100
  : workflows;
101
+ this.validateWorkflowDelegationPolicies(resolved);
90
102
  return this.clone({ workflows: resolved });
91
103
  }
92
104
  build() {
@@ -95,11 +107,16 @@ class Builder {
95
107
  throw new HarnessConfigError('At least one model alias is required.', { reason: 'missing_models', path: 'models' });
96
108
  }
97
109
  this.validateToolSkillNamespace();
110
+ // Validated at build time (not in `.agents(...)`) because models may be
111
+ // declared later in the builder chain.
112
+ this.validateAgentModelAndToolReferences(models);
98
113
  const sandbox = this.configured.sandbox ?? autoDetectSandbox();
99
114
  const memory = this.configured.memory ?? sandboxMemory();
100
115
  validateMemoryAdapter(memory);
101
116
  if (this.configured.workspaceStore)
102
117
  validateDurableWorkspaceStore(this.configured.workspaceStore);
118
+ if (this.configured.checkpoints)
119
+ validateContextCheckpointStore(this.configured.checkpoints);
103
120
  const inspection = this.resolveInspection(this.options.name ?? 'agent-harness', sandbox, memory, models);
104
121
  const missing = missingCapabilities(inspection.requiredCapabilities, inspection.capabilities);
105
122
  if (missing.length > 0) {
@@ -118,6 +135,7 @@ class Builder {
118
135
  memory,
119
136
  ...(this.configured.runtime ? { runtime: this.configured.runtime } : {}),
120
137
  ...(this.configured.workspaceStore ? { workspaceStore: this.configured.workspaceStore } : {}),
138
+ ...(this.configured.checkpoints ? { checkpoints: this.configured.checkpoints } : {}),
121
139
  defaults: {
122
140
  agentMaxIterations: this.configured.defaults?.agentMaxIterations ?? 16,
123
141
  runTimeoutMs: this.configured.defaults?.runTimeoutMs ?? 600_000,
@@ -125,7 +143,8 @@ class Builder {
125
143
  skillTimeoutMs: this.configured.defaults?.skillTimeoutMs ?? 60_000,
126
144
  modelTimeoutMs: this.configured.defaults?.modelTimeoutMs ?? 300_000,
127
145
  maxParallelToolCalls: this.configured.defaults?.maxParallelToolCalls ?? 8,
128
- ...(this.configured.defaults?.historyWindow !== undefined ? { historyWindow: this.configured.defaults.historyWindow } : {})
146
+ ...(this.configured.defaults?.historyWindow !== undefined ? { historyWindow: this.configured.defaults.historyWindow } : {}),
147
+ ...(this.configured.defaults?.delegation ? { delegation: this.configured.defaults.delegation } : {})
129
148
  },
130
149
  models,
131
150
  tools: (this.configured.tools ?? {}),
@@ -175,6 +194,27 @@ class Builder {
175
194
  }
176
195
  }
177
196
  }
197
+ validateAgentModelAndToolReferences(models) {
198
+ const configuredTools = new Set(Object.keys(this.configured.tools ?? {}));
199
+ for (const [agentId, agent] of Object.entries(this.configured.agents ?? {})) {
200
+ if (!(agent.model in models)) {
201
+ throw new HarnessConfigError('Agent references an unknown model alias.', {
202
+ reason: 'invalid_agent',
203
+ path: `agents.${agentId}.model`,
204
+ id: agent.model
205
+ });
206
+ }
207
+ for (const toolId of agent.tools ?? []) {
208
+ if (!configuredTools.has(toolId)) {
209
+ throw new HarnessConfigError('Agent references an unknown tool.', {
210
+ reason: 'invalid_agent',
211
+ path: `agents.${agentId}.tools`,
212
+ id: toolId
213
+ });
214
+ }
215
+ }
216
+ }
217
+ }
178
218
  validateAgentSkillReferences(agents) {
179
219
  const configuredSkills = new Set(Object.keys(this.configured.skills ?? {}));
180
220
  for (const [agentId, agent] of Object.entries(agents)) {
@@ -189,6 +229,54 @@ class Builder {
189
229
  }
190
230
  }
191
231
  }
232
+ validateWorkflowDelegationPolicies(workflows) {
233
+ const configuredAgents = new Set(Object.keys(this.configured.agents ?? {}));
234
+ const configuredModels = new Set(Object.keys(this.configured.models ?? {}));
235
+ for (const [workflowId, workflow] of Object.entries(workflows)) {
236
+ const policy = workflow.delegation;
237
+ if (!policy)
238
+ continue;
239
+ validateDelegationBudget(policy.maxChildAgentCalls, `workflows.${workflowId}.delegation.maxChildAgentCalls`, { min: 0 });
240
+ validateDelegationBudget(policy.maxParallelChildAgentCalls, `workflows.${workflowId}.delegation.maxParallelChildAgentCalls`, { min: 1 });
241
+ validateDelegationBudget(policy.maxDepth, `workflows.${workflowId}.delegation.maxDepth`, { min: 0 });
242
+ for (const agentId of policy.agents ?? []) {
243
+ if (!configuredAgents.has(agentId)) {
244
+ throw new HarnessConfigError('Workflow delegation policy references an unknown agent.', {
245
+ reason: 'invalid_workflow',
246
+ path: `workflows.${workflowId}.delegation.agents`,
247
+ id: agentId
248
+ });
249
+ }
250
+ }
251
+ for (const alias of policy.modelAliases ?? []) {
252
+ if (!configuredModels.has(alias)) {
253
+ throw new HarnessConfigError('Workflow delegation policy references an unknown model alias.', {
254
+ reason: 'invalid_workflow',
255
+ path: `workflows.${workflowId}.delegation.modelAliases`,
256
+ id: alias
257
+ });
258
+ }
259
+ }
260
+ for (const [agentId, aliases] of Object.entries(policy.agentModelAliases ?? {})) {
261
+ if (!configuredAgents.has(agentId)) {
262
+ throw new HarnessConfigError('Workflow delegation policy references an unknown agent.', {
263
+ reason: 'invalid_workflow',
264
+ path: `workflows.${workflowId}.delegation.agentModelAliases.${agentId}`,
265
+ id: agentId
266
+ });
267
+ }
268
+ for (const alias of aliases ?? []) {
269
+ if (!configuredModels.has(alias)) {
270
+ throw new HarnessConfigError('Workflow delegation policy references an unknown model alias.', {
271
+ reason: 'invalid_workflow',
272
+ path: `workflows.${workflowId}.delegation.agentModelAliases.${agentId}`,
273
+ id: alias
274
+ });
275
+ }
276
+ }
277
+ }
278
+ }
279
+ }
192
280
  resolveInspection(name, sandbox, memory, models) {
193
281
  const adapters = [];
194
282
  const sandboxCapabilities = hasAdapterCapabilities(sandbox) ? uniqueCapabilities(sandbox.capabilities) : [];
@@ -224,6 +312,16 @@ class Builder {
224
312
  }
225
313
  });
226
314
  }
315
+ if (this.configured.checkpoints) {
316
+ adapters.push({
317
+ kind: 'context_checkpoint',
318
+ id: this.configured.checkpoints.info.id,
319
+ capabilities: uniqueCapabilities(this.configured.checkpoints.info.capabilities),
320
+ metadata: {
321
+ packageName: this.configured.checkpoints.info.packageName
322
+ }
323
+ });
324
+ }
227
325
  for (const [alias, model] of Object.entries(models)) {
228
326
  adapters.push({
229
327
  kind: 'model',
@@ -253,6 +351,16 @@ function getAdapterId(adapter, fallback) {
253
351
  }
254
352
  return fallback;
255
353
  }
354
+ function validateDelegationBudget(value, path, opts) {
355
+ if (value === undefined)
356
+ return;
357
+ if (!Number.isInteger(value) || value < opts.min) {
358
+ throw new HarnessConfigError(`${path} must be an integer >= ${opts.min}`, {
359
+ reason: 'invalid_defaults',
360
+ path
361
+ });
362
+ }
363
+ }
256
364
  /**
257
365
  * Creates the chainable harness builder used to define a harness system.
258
366
  *
@@ -268,6 +376,7 @@ function getAdapterId(adapter, fallback) {
268
376
  * summarize_ticket: {
269
377
  * input: z.object({ ticket: z.string() }),
270
378
  * output: z.string(),
379
+ * delegation: { agents: ['summarize'] },
271
380
  * handler: (ctx) => ctx.agents.summarize(ctx.input.ticket)
272
381
  * }
273
382
  * })
package/dist/index.d.ts CHANGED
@@ -1,18 +1,38 @@
1
- export * from './errors/index.js';
2
- export * from './logger/index.js';
3
- export * from './telemetry/index.js';
4
- export * from './ulid/index.js';
5
- export * from './ports/index.js';
6
- export { createDurableWorkflowContext, DurableStepError, DurableRunLeaseError, DurableTerminalRunError, inMemoryDurableRuntime, isTerminalRunStatus } from './runtime/index.js';
7
- export type { DurableActiveRunStatus, DurableWorkflowContext, DurableWorkflowContextOptions, DurableStepCommit, DurableRunLease, DurableRunStart, DurableRunStatus, DurableRuntime, DurableTerminalRunStatus, FinishRunPatch, InMemoryDurableRuntimeOptions, RunCheckpoint } from './runtime/index.js';
8
- export * from './state/in-memory.js';
9
- export * from './models/json.js';
10
- export type { SessionRecord, Message, RunRecord, PersistedRunEvent, RunStatus } from './models/state.js';
11
- export * from './models/registry.js';
12
- export * from './eval/index.js';
13
- export * from './memory/sandbox/index.js';
14
- export * from './skills/index.js';
15
- export * from './sandbox/index.js';
16
- export * from './workspace/index.js';
17
- export * from './tools/mcp/index.js';
18
- export * from './harness/defineHarness.js';
1
+ export { HarnessError, isHarnessError, HarnessConfigError, ValidationError, PermissionDeniedError, SandboxError, SandboxNoExecutorError, ModelError, ModelCapabilityError, ToolError, ToolNotFoundError, SkillNotFoundError, SkillManifestError, AgentNotFoundError, AgentLoopBudgetError, DelegationPolicyError, WorkflowNotFoundError, SessionNotFoundError, SessionBusyError, StateError, WorkspaceError, WorkspaceQuotaExceededError, WorkspaceCleanupError, OperationTimeoutError, OperationCancelledError, McpProtocolError, McpAuthError, InternalError, sanitizeProviderMessage, serializeError } from './errors/index.js';
2
+ export type { ErrorCategory } from './errors/index.js';
3
+ export { JsonLogger } from './logger/index.js';
4
+ export type { Logger, LogLevel } from './logger/index.js';
5
+ export type { Metrics, SpanAttrs, TelemetryShim } from './telemetry/index.js';
6
+ export { ulid } from './ulid/index.js';
7
+ export { HARNESS_VERSION } from './version.js';
8
+ export { BaseModelProvider } from './ports/base-model-provider.js';
9
+ export type { BaseModelProviderOptions } from './ports/base-model-provider.js';
10
+ export type { BaseRequest, ContentPart, ContentPartKind, Embedding, EmbeddingRequest, EmbeddingResponse, FinishReason, ModelAlias, ModelCallOptions, ModelCapability, ModelDefaults, ModelFeatureSet, ModelMessage, ModelOutcome, ModelProvider, ModelProviderInfo, ModelRateLimitInfo, ModelRetryKind, ModelRetryOnPolicy, ModelRetryPolicy, ModelRetrySetting, ModelToolSpec, ObjectRequest, ObjectResponse, ObjectStreamChunk, OutputMode, ProviderItems, RerankDocument, RerankRequest, RerankResponse, RerankResult, TextRequest, TextResponse, TextStreamChunk, TokenUsage, ToolCallSpec } from './ports/model-provider.js';
11
+ export type { ModelHandle, ModelInvokeContext } from './models/registry.js';
12
+ export { accumulateStreamToolCallDeltas, createStreamToolCallState, finalizeStreamToolCalls, malformedResponseError, parseProviderJson, redactProviderContent, safePartialJson, toTokenUsage, withoutObjectTool } from './models/adapter-utils.js';
13
+ export type { AdapterCallContext, StreamToolCallState } from './models/adapter-utils.js';
14
+ export type { AdapterCapabilities, AdapterCapability, AdapterInspection, DurableRuntimeAdapter, HarnessInspection } from './ports/capabilities.js';
15
+ export type { HarnessAdapterContext, HarnessContextConfigurable } from './ports/harness-context.js';
16
+ export { StateStoreAdapterBase } from './ports/state.js';
17
+ export type { StateStore } from './ports/state.js';
18
+ export { InMemoryStateStore } from './state/in-memory.js';
19
+ export type { JsonValue } from './models/json.js';
20
+ export type { Message, PersistedRunEvent, RunRecord, RunStatus, SessionRecord } from './models/state.js';
21
+ export type { MemoryAdapter, MemoryAdapterInfo, MemoryCapability, MemoryEntry, MemoryFacade, MemoryListOptions, MemoryOpenContext, MemoryOperation, MemoryOperationContext, MemoryScope, MemoryScopeKind, MemorySearchQuery, MemorySearchResult, MemoryStore, MemoryWriteOptions, SessionMemory } from './ports/memory.js';
22
+ export { sandboxMemory } from './memory/sandbox/index.js';
23
+ export type { FeedbackRecord, FeedbackTarget } from './ports/feedback.js';
24
+ export type { DurableReplayCheckpoint, DurableWorkspacePolicy, DurableWorkspaceStore, DurableWorkspaceStoreInfo, WorkspaceAbortOptions, WorkspaceAbortResult, WorkspaceCheckpoint, WorkspaceCleanupOptions, WorkspaceCleanupResult, WorkspaceEncryptionInfo, WorkspaceHandle, WorkspaceInspection, WorkspaceInspectionOptions, WorkspaceLifecycleState, WorkspacePauseOptions, WorkspaceQuotaPolicy, WorkspaceResumeOptions, WorkspaceRetentionPolicy, WorkspaceStartOptions } from './ports/workspace.js';
25
+ export { InMemoryDurableWorkspaceStore, inMemoryDurableWorkspaceStore } from './workspace/index.js';
26
+ export type { ContextCheckpoint, ContextCheckpointQuery, ContextCheckpointRef, ContextCheckpointStore, ContextCheckpointStoreInfo } from './ports/context-checkpoints.js';
27
+ export { createDurableWorkflowContext, DurableStepError, DurableRunLeaseError, DurableTerminalRunError, inMemoryDurableRuntime, isResumeBlockingRunStatus, isTerminalRunStatus } from './runtime/index.js';
28
+ export type { DurableActiveRunStatus, DurableWorkflowContext, DurableWorkflowContextOptions, DurableStepCommit, DurableStepOptions, DurableStepRetryPolicy, DurableStepRetrySetting, DurableRunLease, DurableRunStart, DurableRunStatus, DurableRuntime, DurableTerminalRunStatus, FinishRunPatch, InMemoryDurableRuntimeOptions, RunCheckpoint } from './runtime/index.js';
29
+ export { bashSandbox, inMemorySandbox } from './sandbox/index.js';
30
+ export type { ExecCapableSandboxSession, HibernateCapableSandbox, ResumeCapableSandbox, Sandbox, SandboxProcess, SandboxResumeOptions, SandboxSession, SandboxSessionBase, SandboxSessionFor, SnapshotCapableSandbox, SnapshotResult, SpawnCapableSandboxSession, SpawnOptions } from './sandbox/index.js';
31
+ export type { DirEntry, ExecOptions, ExecResult, FileStat } from './harness/types.js';
32
+ export { localDirectorySandbox, localDirectoryWorkspaceStore, localDurableExecution, SqliteHarnessStorage, sqliteContextCheckpointStore, sqliteDurableRuntime, sqliteStateStore } from './local/index.js';
33
+ export type { LocalDirectorySandboxOptions, LocalDirectoryWorkspaceStoreOptions, LocalDurableExecution, LocalDurableExecutionOptions, LocalDurableSandbox, LocalExecSandboxCapabilities, LocalFilesOnlySandboxCapabilities, LocalHostExecPolicy, SqliteContextCheckpointStoreOptions, SqliteDurableRuntimeOptions, SqliteStateStoreOptions } from './local/index.js';
34
+ export { discoverSkills } from './skills/index.js';
35
+ export { evaluateDeterministicScorer, evaluatePromptCandidates } from './eval/index.js';
36
+ export type { CandidateScore, DeterministicScorerDefinition, EvaluatePromptCandidatesInput, EvaluationItem, PromptCandidate, ScorerResult, ScorerTarget } from './eval/index.js';
37
+ export { defineHarness } from './harness/defineHarness.js';
38
+ export type { AgentContext, AgentContextMinimal, AgentDefinition, AgentDefinitionHelpers, AgentInput, AgentInvoker, AgentOutput, AgentPermissions, AgentPrepareStep, AgentPrepareStepContext, AgentPrepareStepResult, AgentStopWhen, AgentStopWhenContext, AgentsConfig, BuilderState, BuiltinToolName, ContentCaptureMode, ContextCheckpoints, ConversationHistory, DelegationDefaults, DiscoveredSkills, DiscoverSkillsOptions, DurableInvokeOptions, Harness, HarnessBuilder, HarnessDefaults, HarnessOptions, InferTypes, InvokeOptions, McpAuth, McpHttpToolDefinition, McpStdioToolDefinition, ModelHandles, ModelsConfig, OnPermission, PermissionContext, PermissionDecision, PermissionMode, PermissionPolicy, ResolvedSkill, RunEvent, RunSummary, SerializedError, Session, SkillDefinition, SkillDiagnostic, SkillFrontmatter, SkillsConfig, SkillValidationMode, TelemetryFlavor, TelemetryOptions, ToolDefinition, ToolHandlerContext, ToolsConfig, TsToolDefinition, WorkflowAgentInvokeOptions, WorkflowContext, WorkflowDefinition, WorkflowDefinitionHelpers, WorkflowDelegationPolicy, WorkflowInput, WorkflowInvoker, WorkflowOutput, WorkflowsConfig } from './harness/defineHarness.js';
package/dist/index.js CHANGED
@@ -1,16 +1,30 @@
1
- export * from './errors/index.js';
2
- export * from './logger/index.js';
3
- export * from './telemetry/index.js';
4
- export * from './ulid/index.js';
5
- export * from './ports/index.js';
6
- export { createDurableWorkflowContext, DurableStepError, DurableRunLeaseError, DurableTerminalRunError, inMemoryDurableRuntime, isTerminalRunStatus } from './runtime/index.js';
7
- export * from './state/in-memory.js';
8
- export * from './models/json.js';
9
- export * from './models/registry.js';
10
- export * from './eval/index.js';
11
- export * from './memory/sandbox/index.js';
12
- export * from './skills/index.js';
13
- export * from './sandbox/index.js';
14
- export * from './workspace/index.js';
15
- export * from './tools/mcp/index.js';
16
- export * from './harness/defineHarness.js';
1
+ // Public surface of `@purista/harness`. The export lists below are locked by
2
+ // specs/13-public-api.md and verified by test/public-api.test.ts — keep all
3
+ // three in sync when changing any export.
4
+ // Errors (specs/15-error-catalog.md)
5
+ export { HarnessError, isHarnessError, HarnessConfigError, ValidationError, PermissionDeniedError, SandboxError, SandboxNoExecutorError, ModelError, ModelCapabilityError, ToolError, ToolNotFoundError, SkillNotFoundError, SkillManifestError, AgentNotFoundError, AgentLoopBudgetError, DelegationPolicyError, WorkflowNotFoundError, SessionNotFoundError, SessionBusyError, StateError, WorkspaceError, WorkspaceQuotaExceededError, WorkspaceCleanupError, OperationTimeoutError, OperationCancelledError, McpProtocolError, McpAuthError, InternalError, sanitizeProviderMessage, serializeError } from './errors/index.js';
6
+ // Foundation: logger, telemetry shim types, ULID, version
7
+ export { JsonLogger } from './logger/index.js';
8
+ export { ulid } from './ulid/index.js';
9
+ export { HARNESS_VERSION } from './version.js';
10
+ // Model provider port
11
+ export { BaseModelProvider } from './ports/base-model-provider.js';
12
+ // Shared model adapter helpers (consumed by first-party provider packages)
13
+ export { accumulateStreamToolCallDeltas, createStreamToolCallState, finalizeStreamToolCalls, malformedResponseError, parseProviderJson, redactProviderContent, safePartialJson, toTokenUsage, withoutObjectTool } from './models/adapter-utils.js';
14
+ // State port + in-memory default
15
+ export { StateStoreAdapterBase } from './ports/state.js';
16
+ export { InMemoryStateStore } from './state/in-memory.js';
17
+ export { sandboxMemory } from './memory/sandbox/index.js';
18
+ export { InMemoryDurableWorkspaceStore, inMemoryDurableWorkspaceStore } from './workspace/index.js';
19
+ // Durable runtime
20
+ export { createDurableWorkflowContext, DurableStepError, DurableRunLeaseError, DurableTerminalRunError, inMemoryDurableRuntime, isResumeBlockingRunStatus, isTerminalRunStatus } from './runtime/index.js';
21
+ // Sandbox port + default factories
22
+ export { bashSandbox, inMemorySandbox } from './sandbox/index.js';
23
+ // Local durable execution
24
+ export { localDirectorySandbox, localDirectoryWorkspaceStore, localDurableExecution, SqliteHarnessStorage, sqliteContextCheckpointStore, sqliteDurableRuntime, sqliteStateStore } from './local/index.js';
25
+ // Skills discovery
26
+ export { discoverSkills } from './skills/index.js';
27
+ // AI evaluation core
28
+ export { evaluateDeterministicScorer, evaluatePromptCandidates } from './eval/index.js';
29
+ // Builder, harness, session, and handler context types
30
+ export { defineHarness } from './harness/defineHarness.js';
@@ -0,0 +1,36 @@
1
+ import type { StateStore } from '../ports/state.js';
2
+ import type { DurableRuntime } from '../runtime/durable.js';
3
+ import type { DurableWorkspacePolicy, DurableWorkspaceStore } from '../ports/workspace.js';
4
+ import type { ContextCheckpointStore } from '../ports/context-checkpoints.js';
5
+ import { type LocalDurableSandbox, type LocalHostExecPolicy } from './local-sandbox.js';
6
+ export type { LocalHostExecPolicy, LocalDirectorySandboxOptions, LocalDurableSandbox, LocalFilesOnlySandboxCapabilities, LocalExecSandboxCapabilities } from './local-sandbox.js';
7
+ export type { LocalDirectoryWorkspaceStoreOptions } from './local-workspace.js';
8
+ export type { SqliteDurableRuntimeOptions, SqliteContextCheckpointStoreOptions, SqliteStateStoreOptions } from './sqlite-storage.js';
9
+ export { localDirectorySandbox } from './local-sandbox.js';
10
+ export { localDirectoryWorkspaceStore } from './local-workspace.js';
11
+ export { sqliteContextCheckpointStore, sqliteDurableRuntime, sqliteStateStore, SqliteHarnessStorage } from './sqlite-storage.js';
12
+ export interface LocalDurableExecutionOptions {
13
+ /** Host directory used for SQLite files, active workspaces, and snapshots. */
14
+ root: string;
15
+ /** SQLite database file. Default: `${root}/runtime.sqlite`. */
16
+ databaseFile?: string;
17
+ /** Stable worker id reserved for future policies. */
18
+ workerId?: string;
19
+ /** Host command execution policy. Default: `false`. */
20
+ exec?: false | LocalHostExecPolicy;
21
+ /** Workspace retention/quota/encryption metadata reported by the store. */
22
+ policy?: Partial<DurableWorkspacePolicy>;
23
+ /** Lease takeover window for crashed workers. Default: `120_000`. */
24
+ leaseTtlMs?: number;
25
+ }
26
+ export interface LocalDurableExecution {
27
+ state: StateStore;
28
+ runtime: DurableRuntime;
29
+ /** Files-only by default; advertises `sandbox.exec` only when `exec` is configured (spec 22 §2). */
30
+ sandbox: LocalDurableSandbox;
31
+ workspaceStore: DurableWorkspaceStore;
32
+ checkpoints: ContextCheckpointStore;
33
+ close(): Promise<void>;
34
+ }
35
+ /** Creates the recommended local durable adapter bundle for single-host usage. */
36
+ export declare function localDurableExecution(options: LocalDurableExecutionOptions): LocalDurableExecution;
@@ -0,0 +1,24 @@
1
+ import { resolve } from 'node:path';
2
+ import { createLocalWorkspaceCoordinator, localDirectoryWorkspaceStore } from './local-workspace.js';
3
+ import { localDirectorySandbox } from './local-sandbox.js';
4
+ import { SqliteHarnessStorage, sqliteContextCheckpointStore, sqliteDurableRuntime, sqliteStateStore } from './sqlite-storage.js';
5
+ export { localDirectorySandbox } from './local-sandbox.js';
6
+ export { localDirectoryWorkspaceStore } from './local-workspace.js';
7
+ export { sqliteContextCheckpointStore, sqliteDurableRuntime, sqliteStateStore, SqliteHarnessStorage } from './sqlite-storage.js';
8
+ /** Creates the recommended local durable adapter bundle for single-host usage. */
9
+ export function localDurableExecution(options) {
10
+ const root = resolve(options.root);
11
+ const coordinator = createLocalWorkspaceCoordinator();
12
+ const storage = new SqliteHarnessStorage({
13
+ file: options.databaseFile ?? resolve(root, 'runtime.sqlite'),
14
+ ...(options.leaseTtlMs !== undefined ? { leaseTtlMs: options.leaseTtlMs } : {})
15
+ });
16
+ return {
17
+ state: storage,
18
+ runtime: storage,
19
+ checkpoints: storage,
20
+ sandbox: localDirectorySandbox({ root, exec: options.exec ?? false, coordinator }),
21
+ workspaceStore: localDirectoryWorkspaceStore({ root, ...(options.policy ? { policy: options.policy } : {}), coordinator }),
22
+ close: () => storage.close()
23
+ };
24
+ }