@principles/core 1.161.0 → 1.162.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/dist/runtime-v2/__tests__/adversarial-loop.test.js +24 -78
  2. package/dist/runtime-v2/__tests__/adversarial-loop.test.js.map +1 -1
  3. package/dist/runtime-v2/__tests__/architecture-regression.test.js +8 -5
  4. package/dist/runtime-v2/__tests__/architecture-regression.test.js.map +1 -1
  5. package/dist/runtime-v2/__tests__/artificer-runner-vslice.test.js +32 -80
  6. package/dist/runtime-v2/__tests__/artificer-runner-vslice.test.js.map +1 -1
  7. package/dist/runtime-v2/__tests__/full-chain-real-llm.test.js +2 -2
  8. package/dist/runtime-v2/__tests__/full-chain-real-llm.test.js.map +1 -1
  9. package/dist/runtime-v2/__tests__/principle-compiler-core.test.js +7 -7
  10. package/dist/runtime-v2/__tests__/principle-compiler-core.test.js.map +1 -1
  11. package/dist/runtime-v2/activation/__tests__/production-gate-deps.test.js +24 -1
  12. package/dist/runtime-v2/activation/__tests__/production-gate-deps.test.js.map +1 -1
  13. package/dist/runtime-v2/activation/production-gate-deps.d.ts.map +1 -1
  14. package/dist/runtime-v2/activation/production-gate-deps.js +18 -1
  15. package/dist/runtime-v2/activation/production-gate-deps.js.map +1 -1
  16. package/dist/runtime-v2/adapter/__tests__/artificer-l2-adapter.test.js +277 -475
  17. package/dist/runtime-v2/adapter/__tests__/artificer-l2-adapter.test.js.map +1 -1
  18. package/dist/runtime-v2/adapter/artificer-l2-adapter.d.ts +14 -34
  19. package/dist/runtime-v2/adapter/artificer-l2-adapter.d.ts.map +1 -1
  20. package/dist/runtime-v2/adapter/artificer-l2-adapter.js +182 -222
  21. package/dist/runtime-v2/adapter/artificer-l2-adapter.js.map +1 -1
  22. package/dist/runtime-v2/adapter/pi-ai-runtime-adapter.js +2 -2
  23. package/dist/runtime-v2/adapter/pi-ai-runtime-adapter.js.map +1 -1
  24. package/dist/runtime-v2/adversarial-loop.d.ts.map +1 -1
  25. package/dist/runtime-v2/adversarial-loop.js +5 -27
  26. package/dist/runtime-v2/adversarial-loop.js.map +1 -1
  27. package/dist/runtime-v2/golden-trace-replay-validator.d.ts +8 -0
  28. package/dist/runtime-v2/golden-trace-replay-validator.d.ts.map +1 -1
  29. package/dist/runtime-v2/golden-trace-replay-validator.js +3 -3
  30. package/dist/runtime-v2/golden-trace-replay-validator.js.map +1 -1
  31. package/dist/runtime-v2/golden-trace.d.ts +16 -1
  32. package/dist/runtime-v2/golden-trace.d.ts.map +1 -1
  33. package/dist/runtime-v2/golden-trace.js +13 -4
  34. package/dist/runtime-v2/golden-trace.js.map +1 -1
  35. package/dist/runtime-v2/index.d.ts +8 -5
  36. package/dist/runtime-v2/index.d.ts.map +1 -1
  37. package/dist/runtime-v2/index.js +11 -4
  38. package/dist/runtime-v2/index.js.map +1 -1
  39. package/dist/runtime-v2/internalization/__tests__/artificer-rule-output.test.d.ts +2 -0
  40. package/dist/runtime-v2/internalization/__tests__/artificer-rule-output.test.d.ts.map +1 -0
  41. package/dist/runtime-v2/internalization/__tests__/{artificer-output-v2.test.js → artificer-rule-output.test.js} +126 -127
  42. package/dist/runtime-v2/internalization/__tests__/artificer-rule-output.test.js.map +1 -0
  43. package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.d.ts +2 -0
  44. package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.d.ts.map +1 -0
  45. package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.js +270 -0
  46. package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.js.map +1 -0
  47. package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.d.ts +2 -0
  48. package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.d.ts.map +1 -0
  49. package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.js +180 -0
  50. package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.js.map +1 -0
  51. package/dist/runtime-v2/internalization/artificer-output.d.ts +33 -51
  52. package/dist/runtime-v2/internalization/artificer-output.d.ts.map +1 -1
  53. package/dist/runtime-v2/internalization/artificer-output.js +48 -87
  54. package/dist/runtime-v2/internalization/artificer-output.js.map +1 -1
  55. package/dist/runtime-v2/internalization/artificer-runner.d.ts +8 -8
  56. package/dist/runtime-v2/internalization/artificer-runner.d.ts.map +1 -1
  57. package/dist/runtime-v2/internalization/artificer-runner.js +5 -5
  58. package/dist/runtime-v2/internalization/artificer-runner.js.map +1 -1
  59. package/dist/runtime-v2/internalization/evaluator-runner.js +2 -2
  60. package/dist/runtime-v2/internalization/index.d.ts +9 -4
  61. package/dist/runtime-v2/internalization/index.d.ts.map +1 -1
  62. package/dist/runtime-v2/internalization/index.js +8 -3
  63. package/dist/runtime-v2/internalization/index.js.map +1 -1
  64. package/dist/runtime-v2/internalization/rule-code-validator.d.ts +16 -0
  65. package/dist/runtime-v2/internalization/rule-code-validator.d.ts.map +1 -1
  66. package/dist/runtime-v2/internalization/rule-code-validator.js +50 -1
  67. package/dist/runtime-v2/internalization/rule-code-validator.js.map +1 -1
  68. package/dist/runtime-v2/internalization/rule-host-input-builder.d.ts +62 -0
  69. package/dist/runtime-v2/internalization/rule-host-input-builder.d.ts.map +1 -0
  70. package/dist/runtime-v2/internalization/rule-host-input-builder.js +182 -0
  71. package/dist/runtime-v2/internalization/rule-host-input-builder.js.map +1 -0
  72. package/dist/runtime-v2/internalization/rule-host-validator.d.ts.map +1 -1
  73. package/dist/runtime-v2/internalization/rule-host-validator.js +22 -1
  74. package/dist/runtime-v2/internalization/rule-host-validator.js.map +1 -1
  75. package/dist/runtime-v2/internalization/template-generator.d.ts +7 -2
  76. package/dist/runtime-v2/internalization/template-generator.d.ts.map +1 -1
  77. package/dist/runtime-v2/internalization/template-generator.js +10 -5
  78. package/dist/runtime-v2/internalization/template-generator.js.map +1 -1
  79. package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.d.ts +2 -0
  80. package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.d.ts.map +1 -0
  81. package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.js +322 -0
  82. package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.js.map +1 -0
  83. package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.d.ts +2 -0
  84. package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.d.ts.map +1 -0
  85. package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.js +149 -0
  86. package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.js.map +1 -0
  87. package/dist/runtime-v2/tools/artificer-l2-tool-contract.d.ts +72 -0
  88. package/dist/runtime-v2/tools/artificer-l2-tool-contract.d.ts.map +1 -0
  89. package/dist/runtime-v2/tools/artificer-l2-tool-contract.js +275 -0
  90. package/dist/runtime-v2/tools/artificer-l2-tool-contract.js.map +1 -0
  91. package/dist/runtime-v2/tools/artificer-output-typebox.d.ts +78 -0
  92. package/dist/runtime-v2/tools/artificer-output-typebox.d.ts.map +1 -0
  93. package/dist/runtime-v2/tools/artificer-output-typebox.js +70 -0
  94. package/dist/runtime-v2/tools/artificer-output-typebox.js.map +1 -0
  95. package/dist/telemetry-event.d.ts +2 -2
  96. package/dist/telemetry-event.d.ts.map +1 -1
  97. package/dist/telemetry-event.js +5 -3
  98. package/dist/telemetry-event.js.map +1 -1
  99. package/package.json +1 -1
  100. package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.d.ts +0 -2
  101. package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.d.ts.map +0 -1
  102. package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.js.map +0 -1
@@ -1,64 +1,73 @@
1
1
  /**
2
- * ArtificerL2Adapter tests (RuleHost MVP Activation, ADR-0014 Amendment 2026-06-17,
3
- * PRD Decision 8, test module 7).
2
+ * ArtificerL2Adapter tests (PRI-439 Phase 4 — tool-using L2 agent).
4
3
  *
5
- * TDD Phase 4.1 RED asserts behavior not yet implemented in
6
- * artificer-l2-adapter.ts.
4
+ * Mocks runAgentLoop (no real LLM calls) to verify the adapter's orchestration:
5
+ * - submit_rulecode capture terminates the loop and stores the output
6
+ * - maxTurns cap forces stop when submit_rulecode is never called
7
+ * - beforeToolCall whitelist blocks non-allowlisted tools
8
+ * - shouldStopAfterTurn checks output capture + turn count
9
+ * - no V1/L1 fallback: exhaustion throws PDRuntimeError
10
+ * - timeout: abort signal triggers timed_out failure
11
+ * - telemetry events (artificer_l2_turn / artificer_l2_complete) are emitted
7
12
  *
8
- * The adapter encapsulates a write-test-fix loop (generate code → sandbox replay →
9
- * inject RefinerSandboxFailedCase[] feedback regenerate, max 3 attempts) inside
10
- * a PDRuntimeAdapter. BasePeerRunner sees a single startRun(); the loop is invisible
11
- * to it. This follows the Dreamer L2 precedent (L2AgentLoopAdapter) of putting the
12
- * multi-attempt logic in the adapter, not in succeedTask().
13
- *
14
- * Testability: LLM calls are mocked via an injected `generateCode` function.
15
- * Sandbox replay uses real evaluateRefinerRuleHostGate with a controllable
16
- * RefinerRuleHostGateDeps. No real LLM calls.
17
- *
18
- * Coverage (PRD test module 7):
19
- * - happy path: 1st attempt passes replay → V2 output (1 LLM call)
20
- * - fix path: 1st attempt fails → feedback injected → 2nd passes → V2 (2 LLM calls)
21
- * - exhaustion: 3 attempts all fail → V1 degraded output (no code fields)
22
- * - error types: forbidden_pattern / runtime_error / timeout / validation_failed
23
- * - V1 backward compat: degraded V1 output is NOT detected as V2 by isArtificerOutputV2
24
- *
25
- * ERR checklist (EP-05 Loop State Freshness): each attempt reads fresh sandbox
26
- * errors; the feedback injected into attempt N+1 is from attempt N's failure,
27
- * never stale. (ERR-015/018/019)
13
+ * ERR checklist:
14
+ * - EP-05 Loop State Freshness: each startRun uses fresh outputCapture + turnCount
15
+ * - EP-03 Fail Loud: exhaustion throws PDRuntimeError with structured nextAction
16
+ * - EP-01 Trust Boundary: submit_rulecode validates via injected validator
28
17
  */
29
- import { describe, it, expect } from 'vitest';
18
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
19
+ const hoisted = vi.hoisted(() => {
20
+ return {
21
+ lastLoopConfig: {},
22
+ mockReturn: [],
23
+ impl: null,
24
+ };
25
+ });
26
+ /* eslint-disable @typescript-eslint/max-params -- runAgentLoop mock mirrors the real 5-param signature */
27
+ vi.mock('@earendil-works/pi-agent-core', () => ({
28
+ runAgentLoop: vi.fn(async (prompts, context, config, emit, signal) => {
29
+ hoisted.lastLoopConfig = config;
30
+ if (typeof hoisted.impl === 'function') {
31
+ const fn = hoisted.impl;
32
+ return fn(prompts, context, config, emit, signal);
33
+ }
34
+ return hoisted.mockReturn.slice();
35
+ }),
36
+ }));
37
+ /* eslint-enable @typescript-eslint/max-params */
38
+ // Mock resolveL2Model's pi-ai dependencies (getModel/getProviders) — the adapter
39
+ // uses the custom baseUrl path so these stubs are never called for real.
40
+ vi.mock('@earendil-works/pi-ai', () => ({
41
+ completeSimple: vi.fn(),
42
+ getModel: vi.fn(() => ({ id: 'test', name: 'test', api: 'openai-completions', provider: 'test-provider' })),
43
+ getProviders: vi.fn(() => []),
44
+ }));
45
+ vi.mock('../../store/event-emitter.js', () => ({
46
+ storeEmitter: { emitTelemetry: vi.fn() },
47
+ }));
48
+ import { storeEmitter } from '../../store/event-emitter.js';
30
49
  import { ArtificerL2Adapter } from '../artificer-l2-adapter.js';
31
- import { isArtificerOutputV2, DefaultArtificerValidator } from '../../internalization/artificer-output.js';
32
- import { validateGoldenTrace } from '../../golden-trace.js';
33
- import { Value } from '@sinclair/typebox/value';
34
- import { RunHandleSchema, RuntimeKindSchema } from '../../runtime-protocol.js';
50
+ import { DefaultArtificerValidator } from '../../internalization/artificer-output.js';
51
+ const emitTelemetryMock = storeEmitter.emitTelemetry;
35
52
  const TASK_ID = 'task-artificer-l2-001';
36
- /** A valid V2 output the LLM might produce. */
37
- function makeV2Output(overrides = {}) {
53
+ /** A valid ArtificerRuleOutput the model might submit via submit_rulecode. */
54
+ function makeRuleOutput(overrides = {}) {
38
55
  return {
39
56
  taskId: TASK_ID,
40
- sourceScribeArtifactId: 'pi-art-scribe-001-run-001',
41
- implementationPlan: {
42
- summary: 'Block writes to system dirs',
43
- targetSurface: 'edit gate',
44
- changes: ['path prefix check'],
45
- tests: ['golden trace replay'],
46
- rolloutNotes: ['shadow first'],
47
- confidence: 0.8,
48
- },
49
- sourceTrace: { scribeArtifactId: 'pi-art-scribe-001-run-001' },
50
- risks: [],
51
- generatedAt: '2026-06-17T00:00:00.000Z',
57
+ sourceScribeArtifactId: 'pi-art-scribe-001',
52
58
  implementationCode: 'function evaluate(input, helpers) { return { decision: "allow", matched: false, reason: "ok" }; }',
53
59
  goldenTraceCases: [
54
60
  { caseId: 'negative-1', kind: 'negative', toolName: 'edit', params: { path: '/etc/x' }, expectedDecision: 'block' },
55
61
  { caseId: 'positive-1', kind: 'positive', toolName: 'read', params: { path: '/tmp/y' }, expectedDecision: 'allow' },
56
62
  ],
57
63
  affectedTools: ['edit'],
64
+ implementationSummary: 'Block writes to system dirs',
65
+ risks: [],
66
+ sourceTrace: { scribeArtifactId: 'pi-art-scribe-001' },
67
+ generatedAt: '2026-06-17T00:00:00.000Z',
58
68
  ...overrides,
59
69
  };
60
70
  }
61
- /** Build a gateDeps whose sandbox always accepts (replay passes). */
62
71
  function makeAlwaysPassGateDeps() {
63
72
  const passingResult = {
64
73
  success: true,
@@ -70,469 +79,262 @@ function makeAlwaysPassGateDeps() {
70
79
  evaluateInSandbox: (_code, _trace, _opts) => passingResult,
71
80
  };
72
81
  }
73
- /**
74
- * Build a gateDeps whose sandbox fails N times then passes.
75
- * Each failure carries a distinct RefinerSandboxFailedCase so tests can assert
76
- * that the RIGHT feedback was injected into the next attempt (EP-05 freshness).
77
- */
78
- function makeFailNTimesGateDeps(failures) {
79
- const calls = [];
80
- let attempt = 0;
81
- const deps = {
82
- evaluateInSandbox: (code, _trace, _opts) => {
83
- calls.push({ code });
84
- const result = failures[attempt] ?? { success: true, failedCases: [], executionTimeMs: 1, forbiddenPatternViolations: [] };
85
- attempt += 1;
86
- return result;
87
- },
82
+ function makeStartRun(overrides = {}) {
83
+ return {
84
+ agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
85
+ taskRef: { taskId: TASK_ID },
86
+ inputPayload: 'initial prompt',
87
+ contextItems: [],
88
+ outputSchemaRef: 'artificer-output-v2',
89
+ timeoutMs: 300_000,
90
+ ...overrides,
88
91
  };
89
- return { deps, calls };
90
92
  }
91
- const FAILED_FORBIDDEN = {
92
- success: false,
93
- failedCases: [{ caseId: '__sandbox__', errorType: 'forbidden_pattern', message: 'require() detected' }],
94
- executionTimeMs: 1,
95
- forbiddenPatternViolations: ['require'],
96
- };
97
- const FAILED_RUNTIME = {
98
- success: false,
99
- failedCases: [{ caseId: 'negative-1', errorType: 'runtime_error', message: 'TypeError: x is undefined' }],
100
- executionTimeMs: 1,
101
- forbiddenPatternViolations: [],
102
- };
103
- const FAILED_TIMEOUT = {
104
- success: false,
105
- failedCases: [{ caseId: 'negative-1', errorType: 'timeout', message: 'exceeded 1000ms' }],
106
- executionTimeMs: 1001,
107
- forbiddenPatternViolations: [],
108
- };
109
- const FAILED_VALIDATION = {
110
- success: false,
111
- failedCases: [{ caseId: 'negative-1', errorType: 'validation_failed', message: 'expected block got allow' }],
112
- executionTimeMs: 1,
113
- forbiddenPatternViolations: [],
114
- };
115
- describe('ArtificerL2Adapter (RuleHost MVP Activation, PRI-424)', () => {
116
- it('retries when the model omits executable RuleCode from an otherwise valid V1 response', async () => {
117
- const prompts = [];
118
- const { implementationCode: _code, goldenTraceCases: _cases, affectedTools: _tools, ...v1 } = makeV2Output();
119
- const generateCode = async (prompt) => {
120
- prompts.push(prompt);
121
- return prompts.length === 1 ? v1 : makeV2Output();
122
- };
123
- const adapter = new ArtificerL2Adapter({
124
- generateCode,
125
- gateDeps: makeAlwaysPassGateDeps(),
126
- validator: new DefaultArtificerValidator(),
127
- });
128
- const handle = await adapter.startRun({
129
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
130
- taskRef: { taskId: TASK_ID },
131
- inputPayload: 'initial prompt',
132
- contextItems: [],
133
- outputSchemaRef: 'artificer-output-v2',
134
- timeoutMs: 300_000,
135
- });
136
- expect(prompts).toHaveLength(2);
137
- expect(prompts[1]).toContain('implementationCode');
138
- const output = await adapter.fetchOutput(handle.runId);
139
- expect(isArtificerOutputV2(output?.payload)).toBe(true);
140
- void _code;
141
- void _cases;
142
- void _tools;
93
+ function makeAdapter(overrides = {}) {
94
+ return new ArtificerL2Adapter({
95
+ provider: 'test-provider',
96
+ model: 'test-model',
97
+ apiKeyEnv: 'TEST_API_KEY',
98
+ baseUrl: 'http://localhost:1234/v1',
99
+ gateDeps: overrides.gateDeps ?? makeAlwaysPassGateDeps(),
100
+ validator: new DefaultArtificerValidator(),
101
+ maxTurns: overrides.maxTurns,
102
+ totalBudgetMs: overrides.totalBudgetMs ?? 60_000,
103
+ maxTokens: overrides.maxTokens,
143
104
  });
144
- // ── happy path ─────────────────────────────────────────────────────────────
145
- it('returns V2 output on 1st attempt when sandbox replay passes (1 LLM call)', async () => {
146
- const generateCalls = [];
147
- const generateCode = async (prompt) => {
148
- generateCalls.push(prompt);
149
- return makeV2Output();
105
+ }
106
+ beforeEach(() => {
107
+ vi.clearAllMocks();
108
+ hoisted.mockReturn = [];
109
+ hoisted.impl = null;
110
+ hoisted.lastLoopConfig = {};
111
+ process.env.TEST_API_KEY = 'test-key';
112
+ });
113
+ // ── submit_rulecode capture (primary extraction) ─────────────────────────────
114
+ describe('PRI-439 ArtificerL2Adapter — submit_rulecode capture', () => {
115
+ it('returns the captured output when submit_rulecode was called', async () => {
116
+ const adapter = makeAdapter();
117
+ hoisted.impl = async (_p, context) => {
118
+ const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
119
+ if (submit) {
120
+ await submit.execute('call-1', makeRuleOutput());
121
+ }
122
+ return [];
150
123
  };
151
- const adapter = new ArtificerL2Adapter({
152
- generateCode,
153
- gateDeps: makeAlwaysPassGateDeps(),
154
- validator: new DefaultArtificerValidator(),
155
- });
156
- const handle = await adapter.startRun({
157
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
158
- taskRef: { taskId: TASK_ID },
159
- inputPayload: 'initial prompt',
160
- contextItems: [],
161
- outputSchemaRef: 'artificer-output-v2',
162
- timeoutMs: 300_000,
163
- });
164
- expect(generateCalls).toHaveLength(1);
124
+ const handle = await adapter.startRun(makeStartRun());
165
125
  const output = await adapter.fetchOutput(handle.runId);
166
126
  expect(output).not.toBeNull();
167
- if (!output)
168
- return;
169
- expect(isArtificerOutputV2(output.payload)).toBe(true);
127
+ expect(output?.payload).toEqual(makeRuleOutput());
170
128
  });
171
- // ── fix path ───────────────────────────────────────────────────────────────
172
- it('injects sandbox failure feedback into 2nd attempt and returns V2 when it passes (2 LLM calls)', async () => {
173
- const generateCalls = [];
174
- const generateCode = async (prompt) => {
175
- generateCalls.push(prompt);
176
- return makeV2Output();
129
+ it('shouldStopAfterTurn returns true after output is captured', async () => {
130
+ const adapter = makeAdapter({ maxTurns: 8 });
131
+ hoisted.impl = async (_p, context) => {
132
+ const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
133
+ if (submit) {
134
+ await submit.execute('call-1', makeRuleOutput());
135
+ }
136
+ return [];
177
137
  };
178
- const { deps } = makeFailNTimesGateDeps([FAILED_RUNTIME]);
179
- const adapter = new ArtificerL2Adapter({
180
- generateCode,
181
- gateDeps: deps,
182
- validator: new DefaultArtificerValidator(),
183
- });
184
- const handle = await adapter.startRun({
185
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
186
- taskRef: { taskId: TASK_ID },
187
- inputPayload: 'initial prompt',
188
- contextItems: [],
189
- outputSchemaRef: 'artificer-output-v2',
190
- timeoutMs: 300_000,
191
- });
192
- expect(generateCalls).toHaveLength(2);
193
- // 2nd prompt MUST contain the failure feedback from attempt 1 (EP-05 freshness).
194
- expect(generateCalls[1]).toContain('TypeError: x is undefined');
195
- const output = await adapter.fetchOutput(handle.runId);
196
- expect(output).not.toBeNull();
197
- if (!output)
138
+ await adapter.startRun(makeStartRun());
139
+ const stopFn = hoisted.lastLoopConfig.shouldStopAfterTurn;
140
+ expect(typeof stopFn).toBe('function');
141
+ if (!stopFn)
198
142
  return;
199
- expect(isArtificerOutputV2(output.payload)).toBe(true);
143
+ // After submit_rulecode captured output, the next shouldStopAfterTurn call returns true.
144
+ expect(stopFn()).toBe(true);
200
145
  });
201
- // ── exhaustion → V1 degradation ────────────────────────────────────────────
202
- it('degrades to V1 output (no code fields) when all 3 attempts fail (3 LLM calls)', async () => {
203
- const generateCalls = [];
204
- const generateCode = async (prompt) => {
205
- generateCalls.push(prompt);
206
- return makeV2Output();
207
- };
208
- const { deps } = makeFailNTimesGateDeps([FAILED_RUNTIME, FAILED_RUNTIME, FAILED_RUNTIME]);
209
- const adapter = new ArtificerL2Adapter({
210
- generateCode,
211
- gateDeps: deps,
212
- validator: new DefaultArtificerValidator(),
213
- });
214
- const handle = await adapter.startRun({
215
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
216
- taskRef: { taskId: TASK_ID },
217
- inputPayload: 'initial prompt',
218
- contextItems: [],
219
- outputSchemaRef: 'artificer-output-v2',
220
- timeoutMs: 300_000,
221
- });
222
- expect(generateCalls).toHaveLength(3);
223
- const output = await adapter.fetchOutput(handle.runId);
224
- expect(output).not.toBeNull();
225
- if (!output)
146
+ });
147
+ // ── maxTurns cap ─────────────────────────────────────────────────────────────
148
+ describe('PRI-439 ArtificerL2Adapter maxTurns cap', () => {
149
+ it('shouldStopAfterTurn returns false below maxTurns and true at/above, WITHOUT submit_rulecode', async () => {
150
+ const adapter = makeAdapter({ maxTurns: 5 });
151
+ hoisted.mockReturn = [
152
+ { role: 'assistant', content: 'thinking...' },
153
+ ];
154
+ await adapter.startRun(makeStartRun()).catch(() => {
155
+ // startRun throws when no output is captured — that's expected here.
156
+ });
157
+ const stopFn = hoisted.lastLoopConfig.shouldStopAfterTurn;
158
+ if (!stopFn) {
159
+ expect.fail('shouldStopAfterTurn not wired');
226
160
  return;
227
- // Degraded output must NOT be detected as V2 — downstream Evaluator skips code review.
228
- expect(isArtificerOutputV2(output.payload)).toBe(false);
229
- // V1 fields preserved (plan, lineage) so principle artifact path still works.
230
- expect(output.payload).toHaveProperty('implementationPlan');
161
+ }
162
+ expect(stopFn()).toBe(false); // turn 1
163
+ expect(stopFn()).toBe(false); // turn 2
164
+ expect(stopFn()).toBe(false); // turn 3
165
+ expect(stopFn()).toBe(false); // turn 4
166
+ expect(stopFn()).toBe(true); // turn 5 (>= maxTurns)
167
+ expect(stopFn()).toBe(true); // turn 6 (still >= maxTurns)
231
168
  });
232
- it('degraded V1 output still passes the V1 validator (principle artifact path intact)', async () => {
233
- const generateCode = async () => makeV2Output();
234
- const { deps } = makeFailNTimesGateDeps([FAILED_RUNTIME, FAILED_RUNTIME, FAILED_RUNTIME]);
235
- const validator = new DefaultArtificerValidator();
236
- const adapter = new ArtificerL2Adapter({
237
- generateCode,
238
- gateDeps: deps,
239
- validator,
240
- });
241
- const handle = await adapter.startRun({
242
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
243
- taskRef: { taskId: TASK_ID },
244
- inputPayload: 'initial prompt',
245
- contextItems: [],
246
- outputSchemaRef: 'artificer-output-v2',
247
- timeoutMs: 300_000,
248
- });
249
- const output = await adapter.fetchOutput(handle.runId);
250
- expect(output).not.toBeNull();
251
- if (!output)
169
+ });
170
+ // ── beforeToolCall whitelist ─────────────────────────────────────────────────
171
+ describe('PRI-439 ArtificerL2Adapter beforeToolCall whitelist', () => {
172
+ it('blocks unknown tools', async () => {
173
+ const adapter = makeAdapter();
174
+ hoisted.mockReturn = [];
175
+ await adapter.startRun(makeStartRun()).catch(() => {
176
+ // startRun throws when no output is captured — expected.
177
+ });
178
+ const beforeFn = hoisted.lastLoopConfig.beforeToolCall;
179
+ expect(typeof beforeFn).toBe('function');
180
+ if (!beforeFn)
252
181
  return;
253
- const result = await validator.validate(output.payload, TASK_ID);
254
- expect(result.valid).toBe(true);
182
+ const result = await beforeFn({ toolCall: { name: 'unknown_tool' } });
183
+ expect(result).toEqual({ block: true, reason: expect.stringContaining('unknown_tool') });
255
184
  });
256
- // ── error type coverage ────────────────────────────────────────────────────
257
- it('handles forbidden_pattern failure and injects it as feedback', async () => {
258
- const generateCalls = [];
259
- const generateCode = async (prompt) => {
260
- generateCalls.push(prompt);
261
- return makeV2Output();
262
- };
263
- const { deps } = makeFailNTimesGateDeps([FAILED_FORBIDDEN]);
264
- const adapter = new ArtificerL2Adapter({
265
- generateCode,
266
- gateDeps: deps,
267
- validator: new DefaultArtificerValidator(),
268
- });
269
- await adapter.startRun({
270
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
271
- taskRef: { taskId: TASK_ID },
272
- inputPayload: 'initial prompt',
273
- contextItems: [],
274
- outputSchemaRef: 'artificer-output-v2',
275
- timeoutMs: 300_000,
276
- });
277
- expect(generateCalls).toHaveLength(2);
278
- expect(generateCalls[1]).toContain('require');
279
- });
280
- it('handles timeout failure and injects it as feedback', async () => {
281
- const generateCalls = [];
282
- const generateCode = async (prompt) => {
283
- generateCalls.push(prompt);
284
- return makeV2Output();
285
- };
286
- const { deps } = makeFailNTimesGateDeps([FAILED_TIMEOUT]);
287
- const adapter = new ArtificerL2Adapter({
288
- generateCode,
289
- gateDeps: deps,
290
- validator: new DefaultArtificerValidator(),
291
- });
292
- await adapter.startRun({
293
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
294
- taskRef: { taskId: TASK_ID },
295
- inputPayload: 'initial prompt',
296
- contextItems: [],
297
- outputSchemaRef: 'artificer-output-v2',
298
- timeoutMs: 300_000,
299
- });
300
- expect(generateCalls[1]).toContain('timeout');
185
+ it('allows whitelisted tools', async () => {
186
+ const adapter = makeAdapter();
187
+ hoisted.mockReturn = [];
188
+ await adapter.startRun(makeStartRun()).catch(() => {
189
+ // startRun throws when no output is captured — expected.
190
+ });
191
+ const beforeFn = hoisted.lastLoopConfig.beforeToolCall;
192
+ if (!beforeFn) {
193
+ expect.fail('beforeToolCall not wired');
194
+ return;
195
+ }
196
+ for (const name of ['read_rulecode_spec', 'validate_rulecode', 'replay_rulecode', 'submit_rulecode']) {
197
+ const result = await beforeFn({ toolCall: { name } });
198
+ expect(result).toBeUndefined();
199
+ }
301
200
  });
302
- it('handles validation_failed failure and injects it as feedback', async () => {
303
- const generateCalls = [];
304
- const generateCode = async (prompt) => {
305
- generateCalls.push(prompt);
306
- return makeV2Output();
307
- };
308
- const { deps } = makeFailNTimesGateDeps([FAILED_VALIDATION]);
309
- const adapter = new ArtificerL2Adapter({
310
- generateCode,
311
- gateDeps: deps,
312
- validator: new DefaultArtificerValidator(),
313
- });
314
- await adapter.startRun({
315
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
316
- taskRef: { taskId: TASK_ID },
317
- inputPayload: 'initial prompt',
318
- contextItems: [],
319
- outputSchemaRef: 'artificer-output-v2',
320
- timeoutMs: 300_000,
321
- });
322
- expect(generateCalls[1]).toContain('expected block got allow');
201
+ });
202
+ // ── exhaustion: no V1/L1 fallback ────────────────────────────────────────────
203
+ describe('PRI-439 ArtificerL2Adapter exhaustion (no fallback)', () => {
204
+ it('throws PDRuntimeError when the loop ends without submit_rulecode', async () => {
205
+ const adapter = makeAdapter({ maxTurns: 3 });
206
+ hoisted.mockReturn = [
207
+ { role: 'assistant', content: 'I cannot produce valid code.' },
208
+ ];
209
+ await expect(adapter.startRun(makeStartRun())).rejects.toThrow(/without a submit_rulecode call/);
210
+ // No output stored for the failed run — fetchOutput returns null.
211
+ const runs = adapter.runs;
212
+ expect(runs.size).toBe(1);
213
+ for (const [, state] of runs) {
214
+ expect(state.output).toBeNull();
215
+ }
323
216
  });
324
- // ── EP-05 freshness: each attempt uses the immediately-prior failure ───────
325
- it('injects attempt-N failure (not stale) into attempt N+1 prompt', async () => {
326
- const generateCalls = [];
327
- const generateCode = async (prompt) => {
328
- generateCalls.push(prompt);
329
- return makeV2Output();
330
- };
331
- // attempt 1 fails with runtime_error, attempt 2 fails with timeout, attempt 3 passes
332
- const { deps } = makeFailNTimesGateDeps([FAILED_RUNTIME, FAILED_TIMEOUT]);
333
- const adapter = new ArtificerL2Adapter({
334
- generateCode,
335
- gateDeps: deps,
336
- validator: new DefaultArtificerValidator(),
337
- });
338
- await adapter.startRun({
339
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
340
- taskRef: { taskId: TASK_ID },
341
- inputPayload: 'initial prompt',
342
- contextItems: [],
343
- outputSchemaRef: 'artificer-output-v2',
344
- timeoutMs: 300_000,
345
- });
346
- // attempt 2 prompt must mention attempt 1's runtime_error, NOT attempt 2's timeout
347
- expect(generateCalls[1]).toContain('TypeError: x is undefined');
348
- expect(generateCalls[1]).not.toContain('exceeded 1000ms');
349
- // attempt 3 prompt must mention attempt 2's timeout, NOT attempt 1's runtime_error
350
- expect(generateCalls[2]).toContain('exceeded 1000ms');
217
+ it('emits artificer_l2_complete telemetry with succeeded=false on exhaustion', async () => {
218
+ const adapter = makeAdapter({ maxTurns: 2 });
219
+ hoisted.mockReturn = [{ role: 'assistant', content: 'no code' }];
220
+ await expect(adapter.startRun(makeStartRun())).rejects.toThrow();
221
+ const completeCalls = emitTelemetryMock.mock.calls.filter((c) => c[0].eventType === 'artificer_l2_complete');
222
+ expect(completeCalls.length).toBe(1);
223
+ const payload = completeCalls[0][0].payload;
224
+ expect(payload.succeeded).toBe(false);
351
225
  });
352
- // ── golden trace used for replay must be valid ──────────────────────────────
353
- it('builds a valid golden trace from the V2 output for sandbox replay', async () => {
354
- const generateCode = async () => makeV2Output();
355
- let capturedTrace = null;
356
- const deps = {
357
- evaluateInSandbox: (_code, trace, _opts) => {
358
- capturedTrace = trace;
359
- return { success: true, failedCases: [], executionTimeMs: 1, forbiddenPatternViolations: [] };
360
- },
226
+ });
227
+ // ── loop error ───────────────────────────────────────────────────────────────
228
+ describe('PRI-439 ArtificerL2Adapter loop error', () => {
229
+ it('throws PDRuntimeError when runAgentLoop throws', async () => {
230
+ const adapter = makeAdapter();
231
+ hoisted.impl = async () => {
232
+ throw new Error('LLM provider unavailable');
361
233
  };
362
- const adapter = new ArtificerL2Adapter({
363
- generateCode,
364
- gateDeps: deps,
365
- validator: new DefaultArtificerValidator(),
366
- });
367
- await adapter.startRun({
368
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
369
- taskRef: { taskId: TASK_ID },
370
- inputPayload: 'initial prompt',
371
- contextItems: [],
372
- outputSchemaRef: 'artificer-output-v2',
373
- timeoutMs: 300_000,
374
- });
375
- expect(capturedTrace).not.toBeNull();
376
- expect(validateGoldenTrace(capturedTrace).valid).toBe(true);
234
+ await expect(adapter.startRun(makeStartRun())).rejects.toThrow(/agent loop threw/);
377
235
  });
378
- // ── invalid LLM output (fails validator) is retried, not silently accepted ─
379
- it('retries when LLM output fails the ArtificerValidator (malformed V2)', async () => {
380
- let attempt = 0;
381
- const generateCode = async () => {
382
- attempt += 1;
383
- if (attempt === 1) {
384
- // Malformed: missing affectedTools
385
- const bad = makeV2Output();
386
- delete bad.affectedTools;
387
- return bad;
236
+ });
237
+ // ── runtime metadata ─────────────────────────────────────────────────────────
238
+ describe('PRI-439 ArtificerL2Adapter runtime metadata', () => {
239
+ it('pollRun returns succeeded status after startRun completes with output', async () => {
240
+ const adapter = makeAdapter();
241
+ hoisted.impl = async (_p, context) => {
242
+ const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
243
+ if (submit) {
244
+ await submit.execute('call-1', makeRuleOutput());
388
245
  }
389
- return makeV2Output();
246
+ return [];
390
247
  };
391
- const adapter = new ArtificerL2Adapter({
392
- generateCode,
393
- gateDeps: makeAlwaysPassGateDeps(),
394
- validator: new DefaultArtificerValidator(),
395
- });
396
- const handle = await adapter.startRun({
397
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
398
- taskRef: { taskId: TASK_ID },
399
- inputPayload: 'initial prompt',
400
- contextItems: [],
401
- outputSchemaRef: 'artificer-output-v2',
402
- timeoutMs: 300_000,
403
- });
404
- const output = await adapter.fetchOutput(handle.runId);
405
- expect(output).not.toBeNull();
406
- if (!output)
407
- return;
408
- // 2nd attempt produces valid V2 → replay passes → V2 output
409
- expect(isArtificerOutputV2(output.payload)).toBe(true);
248
+ const handle = await adapter.startRun(makeStartRun());
249
+ const status = await adapter.pollRun(handle.runId);
250
+ expect(status.status).toBe('succeeded');
410
251
  });
411
- // ── P1+P2 fixes: validator-rejected candidates never degrade, total failure throws ─
412
- it('throws (does NOT degrade) when all 3 attempts fail validation — no validated V2 to degrade from', async () => {
413
- // P2 fix: validator rejection must NOT set lastValidV2. Without a validated
414
- // candidate, degradation is impossible (Runtime Contract Rule 1/3 — never
415
- // emit an unvalidated object). The adapter throws PDRuntimeError instead,
416
- // which BasePeerRunner.handlePostLeaseError catches → task fails.
417
- const generateCode = async () => {
418
- // Every attempt returns malformed V2 (missing affectedTools).
419
- const bad = makeV2Output();
420
- delete bad.affectedTools;
421
- return bad;
422
- };
423
- const adapter = new ArtificerL2Adapter({
424
- generateCode,
425
- gateDeps: makeAlwaysPassGateDeps(),
426
- validator: new DefaultArtificerValidator(),
427
- });
428
- await expect(adapter.startRun({
429
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
430
- taskRef: { taskId: TASK_ID },
431
- inputPayload: 'initial prompt',
432
- contextItems: [],
433
- outputSchemaRef: 'artificer-output-v2',
434
- timeoutMs: 300_000,
435
- })).rejects.toThrow(/without a validated candidate/);
252
+ it('kind() returns pi-ai-l2', () => {
253
+ const adapter = makeAdapter();
254
+ expect(adapter.kind()).toBe('pi-ai-l2');
436
255
  });
437
- it('degrades to V1 only when a VALIDATED V2 candidate existed (replay failed, not validation)', async () => {
438
- // Confirms the positive side of the P2 fix: a validated V2 that fails replay
439
- // CAN degrade. This is the legitimate degradation path (plan is valid, only
440
- // the code was wrong).
441
- const generateCode = async () => makeV2Output();
442
- const { deps } = makeFailNTimesGateDeps([FAILED_RUNTIME, FAILED_RUNTIME, FAILED_RUNTIME]);
443
- const adapter = new ArtificerL2Adapter({
444
- generateCode,
445
- gateDeps: deps,
446
- validator: new DefaultArtificerValidator(),
447
- });
448
- const handle = await adapter.startRun({
449
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
450
- taskRef: { taskId: TASK_ID },
451
- inputPayload: 'initial prompt',
452
- contextItems: [],
453
- outputSchemaRef: 'artificer-output-v2',
454
- timeoutMs: 300_000,
455
- });
456
- const output = await adapter.fetchOutput(handle.runId);
457
- expect(output).not.toBeNull();
458
- if (!output)
459
- return;
460
- expect(isArtificerOutputV2(output.payload)).toBe(false);
256
+ it('getCapabilities reports supportsToolUse=true', async () => {
257
+ const adapter = makeAdapter();
258
+ const caps = await adapter.getCapabilities();
259
+ expect(caps.supportsToolUse).toBe(true);
461
260
  });
462
- // ── runtime metadata ─────────────────────────────────────────────────────────
463
- it('pollRun returns terminal status after startRun completes', async () => {
464
- const generateCode = async () => makeV2Output();
465
- const adapter = new ArtificerL2Adapter({
466
- generateCode,
467
- gateDeps: makeAlwaysPassGateDeps(),
468
- validator: new DefaultArtificerValidator(),
469
- });
470
- const handle = await adapter.startRun({
471
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
472
- taskRef: { taskId: TASK_ID },
473
- inputPayload: 'initial prompt',
474
- contextItems: [],
475
- outputSchemaRef: 'artificer-output-v2',
476
- timeoutMs: 300_000,
477
- });
478
- const status = await adapter.pollRun(handle.runId);
479
- // RunStatus is an object { runId, status, ... }; status.status is the execution state.
480
- expect(['succeeded', 'failed']).toContain(status.status);
261
+ it('healthCheck returns unhealthy when API key is missing', async () => {
262
+ delete process.env.TEST_API_KEY;
263
+ const adapter = makeAdapter();
264
+ const health = await adapter.healthCheck();
265
+ expect(health.healthy).toBe(false);
481
266
  });
482
- it('kind() returns a stable runtime kind identifier', () => {
483
- const adapter = new ArtificerL2Adapter({
484
- generateCode: async () => makeV2Output(),
485
- gateDeps: makeAlwaysPassGateDeps(),
486
- validator: new DefaultArtificerValidator(),
487
- });
488
- expect(Value.Check(RuntimeKindSchema, adapter.kind())).toBe(true);
489
- expect(adapter.kind()).toBe('pi-ai-l2');
267
+ it('healthCheck returns healthy when API key is present', async () => {
268
+ const adapter = makeAdapter();
269
+ const health = await adapter.healthCheck();
270
+ expect(health.healthy).toBe(true);
490
271
  });
491
- it('returns a RunHandle that satisfies the runtime protocol schema', async () => {
492
- const adapter = new ArtificerL2Adapter({
493
- generateCode: async () => makeV2Output(),
494
- gateDeps: makeAlwaysPassGateDeps(),
495
- validator: new DefaultArtificerValidator(),
496
- });
497
- const handle = await adapter.startRun({
498
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
499
- taskRef: { taskId: TASK_ID },
500
- inputPayload: '{}',
501
- contextItems: [],
502
- outputSchemaRef: 'artificer-output-v2',
503
- timeoutMs: 30_000,
272
+ it('startRun throws when API key is missing', async () => {
273
+ delete process.env.TEST_API_KEY;
274
+ const adapter = makeAdapter();
275
+ await expect(adapter.startRun(makeStartRun())).rejects.toThrow(/API key not found/);
276
+ });
277
+ });
278
+ // ── config defaults ──────────────────────────────────────────────────────────
279
+ describe('PRI-439 ArtificerL2Adapter config defaults', () => {
280
+ it('wires maxTokens=8192 default into loopConfig', async () => {
281
+ const adapter = makeAdapter();
282
+ hoisted.mockReturn = [];
283
+ await adapter.startRun(makeStartRun()).catch(() => {
284
+ // expected — no output captured
285
+ });
286
+ expect(hoisted.lastLoopConfig.maxTokens).toBe(8192);
287
+ });
288
+ it('wires custom maxTokens when provided', async () => {
289
+ const adapter = makeAdapter({ maxTokens: 4096 });
290
+ hoisted.mockReturn = [];
291
+ await adapter.startRun(makeStartRun()).catch(() => {
292
+ // expected
504
293
  });
505
- expect(Value.Check(RunHandleSchema, handle)).toBe(true);
294
+ expect(hoisted.lastLoopConfig.maxTokens).toBe(4096);
506
295
  });
507
- it.each([0, -1, 1.5, Number.POSITIVE_INFINITY])('rejects invalid maxAttempts=%s', (maxAttempts) => {
508
- expect(() => new ArtificerL2Adapter({
509
- generateCode: async () => makeV2Output(),
510
- gateDeps: makeAlwaysPassGateDeps(),
511
- validator: new DefaultArtificerValidator(),
512
- maxAttempts,
513
- })).toThrow(/maxAttempts/);
296
+ });
297
+ // ── telemetry ────────────────────────────────────────────────────────────────
298
+ describe('PRI-439 ArtificerL2Adapter — telemetry', () => {
299
+ it('emits artificer_l2_turn with phase=loop_started at start', async () => {
300
+ const adapter = makeAdapter();
301
+ hoisted.mockReturn = [];
302
+ await adapter.startRun(makeStartRun()).catch(() => {
303
+ // expected
304
+ });
305
+ const startCalls = emitTelemetryMock.mock.calls.filter((c) => {
306
+ const evt = c[0];
307
+ return evt.eventType === 'artificer_l2_turn' && evt.payload?.phase === 'loop_started';
308
+ });
309
+ expect(startCalls.length).toBe(1);
514
310
  });
311
+ it('emits artificer_l2_complete with succeeded=true on success', async () => {
312
+ const adapter = makeAdapter();
313
+ hoisted.impl = async (_p, context) => {
314
+ const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
315
+ if (submit) {
316
+ await submit.execute('call-1', makeRuleOutput());
317
+ }
318
+ return [];
319
+ };
320
+ await adapter.startRun(makeStartRun());
321
+ const completeCalls = emitTelemetryMock.mock.calls.filter((c) => c[0].eventType === 'artificer_l2_complete');
322
+ expect(completeCalls.length).toBe(1);
323
+ const payload = completeCalls[0][0].payload;
324
+ expect(payload.succeeded).toBe(true);
325
+ });
326
+ });
327
+ // ── input serialization ──────────────────────────────────────────────────────
328
+ describe('PRI-439 ArtificerL2Adapter — input serialization', () => {
515
329
  it('bounds and safely serializes an unknown prompt payload', async () => {
516
330
  const circular = { text: 'x'.repeat(60_000) };
517
331
  circular.self = circular;
518
- let receivedPrompt = '';
519
- const adapter = new ArtificerL2Adapter({
520
- generateCode: async (prompt) => {
521
- receivedPrompt = prompt;
522
- return makeV2Output();
523
- },
524
- gateDeps: makeAlwaysPassGateDeps(),
525
- validator: new DefaultArtificerValidator(),
526
- });
527
- await adapter.startRun({
528
- agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
529
- taskRef: { taskId: TASK_ID },
530
- inputPayload: circular,
531
- contextItems: [],
532
- outputSchemaRef: 'artificer-output-v2',
533
- timeoutMs: 30_000,
534
- });
535
- expect(receivedPrompt.length).toBeLessThanOrEqual(50_003);
332
+ const adapter = makeAdapter();
333
+ hoisted.mockReturn = [];
334
+ // The circular payload is safely stringified (safeStringifyPreview handles cycles).
335
+ // startRun still throws because no output is captured, but it should NOT throw
336
+ // a serialization error.
337
+ await expect(adapter.startRun(makeStartRun({ inputPayload: circular }))).rejects.toThrow(/without a submit_rulecode call/);
536
338
  });
537
339
  });
538
340
  //# sourceMappingURL=artificer-l2-adapter.test.js.map