@principles/core 1.160.0 → 1.162.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/runtime-v2/__tests__/adversarial-loop.test.js +24 -78
- package/dist/runtime-v2/__tests__/adversarial-loop.test.js.map +1 -1
- package/dist/runtime-v2/__tests__/architecture-regression.test.js +8 -5
- package/dist/runtime-v2/__tests__/architecture-regression.test.js.map +1 -1
- package/dist/runtime-v2/__tests__/artificer-runner-vslice.test.js +32 -80
- package/dist/runtime-v2/__tests__/artificer-runner-vslice.test.js.map +1 -1
- package/dist/runtime-v2/__tests__/full-chain-real-llm.test.js +2 -2
- package/dist/runtime-v2/__tests__/full-chain-real-llm.test.js.map +1 -1
- package/dist/runtime-v2/__tests__/principle-compiler-core.test.js +7 -7
- package/dist/runtime-v2/__tests__/principle-compiler-core.test.js.map +1 -1
- package/dist/runtime-v2/activation/__tests__/production-gate-deps.test.js +24 -1
- package/dist/runtime-v2/activation/__tests__/production-gate-deps.test.js.map +1 -1
- package/dist/runtime-v2/activation/production-gate-deps.d.ts.map +1 -1
- package/dist/runtime-v2/activation/production-gate-deps.js +18 -1
- package/dist/runtime-v2/activation/production-gate-deps.js.map +1 -1
- package/dist/runtime-v2/adapter/__tests__/artificer-l2-adapter.test.js +272 -442
- package/dist/runtime-v2/adapter/__tests__/artificer-l2-adapter.test.js.map +1 -1
- package/dist/runtime-v2/adapter/artificer-l2-adapter.d.ts +14 -34
- package/dist/runtime-v2/adapter/artificer-l2-adapter.d.ts.map +1 -1
- package/dist/runtime-v2/adapter/artificer-l2-adapter.js +182 -220
- package/dist/runtime-v2/adapter/artificer-l2-adapter.js.map +1 -1
- package/dist/runtime-v2/adapter/pi-ai-runtime-adapter.js +2 -2
- package/dist/runtime-v2/adapter/pi-ai-runtime-adapter.js.map +1 -1
- package/dist/runtime-v2/adversarial-loop.d.ts.map +1 -1
- package/dist/runtime-v2/adversarial-loop.js +5 -27
- package/dist/runtime-v2/adversarial-loop.js.map +1 -1
- package/dist/runtime-v2/golden-trace-replay-validator.d.ts +8 -0
- package/dist/runtime-v2/golden-trace-replay-validator.d.ts.map +1 -1
- package/dist/runtime-v2/golden-trace-replay-validator.js +3 -3
- package/dist/runtime-v2/golden-trace-replay-validator.js.map +1 -1
- package/dist/runtime-v2/golden-trace.d.ts +16 -1
- package/dist/runtime-v2/golden-trace.d.ts.map +1 -1
- package/dist/runtime-v2/golden-trace.js +13 -4
- package/dist/runtime-v2/golden-trace.js.map +1 -1
- package/dist/runtime-v2/index.d.ts +8 -5
- package/dist/runtime-v2/index.d.ts.map +1 -1
- package/dist/runtime-v2/index.js +11 -4
- package/dist/runtime-v2/index.js.map +1 -1
- package/dist/runtime-v2/internalization/__tests__/artificer-rule-output.test.d.ts +2 -0
- package/dist/runtime-v2/internalization/__tests__/artificer-rule-output.test.d.ts.map +1 -0
- package/dist/runtime-v2/internalization/__tests__/{artificer-output-v2.test.js → artificer-rule-output.test.js} +126 -127
- package/dist/runtime-v2/internalization/__tests__/artificer-rule-output.test.js.map +1 -0
- package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.d.ts +2 -0
- package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.d.ts.map +1 -0
- package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.js +270 -0
- package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.js.map +1 -0
- package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.d.ts +2 -0
- package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.d.ts.map +1 -0
- package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.js +180 -0
- package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.js.map +1 -0
- package/dist/runtime-v2/internalization/artificer-output.d.ts +33 -51
- package/dist/runtime-v2/internalization/artificer-output.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/artificer-output.js +48 -87
- package/dist/runtime-v2/internalization/artificer-output.js.map +1 -1
- package/dist/runtime-v2/internalization/artificer-runner.d.ts +8 -8
- package/dist/runtime-v2/internalization/artificer-runner.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/artificer-runner.js +5 -5
- package/dist/runtime-v2/internalization/artificer-runner.js.map +1 -1
- package/dist/runtime-v2/internalization/evaluator-runner.js +2 -2
- package/dist/runtime-v2/internalization/index.d.ts +9 -4
- package/dist/runtime-v2/internalization/index.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/index.js +8 -3
- package/dist/runtime-v2/internalization/index.js.map +1 -1
- package/dist/runtime-v2/internalization/rule-code-validator.d.ts +16 -0
- package/dist/runtime-v2/internalization/rule-code-validator.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/rule-code-validator.js +50 -1
- package/dist/runtime-v2/internalization/rule-code-validator.js.map +1 -1
- package/dist/runtime-v2/internalization/rule-host-evaluator.d.ts +1 -0
- package/dist/runtime-v2/internalization/rule-host-evaluator.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/rule-host-evaluator.js +6 -2
- package/dist/runtime-v2/internalization/rule-host-evaluator.js.map +1 -1
- package/dist/runtime-v2/internalization/rule-host-input-builder.d.ts +62 -0
- package/dist/runtime-v2/internalization/rule-host-input-builder.d.ts.map +1 -0
- package/dist/runtime-v2/internalization/rule-host-input-builder.js +182 -0
- package/dist/runtime-v2/internalization/rule-host-input-builder.js.map +1 -0
- package/dist/runtime-v2/internalization/rule-host-validator.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/rule-host-validator.js +22 -1
- package/dist/runtime-v2/internalization/rule-host-validator.js.map +1 -1
- package/dist/runtime-v2/internalization/template-generator.d.ts +7 -2
- package/dist/runtime-v2/internalization/template-generator.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/template-generator.js +10 -5
- package/dist/runtime-v2/internalization/template-generator.js.map +1 -1
- package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.d.ts +2 -0
- package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.d.ts.map +1 -0
- package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.js +322 -0
- package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.js.map +1 -0
- package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.d.ts +2 -0
- package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.d.ts.map +1 -0
- package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.js +149 -0
- package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.js.map +1 -0
- package/dist/runtime-v2/tools/artificer-l2-tool-contract.d.ts +72 -0
- package/dist/runtime-v2/tools/artificer-l2-tool-contract.d.ts.map +1 -0
- package/dist/runtime-v2/tools/artificer-l2-tool-contract.js +275 -0
- package/dist/runtime-v2/tools/artificer-l2-tool-contract.js.map +1 -0
- package/dist/runtime-v2/tools/artificer-output-typebox.d.ts +78 -0
- package/dist/runtime-v2/tools/artificer-output-typebox.d.ts.map +1 -0
- package/dist/runtime-v2/tools/artificer-output-typebox.js +70 -0
- package/dist/runtime-v2/tools/artificer-output-typebox.js.map +1 -0
- package/dist/telemetry-event.d.ts +2 -2
- package/dist/telemetry-event.d.ts.map +1 -1
- package/dist/telemetry-event.js +5 -3
- package/dist/telemetry-event.js.map +1 -1
- package/package.json +1 -1
- package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.d.ts +0 -2
- package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.d.ts.map +0 -1
- package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.js.map +0 -1
|
@@ -1,64 +1,73 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* ArtificerL2Adapter tests (
|
|
3
|
-
* PRD Decision 8, test module 7).
|
|
2
|
+
* ArtificerL2Adapter tests (PRI-439 Phase 4 — tool-using L2 agent).
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Mocks runAgentLoop (no real LLM calls) to verify the adapter's orchestration:
|
|
5
|
+
* - submit_rulecode capture terminates the loop and stores the output
|
|
6
|
+
* - maxTurns cap forces stop when submit_rulecode is never called
|
|
7
|
+
* - beforeToolCall whitelist blocks non-allowlisted tools
|
|
8
|
+
* - shouldStopAfterTurn checks output capture + turn count
|
|
9
|
+
* - no V1/L1 fallback: exhaustion throws PDRuntimeError
|
|
10
|
+
* - timeout: abort signal triggers timed_out failure
|
|
11
|
+
* - telemetry events (artificer_l2_turn / artificer_l2_complete) are emitted
|
|
7
12
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
* multi-attempt logic in the adapter, not in succeedTask().
|
|
13
|
-
*
|
|
14
|
-
* Testability: LLM calls are mocked via an injected `generateCode` function.
|
|
15
|
-
* Sandbox replay uses real evaluateRefinerRuleHostGate with a controllable
|
|
16
|
-
* RefinerRuleHostGateDeps. No real LLM calls.
|
|
17
|
-
*
|
|
18
|
-
* Coverage (PRD test module 7):
|
|
19
|
-
* - happy path: 1st attempt passes replay → V2 output (1 LLM call)
|
|
20
|
-
* - fix path: 1st attempt fails → feedback injected → 2nd passes → V2 (2 LLM calls)
|
|
21
|
-
* - exhaustion: 3 attempts all fail → V1 degraded output (no code fields)
|
|
22
|
-
* - error types: forbidden_pattern / runtime_error / timeout / validation_failed
|
|
23
|
-
* - V1 backward compat: degraded V1 output is NOT detected as V2 by isArtificerOutputV2
|
|
24
|
-
*
|
|
25
|
-
* ERR checklist (EP-05 Loop State Freshness): each attempt reads fresh sandbox
|
|
26
|
-
* errors; the feedback injected into attempt N+1 is from attempt N's failure,
|
|
27
|
-
* never stale. (ERR-015/018/019)
|
|
13
|
+
* ERR checklist:
|
|
14
|
+
* - EP-05 Loop State Freshness: each startRun uses fresh outputCapture + turnCount
|
|
15
|
+
* - EP-03 Fail Loud: exhaustion throws PDRuntimeError with structured nextAction
|
|
16
|
+
* - EP-01 Trust Boundary: submit_rulecode validates via injected validator
|
|
28
17
|
*/
|
|
29
|
-
import { describe, it, expect } from 'vitest';
|
|
18
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
19
|
+
const hoisted = vi.hoisted(() => {
|
|
20
|
+
return {
|
|
21
|
+
lastLoopConfig: {},
|
|
22
|
+
mockReturn: [],
|
|
23
|
+
impl: null,
|
|
24
|
+
};
|
|
25
|
+
});
|
|
26
|
+
/* eslint-disable @typescript-eslint/max-params -- runAgentLoop mock mirrors the real 5-param signature */
|
|
27
|
+
vi.mock('@earendil-works/pi-agent-core', () => ({
|
|
28
|
+
runAgentLoop: vi.fn(async (prompts, context, config, emit, signal) => {
|
|
29
|
+
hoisted.lastLoopConfig = config;
|
|
30
|
+
if (typeof hoisted.impl === 'function') {
|
|
31
|
+
const fn = hoisted.impl;
|
|
32
|
+
return fn(prompts, context, config, emit, signal);
|
|
33
|
+
}
|
|
34
|
+
return hoisted.mockReturn.slice();
|
|
35
|
+
}),
|
|
36
|
+
}));
|
|
37
|
+
/* eslint-enable @typescript-eslint/max-params */
|
|
38
|
+
// Mock resolveL2Model's pi-ai dependencies (getModel/getProviders) — the adapter
|
|
39
|
+
// uses the custom baseUrl path so these stubs are never called for real.
|
|
40
|
+
vi.mock('@earendil-works/pi-ai', () => ({
|
|
41
|
+
completeSimple: vi.fn(),
|
|
42
|
+
getModel: vi.fn(() => ({ id: 'test', name: 'test', api: 'openai-completions', provider: 'test-provider' })),
|
|
43
|
+
getProviders: vi.fn(() => []),
|
|
44
|
+
}));
|
|
45
|
+
vi.mock('../../store/event-emitter.js', () => ({
|
|
46
|
+
storeEmitter: { emitTelemetry: vi.fn() },
|
|
47
|
+
}));
|
|
48
|
+
import { storeEmitter } from '../../store/event-emitter.js';
|
|
30
49
|
import { ArtificerL2Adapter } from '../artificer-l2-adapter.js';
|
|
31
|
-
import {
|
|
32
|
-
|
|
33
|
-
import { Value } from '@sinclair/typebox/value';
|
|
34
|
-
import { RunHandleSchema, RuntimeKindSchema } from '../../runtime-protocol.js';
|
|
50
|
+
import { DefaultArtificerValidator } from '../../internalization/artificer-output.js';
|
|
51
|
+
const emitTelemetryMock = storeEmitter.emitTelemetry;
|
|
35
52
|
const TASK_ID = 'task-artificer-l2-001';
|
|
36
|
-
/** A valid
|
|
37
|
-
function
|
|
53
|
+
/** A valid ArtificerRuleOutput the model might submit via submit_rulecode. */
|
|
54
|
+
function makeRuleOutput(overrides = {}) {
|
|
38
55
|
return {
|
|
39
56
|
taskId: TASK_ID,
|
|
40
|
-
sourceScribeArtifactId: 'pi-art-scribe-001
|
|
41
|
-
implementationPlan: {
|
|
42
|
-
summary: 'Block writes to system dirs',
|
|
43
|
-
targetSurface: 'edit gate',
|
|
44
|
-
changes: ['path prefix check'],
|
|
45
|
-
tests: ['golden trace replay'],
|
|
46
|
-
rolloutNotes: ['shadow first'],
|
|
47
|
-
confidence: 0.8,
|
|
48
|
-
},
|
|
49
|
-
sourceTrace: { scribeArtifactId: 'pi-art-scribe-001-run-001' },
|
|
50
|
-
risks: [],
|
|
51
|
-
generatedAt: '2026-06-17T00:00:00.000Z',
|
|
57
|
+
sourceScribeArtifactId: 'pi-art-scribe-001',
|
|
52
58
|
implementationCode: 'function evaluate(input, helpers) { return { decision: "allow", matched: false, reason: "ok" }; }',
|
|
53
59
|
goldenTraceCases: [
|
|
54
60
|
{ caseId: 'negative-1', kind: 'negative', toolName: 'edit', params: { path: '/etc/x' }, expectedDecision: 'block' },
|
|
55
61
|
{ caseId: 'positive-1', kind: 'positive', toolName: 'read', params: { path: '/tmp/y' }, expectedDecision: 'allow' },
|
|
56
62
|
],
|
|
57
63
|
affectedTools: ['edit'],
|
|
64
|
+
implementationSummary: 'Block writes to system dirs',
|
|
65
|
+
risks: [],
|
|
66
|
+
sourceTrace: { scribeArtifactId: 'pi-art-scribe-001' },
|
|
67
|
+
generatedAt: '2026-06-17T00:00:00.000Z',
|
|
58
68
|
...overrides,
|
|
59
69
|
};
|
|
60
70
|
}
|
|
61
|
-
/** Build a gateDeps whose sandbox always accepts (replay passes). */
|
|
62
71
|
function makeAlwaysPassGateDeps() {
|
|
63
72
|
const passingResult = {
|
|
64
73
|
success: true,
|
|
@@ -70,441 +79,262 @@ function makeAlwaysPassGateDeps() {
|
|
|
70
79
|
evaluateInSandbox: (_code, _trace, _opts) => passingResult,
|
|
71
80
|
};
|
|
72
81
|
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
evaluateInSandbox: (code, _trace, _opts) => {
|
|
83
|
-
calls.push({ code });
|
|
84
|
-
const result = failures[attempt] ?? { success: true, failedCases: [], executionTimeMs: 1, forbiddenPatternViolations: [] };
|
|
85
|
-
attempt += 1;
|
|
86
|
-
return result;
|
|
87
|
-
},
|
|
82
|
+
function makeStartRun(overrides = {}) {
|
|
83
|
+
return {
|
|
84
|
+
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
85
|
+
taskRef: { taskId: TASK_ID },
|
|
86
|
+
inputPayload: 'initial prompt',
|
|
87
|
+
contextItems: [],
|
|
88
|
+
outputSchemaRef: 'artificer-output-v2',
|
|
89
|
+
timeoutMs: 300_000,
|
|
90
|
+
...overrides,
|
|
88
91
|
};
|
|
89
|
-
return { deps, calls };
|
|
90
92
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
};
|
|
103
|
-
const FAILED_TIMEOUT = {
|
|
104
|
-
success: false,
|
|
105
|
-
failedCases: [{ caseId: 'negative-1', errorType: 'timeout', message: 'exceeded 1000ms' }],
|
|
106
|
-
executionTimeMs: 1001,
|
|
107
|
-
forbiddenPatternViolations: [],
|
|
108
|
-
};
|
|
109
|
-
const FAILED_VALIDATION = {
|
|
110
|
-
success: false,
|
|
111
|
-
failedCases: [{ caseId: 'negative-1', errorType: 'validation_failed', message: 'expected block got allow' }],
|
|
112
|
-
executionTimeMs: 1,
|
|
113
|
-
forbiddenPatternViolations: [],
|
|
114
|
-
};
|
|
115
|
-
describe('ArtificerL2Adapter (RuleHost MVP Activation, PRI-424)', () => {
|
|
116
|
-
// ── happy path ─────────────────────────────────────────────────────────────
|
|
117
|
-
it('returns V2 output on 1st attempt when sandbox replay passes (1 LLM call)', async () => {
|
|
118
|
-
const generateCalls = [];
|
|
119
|
-
const generateCode = async (prompt) => {
|
|
120
|
-
generateCalls.push(prompt);
|
|
121
|
-
return makeV2Output();
|
|
122
|
-
};
|
|
123
|
-
const adapter = new ArtificerL2Adapter({
|
|
124
|
-
generateCode,
|
|
125
|
-
gateDeps: makeAlwaysPassGateDeps(),
|
|
126
|
-
validator: new DefaultArtificerValidator(),
|
|
127
|
-
});
|
|
128
|
-
const handle = await adapter.startRun({
|
|
129
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
130
|
-
taskRef: { taskId: TASK_ID },
|
|
131
|
-
inputPayload: 'initial prompt',
|
|
132
|
-
contextItems: [],
|
|
133
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
134
|
-
timeoutMs: 300_000,
|
|
135
|
-
});
|
|
136
|
-
expect(generateCalls).toHaveLength(1);
|
|
137
|
-
const output = await adapter.fetchOutput(handle.runId);
|
|
138
|
-
expect(output).not.toBeNull();
|
|
139
|
-
if (!output)
|
|
140
|
-
return;
|
|
141
|
-
expect(isArtificerOutputV2(output.payload)).toBe(true);
|
|
93
|
+
function makeAdapter(overrides = {}) {
|
|
94
|
+
return new ArtificerL2Adapter({
|
|
95
|
+
provider: 'test-provider',
|
|
96
|
+
model: 'test-model',
|
|
97
|
+
apiKeyEnv: 'TEST_API_KEY',
|
|
98
|
+
baseUrl: 'http://localhost:1234/v1',
|
|
99
|
+
gateDeps: overrides.gateDeps ?? makeAlwaysPassGateDeps(),
|
|
100
|
+
validator: new DefaultArtificerValidator(),
|
|
101
|
+
maxTurns: overrides.maxTurns,
|
|
102
|
+
totalBudgetMs: overrides.totalBudgetMs ?? 60_000,
|
|
103
|
+
maxTokens: overrides.maxTokens,
|
|
142
104
|
});
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
105
|
+
}
|
|
106
|
+
beforeEach(() => {
|
|
107
|
+
vi.clearAllMocks();
|
|
108
|
+
hoisted.mockReturn = [];
|
|
109
|
+
hoisted.impl = null;
|
|
110
|
+
hoisted.lastLoopConfig = {};
|
|
111
|
+
process.env.TEST_API_KEY = 'test-key';
|
|
112
|
+
});
|
|
113
|
+
// ── submit_rulecode capture (primary extraction) ─────────────────────────────
|
|
114
|
+
describe('PRI-439 ArtificerL2Adapter — submit_rulecode capture', () => {
|
|
115
|
+
it('returns the captured output when submit_rulecode was called', async () => {
|
|
116
|
+
const adapter = makeAdapter();
|
|
117
|
+
hoisted.impl = async (_p, context) => {
|
|
118
|
+
const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
|
|
119
|
+
if (submit) {
|
|
120
|
+
await submit.execute('call-1', makeRuleOutput());
|
|
121
|
+
}
|
|
122
|
+
return [];
|
|
149
123
|
};
|
|
150
|
-
const
|
|
151
|
-
const adapter = new ArtificerL2Adapter({
|
|
152
|
-
generateCode,
|
|
153
|
-
gateDeps: deps,
|
|
154
|
-
validator: new DefaultArtificerValidator(),
|
|
155
|
-
});
|
|
156
|
-
const handle = await adapter.startRun({
|
|
157
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
158
|
-
taskRef: { taskId: TASK_ID },
|
|
159
|
-
inputPayload: 'initial prompt',
|
|
160
|
-
contextItems: [],
|
|
161
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
162
|
-
timeoutMs: 300_000,
|
|
163
|
-
});
|
|
164
|
-
expect(generateCalls).toHaveLength(2);
|
|
165
|
-
// 2nd prompt MUST contain the failure feedback from attempt 1 (EP-05 freshness).
|
|
166
|
-
expect(generateCalls[1]).toContain('TypeError: x is undefined');
|
|
124
|
+
const handle = await adapter.startRun(makeStartRun());
|
|
167
125
|
const output = await adapter.fetchOutput(handle.runId);
|
|
168
126
|
expect(output).not.toBeNull();
|
|
169
|
-
|
|
170
|
-
return;
|
|
171
|
-
expect(isArtificerOutputV2(output.payload)).toBe(true);
|
|
127
|
+
expect(output?.payload).toEqual(makeRuleOutput());
|
|
172
128
|
});
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
129
|
+
it('shouldStopAfterTurn returns true after output is captured', async () => {
|
|
130
|
+
const adapter = makeAdapter({ maxTurns: 8 });
|
|
131
|
+
hoisted.impl = async (_p, context) => {
|
|
132
|
+
const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
|
|
133
|
+
if (submit) {
|
|
134
|
+
await submit.execute('call-1', makeRuleOutput());
|
|
135
|
+
}
|
|
136
|
+
return [];
|
|
179
137
|
};
|
|
180
|
-
|
|
181
|
-
const
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
validator: new DefaultArtificerValidator(),
|
|
185
|
-
});
|
|
186
|
-
const handle = await adapter.startRun({
|
|
187
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
188
|
-
taskRef: { taskId: TASK_ID },
|
|
189
|
-
inputPayload: 'initial prompt',
|
|
190
|
-
contextItems: [],
|
|
191
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
192
|
-
timeoutMs: 300_000,
|
|
193
|
-
});
|
|
194
|
-
expect(generateCalls).toHaveLength(3);
|
|
195
|
-
const output = await adapter.fetchOutput(handle.runId);
|
|
196
|
-
expect(output).not.toBeNull();
|
|
197
|
-
if (!output)
|
|
138
|
+
await adapter.startRun(makeStartRun());
|
|
139
|
+
const stopFn = hoisted.lastLoopConfig.shouldStopAfterTurn;
|
|
140
|
+
expect(typeof stopFn).toBe('function');
|
|
141
|
+
if (!stopFn)
|
|
198
142
|
return;
|
|
199
|
-
//
|
|
200
|
-
expect(
|
|
201
|
-
// V1 fields preserved (plan, lineage) so principle artifact path still works.
|
|
202
|
-
expect(output.payload).toHaveProperty('implementationPlan');
|
|
143
|
+
// After submit_rulecode captured output, the next shouldStopAfterTurn call returns true.
|
|
144
|
+
expect(stopFn()).toBe(true);
|
|
203
145
|
});
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
const adapter =
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
215
|
-
taskRef: { taskId: TASK_ID },
|
|
216
|
-
inputPayload: 'initial prompt',
|
|
217
|
-
contextItems: [],
|
|
218
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
219
|
-
timeoutMs: 300_000,
|
|
146
|
+
});
|
|
147
|
+
// ── maxTurns cap ─────────────────────────────────────────────────────────────
|
|
148
|
+
describe('PRI-439 ArtificerL2Adapter — maxTurns cap', () => {
|
|
149
|
+
it('shouldStopAfterTurn returns false below maxTurns and true at/above, WITHOUT submit_rulecode', async () => {
|
|
150
|
+
const adapter = makeAdapter({ maxTurns: 5 });
|
|
151
|
+
hoisted.mockReturn = [
|
|
152
|
+
{ role: 'assistant', content: 'thinking...' },
|
|
153
|
+
];
|
|
154
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
155
|
+
// startRun throws when no output is captured — that's expected here.
|
|
220
156
|
});
|
|
221
|
-
const
|
|
222
|
-
|
|
223
|
-
|
|
157
|
+
const stopFn = hoisted.lastLoopConfig.shouldStopAfterTurn;
|
|
158
|
+
if (!stopFn) {
|
|
159
|
+
expect.fail('shouldStopAfterTurn not wired');
|
|
224
160
|
return;
|
|
225
|
-
|
|
226
|
-
expect(
|
|
161
|
+
}
|
|
162
|
+
expect(stopFn()).toBe(false); // turn 1
|
|
163
|
+
expect(stopFn()).toBe(false); // turn 2
|
|
164
|
+
expect(stopFn()).toBe(false); // turn 3
|
|
165
|
+
expect(stopFn()).toBe(false); // turn 4
|
|
166
|
+
expect(stopFn()).toBe(true); // turn 5 (>= maxTurns)
|
|
167
|
+
expect(stopFn()).toBe(true); // turn 6 (still >= maxTurns)
|
|
227
168
|
});
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
const adapter = new ArtificerL2Adapter({
|
|
237
|
-
generateCode,
|
|
238
|
-
gateDeps: deps,
|
|
239
|
-
validator: new DefaultArtificerValidator(),
|
|
240
|
-
});
|
|
241
|
-
await adapter.startRun({
|
|
242
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
243
|
-
taskRef: { taskId: TASK_ID },
|
|
244
|
-
inputPayload: 'initial prompt',
|
|
245
|
-
contextItems: [],
|
|
246
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
247
|
-
timeoutMs: 300_000,
|
|
169
|
+
});
|
|
170
|
+
// ── beforeToolCall whitelist ─────────────────────────────────────────────────
|
|
171
|
+
describe('PRI-439 ArtificerL2Adapter — beforeToolCall whitelist', () => {
|
|
172
|
+
it('blocks unknown tools', async () => {
|
|
173
|
+
const adapter = makeAdapter();
|
|
174
|
+
hoisted.mockReturn = [];
|
|
175
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
176
|
+
// startRun throws when no output is captured — expected.
|
|
248
177
|
});
|
|
249
|
-
|
|
250
|
-
expect(
|
|
178
|
+
const beforeFn = hoisted.lastLoopConfig.beforeToolCall;
|
|
179
|
+
expect(typeof beforeFn).toBe('function');
|
|
180
|
+
if (!beforeFn)
|
|
181
|
+
return;
|
|
182
|
+
const result = await beforeFn({ toolCall: { name: 'unknown_tool' } });
|
|
183
|
+
expect(result).toEqual({ block: true, reason: expect.stringContaining('unknown_tool') });
|
|
251
184
|
});
|
|
252
|
-
it('
|
|
253
|
-
const
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
};
|
|
258
|
-
const { deps } = makeFailNTimesGateDeps([FAILED_TIMEOUT]);
|
|
259
|
-
const adapter = new ArtificerL2Adapter({
|
|
260
|
-
generateCode,
|
|
261
|
-
gateDeps: deps,
|
|
262
|
-
validator: new DefaultArtificerValidator(),
|
|
185
|
+
it('allows whitelisted tools', async () => {
|
|
186
|
+
const adapter = makeAdapter();
|
|
187
|
+
hoisted.mockReturn = [];
|
|
188
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
189
|
+
// startRun throws when no output is captured — expected.
|
|
263
190
|
});
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
191
|
+
const beforeFn = hoisted.lastLoopConfig.beforeToolCall;
|
|
192
|
+
if (!beforeFn) {
|
|
193
|
+
expect.fail('beforeToolCall not wired');
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
for (const name of ['read_rulecode_spec', 'validate_rulecode', 'replay_rulecode', 'submit_rulecode']) {
|
|
197
|
+
const result = await beforeFn({ toolCall: { name } });
|
|
198
|
+
expect(result).toBeUndefined();
|
|
199
|
+
}
|
|
273
200
|
});
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
inputPayload: 'initial prompt',
|
|
290
|
-
contextItems: [],
|
|
291
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
292
|
-
timeoutMs: 300_000,
|
|
293
|
-
});
|
|
294
|
-
expect(generateCalls[1]).toContain('expected block got allow');
|
|
201
|
+
});
|
|
202
|
+
// ── exhaustion: no V1/L1 fallback ────────────────────────────────────────────
|
|
203
|
+
describe('PRI-439 ArtificerL2Adapter — exhaustion (no fallback)', () => {
|
|
204
|
+
it('throws PDRuntimeError when the loop ends without submit_rulecode', async () => {
|
|
205
|
+
const adapter = makeAdapter({ maxTurns: 3 });
|
|
206
|
+
hoisted.mockReturn = [
|
|
207
|
+
{ role: 'assistant', content: 'I cannot produce valid code.' },
|
|
208
|
+
];
|
|
209
|
+
await expect(adapter.startRun(makeStartRun())).rejects.toThrow(/without a submit_rulecode call/);
|
|
210
|
+
// No output stored for the failed run — fetchOutput returns null.
|
|
211
|
+
const runs = adapter.runs;
|
|
212
|
+
expect(runs.size).toBe(1);
|
|
213
|
+
for (const [, state] of runs) {
|
|
214
|
+
expect(state.output).toBeNull();
|
|
215
|
+
}
|
|
295
216
|
});
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
const { deps } = makeFailNTimesGateDeps([FAILED_RUNTIME, FAILED_TIMEOUT]);
|
|
305
|
-
const adapter = new ArtificerL2Adapter({
|
|
306
|
-
generateCode,
|
|
307
|
-
gateDeps: deps,
|
|
308
|
-
validator: new DefaultArtificerValidator(),
|
|
309
|
-
});
|
|
310
|
-
await adapter.startRun({
|
|
311
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
312
|
-
taskRef: { taskId: TASK_ID },
|
|
313
|
-
inputPayload: 'initial prompt',
|
|
314
|
-
contextItems: [],
|
|
315
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
316
|
-
timeoutMs: 300_000,
|
|
317
|
-
});
|
|
318
|
-
// attempt 2 prompt must mention attempt 1's runtime_error, NOT attempt 2's timeout
|
|
319
|
-
expect(generateCalls[1]).toContain('TypeError: x is undefined');
|
|
320
|
-
expect(generateCalls[1]).not.toContain('exceeded 1000ms');
|
|
321
|
-
// attempt 3 prompt must mention attempt 2's timeout, NOT attempt 1's runtime_error
|
|
322
|
-
expect(generateCalls[2]).toContain('exceeded 1000ms');
|
|
217
|
+
it('emits artificer_l2_complete telemetry with succeeded=false on exhaustion', async () => {
|
|
218
|
+
const adapter = makeAdapter({ maxTurns: 2 });
|
|
219
|
+
hoisted.mockReturn = [{ role: 'assistant', content: 'no code' }];
|
|
220
|
+
await expect(adapter.startRun(makeStartRun())).rejects.toThrow();
|
|
221
|
+
const completeCalls = emitTelemetryMock.mock.calls.filter((c) => c[0].eventType === 'artificer_l2_complete');
|
|
222
|
+
expect(completeCalls.length).toBe(1);
|
|
223
|
+
const payload = completeCalls[0][0].payload;
|
|
224
|
+
expect(payload.succeeded).toBe(false);
|
|
323
225
|
});
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
const
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
return { success: true, failedCases: [], executionTimeMs: 1, forbiddenPatternViolations: [] };
|
|
332
|
-
},
|
|
226
|
+
});
|
|
227
|
+
// ── loop error ───────────────────────────────────────────────────────────────
|
|
228
|
+
describe('PRI-439 ArtificerL2Adapter — loop error', () => {
|
|
229
|
+
it('throws PDRuntimeError when runAgentLoop throws', async () => {
|
|
230
|
+
const adapter = makeAdapter();
|
|
231
|
+
hoisted.impl = async () => {
|
|
232
|
+
throw new Error('LLM provider unavailable');
|
|
333
233
|
};
|
|
334
|
-
|
|
335
|
-
generateCode,
|
|
336
|
-
gateDeps: deps,
|
|
337
|
-
validator: new DefaultArtificerValidator(),
|
|
338
|
-
});
|
|
339
|
-
await adapter.startRun({
|
|
340
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
341
|
-
taskRef: { taskId: TASK_ID },
|
|
342
|
-
inputPayload: 'initial prompt',
|
|
343
|
-
contextItems: [],
|
|
344
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
345
|
-
timeoutMs: 300_000,
|
|
346
|
-
});
|
|
347
|
-
expect(capturedTrace).not.toBeNull();
|
|
348
|
-
expect(validateGoldenTrace(capturedTrace).valid).toBe(true);
|
|
234
|
+
await expect(adapter.startRun(makeStartRun())).rejects.toThrow(/agent loop threw/);
|
|
349
235
|
});
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
return bad;
|
|
236
|
+
});
|
|
237
|
+
// ── runtime metadata ─────────────────────────────────────────────────────────
|
|
238
|
+
describe('PRI-439 ArtificerL2Adapter — runtime metadata', () => {
|
|
239
|
+
it('pollRun returns succeeded status after startRun completes with output', async () => {
|
|
240
|
+
const adapter = makeAdapter();
|
|
241
|
+
hoisted.impl = async (_p, context) => {
|
|
242
|
+
const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
|
|
243
|
+
if (submit) {
|
|
244
|
+
await submit.execute('call-1', makeRuleOutput());
|
|
360
245
|
}
|
|
361
|
-
return
|
|
246
|
+
return [];
|
|
362
247
|
};
|
|
363
|
-
const
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
validator: new DefaultArtificerValidator(),
|
|
367
|
-
});
|
|
368
|
-
const handle = await adapter.startRun({
|
|
369
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
370
|
-
taskRef: { taskId: TASK_ID },
|
|
371
|
-
inputPayload: 'initial prompt',
|
|
372
|
-
contextItems: [],
|
|
373
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
374
|
-
timeoutMs: 300_000,
|
|
375
|
-
});
|
|
376
|
-
const output = await adapter.fetchOutput(handle.runId);
|
|
377
|
-
expect(output).not.toBeNull();
|
|
378
|
-
if (!output)
|
|
379
|
-
return;
|
|
380
|
-
// 2nd attempt produces valid V2 → replay passes → V2 output
|
|
381
|
-
expect(isArtificerOutputV2(output.payload)).toBe(true);
|
|
248
|
+
const handle = await adapter.startRun(makeStartRun());
|
|
249
|
+
const status = await adapter.pollRun(handle.runId);
|
|
250
|
+
expect(status.status).toBe('succeeded');
|
|
382
251
|
});
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
// candidate, degradation is impossible (Runtime Contract Rule 1/3 — never
|
|
387
|
-
// emit an unvalidated object). The adapter throws PDRuntimeError instead,
|
|
388
|
-
// which BasePeerRunner.handlePostLeaseError catches → task fails.
|
|
389
|
-
const generateCode = async () => {
|
|
390
|
-
// Every attempt returns malformed V2 (missing affectedTools).
|
|
391
|
-
const bad = makeV2Output();
|
|
392
|
-
delete bad.affectedTools;
|
|
393
|
-
return bad;
|
|
394
|
-
};
|
|
395
|
-
const adapter = new ArtificerL2Adapter({
|
|
396
|
-
generateCode,
|
|
397
|
-
gateDeps: makeAlwaysPassGateDeps(),
|
|
398
|
-
validator: new DefaultArtificerValidator(),
|
|
399
|
-
});
|
|
400
|
-
await expect(adapter.startRun({
|
|
401
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
402
|
-
taskRef: { taskId: TASK_ID },
|
|
403
|
-
inputPayload: 'initial prompt',
|
|
404
|
-
contextItems: [],
|
|
405
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
406
|
-
timeoutMs: 300_000,
|
|
407
|
-
})).rejects.toThrow(/without a validated candidate/);
|
|
252
|
+
it('kind() returns pi-ai-l2', () => {
|
|
253
|
+
const adapter = makeAdapter();
|
|
254
|
+
expect(adapter.kind()).toBe('pi-ai-l2');
|
|
408
255
|
});
|
|
409
|
-
it('
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
const generateCode = async () => makeV2Output();
|
|
414
|
-
const { deps } = makeFailNTimesGateDeps([FAILED_RUNTIME, FAILED_RUNTIME, FAILED_RUNTIME]);
|
|
415
|
-
const adapter = new ArtificerL2Adapter({
|
|
416
|
-
generateCode,
|
|
417
|
-
gateDeps: deps,
|
|
418
|
-
validator: new DefaultArtificerValidator(),
|
|
419
|
-
});
|
|
420
|
-
const handle = await adapter.startRun({
|
|
421
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
422
|
-
taskRef: { taskId: TASK_ID },
|
|
423
|
-
inputPayload: 'initial prompt',
|
|
424
|
-
contextItems: [],
|
|
425
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
426
|
-
timeoutMs: 300_000,
|
|
427
|
-
});
|
|
428
|
-
const output = await adapter.fetchOutput(handle.runId);
|
|
429
|
-
expect(output).not.toBeNull();
|
|
430
|
-
if (!output)
|
|
431
|
-
return;
|
|
432
|
-
expect(isArtificerOutputV2(output.payload)).toBe(false);
|
|
256
|
+
it('getCapabilities reports supportsToolUse=true', async () => {
|
|
257
|
+
const adapter = makeAdapter();
|
|
258
|
+
const caps = await adapter.getCapabilities();
|
|
259
|
+
expect(caps.supportsToolUse).toBe(true);
|
|
433
260
|
});
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
const
|
|
437
|
-
const
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
const
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
261
|
+
it('healthCheck returns unhealthy when API key is missing', async () => {
|
|
262
|
+
delete process.env.TEST_API_KEY;
|
|
263
|
+
const adapter = makeAdapter();
|
|
264
|
+
const health = await adapter.healthCheck();
|
|
265
|
+
expect(health.healthy).toBe(false);
|
|
266
|
+
});
|
|
267
|
+
it('healthCheck returns healthy when API key is present', async () => {
|
|
268
|
+
const adapter = makeAdapter();
|
|
269
|
+
const health = await adapter.healthCheck();
|
|
270
|
+
expect(health.healthy).toBe(true);
|
|
271
|
+
});
|
|
272
|
+
it('startRun throws when API key is missing', async () => {
|
|
273
|
+
delete process.env.TEST_API_KEY;
|
|
274
|
+
const adapter = makeAdapter();
|
|
275
|
+
await expect(adapter.startRun(makeStartRun())).rejects.toThrow(/API key not found/);
|
|
276
|
+
});
|
|
277
|
+
});
|
|
278
|
+
// ── config defaults ──────────────────────────────────────────────────────────
|
|
279
|
+
describe('PRI-439 ArtificerL2Adapter — config defaults', () => {
|
|
280
|
+
it('wires maxTokens=8192 default into loopConfig', async () => {
|
|
281
|
+
const adapter = makeAdapter();
|
|
282
|
+
hoisted.mockReturn = [];
|
|
283
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
284
|
+
// expected — no output captured
|
|
449
285
|
});
|
|
450
|
-
|
|
451
|
-
// RunStatus is an object { runId, status, ... }; status.status is the execution state.
|
|
452
|
-
expect(['succeeded', 'failed']).toContain(status.status);
|
|
286
|
+
expect(hoisted.lastLoopConfig.maxTokens).toBe(8192);
|
|
453
287
|
});
|
|
454
|
-
it('
|
|
455
|
-
const adapter =
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
288
|
+
it('wires custom maxTokens when provided', async () => {
|
|
289
|
+
const adapter = makeAdapter({ maxTokens: 4096 });
|
|
290
|
+
hoisted.mockReturn = [];
|
|
291
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
292
|
+
// expected
|
|
459
293
|
});
|
|
460
|
-
expect(
|
|
461
|
-
expect(adapter.kind()).toBe('pi-ai-l2');
|
|
294
|
+
expect(hoisted.lastLoopConfig.maxTokens).toBe(4096);
|
|
462
295
|
});
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
296
|
+
});
|
|
297
|
+
// ── telemetry ────────────────────────────────────────────────────────────────
|
|
298
|
+
describe('PRI-439 ArtificerL2Adapter — telemetry', () => {
|
|
299
|
+
it('emits artificer_l2_turn with phase=loop_started at start', async () => {
|
|
300
|
+
const adapter = makeAdapter();
|
|
301
|
+
hoisted.mockReturn = [];
|
|
302
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
303
|
+
// expected
|
|
468
304
|
});
|
|
469
|
-
const
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
inputPayload: '{}',
|
|
473
|
-
contextItems: [],
|
|
474
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
475
|
-
timeoutMs: 30_000,
|
|
305
|
+
const startCalls = emitTelemetryMock.mock.calls.filter((c) => {
|
|
306
|
+
const evt = c[0];
|
|
307
|
+
return evt.eventType === 'artificer_l2_turn' && evt.payload?.phase === 'loop_started';
|
|
476
308
|
});
|
|
477
|
-
expect(
|
|
309
|
+
expect(startCalls.length).toBe(1);
|
|
478
310
|
});
|
|
479
|
-
it
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
311
|
+
it('emits artificer_l2_complete with succeeded=true on success', async () => {
|
|
312
|
+
const adapter = makeAdapter();
|
|
313
|
+
hoisted.impl = async (_p, context) => {
|
|
314
|
+
const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
|
|
315
|
+
if (submit) {
|
|
316
|
+
await submit.execute('call-1', makeRuleOutput());
|
|
317
|
+
}
|
|
318
|
+
return [];
|
|
319
|
+
};
|
|
320
|
+
await adapter.startRun(makeStartRun());
|
|
321
|
+
const completeCalls = emitTelemetryMock.mock.calls.filter((c) => c[0].eventType === 'artificer_l2_complete');
|
|
322
|
+
expect(completeCalls.length).toBe(1);
|
|
323
|
+
const payload = completeCalls[0][0].payload;
|
|
324
|
+
expect(payload.succeeded).toBe(true);
|
|
486
325
|
});
|
|
326
|
+
});
|
|
327
|
+
// ── input serialization ──────────────────────────────────────────────────────
|
|
328
|
+
describe('PRI-439 ArtificerL2Adapter — input serialization', () => {
|
|
487
329
|
it('bounds and safely serializes an unknown prompt payload', async () => {
|
|
488
330
|
const circular = { text: 'x'.repeat(60_000) };
|
|
489
331
|
circular.self = circular;
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
gateDeps: makeAlwaysPassGateDeps(),
|
|
497
|
-
validator: new DefaultArtificerValidator(),
|
|
498
|
-
});
|
|
499
|
-
await adapter.startRun({
|
|
500
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
501
|
-
taskRef: { taskId: TASK_ID },
|
|
502
|
-
inputPayload: circular,
|
|
503
|
-
contextItems: [],
|
|
504
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
505
|
-
timeoutMs: 30_000,
|
|
506
|
-
});
|
|
507
|
-
expect(receivedPrompt.length).toBeLessThanOrEqual(50_003);
|
|
332
|
+
const adapter = makeAdapter();
|
|
333
|
+
hoisted.mockReturn = [];
|
|
334
|
+
// The circular payload is safely stringified (safeStringifyPreview handles cycles).
|
|
335
|
+
// startRun still throws because no output is captured, but it should NOT throw
|
|
336
|
+
// a serialization error.
|
|
337
|
+
await expect(adapter.startRun(makeStartRun({ inputPayload: circular }))).rejects.toThrow(/without a submit_rulecode call/);
|
|
508
338
|
});
|
|
509
339
|
});
|
|
510
340
|
//# sourceMappingURL=artificer-l2-adapter.test.js.map
|